diff --git a/.gitignore b/.gitignore index d0e39361..08278d08 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,19 @@ test/include/test/jemalloc_test_defs.h /test/unit/*.out /VERSION + +*.pdb +*.sdf +*.opendb +*.opensdf +*.cachefile +*.suo +*.user +*.sln.docstates +*.tmp +/msvc/Win32/ +/msvc/x64/ +/msvc/projects/*/*/Debug*/ +/msvc/projects/*/*/Release*/ +/msvc/projects/*/*/Win32/ +/msvc/projects/*/*/x64/ diff --git a/ChangeLog b/ChangeLog index 8ed42cbe..9cbfbf96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,79 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.1.0 (February 28, 2016) + + This release is primarily about optimizations, but it also incorporates a lot + of portability-motivated refactoring and enhancements. Many people worked on + this release, to an extent that even with the omission here of minor changes + (see git revision history), and of the people who reported and diagnosed + issues, so much of the work was contributed that starting with this release, + changes are annotated with author credits to help reflect the collaborative + effort involved. + + New features: + - Implement decay-based unused dirty page purging, a major optimization with + mallctl API impact. This is an alternative to the existing ratio-based + unused dirty page purging, and is intended to eventually become the sole + purging mechanism. New mallctls: + + opt.purge + + opt.decay_time + + arena..decay + + arena..decay_time + + arenas.decay_time + + stats.arenas..decay_time + (@jasone, @cevans87) + - Add --with-malloc-conf, which makes it possible to embed a default + options string during configuration. This was motivated by the desire to + specify --with-malloc-conf=purge:decay , since the default must remain + purge:ratio until the 5.0.0 release. (@jasone) + - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) + - Make *allocx() size class overflow behavior defined. The maximum + size class is now less than PTRDIFF_MAX to protect applications against + numerical overflow, and all allocation functions are guaranteed to indicate + errors rather than potentially crashing if the request size exceeds the + maximum size class. (@jasone) + - jeprof: + + Add raw heap profile support. (@jasone) + + Add --retain and --exclude for backtrace symbol filtering. (@jasone) + + Optimizations: + - Optimize the fast path to combine various bootstrapping and configuration + checks and execute more streamlined code in the common case. (@interwq) + - Use linear scan for small bitmaps (used for small object tracking). In + addition to speeding up bitmap operations on 64-bit systems, this reduces + allocator metadata overhead by approximately 0.2%. (@djwatson) + - Separate arena_avail trees, which substantially speeds up run tree + operations. (@djwatson) + - Use memoization (boot-time-computed table) for run quantization. Separate + arena_avail trees reduced the importance of this optimization. (@jasone) + - Attempt mmap-based in-place huge reallocation. This can dramatically speed + up incremental huge reallocation. (@jasone) + + Incompatible changes: + - Make opt.narenas unsigned rather than size_t. (@jasone) + + Bug fixes: + - Fix stats.cactive accounting regression. (@rustyx, @jasone) + - Handle unaligned keys in hash(). This caused problems for some ARM systems. + (@jasone, Christopher Ferris) + - Refactor arenas array. In addition to fixing a fork-related deadlock, this + makes arena lookups faster and simpler. (@jasone) + - Move retained memory allocation out of the default chunk allocation + function, to a location that gets executed even if the application installs + a custom chunk allocation function. This resolves a virtual memory leak. + (@buchgr) + - Fix a potential tsd cleanup leak. (Christopher Ferris, @jasone) + - Fix run quantization. In practice this bug had no impact unless + applications requested memory with alignment exceeding one page. + (@jasone, @djwatson) + - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) + - jeprof: + + Don't discard curl options if timeout is not defined. (@djwatson) + + Detect failed profile fetches. (@djwatson) + - Fix stats.arenas..{dss,lg_dirty_mult,decay_time,pactive,pdirty} for + --disable-stats case. (@jasone) + * 4.0.4 (October 24, 2015) This bugfix release fixes another xallocx() regression. No other regressions diff --git a/INSTALL b/INSTALL index 8d396874..5c25054a 100644 --- a/INSTALL +++ b/INSTALL @@ -84,6 +84,14 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. +--with-malloc-conf= + Embed as a run-time options string that is processed prior to + the malloc_conf global variable, the /etc/malloc.conf symlink, and the + MALLOC_CONF environment variable. For example, to change the default chunk + size to 256 KiB: + + --with-malloc-conf=lg_chunk:18 + --disable-cc-silence Disable code that silences non-useful compiler warnings. This is mainly useful during development when auditing the set of warnings that are being diff --git a/Makefile.in b/Makefile.in index 1ac6f292..f60823f5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -78,15 +78,32 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h -C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ - $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ - $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ - $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ - $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ - $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \ - $(srcroot)src/tsd.c +C_SRCS := $(srcroot)src/jemalloc.c \ + $(srcroot)src/arena.c \ + $(srcroot)src/atomic.c \ + $(srcroot)src/base.c \ + $(srcroot)src/bitmap.c \ + $(srcroot)src/chunk.c \ + $(srcroot)src/chunk_dss.c \ + $(srcroot)src/chunk_mmap.c \ + $(srcroot)src/ckh.c \ + $(srcroot)src/ctl.c \ + $(srcroot)src/extent.c \ + $(srcroot)src/hash.c \ + $(srcroot)src/huge.c \ + $(srcroot)src/mb.c \ + $(srcroot)src/mutex.c \ + $(srcroot)src/nstime.c \ + $(srcroot)src/pages.c \ + $(srcroot)src/prng.c \ + $(srcroot)src/prof.c \ + $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c \ + $(srcroot)src/tcache.c \ + $(srcroot)src/ticker.c \ + $(srcroot)src/tsd.c \ + $(srcroot)src/util.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -116,10 +133,11 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/decay.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ $(srcroot)test/unit/junk_alloc.c \ @@ -129,6 +147,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ @@ -140,9 +159,13 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/run_quantize.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ + $(srcroot)test/unit/smoothstep.c \ $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/ticker.c \ + $(srcroot)test/unit/nstime.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c @@ -349,18 +372,22 @@ stress_dir: check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) check_integration_prof: tests_integration check_integration_dir ifeq ($(enable_prof), 1) $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) endif +check_integration_decay: tests_integration check_integration_dir + $(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: tests check_dir check_integration_prof - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +check: check_unit check_integration check_integration_decay check_integration_prof ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit diff --git a/bin/jeprof.in b/bin/jeprof.in index e7178078..42087fce 100644 --- a/bin/jeprof.in +++ b/bin/jeprof.in @@ -95,7 +95,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread my @KCACHEGRIND = ("kcachegrind"); my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s"); +my @URL_FETCHER = ("curl", "-s", "--fail"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -223,12 +223,14 @@ Call-graph Options: --nodefraction= Hide nodes below *total [default=.005] --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching + --focus= Focus on backtraces with nodes matching --thread= Show profile for thread - --ignore= Ignore nodes matching + --ignore= Ignore backtraces with nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match Miscellaneous: --tools=[,...] \$PATH for object tool pathnames @@ -339,6 +341,8 @@ sub Init() { $main::opt_ignore = ''; $main::opt_scale = 0; $main::opt_heapcheck = 0; + $main::opt_retain = ''; + $main::opt_exclude = ''; $main::opt_seconds = 30; $main::opt_lib = ""; @@ -410,6 +414,8 @@ sub Init() { "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, "inuse_space!" => \$main::opt_inuse_space, "inuse_objects!" => \$main::opt_inuse_objects, "alloc_space!" => \$main::opt_alloc_space, @@ -1160,8 +1166,21 @@ sub PrintSymbolizedProfile { } print '---', "\n"; - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; + my $profile_marker; + if ($main::profile_type eq 'heap') { + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'growth') { + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'contention') { + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } else { # elsif ($main::profile_type eq 'cpu') + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } + print '--- ', $profile_marker, "\n"; if (defined($main::collected_profile)) { # if used with remote fetch, simply dump the collected profile to output. @@ -1171,6 +1190,12 @@ sub PrintSymbolizedProfile { } close(SRC); } else { + # --raw/http: For everything to work correctly for non-remote profiles, we + # would need to extend PrintProfileData() to handle all possible profile + # types, re-enable the code that is currently disabled in ReadCPUProfile() + # and FixCallerAddresses(), and remove the remote profile dumping code in + # the block above. + die "--raw/http: jeprof can only dump remote profiles for --raw\n"; # dump a cpu-format profile to standard out PrintProfileData($profile); } @@ -2821,6 +2846,43 @@ sub ExtractCalls { return $calls; } +sub FilterFrames { + my $symbols = shift; + my $profile = shift; + + if ($main::opt_retain eq '' && $main::opt_exclude eq '') { + return $profile; + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + my $sym; + if (exists($symbols->{$a})) { + $sym = $symbols->{$a}->[0]; + } else { + $sym = $a; + } + if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { + next; + } + if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { + next; + } + push(@path, $a); + } + if (scalar(@path) > 0) { + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + } + + return $result; +} + sub RemoveUninterestingFrames { my $symbols = shift; my $profile = shift; @@ -2965,6 +3027,9 @@ sub RemoveUninterestingFrames { my $reduced_path = join("\n", @path); AddEntry($result, $reduced_path, $count); } + + $result = FilterFrames($symbols, $result); + return $result; } @@ -3274,7 +3339,7 @@ sub ResolveRedirectionForCurl { # Add a timeout flat to URL_FETCHER. Returns a new list. sub AddFetchTimeout { my $timeout = shift; - my @fetcher = shift; + my @fetcher = @_; if (defined($timeout)) { if (join(" ", @fetcher) =~ m/\bcurl -s/) { push(@fetcher, "--max-time", sprintf("%d", $timeout)); @@ -3320,6 +3385,27 @@ sub ReadSymbols { return $map; } +sub URLEncode { + my $str = shift; + $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; + return $str; +} + +sub AppendSymbolFilterParams { + my $url = shift; + my @params = (); + if ($main::opt_retain ne '') { + push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); + } + if ($main::opt_exclude ne '') { + push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); + } + if (scalar @params > 0) { + $url = sprintf("%s?%s", $url, join("&", @params)); + } + return $url; +} + # Fetches and processes symbols to prepare them for use in the profile output # code. If the optional 'symbol_map' arg is not given, fetches symbols from # $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols @@ -3344,9 +3430,11 @@ sub FetchSymbols { my $command_line; if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { $url = ResolveRedirectionForCurl($url); + $url = AppendSymbolFilterParams($url); $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", $url); } else { + $url = AppendSymbolFilterParams($url); $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) . " < " . ShellEscape($main::tmpfile_sym)); } @@ -3427,12 +3515,22 @@ sub FetchDynamicProfile { } $url .= sprintf("seconds=%d", $main::opt_seconds); $fetch_timeout = $main::opt_seconds * 1.01 + 60; + # Set $profile_type for consumption by PrintSymbolizedProfile. + $main::profile_type = 'cpu'; } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; $profile_file .= $suffix; + # Set $profile_type for consumption by PrintSymbolizedProfile. + if ($path =~ m/$HEAP_PAGE/) { + $main::profile_type = 'heap'; + } elsif ($path =~ m/$GROWTH_PAGE/) { + $main::profile_type = 'growth'; + } elsif ($path =~ m/$CONTENTION_PAGE/) { + $main::profile_type = 'contention'; + } } my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); @@ -3730,6 +3828,8 @@ sub ReadProfile { my $symbol_marker = $&; $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash my $profile_marker = $&; + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $heap_marker = $&; # Look at first line to see if it is a heap or a CPU profile. # CPU profile may start with no header at all, and just binary data @@ -3756,7 +3856,13 @@ sub ReadProfile { $header = ReadProfileHeader(*PROFILE) || ""; } + if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { + # Skip "--- ..." line for profile types that have their own headers. + $header = ReadProfileHeader(*PROFILE) || ""; + } + $main::profile_type = ''; + if ($header =~ m/^heap profile:.*$growth_marker/o) { $main::profile_type = 'growth'; $result = ReadHeapProfile($prog, *PROFILE, $header); @@ -3808,9 +3914,9 @@ sub ReadProfile { # independent implementation. sub FixCallerAddresses { my $stack = shift; - if ($main::use_symbolized_profile) { - return $stack; - } else { + # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() + # dumps unadjusted profiles. + { $stack =~ /(\s)/; my $delimiter = $1; my @addrs = split(' ', $stack); @@ -3878,12 +3984,7 @@ sub ReadCPUProfile { for (my $j = 0; $j < $d; $j++) { my $pc = $slots->get($i+$j); # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - $pc--; - } + $pc--; $pc = sprintf("%0*x", $address_length, $pc); $pcs->{$pc} = 1; push @k, $pc; diff --git a/config.guess b/build-aux/config.guess similarity index 100% rename from config.guess rename to build-aux/config.guess diff --git a/config.sub b/build-aux/config.sub similarity index 100% rename from config.sub rename to build-aux/config.sub diff --git a/install-sh b/build-aux/install-sh similarity index 100% rename from install-sh rename to build-aux/install-sh diff --git a/configure.ac b/configure.ac index 7a1290e0..eb387ed9 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,8 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT([Makefile.in]) +AC_CONFIG_AUX_DIR([build-aux]) + dnl ============================================================================ dnl Custom macro definitions. @@ -138,6 +140,7 @@ if test "x$CFLAGS" = "x" ; then fi JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-Werror=declaration-after-statement]) + JE_CFLAGS_APPEND([-Wshorten-64-to-32]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) elif test "x$je_cv_msvc" = "xyes" ; then @@ -164,13 +167,18 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi -AC_CHECK_SIZEOF([void *]) -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 +if test "x${je_cv_msvc}" = "xyes" ; then + LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN + AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit]) else - AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) + AC_CHECK_SIZEOF([void *]) + if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 + elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 + else + AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) + fi fi AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) @@ -194,6 +202,16 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) +AC_CHECK_SIZEOF([long long]) +if test "x${ac_cv_sizeof_long_long}" = "x8" ; then + LG_SIZEOF_LONG_LONG=3 +elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then + LG_SIZEOF_LONG_LONG=2 +else + AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG]) + AC_CHECK_SIZEOF([intmax_t]) if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then LG_SIZEOF_INTMAX_T=4 @@ -211,12 +229,22 @@ dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - AC_CACHE_VAL([je_cv_pause], - [JE_COMPILABLE([pause instruction], [], - [[__asm__ volatile("pause"); return 0;]], - [je_cv_pause])]) - if test "x${je_cv_pause}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' + if test "x${je_cv_msvc}" = "xyes" ; then + AC_CACHE_VAL([je_cv_pause_msvc], + [JE_COMPILABLE([pause instruction MSVC], [], + [[_mm_pause(); return 0;]], + [je_cv_pause_msvc])]) + if test "x${je_cv_pause_msvc}" = "xyes" ; then + CPU_SPINWAIT='_mm_pause()' + fi + else + AC_CACHE_VAL([je_cv_pause], + [JE_COMPILABLE([pause instruction], [], + [[__asm__ volatile("pause"); return 0;]], + [je_cv_pause])]) + if test "x${je_cv_pause}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' + fi fi ;; powerpc) @@ -575,6 +603,15 @@ AC_ARG_WITH([install_suffix], install_suffix="$INSTALL_SUFFIX" AC_SUBST([install_suffix]) +dnl Specify default malloc_conf. +AC_ARG_WITH([malloc_conf], + [AS_HELP_STRING([--with-malloc-conf=], [config.malloc_conf options string])], + [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"], + [JEMALLOC_CONFIG_MALLOC_CONF=""] +) +config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF" +AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"]) + dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of dnl jemalloc_protos_jet.h easy. je_="je_" @@ -1014,7 +1051,7 @@ dnl ============================================================================ dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. dnl One of those two functions should (theoretically) exist on all platforms dnl that jemalloc currently has a chance of functioning on without modification. -dnl We additionally assume ffs() or __builtin_ffs() are defined if +dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if dnl ffsl() or __builtin_ffsl() are defined, respectively. JE_COMPILABLE([a program using __builtin_ffsl], [ #include @@ -1027,6 +1064,7 @@ JE_COMPILABLE([a program using __builtin_ffsl], [ } ], [je_cv_gcc_builtin_ffsl]) if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) else @@ -1041,6 +1079,7 @@ else } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) else @@ -1100,7 +1139,7 @@ if test "x$LG_PAGE" = "xdetect"; then if (f == NULL) { return 1; } - fprintf(f, "%d\n", result); + fprintf(f, "%d", result); fclose(f); return 0; @@ -1724,6 +1763,7 @@ AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) +AC_MSG_RESULT([malloc_conf : ${config_malloc_conf}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 26a5e142..bc5dbd1d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -310,16 +310,14 @@ The mallocx function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints. + size is 0. The rallocx function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints. + size is 0. The xallocx function resizes the allocation at ptr in place to be at least @@ -354,10 +352,10 @@ memory, but it performs the same size computation as the mallocx function, and returns the real size of the allocation that would result from the equivalent - mallocx function call. Behavior is - undefined if size is 0, or if - request size overflows due to size class and/or alignment - constraints. + mallocx function call, or + 0 if the inputs exceed the maximum supported size + class and/or alignment. Behavior is undefined if + size is 0. The mallctl function provides a general interface for introspecting the memory allocator, as well as @@ -455,19 +453,20 @@ for (i = 0; i < nbins; i++) { routines, the allocator initializes its internals based in part on various options that can be specified at compile- or run-time. - The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the + The string specified via , the + string pointed to by the global variable malloc_conf, the + “name” of the file referenced by the symbolic link named + /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before main is entered, so the declaration of malloc_conf should specify an initializer that contains - the final value to be read by jemalloc. malloc_conf is - a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF - can be safely set any time prior to program invocation. + the final value to be read by jemalloc. + and malloc_conf are compile-time mechanisms, whereas + /etc/malloc.conf and + MALLOC_CONF can be safely set any time prior to program + invocation. An options string is a comma-separated list of option:value pairs. There is one key corresponding to each - Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. - - User objects are broken into three categories according to size: - small, large, and huge. Small and large objects are managed entirely by - arenas; huge objects are additionally aggregated in a single data structure - that is shared by all threads. Huge objects are typically used by - applications infrequently enough that this single data structure is not a - scalability issue. - - Each chunk that is managed by an arena tracks its contents as runs of + Memory is conceptually broken into equal-sized chunks, where the chunk + size is a power of two that is greater than the page size. Chunks are + always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly. User objects are + broken into three categories according to size: small, large, and huge. + Multiple small and large objects can reside within a single chunk, whereas + huge objects each have one or more chunks backing them. Each chunk that + contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time. + large object). The combination of chunk alignment and chunk page maps makes + it possible to determine all metadata regarding small and large allocations + in constant time. Small objects are managed in groups by page runs. Each run maintains a bitmap to track which regions are in use. Allocation requests that are no @@ -776,6 +770,17 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.malloc_conf + (const char *) + r- + + Embedded configure-time-specified run-time options + string, empty unless was specified + during build configuration. + + config.munmap @@ -929,7 +934,7 @@ for (i = 0; i < nbins; i++) { opt.narenas - (size_t) + (unsigned) r- Maximum number of arenas to use for automatic @@ -937,6 +942,20 @@ for (i = 0; i < nbins; i++) { number of CPUs, or one if there is a single CPU. + + + opt.purge + (const char *) + r- + + Purge mode is “ratio” (default) or + “decay”. See opt.lg_dirty_mult + for details of the ratio mode. See opt.decay_time for + details of the decay mode. + + opt.lg_dirty_mult @@ -959,6 +978,26 @@ for (i = 0; i < nbins; i++) { for related dynamic control options. + + + opt.decay_time + (ssize_t) + r- + + Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. The pages are incrementally purged according to a + sigmoidal decay curve that starts and ends with zero purge rate. A + decay time of 0 causes all unused dirty pages to be purged immediately + upon creation. A decay time of -1 disables purging. The default decay + time is 10 seconds. See arenas.decay_time + and arena.<i>.decay_time + for related dynamic control options. + + + opt.stats_print @@ -1150,7 +1189,8 @@ malloc_conf = "xmalloc:true";]]> the jeprof command, which is based on the pprof that is developed as part of the gperftools - package. + package. See HEAP PROFILE + FORMAT for heap profile format documentation. @@ -1467,7 +1507,7 @@ malloc_conf = "xmalloc:true";]]> Flush the specified thread-specific cache (tcache). The same considerations apply to this interface as to thread.tcache.flush, - except that the tcache will never be automatically be discarded. + except that the tcache will never be automatically discarded. @@ -1489,12 +1529,27 @@ malloc_conf = "xmalloc:true";]]> (void) -- - Purge unused dirty pages for arena <i>, or for + Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals arenas.narenas. + + + arena.<i>.decay + (void) + -- + + Trigger decay-based purging of unused dirty pages for + arena <i>, or for all arenas if <i> equals arenas.narenas. + The proportion of unused dirty pages to be purged depends on the current + time; see opt.decay_time for + details. + + arena.<i>.dss @@ -1523,6 +1578,22 @@ malloc_conf = "xmalloc:true";]]> for additional information. + + + arena.<i>.decay_time + (ssize_t) + rw + + Current per-arena approximate time in seconds from the + creation of a set of unused dirty pages until an equivalent set of + unused dirty pages is purged and/or reused. Each time this interface is + set, all currently unused dirty pages are considered to have fully + decayed, which causes immediate purging of all unused dirty pages unless + the decay time is set to -1 (i.e. purging disabled). See opt.decay_time for + additional information. + + arena.<i>.chunk_hooks @@ -1757,6 +1828,21 @@ typedef struct { for additional information. + + + arenas.decay_time + (ssize_t) + rw + + Current default per-arena approximate time in seconds + from the creation of a set of unused dirty pages until an equivalent set + of unused dirty pages is purged and/or reused, used to initialize arena.<i>.decay_time + during arena creation. See opt.decay_time for + additional information. + + arenas.quantum @@ -2101,6 +2187,19 @@ typedef struct { for details. + + + stats.arenas.<i>.decay_time + (ssize_t) + r- + + Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. See opt.decay_time + for details. + + stats.arenas.<i>.nthreads @@ -2523,6 +2622,53 @@ typedef struct { + + HEAP PROFILE FORMAT + Although the heap profiling functionality was originally designed to + be compatible with the + pprof command that is developed as part of the gperftools + package, the addition of per thread heap profiling functionality + required a different heap profile format. The jeprof + command is derived from pprof, with enhancements to + support the heap profile format described here. + + In the following hypothetical heap profile, [...] + indicates elision for the sake of compactness. The following matches the above heap profile, but most +tokens are replaced with <description> to indicate +descriptions of the corresponding fields. / + : : [: ] + [...] + : : [: ] + [...] + : : [: ] + [...] +@ [...] [...] + : : [: ] + : : [: ] + : : [: ] +[...] + +MAPPED_LIBRARIES: +/maps>]]> + + DEBUGGING MALLOC PROBLEMS When debugging, it is a good idea to configure/build jemalloc with diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 12c61797..3519873c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -23,6 +23,18 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 +typedef enum { + purge_mode_ratio = 0, + purge_mode_decay = 1, + + purge_mode_limit = 2 +} purge_mode_t; +#define PURGE_DEFAULT purge_mode_ratio +/* Default decay time in seconds. */ +#define DECAY_TIME_DEFAULT 10 +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; typedef struct arena_run_s arena_run_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; @@ -31,6 +43,7 @@ typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -154,15 +167,14 @@ struct arena_chunk_map_misc_s { /* Profile counters, used for large object runs. */ union { - void *prof_tctx_pun; - prof_tctx_t *prof_tctx; + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; }; /* Small region run metadata. */ arena_run_t run; }; }; -typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; #endif /* JEMALLOC_ARENA_STRUCTS_A */ @@ -220,28 +232,28 @@ struct arena_chunk_s { */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ - size_t reg_size; + size_t reg_size; /* Redzone size. */ - size_t redzone_size; + size_t redzone_size; /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; + size_t reg_interval; /* Total size of a run for this bin's size class. */ - size_t run_size; + size_t run_size; /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; + uint32_t nregs; /* * Metadata used to manipulate bitmaps for runs associated with this * bin. */ - bitmap_info_t bitmap_info; + bitmap_info_t bitmap_info; /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; + uint32_t reg0_offset; }; struct arena_bin_s { @@ -251,13 +263,13 @@ struct arena_bin_s { * which may be acquired while holding one or more bin locks, but not * vise versa. */ - malloc_mutex_t lock; + malloc_mutex_t lock; /* * Current run being used to service allocations of this bin's size * class. */ - arena_run_t *runcur; + arena_run_t *runcur; /* * Tree of non-full runs. This tree is used when looking for an @@ -266,10 +278,10 @@ struct arena_bin_s { * objects packed well, and it can also help reduce the number of * almost-empty chunks. */ - arena_run_tree_t runs; + arena_run_tree_t runs; /* Bin statistics. */ - malloc_bin_stats_t stats; + malloc_bin_stats_t stats; }; struct arena_s { @@ -278,14 +290,14 @@ struct arena_s { /* * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. + * synchronized via atomic operations. */ unsigned nthreads; /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. + * 1) Thread assignment (modifies nthreads) is synchronized via atomics. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -324,7 +336,7 @@ struct arena_s { /* Minimum ratio (log base 2) of nactive:ndirty. */ ssize_t lg_dirty_mult; - /* True if a thread is currently executing arena_purge(). */ + /* True if a thread is currently executing arena_purge_to_limit(). */ bool purging; /* Number of pages in active runs and huge regions. */ @@ -338,12 +350,6 @@ struct arena_s { */ size_t ndirty; - /* - * Size/address-ordered tree of this arena's available runs. The tree - * is used for first-best-fit run allocation. - */ - arena_avail_tree_t runs_avail; - /* * Unused dirty memory this arena manages. Dirty memory is conceptually * tracked as an arbitrarily interleaved LRU of dirty runs and cached @@ -375,6 +381,53 @@ struct arena_s { arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t decay_time; + /* decay_time / SMOOTHSTEP_NSTEPS. */ + nstime_t decay_interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t decay_epoch; + /* decay_deadline randomness generator. */ + uint64_t decay_jitter_state; + /* + * Deadline for current epoch. This is the sum of decay_interval and + * per epoch jitter which is a uniform random variable in + * [0..decay_interval). Epochs always advance by precise multiples of + * decay_interval, but we randomize the deadline to reduce the + * likelihood of arenas purging in lockstep. + */ + nstime_t decay_deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between decay_ndirty and ndirty to + * determine how many dirty pages, if any, were generated, and record + * the result in decay_backlog. + */ + size_t decay_ndirty; + /* + * Memoized result of arena_decay_backlog_npages_limit() corresponding + * to the current contents of decay_backlog, i.e. the limit on how many + * pages are allowed to exist for the decay epochs. + */ + size_t decay_backlog_npages_limit; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to decay_epoch. + */ + size_t decay_backlog[SMOOTHSTEP_NSTEPS]; + /* Extant huge allocations. */ ql_head(extent_node_t) huge; /* Synchronizes all huge allocation/update/deallocation. */ @@ -402,6 +455,17 @@ struct arena_s { /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; + + /* + * Quantized address-ordered trees of this arena's available runs. The + * trees are used for first-best-fit run allocation. + */ + arena_run_tree_t runs_avail[1]; /* Dynamically sized. */ +}; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + ticker_t decay_ticker; }; #endif /* JEMALLOC_ARENA_STRUCTS_B */ @@ -417,7 +481,10 @@ static const size_t large_pad = #endif ; +extern purge_mode_t opt_purge; +extern const char *purge_mode_names[]; extern ssize_t opt_lg_dirty_mult; +extern ssize_t opt_decay_time; extern arena_bin_info_t arena_bin_info[NBINS]; @@ -425,9 +492,15 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t large_maxclass; /* Max large size class. */ +extern size_t run_quantize_max; /* Max run_quantize_*() input. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ +#ifdef JEMALLOC_JET +typedef size_t (run_quantize_t)(size_t); +extern run_quantize_t *run_quantize_floor; +extern run_quantize_t *run_quantize_ceil; +#endif void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache); void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, @@ -445,9 +518,11 @@ bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, size_t usize, bool *zero); ssize_t arena_lg_dirty_mult_get(arena_t *arena); bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); +ssize_t arena_decay_time_get(arena_t *arena); +bool arena_decay_time_set(arena_t *arena, ssize_t decay_time); void arena_maybe_purge(arena_t *arena); -void arena_purge_all(arena_t *arena); -void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, +void arena_purge(arena_t *arena, bool all); +void arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); @@ -461,8 +536,9 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero); +void *arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache); void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); @@ -470,8 +546,8 @@ void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind); +void arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind); #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; @@ -480,12 +556,13 @@ void arena_dalloc_junk_large(void *ptr, size_t usize); #endif void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); -void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, + void *ptr); #ifdef JEMALLOC_JET typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif -bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, +bool arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); @@ -493,10 +570,18 @@ dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_lg_dirty_mult_default_get(void); bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); -void arena_stats_merge(arena_t *arena, const char **dss, - ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, +ssize_t arena_decay_time_default_get(void); +bool arena_decay_time_default_set(ssize_t decay_time); +void arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty); +void arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss, + ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); +unsigned arena_nthreads_get(arena_t *arena); +void arena_nthreads_inc(arena_t *arena); +void arena_nthreads_dec(arena_t *arena); arena_t *arena_new(unsigned ind); bool arena_boot(void); void arena_prefork(arena_t *arena); @@ -512,7 +597,7 @@ arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); +size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); @@ -552,17 +637,19 @@ bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, +size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); void arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx); -void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); +void arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks); +void arena_decay_tick(tsd_t *tsd, arena_t *arena); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path); arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif @@ -590,7 +677,7 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) +arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + @@ -970,7 +1057,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) run = &miscelm->run; run_binind = run->binind; bin = &arena->bins[run_binind]; - actual_binind = bin - arena->bins; + actual_binind = (szind_t)(bin - arena->bins); assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; rpages = arena_miscelm_to_rpages(miscelm); @@ -987,16 +1074,15 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - szind_t binind = bin - arena->bins; + szind_t binind = (szind_t)(bin - arena->bins); assert(binind < NBINS); return (binind); } -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { - unsigned shift, diff, regind; - size_t interval; + size_t diff, interval, shift, regind; arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); void *rpages = arena_miscelm_to_rpages(miscelm); @@ -1011,12 +1097,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - + diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = jemalloc_ffs(interval) - 1; + shift = ffs_zu(interval) - 1; diff >>= shift; interval >>= shift; @@ -1038,9 +1124,9 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * divide by 0, and 1 and 2 are both powers of two, which are * handled above. */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned interval_invs[] = { +#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1) + static const size_t interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -1051,8 +1137,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (likely(interval <= ((sizeof(interval_invs) / - sizeof(unsigned)) + 2))) { + if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t)) + + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else @@ -1157,35 +1243,48 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, } } +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks) +{ + ticker_t *decay_ticker; + + if (unlikely(tsd == NULL)) + return; + decay_ticker = decay_ticker_get(tsd, arena->ind); + if (unlikely(decay_ticker == NULL)) + return; + if (unlikely(ticker_ticks(decay_ticker, nticks))) + arena_purge(arena, false); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsd_t *tsd, arena_t *arena) +{ + + arena_decay_ticks(tsd, arena, 1); +} + JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) { assert(size != 0); - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - if (likely(size <= SMALL_MAXCLASS)) { - if (likely(tcache != NULL)) { + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { return (tcache_alloc_small(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_small(arena, size, zero)); - } else if (likely(size <= large_maxclass)) { - /* - * Initialize tcache after checking size in order to avoid - * infinite recursion during tcache initialization. - */ - if (likely(tcache != NULL) && size <= tcache_maxclass) { + ind, zero, slow_path)); + } + if (likely(size <= tcache_maxclass)) { return (tcache_alloc_large(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_large(arena, size, zero)); - } else - return (huge_malloc(tsd, arena, size, zero, tcache)); + ind, zero, slow_path)); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + + return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1251,7 +1350,7 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1268,9 +1367,10 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL)) { szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind, + slow_path); } else { - arena_dalloc_small(extent_node_arena_get( + arena_dalloc_small(tsd, extent_node_arena_get( &chunk->node), chunk, ptr, pageind); } } else { @@ -1283,9 +1383,9 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL) && size - large_pad <= tcache_maxclass) { tcache_dalloc_large(tsd, tcache, ptr, size - - large_pad); + large_pad, slow_path); } else { - arena_dalloc_large(extent_node_arena_get( + arena_dalloc_large(tsd, extent_node_arena_get( &chunk->node), chunk, ptr); } } @@ -1303,7 +1403,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) if (config_prof && opt_prof) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); if (arena_mapbits_large_get(chunk, pageind) != 0) { /* * Make sure to use promoted size, not request @@ -1319,21 +1420,23 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) /* Small allocation. */ if (likely(tcache != NULL)) { szind_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind, + true); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(extent_node_arena_get( + arena_dalloc_small(tsd, extent_node_arena_get( &chunk->node), chunk, ptr, pageind); } } else { assert(config_cache_oblivious || ((uintptr_t)ptr & PAGE_MASK) == 0); - if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); - else { - arena_dalloc_large(extent_node_arena_get( + if (likely(tcache != NULL) && size <= tcache_maxclass) { + tcache_dalloc_large(tsd, tcache, ptr, size, + true); + } else { + arena_dalloc_large(tsd, extent_node_arena_get( &chunk->node), chunk, ptr); } } diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h new file mode 100644 index 00000000..6f8f7eb9 --- /dev/null +++ b/include/jemalloc/internal/assert.h @@ -0,0 +1,45 @@ +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (unlikely(config_debug && !(e))) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + unreachable(); \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) \ + not_implemented(); \ +} while (0) +#endif + + diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index a9aad35d..3f15ea14 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -28,8 +28,8 @@ * callers. * * atomic_read_( *p) { return (*p); } - * atomic_add_( *p, x) { return (*p + x); } - * atomic_sub_( *p, x) { return (*p - x); } + * atomic_add_( *p, x) { return (*p += x); } + * atomic_sub_( *p, x) { return (*p -= x); } * bool atomic_cas_( *p, c, s) * { * if (*p != c) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index fcc6005c..2594e3a4 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -15,6 +15,15 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffsl more than 2^3 times, use a + * tree instead. + */ +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 +# define USE_TREE +#endif + /* Number of groups required to store a given number of bits. */ #define BITMAP_BITS2GROUPS(nbits) \ ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) @@ -48,6 +57,8 @@ typedef unsigned long bitmap_t; /* * Maximum number of groups required to support LG_BITMAP_MAXBITS. */ +#ifdef USE_TREE + #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS # define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 @@ -65,6 +76,12 @@ typedef unsigned long bitmap_t; (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) +#else /* USE_TREE */ + +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) + +#endif /* USE_TREE */ + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -78,6 +95,7 @@ struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; +#ifdef USE_TREE /* Number of levels necessary for nbits. */ unsigned nlevels; @@ -86,6 +104,10 @@ struct bitmap_info_s { * bottom to top (e.g. the bottom level is stored in levels[0]). */ bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +#else /* USE_TREE */ + /* Number of groups necessary for nbits. */ + size_t ngroups; +#endif /* USE_TREE */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -93,9 +115,8 @@ struct bitmap_info_s { #ifdef JEMALLOC_H_EXTERNS void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); +size_t bitmap_size(const bitmap_info_t *binfo); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -113,10 +134,20 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); JEMALLOC_INLINE bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; +#ifdef USE_TREE + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ return (rg == 0); +#else + size_t i; + + for (i = 0; i < binfo->ngroups; i++) { + if (bitmap[i] != 0) + return (false); + } + return (true); +#endif } JEMALLOC_INLINE bool @@ -128,7 +159,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert(bit < binfo->nbits); goff = bit >> LG_BITMAP_GROUP_NBITS; g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); + return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); } JEMALLOC_INLINE void @@ -143,10 +174,11 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (g == 0) { unsigned i; @@ -155,13 +187,14 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (g != 0) break; } } +#endif } /* sfu: set first unset. */ @@ -174,15 +207,24 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) assert(!bitmap_full(bitmap, binfo)); +#ifdef USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = jemalloc_ffsl(g) - 1; + bit = ffs_lu(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); } - +#else + i = 0; + g = bitmap[0]; + while ((bit = ffs_lu(g)) == 0) { + i++; + g = bitmap[i]; + } + bit = (bit - 1) + (i << 6); +#endif bitmap_set(bitmap, binfo, bit); return (bit); } @@ -193,7 +235,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) size_t goff; bitmap_t *gp; bitmap_t g; - bool propagate; + UNUSED bool propagate; assert(bit < binfo->nbits); assert(bitmap_get(bitmap, binfo, bit)); @@ -201,10 +243,11 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -214,14 +257,15 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (!propagate) break; } } +#endif /* USE_TREE */ } #endif diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 7d8014c5..6f2d0ac2 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,8 +9,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, - bool *commit); +void *chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 75c1c979..f75ad90b 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -40,9 +40,7 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prng_state; + uint64_t prng_state; /* Total number of items. */ size_t count; @@ -74,7 +72,7 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); -bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); void ckh_pointer_hash(const void *key, size_t r_hash[2]); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 751c14b5..9c5e9328 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -35,8 +35,12 @@ struct ctl_arena_stats_s { unsigned nthreads; const char *dss; ssize_t lg_dirty_mult; + ssize_t decay_time; size_t pactive; size_t pdirty; + + /* The remainder are only populated if config_stats is true. */ + arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index bcead337..864fda81 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -1,6 +1,6 @@ /* * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * domain by Austin Appleby. See https://github.com/aappleby/smhasher for * details. */ /******************************************************************************/ @@ -49,6 +49,14 @@ JEMALLOC_INLINE uint32_t hash_get_block_32(const uint32_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { + uint32_t ret; + + memcpy(&ret, &p[i], sizeof(uint32_t)); + return (ret); + } + return (p[i]); } @@ -56,6 +64,14 @@ JEMALLOC_INLINE uint64_t hash_get_block_64(const uint64_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { + uint64_t ret; + + memcpy(&ret, &p[i], sizeof(uint64_t)); + return (ret); + } + return (p[i]); } @@ -321,13 +337,18 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, JEMALLOC_INLINE void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { + + assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ + #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) - hash_x64_128(key, len, seed, (uint64_t *)r_hash); + hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); #else - uint64_t hashes[2]; - hash_x86_128(key, len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; + { + uint64_t hashes[2]; + hash_x86_128(key, (int)len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; + } #endif } #endif diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index ece7af98..cb6f69e6 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,12 +9,12 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero, tcache_t *tcache); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, - size_t usize_max, bool zero); +bool huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, + size_t usize_min, size_t usize_max, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 654cd088..3f54391f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -49,6 +49,7 @@ static const bool config_lazy_lock = false #endif ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; static const bool config_prof = #ifdef JEMALLOC_PROF true @@ -355,12 +356,15 @@ typedef unsigned szind_t; # define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -383,12 +387,15 @@ typedef unsigned szind_t; /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -426,18 +433,24 @@ extern bool opt_redzone; extern bool opt_utrace; extern bool opt_xmalloc; extern bool opt_zero; -extern size_t opt_narenas; +extern unsigned opt_narenas; extern bool in_valgrind; /* Number of CPUs. */ -extern unsigned ncpus; +extern unsigned ncpus; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). */ -extern size_t const index2size_tab[NSIZES]; +extern size_t const index2size_tab[NSIZES+1]; /* * size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -445,35 +458,36 @@ extern size_t const index2size_tab[NSIZES]; */ extern uint8_t const size2index_tab[]; -arena_t *a0get(void); void *a0malloc(size_t size); void a0dalloc(void *ptr); void *bootstrap_malloc(size_t size); void *bootstrap_calloc(size_t num, size_t size); void bootstrap_free(void *ptr); arena_t *arenas_extend(unsigned ind); -arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); -arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_t *arena_init(unsigned ind); +arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); arena_t *arena_choose_hard(tsd_t *tsd); void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); -unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); -void arenas_cache_cleanup(tsd_t *tsd); -void narenas_cache_cleanup(tsd_t *tsd); -void arenas_cache_bypass_cleanup(tsd_t *tsd); +void arenas_tdata_cleanup(tsd_t *tsd); +void narenas_tdata_cleanup(tsd_t *tsd); +void arenas_tdata_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -496,12 +510,15 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -526,8 +543,10 @@ size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, +arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); +arena_t *arena_get(unsigned ind, bool init_if_missing); +ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -537,27 +556,27 @@ size2index_compute(size_t size) #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) : lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; + szind_t grp = shift << LG_SIZE_CLASS_GROUP; - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - size_t index = NTBINS + grp + mod; + szind_t index = NTBINS + grp + mod; return (index); } } @@ -568,8 +587,7 @@ size2index_lookup(size_t size) assert(size <= LOOKUP_MAXCLASS); { - size_t ret = ((size_t)(size2index_tab[(size-1) >> - LG_TINY_MIN])); + szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]); assert(ret == size2index_compute(size)); return (ret); } @@ -635,7 +653,7 @@ s2u_compute(size_t size) #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : (ZU(1) << lg_ceil)); } @@ -727,17 +745,16 @@ sa2u(size_t size, size_t alignment) return (usize); } - /* Huge size class. Beware of size_t overflow. */ + /* Huge size class. Beware of overflow. */ + + if (unlikely(alignment > HUGE_MAXCLASS)) + return (0); /* * We can't achieve subchunk alignment, so round up alignment to the * minimum that can actually be supported. */ alignment = CHUNK_CEILING(alignment); - if (alignment == 0) { - /* size_t overflow. */ - return (0); - } /* Make sure result is a huge size class. */ if (size <= chunksize) @@ -776,32 +793,56 @@ arena_choose(tsd_t *tsd, arena_t *arena) return (ret); } -JEMALLOC_INLINE arena_t * -arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, - bool refresh_if_missing) +JEMALLOC_INLINE arena_tdata_t * +arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) { - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + arena_tdata_t *tdata; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); - /* init_if_missing requires refresh_if_missing. */ - assert(!init_if_missing || refresh_if_missing); - - if (unlikely(arenas_cache == NULL)) { - /* arenas_cache hasn't been initialized yet. */ - return (arena_get_hard(tsd, ind, init_if_missing)); + if (unlikely(arenas_tdata == NULL)) { + /* arenas_tdata hasn't been initialized yet. */ + return (arena_tdata_get_hard(tsd, ind)); } - if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { + if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { /* - * ind is invalid, cache is old (too small), or arena to be + * ind is invalid, cache is old (too small), or tdata to be * initialized. */ - return (refresh_if_missing ? arena_get_hard(tsd, ind, - init_if_missing) : NULL); + return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : + NULL); } - arena = arenas_cache[ind]; - if (likely(arena != NULL) || !refresh_if_missing) - return (arena); - return (arena_get_hard(tsd, ind, init_if_missing)); + + tdata = &arenas_tdata[ind]; + if (likely(tdata != NULL) || !refresh_if_missing) + return (tdata); + return (arena_tdata_get_hard(tsd, ind)); +} + +JEMALLOC_INLINE arena_t * +arena_get(unsigned ind, bool init_if_missing) +{ + arena_t *ret; + + assert(ind <= MALLOCX_ARENA_MAX); + + ret = arenas[ind]; + if (unlikely(ret == NULL)) { + ret = atomic_read_p((void *)&arenas[ind]); + if (init_if_missing && unlikely(ret == NULL)) + ret = arena_init(ind); + } + return (ret); +} + +JEMALLOC_INLINE ticker_t * +decay_ticker_get(tsd_t *tsd, unsigned ind) +{ + arena_tdata_t *tdata; + + tdata = arena_tdata_get(tsd, ind, true); + if (unlikely(tdata == NULL)) + return (NULL); + return (&tdata->decay_ticker); } #endif @@ -823,12 +864,14 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *icalloc(tsd_t *tsd, size_t size); +void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); +void *imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, + arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path); +void *icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, + arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size, szind_t ind); void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, @@ -837,10 +880,11 @@ void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path); void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, @@ -850,8 +894,8 @@ void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero); -bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); +bool ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -881,14 +925,14 @@ isalloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, - arena_t *arena) +iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena, bool slow_path) { void *ret; assert(size != 0); - ret = arena_malloc(tsd, arena, size, zero, tcache); + ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); @@ -897,31 +941,33 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, false, tcache, false, arena)); + return (iallocztm(tsd, size, ind, false, tcache, false, arena, true)); } JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size) +imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path) { - return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); + return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false, + NULL, slow_path)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, true, tcache, false, arena)); + return (iallocztm(tsd, size, ind, true, tcache, false, arena, true)); } JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size) +icalloc(tsd_t *tsd, size_t size, szind_t ind) { - return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); + return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false, + NULL, true)); } JEMALLOC_ALWAYS_INLINE void * @@ -954,8 +1000,8 @@ JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, - NULL), false, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true), + false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t @@ -997,7 +1043,8 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path) { assert(ptr != NULL); @@ -1006,31 +1053,31 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) config_prof)); } - arena_dalloc(tsd, ptr, tcache); + arena_dalloc(tsd, ptr, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) { - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd, ptr, tcache, false, true); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd, ptr, tcache_get(tsd, false), false); + idalloctm(tsd, ptr, tcache_get(tsd, false), false, true); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { - if (config_fill && unlikely(opt_quarantine)) + if (slow_path && config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd, ptr, tcache, false, slow_path); } JEMALLOC_ALWAYS_INLINE void @@ -1058,7 +1105,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t usize, copysize; usize = sa2u(size + extra, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { @@ -1066,7 +1113,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, return (NULL); /* Try again, without extra this time. */ usize = sa2u(size, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) @@ -1114,8 +1161,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, } JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, - bool zero) +ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) { assert(ptr != NULL); @@ -1127,7 +1174,7 @@ ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, return (true); } - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); + return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero)); } #endif diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index a601d6eb..2b8ca5d0 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -18,6 +18,7 @@ # endif # include # include +# include #endif #include diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index b0f8caaf..2c753719 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -189,9 +189,10 @@ #undef JEMALLOC_TLS /* - * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; - * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. */ +#undef JEMALLOC_INTERNAL_FFSLL #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS @@ -241,6 +242,9 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#undef LG_SIZEOF_LONG_LONG + /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T @@ -259,4 +263,7 @@ */ #undef JEMALLOC_EXPORT +/* config.malloc_conf options string. */ +#undef JEMALLOC_CONFIG_MALLOC_CONF + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h new file mode 100644 index 00000000..bd04f04b --- /dev/null +++ b/include/jemalloc/internal/nstime.h @@ -0,0 +1,48 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ + && _POSIX_MONOTONIC_CLOCK >= 0 + +typedef struct nstime_s nstime_t; + +/* Maximum supported number of seconds (~584 years). */ +#define NSTIME_SEC_MAX 18446744072 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct nstime_s { + uint64_t ns; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); +#ifdef JEMALLOC_JET +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *nstime_update; +#else +bool nstime_update(nstime_t *time); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index a90021aa..5880996a 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,8 +1,8 @@ a0dalloc -a0get a0malloc arena_aalloc arena_alloc_junk_small +arena_basic_stats_merge arena_bin_index arena_bin_info arena_bitselm_get @@ -25,18 +25,23 @@ arena_dalloc_junk_small arena_dalloc_large arena_dalloc_large_junked_locked arena_dalloc_small +arena_decay_tick +arena_decay_ticks +arena_decay_time_default_get +arena_decay_time_default_set +arena_decay_time_get +arena_decay_time_set arena_dss_prec_get arena_dss_prec_set arena_get -arena_get_hard arena_init arena_lg_dirty_mult_default_get arena_lg_dirty_mult_default_set arena_lg_dirty_mult_get arena_lg_dirty_mult_set arena_malloc +arena_malloc_hard arena_malloc_large -arena_malloc_small arena_mapbits_allocated_get arena_mapbits_binind_get arena_mapbits_decommitted_get @@ -47,9 +52,6 @@ arena_mapbits_large_binind_set arena_mapbits_large_get arena_mapbits_large_set arena_mapbits_large_size_get -arena_mapbitsp_get -arena_mapbitsp_read -arena_mapbitsp_write arena_mapbits_size_decode arena_mapbits_size_encode arena_mapbits_small_runind_get @@ -58,6 +60,9 @@ arena_mapbits_unallocated_set arena_mapbits_unallocated_size_get arena_mapbits_unallocated_size_set arena_mapbits_unzeroed_get +arena_mapbitsp_get +arena_mapbitsp_read +arena_mapbitsp_write arena_maxrun arena_maybe_purge arena_metadata_allocated_add @@ -67,10 +72,12 @@ arena_migrate arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages -arena_nbound arena_new arena_node_alloc arena_node_dalloc +arena_nthreads_dec +arena_nthreads_get +arena_nthreads_inc arena_palloc arena_postfork_child arena_postfork_parent @@ -83,7 +90,7 @@ arena_prof_tctx_get arena_prof_tctx_reset arena_prof_tctx_set arena_ptr_small_binind_get -arena_purge_all +arena_purge arena_quarantine_junk_small arena_ralloc arena_ralloc_junk_large @@ -93,11 +100,14 @@ arena_redzone_corruption arena_run_regind arena_run_to_miscelm arena_salloc -arenas_cache_bypass_cleanup -arenas_cache_cleanup arena_sdalloc arena_stats_merge arena_tcache_fill_small +arena_tdata_get +arena_tdata_get_hard +arenas +arenas_tdata_bypass_cleanup +arenas_tdata_cleanup atomic_add_p atomic_add_u atomic_add_uint32 @@ -122,7 +132,6 @@ base_stats_get bitmap_full bitmap_get bitmap_info_init -bitmap_info_ngroups bitmap_init bitmap_set bitmap_sfu @@ -162,9 +171,9 @@ chunk_prefork chunk_purge_arena chunk_purge_wrapper chunk_register +chunks_rtree chunksize chunksize_mask -chunks_rtree ckh_count ckh_delete ckh_insert @@ -183,6 +192,7 @@ ctl_nametomib ctl_postfork_child ctl_postfork_parent ctl_prefork +decay_ticker_get dss_prec_names extent_node_achunk_get extent_node_achunk_set @@ -234,6 +244,12 @@ extent_tree_szad_reverse_iter extent_tree_szad_reverse_iter_recurse extent_tree_szad_reverse_iter_start extent_tree_szad_search +ffs_llu +ffs_lu +ffs_u +ffs_u32 +ffs_u64 +ffs_zu get_errno hash hash_fmix_32 @@ -265,11 +281,11 @@ idalloct idalloctm imalloc imalloct +in_valgrind index2size index2size_compute index2size_lookup index2size_tab -in_valgrind ipalloc ipalloct ipallocztm @@ -311,11 +327,25 @@ map_bias map_misc_offset mb_write mutex_boot -narenas_cache_cleanup +narenas_tdata_cleanup narenas_total_get ncpus nhbins +nstime_add +nstime_compare +nstime_copy +nstime_divide +nstime_idivide +nstime_imultiply +nstime_init +nstime_init2 +nstime_ns +nstime_nsec +nstime_sec +nstime_subtract +nstime_update opt_abort +opt_decay_time opt_dss opt_junk opt_junk_alloc @@ -334,6 +364,7 @@ opt_prof_gdump opt_prof_leak opt_prof_prefix opt_prof_thread_active_init +opt_purge opt_quarantine opt_redzone opt_stats_print @@ -348,7 +379,11 @@ pages_map pages_purge pages_trim pages_unmap -pow2_ceil +pow2_ceil_u32 +pow2_ceil_u64 +pow2_ceil_zu +prng_lg_range +prng_range prof_active_get prof_active_get_unlocked prof_active_set @@ -393,6 +428,7 @@ prof_thread_active_init_set prof_thread_active_set prof_thread_name_get prof_thread_name_set +purge_mode_names quarantine quarantine_alloc_hook quarantine_alloc_hook_work @@ -413,6 +449,9 @@ rtree_subtree_read_hard rtree_subtree_tryread rtree_val_read rtree_val_write +run_quantize_ceil +run_quantize_floor +run_quantize_max s2u s2u_compute s2u_lookup @@ -451,15 +490,20 @@ tcache_flush tcache_get tcache_get_hard tcache_maxclass -tcaches tcache_salloc +tcache_stats_merge +tcaches tcaches_create tcaches_destroy tcaches_flush tcaches_get -tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup +ticker_copy +ticker_init +ticker_read +ticker_tick +ticker_ticks tsd_arena_get tsd_arena_set tsd_boot @@ -477,6 +521,8 @@ tsd_init_check_recursion tsd_init_finish tsd_init_head tsd_nominal +tsd_prof_tdata_get +tsd_prof_tdata_set tsd_quarantine_get tsd_quarantine_set tsd_set @@ -484,14 +530,12 @@ tsd_tcache_enabled_get tsd_tcache_enabled_set tsd_tcache_get tsd_tcache_set -tsd_tls -tsd_tsd -tsd_prof_tdata_get -tsd_prof_tdata_set tsd_thread_allocated_get tsd_thread_allocated_set tsd_thread_deallocated_get tsd_thread_deallocated_set +tsd_tls +tsd_tsd u2rz valgrind_freelike_block valgrind_make_mem_defined diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 216d0ef4..5830f8b7 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -18,31 +18,9 @@ * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - (lg_range)); \ -} while (false) - -/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - (lg_range)); \ -} while (false) +#define PRNG_A UINT64_C(6364136223846793005) +#define PRNG_C UINT64_C(1442695040888963407) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -56,5 +34,46 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t prng_lg_range(uint64_t *state, unsigned lg_range); +uint64_t prng_range(uint64_t *state, uint64_t range); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range(uint64_t *state, unsigned lg_range) +{ + uint64_t ret; + + assert(lg_range > 0); + assert(lg_range <= 64); + + ret = (*state * PRNG_A) + PRNG_C; + *state = ret; + ret >>= (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range(uint64_t *state, uint64_t range) +{ + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range(state, lg_range); + } while (ret >= range); + + return (ret); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e5198c3e..a25502a9 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -436,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, cassert(config_prof); tdata = prof_tdata_get(tsd, true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) tdata = NULL; if (tdata_out != NULL) *tdata_out = tdata; - if (tdata == NULL) + if (unlikely(tdata == NULL)) return (true); - if (tdata->bytes_until_sample >= usize) { + if (likely(tdata->bytes_until_sample >= usize)) { if (update) tdata->bytes_until_sample -= usize; return (true); diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 2ca8e593..3770342f 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -42,7 +42,6 @@ struct { \ #define rb_tree(a_type) \ struct { \ a_type *rbt_root; \ - a_type rbt_nil; \ } /* Left accessors. */ @@ -79,6 +78,15 @@ struct { \ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ } while (0) + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + /* Bookkeeping bit cannot be used by node pointer. */ \ + assert(((uintptr_t)(a_node) & 0x1) == 0); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) #else /* Right accessors. */ #define rbtn_right_get(a_type, a_field, a_node) \ @@ -99,28 +107,26 @@ struct { \ #define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = false; \ } while (0) -#endif /* Node initializer. */ #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) +#endif /* Tree initializer. */ #define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ - rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ - rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ + (a_rbt)->rbt_root = NULL; \ } while (0) /* Internal utility macros. */ #define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ + if ((r_node) != NULL) { \ for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + rbtn_left_get(a_type, a_field, (r_node)) != NULL; \ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ } \ } \ @@ -128,10 +134,9 @@ struct { \ #define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != \ - &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ - (r_node))) { \ + if ((r_node) != NULL) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ + (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \ } \ } \ } while (0) @@ -169,11 +174,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ a_attr void \ @@ -183,7 +188,10 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ a_rbt_type *, a_type *, void *), void *arg); \ a_attr a_type * \ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg); /* * The rb_gen() macro generates a type-specific red-black tree implementation, @@ -254,7 +262,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * last/first. * * static ex_node_t * - * ex_search(ex_t *tree, ex_node_t *key); + * ex_search(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. * Args: * tree: Pointer to an initialized red-black tree object. @@ -262,9 +270,9 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *tree, ex_node_t *key); + * ex_nsearch(ex_t *tree, const ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *tree, ex_node_t *key); + * ex_psearch(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were * key in tree. @@ -312,6 +320,20 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. + * + * static void + * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg); + * Description: Iterate over the tree with post-order traversal, remove + * each node, and run the callback if non-null. This is + * used for destroying a tree without paying the cost to + * rebalance it. The tree must not be otherwise altered + * during traversal. + * Args: + * tree: Pointer to an initialized red-black tree object. + * cb : Callback function, which, if non-null, is called for each node + * during iteration. There is no way to stop iteration once it + * has begun. + * arg : Opaque pointer passed to cb(). */ #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ a_attr void \ @@ -320,36 +342,30 @@ a_prefix##new(a_rbt_type *rbtree) { \ } \ a_attr bool \ a_prefix##empty(a_rbt_type *rbtree) { \ - return (rbtree->rbt_root == &rbtree->rbt_nil); \ + return (rbtree->rbt_root == NULL); \ } \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##last(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_right_get(a_type, a_field, node) != NULL) { \ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -360,24 +376,21 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_left_get(a_type, a_field, node) != NULL) { \ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -388,20 +401,17 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ int cmp; \ ret = rbtree->rbt_root; \ - while (ret != &rbtree->rbt_nil \ + while (ret != NULL \ && (cmp = (a_cmp)(key, ret)) != 0) { \ if (cmp < 0) { \ ret = rbtn_left_get(a_type, a_field, ret); \ @@ -409,17 +419,14 @@ a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ ret = rbtn_right_get(a_type, a_field, ret); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ ret = tnode; \ @@ -431,17 +438,14 @@ a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ tnode = rbtn_left_get(a_type, a_field, tnode); \ @@ -453,9 +457,6 @@ a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr void \ @@ -467,7 +468,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbt_node_new(a_type, a_field, rbtree, node); \ /* Wind. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ assert(cmp != 0); \ if (cmp < 0) { \ @@ -487,7 +488,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_left_set(a_type, a_field, cnode, left); \ if (rbtn_red_get(a_type, a_field, left)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* Fix up 4-node. */ \ a_type *tnode; \ rbtn_black_set(a_type, a_field, leftleft); \ @@ -502,7 +504,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_right_set(a_type, a_field, cnode, right); \ if (rbtn_red_get(a_type, a_field, right)) { \ a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (rbtn_red_get(a_type, a_field, left)) { \ + if (left != NULL && rbtn_red_get(a_type, a_field, \ + left)) { \ /* Split 4-node. */ \ rbtn_black_set(a_type, a_field, left); \ rbtn_black_set(a_type, a_field, right); \ @@ -535,7 +538,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Wind. */ \ nodep = NULL; /* Silence compiler warning. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -547,7 +550,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ - for (pathp++; pathp->node != &rbtree->rbt_nil; \ + for (pathp++; pathp->node != NULL; \ pathp++) { \ pathp->cmp = -1; \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -590,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != &rbtree->rbt_nil) { \ + if (left != NULL) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ assert(!rbtn_red_get(a_type, a_field, node)); \ @@ -610,33 +613,32 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (pathp == path) { \ /* The tree only contained one node. */ \ - rbtree->rbt_root = &rbtree->rbt_nil; \ + rbtree->rbt_root = NULL; \ return; \ } \ } \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ /* Prune red node, which requires no fixup. */ \ assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - &rbtree->rbt_nil); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \ return; \ } \ /* The node to be pruned is black, so unwind until balance is */\ /* restored. */\ - pathp->node = &rbtree->rbt_nil; \ + pathp->node = NULL; \ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ assert(pathp->cmp != 0); \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ a_type *tnode; \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* In the following diagrams, ||, //, and \\ */\ /* indicate the path to the removed node. */\ /* */\ @@ -679,7 +681,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* || */\ /* pathp(b) */\ /* // \ */\ @@ -733,7 +736,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ left); \ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ leftright); \ - if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + if (leftrightleft != NULL && rbtn_red_get(a_type, \ + a_field, leftrightleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -759,7 +763,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* (b) */\ /* / */\ /* (b) */\ - assert(leftright != &rbtree->rbt_nil); \ + assert(leftright != NULL); \ rbtn_red_set(a_type, a_field, leftright); \ rbtn_rotate_right(a_type, a_field, pathp->node, \ tnode); \ @@ -782,7 +786,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(r) */\ /* / \\ */\ @@ -820,7 +825,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -866,13 +872,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return (NULL); \ } else { \ a_type *ret; \ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != &rbtree->rbt_nil \ - || (ret = cb(rbtree, node, arg)) != NULL) { \ + a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ + arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -886,8 +892,8 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ if (cmp < 0) { \ a_type *ret; \ if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -914,21 +920,18 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ } else { \ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return (NULL); \ } else { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -943,8 +946,8 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ if (cmp > 0) { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -972,10 +975,29 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ cb, arg); \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ +} \ +a_attr void \ +a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ + a_type *, void *), void *arg) { \ + if (node == NULL) { \ + return; \ + } \ + a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_left_set(a_type, a_field, (node), NULL); \ + a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_right_set(a_type, a_field, (node), NULL); \ + if (cb) { \ + cb(node, arg); \ + } \ +} \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg) { \ + a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ + rbtree->rbt_root = NULL; \ } #endif /* RB_H_ */ diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index fc82036d..2b0ca29a 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -142,10 +142,10 @@ size_classes() { # All remaining groups. lg_grp=$((${lg_grp} + ${lg_g})) - while [ ${lg_grp} -lt ${ptr_bits} ] ; do + while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do sep_line ndelta=1 - if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then ndelta_limit=$((${g} - 1)) else ndelta_limit=${g} diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h new file mode 100644 index 00000000..c5333cca --- /dev/null +++ b/include/jemalloc/internal/smoothstep.h @@ -0,0 +1,246 @@ +/* + * This file was generated by the following command: + * sh smoothstep.sh smoother 200 24 3 15 + */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * This header defines a precomputed table based on the smoothstep family of + * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0 + * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so + * that floating point math can be avoided. + * + * 3 2 + * smoothstep(x) = -2x + 3x + * + * 5 4 3 + * smootherstep(x) = 6x - 15x + 10x + * + * 7 6 5 4 + * smootheststep(x) = -20x + 70x - 84x + 35x + */ + +#define SMOOTHSTEP_VARIANT "smoother" +#define SMOOTHSTEP_NSTEPS 200 +#define SMOOTHSTEP_BFP 24 +#define SMOOTHSTEP \ + /* STEP(step, h, x, y) */ \ + STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ + STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ + STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \ + STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \ + STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \ + STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \ + STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \ + STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \ + STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \ + STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \ + STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \ + STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \ + STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \ + STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \ + STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \ + STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \ + STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \ + STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \ + STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \ + STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \ + STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \ + STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \ + STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \ + STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \ + STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \ + STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \ + STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \ + STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \ + STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \ + STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \ + STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \ + STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \ + STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \ + STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \ + STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \ + STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \ + STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \ + STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \ + STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \ + STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \ + STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \ + STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \ + STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \ + STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \ + STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \ + STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \ + STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \ + STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \ + STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \ + STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \ + STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \ + STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \ + STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \ + STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \ + STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \ + STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \ + STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \ + STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \ + STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \ + STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \ + STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \ + STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \ + STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \ + STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \ + STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \ + STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \ + STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \ + STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \ + STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \ + STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \ + STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \ + STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \ + STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \ + STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \ + STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \ + STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \ + STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \ + STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \ + STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \ + STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \ + STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \ + STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \ + STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \ + STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \ + STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \ + STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \ + STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \ + STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \ + STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \ + STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \ + STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \ + STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \ + STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \ + STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \ + STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \ + STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \ + STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \ + STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \ + STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \ + STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \ + STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \ + STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \ + STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \ + STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \ + STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \ + STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \ + STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \ + STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \ + STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \ + STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \ + STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \ + STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \ + STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \ + STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \ + STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \ + STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \ + STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \ + STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \ + STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \ + STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \ + STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \ + STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \ + STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \ + STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \ + STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \ + STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \ + STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \ + STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \ + STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \ + STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \ + STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \ + STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \ + STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \ + STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \ + STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \ + STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \ + STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \ + STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \ + STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \ + STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \ + STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \ + STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \ + STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \ + STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \ + STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \ + STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \ + STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \ + STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \ + STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \ + STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \ + STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \ + STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \ + STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \ + STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \ + STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \ + STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \ + STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \ + STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \ + STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \ + STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \ + STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \ + STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \ + STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \ + STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \ + STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \ + STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \ + STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \ + STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \ + STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \ + STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \ + STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \ + STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \ + STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \ + STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \ + STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \ + STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \ + STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \ + STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \ + STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \ + STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \ + STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \ + STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \ + STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \ + STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \ + STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \ + STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \ + STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \ + STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \ + STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \ + STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \ + STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \ + STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \ + STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \ + STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \ + STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \ + STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \ + STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \ + STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \ + STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ + STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh new file mode 100755 index 00000000..8124693f --- /dev/null +++ b/include/jemalloc/internal/smoothstep.sh @@ -0,0 +1,115 @@ +#!/bin/sh +# +# Generate a discrete lookup table for a sigmoid function in the smoothstep +# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table +# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps]. Encode +# the entries using a binary fixed point representation. +# +# Usage: smoothstep.sh +# +# is in {smooth, smoother, smoothest}. +# must be greater than zero. +# must be in [0..62]; reasonable values are roughly [10..30]. +# is x decimal precision. +# is y decimal precision. + +#set -x + +cmd="sh smoothstep.sh $*" +variant=$1 +nsteps=$2 +bfp=$3 +xprec=$4 +yprec=$5 + +case "${variant}" in + smooth) + ;; + smoother) + ;; + smoothest) + ;; + *) + echo "Unsupported variant" + exit 1 + ;; +esac + +smooth() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoother() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoothest() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +cat < 0); + assert((size & chunksize_mask) == 0); + + cactive = atomic_add_z(&stats_cactive, size); + assert(cactive - size < cactive); } JEMALLOC_INLINE void stats_cactive_sub(size_t size) { + UNUSED size_t cactive; - atomic_sub_z(&stats_cactive, size); + assert(size > 0); + assert((size & chunksize_mask) == 0); + + cactive = atomic_sub_z(&stats_cactive, size); + assert(cactive + size > cactive); } #endif diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 5079cd26..8357820b 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -70,13 +70,20 @@ struct tcache_bin_s { int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ void **avail; /* Stack of available objects. */ }; struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - unsigned ev_cnt; /* Event count since incremental GC. */ + ticker_t gc_ticker; /* Drives incremental GC. */ szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* @@ -108,7 +115,7 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern unsigned nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; @@ -126,7 +133,7 @@ extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind); + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, szind_t binind, unsigned rem); void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, @@ -155,15 +162,15 @@ void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind); + szind_t binind, bool slow_path); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size); + size_t size, bool slow_path); tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif @@ -240,51 +247,74 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) if (TCACHE_GC_INCR == 0) return; - tcache->ev_cnt++; - assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) + if (unlikely(ticker_tick(&tcache->gc_ticker))) tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { void *ret; if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; + *tcache_success = false; return (NULL); } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); tbin->ncached--; + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; + return (ret); } JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); - binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - usize = index2size(binind); - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); - if (ret == NULL) + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind, + &tcache_hard_success); + if (tcache_hard_success == false) return (NULL); } - assert(tcache_salloc(ret) == usize); + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(tcache_salloc(ret) == usize); + } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); @@ -292,7 +322,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, memset(ret, 0, usize); } } else { - if (config_fill && unlikely(opt_junk_alloc)) { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -309,28 +339,38 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; - binind = size2index(size); - usize = index2size(binind); - assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(arena, usize, zero); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = arena_malloc_large(tsd, arena, binind, zero); if (ret == NULL) return (NULL); } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { + usize = index2size(binind); + assert(usize <= tcache_maxclass); + } + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); @@ -340,7 +380,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, BININD_INVALID); } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) @@ -360,14 +400,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; @@ -377,14 +418,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, + bool slow_path) { szind_t binind; tcache_bin_t *tbin; @@ -396,7 +438,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) binind = size2index(size); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; @@ -406,8 +448,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h new file mode 100644 index 00000000..4696e56d --- /dev/null +++ b/include/jemalloc/internal/ticker.h @@ -0,0 +1,75 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ticker_s ticker_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ticker_s { + int32_t tick; + int32_t nticks; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void ticker_init(ticker_t *ticker, int32_t nticks); +void ticker_copy(ticker_t *ticker, const ticker_t *other); +int32_t ticker_read(const ticker_t *ticker); +bool ticker_ticks(ticker_t *ticker, int32_t nticks); +bool ticker_tick(ticker_t *ticker); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) +JEMALLOC_INLINE void +ticker_init(ticker_t *ticker, int32_t nticks) +{ + + ticker->tick = nticks; + ticker->nticks = nticks; +} + +JEMALLOC_INLINE void +ticker_copy(ticker_t *ticker, const ticker_t *other) +{ + + *ticker = *other; +} + +JEMALLOC_INLINE int32_t +ticker_read(const ticker_t *ticker) +{ + + return (ticker->tick); +} + +JEMALLOC_INLINE bool +ticker_ticks(ticker_t *ticker, int32_t nticks) +{ + + if (unlikely(ticker->tick < nticks)) { + ticker->tick = ticker->nticks; + return (true); + } + ticker->tick -= nticks; + return(false); +} + +JEMALLOC_INLINE bool +ticker_tick(ticker_t *ticker) +{ + + return (ticker_ticks(ticker, 1)); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index eed7aa01..16cc2f17 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -537,9 +537,9 @@ struct tsd_init_head_s { O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ O(arena, arena_t *) \ - O(arenas_cache, arena_t **) \ - O(narenas_cache, unsigned) \ - O(arenas_cache_bypass, bool) \ + O(arenas_tdata, arena_tdata_t *) \ + O(narenas_tdata, unsigned) \ + O(arenas_tdata_bypass, bool) \ O(tcache_enabled, tcache_enabled_t) \ O(quarantine, quarantine_t *) \ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index b2ea740f..b8885bfa 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -81,49 +81,7 @@ # define unreachable() #endif -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -#define assert(e) do { \ - if (unlikely(config_debug && !(e))) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef not_reached -#define not_reached() do { \ - if (config_debug) { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ - unreachable(); \ -} while (0) -#endif - -#ifndef not_implemented -#define not_implemented() do { \ - if (config_debug) { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef assert_not_implemented -#define assert_not_implemented(e) do { \ - if (unlikely(config_debug && !(e))) \ - not_implemented(); \ -} while (0) -#endif +#include "jemalloc/internal/assert.h" /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ @@ -163,10 +121,16 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -int jemalloc_ffsl(long bitmap); -int jemalloc_ffs(int bitmap); -size_t pow2_ceil(size_t x); -size_t lg_floor(size_t x); +unsigned ffs_llu(unsigned long long bitmap); +unsigned ffs_lu(unsigned long bitmap); +unsigned ffs_u(unsigned bitmap); +unsigned ffs_zu(size_t bitmap); +unsigned ffs_u64(uint64_t bitmap); +unsigned ffs_u32(uint32_t bitmap); +uint64_t pow2_ceil_u64(uint64_t x); +uint32_t pow2_ceil_u32(uint32_t x); +size_t pow2_ceil_zu(size_t x); +unsigned lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); #endif @@ -174,27 +138,74 @@ int get_errno(void); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) /* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) -# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure #endif -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffsl(long bitmap) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_llu(unsigned long long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSLL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_lu(unsigned long bitmap) { return (JEMALLOC_INTERNAL_FFSL(bitmap)); } -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffs(int bitmap) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u(unsigned bitmap) { return (JEMALLOC_INTERNAL_FFS(bitmap)); } -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_zu(size_t bitmap) +{ + +#if LG_SIZEOF_PTR == LG_SIZEOF_INT + return (ffs_u(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return (ffs_llu(bitmap)); +#else +#error No implementation for size_t ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u64(uint64_t bitmap) +{ + +#if LG_SIZEOF_LONG == 3 + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_LONG_LONG == 3 + return (ffs_llu(bitmap)); +#else +#error No implementation for 64-bit ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u32(uint32_t bitmap) +{ + +#if LG_SIZEOF_INT == 2 + return (ffs_u(bitmap)); +#else +#error No implementation for 32-bit ffs() +#endif + return (ffs_u(bitmap)); +} + +JEMALLOC_INLINE uint64_t +pow2_ceil_u64(uint64_t x) { x--; @@ -203,15 +214,39 @@ pow2_ceil(size_t x) x |= x >> 4; x |= x >> 8; x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) x |= x >> 32; -#endif x++; return (x); } -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +pow2_ceil_u32(uint32_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return (x); +} + +/* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t +pow2_ceil_zu(size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return (pow2_ceil_u64(x)); +#else + return (pow2_ceil_u32(x)); +#endif +} + +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE unsigned lg_floor(size_t x) { size_t ret; @@ -222,10 +257,11 @@ lg_floor(size_t x) : "=r"(ret) // Outputs. : "r"(x) // Inputs. ); - return (ret); + assert(ret < UINT_MAX); + return ((unsigned)ret); } #elif (defined(_MSC_VER)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { unsigned long ret; @@ -237,12 +273,13 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == 2) _BitScanReverse(&ret, x); #else -# error "Unsupported type sizes for lg_floor()" +# error "Unsupported type size for lg_floor()" #endif - return (ret); + assert(ret < UINT_MAX); + return ((unsigned)ret); } #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { @@ -253,11 +290,11 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); #else -# error "Unsupported type sizes for lg_floor()" +# error "Unsupported type size for lg_floor()" #endif } #else -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { @@ -268,20 +305,13 @@ lg_floor(size_t x) x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) +#if (LG_SIZEOF_PTR == 3) x |= (x >> 32); - if (x == KZU(0xffffffffffffffff)) - return (63); - x++; - return (jemalloc_ffsl(x) - 2); -#elif (LG_SIZEOF_PTR == 2) - if (x == KZU(0xffffffff)) - return (31); - x++; - return (jemalloc_ffs(x) - 2); -#else -# error "Unsupported type sizes for lg_floor()" #endif + if (x == SIZE_T_MAX) + return ((8 << LG_SIZEOF_PTR) - 1); + x++; + return (ffs_zu(x) - 2); } #endif diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index ab13c375..6d89435c 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -33,5 +33,13 @@ */ #undef JEMALLOC_USE_CXX_THROW +#ifdef _MSC_VER +# ifdef _WIN64 +# define LG_SIZEOF_PTR_WIN 3 +# else +# define LG_SIZEOF_PTR_WIN 2 +# endif +#endif + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 7f64d9ff..9f356f98 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -11,12 +11,13 @@ #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -# define MALLOCX_LG_ALIGN(la) (la) +# define MALLOCX_LG_ALIGN(la) ((int)(la)) # if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) (ffs(a)-1) +# define MALLOCX_ALIGN(a) ((int)(ffs(a)-1)) # else # define MALLOCX_ALIGN(a) \ - ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) + ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)((a)>>32))+31)) # endif # define MALLOCX_ZERO ((int)0x40) /* @@ -36,32 +37,7 @@ # define JEMALLOC_CXX_THROW #endif -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#elif _MSC_VER +#if _MSC_VER # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_ALLOC_SIZE(s) @@ -87,6 +63,31 @@ # else # define JEMALLOC_ALLOCATOR # endif +#elif defined(JEMALLOC_HAVE_ATTR) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR #else # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h index f01ffdd1..a3ee2506 100644 --- a/include/msvc_compat/strings.h +++ b/include/msvc_compat/strings.h @@ -21,7 +21,37 @@ static __forceinline int ffs(int x) return (ffsl(x)); } +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# endif + +static __forceinline int ffsll(unsigned __int64 x) +{ + unsigned long i; +#ifdef _M_X64 + if (_BitScanForward64(&i, x)) + return (i + 1); + return (0); #else +// Fallback for 32-bit build where 64-bit version not available +// assuming little endian + union { + unsigned __int64 ll; + unsigned long l[2]; + } s; + + s.ll = x; + + if (_BitScanForward(&i, s.l[0])) + return (i + 1); + else if(_BitScanForward(&i, s.l[1])) + return (i + 33); + return (0); +#endif +} + +#else +# define ffsll(x) __builtin_ffsll(x) # define ffsl(x) __builtin_ffsl(x) # define ffs(x) __builtin_ffs(x) #endif diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt new file mode 100644 index 00000000..02b97f74 --- /dev/null +++ b/msvc/ReadMe.txt @@ -0,0 +1,24 @@ + +How to build jemalloc for Windows +================================= + +1. Install Cygwin with at least the following packages: + * autoconf + * autogen + * gawk + * grep + * sed + +2. Install Visual Studio 2015 with Visual C++ + +3. Add Cygwin\bin to the PATH environment variable + +4. Open "VS2015 x86 Native Tools Command Prompt" + (note: x86/x64 doesn't matter at this point) + +5. Generate header files: + sh -c "./autogen.sh CC=cl --enable-lazy-lock=no" + +6. Now the project can be opened and built in Visual Studio: + msvc\jemalloc_vc2015.sln + diff --git a/msvc/jemalloc_vc2015.sln b/msvc/jemalloc_vc2015.sln new file mode 100644 index 00000000..aedd5e5e --- /dev/null +++ b/msvc/jemalloc_vc2015.sln @@ -0,0 +1,63 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.24720.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}" + ProjectSection(SolutionItems) = preProject + ReadMe.txt = ReadMe.txt + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Debug-static|x64 = Debug-static|x64 + Debug-static|x86 = Debug-static|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + Release-static|x64 = Release-static|x64 + Release-static|x86 = Release-static|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj new file mode 100644 index 00000000..d8ad505b --- /dev/null +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -0,0 +1,392 @@ + + + + + Debug-static + Win32 + + + Debug-static + x64 + + + Debug + Win32 + + + Release-static + Win32 + + + Release-static + x64 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {8D6BB292-9E1C-413D-9F98-4864BDC1514A} + Win32Proj + jemalloc + 8.1 + + + + DynamicLibrary + true + v140 + MultiByte + + + StaticLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + StaticLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)d + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)d + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + + + + Level3 + Disabled + _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters new file mode 100644 index 00000000..89a51f76 --- /dev/null +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -0,0 +1,245 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {5697dfa3-16cf-4932-b428-6e0ec6e9f98e} + + + {0cbd2ca6-42a7-4f82-8517-d7e7a14fd986} + + + {0abe6f30-49b5-46dd-8aca-6e33363fa52c} + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\msvc_compat + + + Header Files\msvc_compat + + + Header Files\msvc_compat\C99 + + + Header Files\msvc_compat\C99 + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp new file mode 100644 index 00000000..c8cb7d66 --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.cpp @@ -0,0 +1,89 @@ +// jemalloc C++ threaded test +// Author: Rustam Abdullaev +// Public Domain + +#include +#include +#include +#include +#include +#include +#include +#include + +using std::vector; +using std::thread; +using std::uniform_int_distribution; +using std::minstd_rand; + +int test_threads() +{ + je_malloc_conf = "narenas:3"; + int narenas = 0; + size_t sz = sizeof(narenas); + je_mallctl("opt.narenas", &narenas, &sz, NULL, 0); + if (narenas != 3) { + printf("Error: unexpected number of arenas: %d\n", narenas); + return 1; + } + static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 }; + static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0])); + vector workers; + static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50; + je_malloc_stats_print(NULL, NULL, NULL); + size_t allocated1; + size_t sz1 = sizeof(allocated1); + je_mallctl("stats.active", &allocated1, &sz1, NULL, 0); + printf("\nPress Enter to start threads...\n"); + getchar(); + printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2); + for (int i = 0; i < numThreads; i++) { + workers.emplace_back([tid=i]() { + uniform_int_distribution sizeDist(0, numSizes - 1); + minstd_rand rnd(tid * 17); + uint8_t* ptrs[numAllocsMax]; + int ptrsz[numAllocsMax]; + for (int i = 0; i < numIter1; ++i) { + thread t([&]() { + for (int i = 0; i < numIter2; ++i) { + const int numAllocs = numAllocsMax - sizeDist(rnd); + for (int j = 0; j < numAllocs; j += 64) { + const int x = sizeDist(rnd); + const int sz = sizes[x]; + ptrsz[j] = sz; + ptrs[j] = (uint8_t*)je_malloc(sz); + if (!ptrs[j]) { + printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x); + exit(1); + } + for (int k = 0; k < sz; k++) + ptrs[j][k] = tid + k; + } + for (int j = 0; j < numAllocs; j += 64) { + for (int k = 0, sz = ptrsz[j]; k < sz; k++) + if (ptrs[j][k] != (uint8_t)(tid + k)) { + printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); + exit(1); + } + je_free(ptrs[j]); + } + } + }); + t.join(); + } + }); + } + for (thread& t : workers) { + t.join(); + } + je_malloc_stats_print(NULL, NULL, NULL); + size_t allocated2; + je_mallctl("stats.active", &allocated2, &sz1, NULL, 0); + size_t leaked = allocated2 - allocated1; + printf("\nDone. Leaked: %zd bytes\n", leaked); + bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet) + printf("\nTest %s!\n", (failed ? "FAILED" : "successful")); + printf("\nPress Enter to continue...\n"); + getchar(); + return failed ? 1 : 0; +} diff --git a/msvc/projects/vc2015/test_threads/test_threads.h b/msvc/projects/vc2015/test_threads/test_threads.h new file mode 100644 index 00000000..64d0cdb3 --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.h @@ -0,0 +1,3 @@ +#pragma once + +int test_threads(); diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj new file mode 100644 index 00000000..b681e71e --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj @@ -0,0 +1,327 @@ + + + + + Debug-static + Win32 + + + Debug-static + x64 + + + Debug + Win32 + + + Release-static + Win32 + + + Release-static + x64 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {09028CFD-4EB7-491D-869C-0708DB97ED44} + Win32Proj + test_threads + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + true + v140 + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + $(SolutionDir)$(Platform)\$(Configuration) + jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + _DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration) + + + + + + + Level3 + Disabled + JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + {8d6bb292-9e1c-413d-9f98-4864bdc1514a} + + + + + + + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters new file mode 100644 index 00000000..4c233407 --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters @@ -0,0 +1,26 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/msvc/projects/vc2015/test_threads/test_threads_main.cpp new file mode 100644 index 00000000..ffd96e6a --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads_main.cpp @@ -0,0 +1,12 @@ +#include "test_threads.h" +#include +#include +#include + +using namespace std::chrono_literals; + +int main(int argc, char** argv) +{ + int rc = test_threads(); + return rc; +} diff --git a/src/arena.c b/src/arena.c index 43733cc1..99e20fde 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,18 +4,32 @@ /******************************************************************************/ /* Data. */ +purge_mode_t opt_purge = PURGE_DEFAULT; +const char *purge_mode_names[] = { + "ratio", + "decay", + "N/A" +}; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; static ssize_t lg_dirty_mult_default; +ssize_t opt_decay_time = DECAY_TIME_DEFAULT; +static ssize_t decay_time_default; + arena_bin_info_t arena_bin_info[NBINS]; size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -static size_t small_maxrun; /* Max run size used for small size classes. */ +size_t run_quantize_max; /* Max run_quantize_*() input. */ +static size_t small_maxrun; /* Max run size for small size classes. */ static bool *small_run_tab; /* Valid small run page multiples. */ +static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */ +static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ +static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */ +static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ /******************************************************************************/ /* @@ -23,7 +37,7 @@ unsigned nhclasses; /* Number of huge size classes. */ * definition. */ -static void arena_purge(arena_t *arena, bool all); +static void arena_purge_to_limit(arena_t *arena, size_t ndirty_limit); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, bool decommitted); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, @@ -33,42 +47,12 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, /******************************************************************************/ -#define CHUNK_MAP_KEY ((uintptr_t)0x1U) - -JEMALLOC_INLINE_C arena_chunk_map_misc_t * -arena_miscelm_key_create(size_t size) -{ - - return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) | - CHUNK_MAP_KEY)); -} - -JEMALLOC_INLINE_C bool -arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm) -{ - - return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0); -} - -#undef CHUNK_MAP_KEY - JEMALLOC_INLINE_C size_t -arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) -{ - - assert(arena_miscelm_is_key(miscelm)); - - return (arena_mapbits_size_decode((uintptr_t)miscelm)); -} - -JEMALLOC_INLINE_C size_t -arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) +arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk; size_t pageind, mapbits; - assert(!arena_miscelm_is_key(miscelm)); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); pageind = arena_miscelm_to_pageind(miscelm); mapbits = arena_mapbits_get(chunk, pageind); @@ -76,7 +60,8 @@ arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) } JEMALLOC_INLINE_C int -arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) +arena_run_addr_comp(const arena_chunk_map_misc_t *a, + const arena_chunk_map_misc_t *b) { uintptr_t a_miscelm = (uintptr_t)a; uintptr_t b_miscelm = (uintptr_t)b; @@ -89,10 +74,10 @@ arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) /* Generate red-black tree functions. */ rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, - rb_link, arena_run_comp) + rb_link, arena_run_addr_comp) static size_t -run_quantize(size_t size) +run_quantize_floor_compute(size_t size) { size_t qsize; @@ -110,13 +95,13 @@ run_quantize(size_t size) */ qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; if (qsize <= SMALL_MAXCLASS + large_pad) - return (run_quantize(size - large_pad)); + return (run_quantize_floor_compute(size - large_pad)); assert(qsize <= size); return (qsize); } static size_t -run_quantize_next(size_t size) +run_quantize_ceil_compute_hard(size_t size) { size_t large_run_size_next; @@ -150,9 +135,9 @@ run_quantize_next(size_t size) } static size_t -run_quantize_first(size_t size) +run_quantize_ceil_compute(size_t size) { - size_t qsize = run_quantize(size); + size_t qsize = run_quantize_floor_compute(size); if (qsize < size) { /* @@ -163,65 +148,89 @@ run_quantize_first(size_t size) * search would potentially find sufficiently aligned available * memory somewhere lower. */ - qsize = run_quantize_next(size); + qsize = run_quantize_ceil_compute_hard(qsize); } return (qsize); } -JEMALLOC_INLINE_C int -arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor_impl) +#endif +static size_t +run_quantize_floor(size_t size) { - int ret; - uintptr_t a_miscelm = (uintptr_t)a; - size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ? - arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a)); - size_t b_qsize = run_quantize(arena_miscelm_size_get(b)); + size_t ret; - /* - * Compare based on quantized size rather than size, in order to sort - * equally useful runs only by address. - */ - ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); - if (ret == 0) { - if (!arena_miscelm_is_key(a)) { - uintptr_t b_miscelm = (uintptr_t)b; - - ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); - } else { - /* - * Treat keys as if they are lower than anything else. - */ - ret = -1; - } - } + assert(size > 0); + assert(size <= run_quantize_max); + assert((size & PAGE_MASK) == 0); + ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1]; + assert(ret == run_quantize_floor_compute(size)); return (ret); } +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor) +run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl); +#endif -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_misc_t, rb_link, arena_avail_comp) +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl) +#endif +static size_t +run_quantize_ceil(size_t size) +{ + size_t ret; + + assert(size > 0); + assert(size <= run_quantize_max); + assert((size & PAGE_MASK) == 0); + + ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1]; + assert(ret == run_quantize_ceil_compute(size)); + return (ret); +} +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) +run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl); +#endif + +static arena_run_tree_t * +arena_runs_avail_get(arena_t *arena, szind_t ind) +{ + + assert(ind >= runs_avail_bias); + assert(ind - runs_avail_bias < runs_avail_nclasses); + + return (&arena->runs_avail[ind - runs_avail_bias]); +} static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - + szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + arena_miscelm_get(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, - pageind)); + arena_run_tree_insert(arena_runs_avail_get(arena, ind), + arena_miscelm_get(chunk, pageind)); } static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - + szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + arena_miscelm_get(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk, - pageind)); + arena_run_tree_remove(arena_runs_avail_get(arena, ind), + arena_miscelm_get(chunk, pageind)); } static void @@ -292,14 +301,14 @@ JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; - unsigned regind; + size_t regind; arena_chunk_map_misc_t *miscelm; void *rpages; assert(run->nfree > 0); assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info)); - regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); + regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info); miscelm = arena_run_to_miscelm(run); rpages = arena_miscelm_to_rpages(miscelm); ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + @@ -316,7 +325,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) size_t mapbits = arena_mapbits_get(chunk, pageind); szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); + size_t regind = arena_run_regind(run, bin_info, ptr); assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ @@ -364,16 +373,30 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) +arena_nactive_add(arena_t *arena, size_t add_pages) { if (config_stats) { - ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages - - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + size_t cactive_add = CHUNK_CEILING((arena->nactive + + add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); + if (cactive_add != 0) + stats_cactive_add(cactive_add); } + arena->nactive += add_pages; +} + +static void +arena_nactive_sub(arena_t *arena, size_t sub_pages) +{ + + if (config_stats) { + size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) - + CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE); + if (cactive_sub != 0) + stats_cactive_sub(cactive_sub); + } + arena->nactive -= sub_pages; } static void @@ -394,8 +417,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) arena_run_dirty_remove(arena, chunk, run_ind, total_pages); - arena_cactive_update(arena, need_pages, 0); - arena->nactive += need_pages; + arena_nactive_add(arena, need_pages); /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { @@ -711,7 +733,6 @@ arena_chunk_alloc(arena_t *arena) return (NULL); } - /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); return (chunk); @@ -732,10 +753,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_decommitted_get(chunk, map_bias) == arena_mapbits_decommitted_get(chunk, chunk_npages-1)); - /* - * Remove run from the runs_avail tree, so that the arena does not use - * it. - */ + /* Remove run from runs_avail, so that the arena does not use it. */ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); if (arena->spare != NULL) { @@ -888,7 +906,7 @@ arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, arena_huge_malloc_stats_update_undo(arena, usize); arena->stats.mapped -= usize; } - arena->nactive -= (usize >> LG_PAGE); + arena_nactive_sub(arena, usize >> LG_PAGE); malloc_mutex_unlock(&arena->lock); } @@ -910,7 +928,7 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, arena_huge_malloc_stats_update(arena, usize); arena->stats.mapped += usize; } - arena->nactive += (usize >> LG_PAGE); + arena_nactive_add(arena, usize >> LG_PAGE); ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment, zero, true); @@ -920,8 +938,6 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, alignment, zero, csize); } - if (config_stats && ret != NULL) - stats_cactive_add(usize); return (ret); } @@ -936,9 +952,8 @@ arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) if (config_stats) { arena_huge_dalloc_stats_update(arena, usize); arena->stats.mapped -= usize; - stats_cactive_sub(usize); } - arena->nactive -= (usize >> LG_PAGE); + arena_nactive_sub(arena, usize >> LG_PAGE); chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true); malloc_mutex_unlock(&arena->lock); @@ -955,17 +970,10 @@ arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize, malloc_mutex_lock(&arena->lock); if (config_stats) arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (oldsize < usize) { - size_t udiff = usize - oldsize; - arena->nactive += udiff >> LG_PAGE; - if (config_stats) - stats_cactive_add(udiff); - } else { - size_t udiff = oldsize - usize; - arena->nactive -= udiff >> LG_PAGE; - if (config_stats) - stats_cactive_sub(udiff); - } + if (oldsize < usize) + arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE); + else + arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE); malloc_mutex_unlock(&arena->lock); } @@ -979,12 +987,10 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, malloc_mutex_lock(&arena->lock); if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (cdiff != 0) { + if (cdiff != 0) arena->stats.mapped -= cdiff; - stats_cactive_sub(udiff); - } } - arena->nactive -= udiff >> LG_PAGE; + arena_nactive_sub(arena, udiff >> LG_PAGE); if (cdiff != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; @@ -1014,7 +1020,7 @@ arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, usize); arena->stats.mapped -= cdiff; } - arena->nactive -= (udiff >> LG_PAGE); + arena_nactive_sub(arena, udiff >> LG_PAGE); malloc_mutex_unlock(&arena->lock); } else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk, cdiff, true, arena->ind)) { @@ -1042,7 +1048,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, arena_huge_ralloc_stats_update(arena, oldsize, usize); arena->stats.mapped += cdiff; } - arena->nactive += (udiff >> LG_PAGE); + arena_nactive_add(arena, udiff >> LG_PAGE); err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff, chunksize, zero, true) == NULL); @@ -1058,26 +1064,28 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, err = true; } - if (config_stats && !err) - stats_cactive_add(udiff); return (err); } /* * Do first-best-fit run selection, i.e. select the lowest run that best fits. - * Run sizes are quantized, so not all candidate runs are necessarily exactly - * the same size. + * Run sizes are indexed, so not all candidate runs are necessarily exactly the + * same size. */ static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - size_t search_size = run_quantize_first(size); - arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size); - arena_chunk_map_misc_t *miscelm = - arena_avail_tree_nsearch(&arena->runs_avail, key); - if (miscelm == NULL) - return (NULL); - return (&miscelm->run); + szind_t ind, i; + + ind = size2index(run_quantize_ceil(size)); + for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) { + arena_chunk_map_misc_t *miscelm = arena_run_tree_first( + arena_runs_avail_get(arena, i)); + if (miscelm != NULL) + return (&miscelm->run); + } + + return (NULL); } static arena_run_t * @@ -1204,16 +1212,194 @@ arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) return (false); } -void -arena_maybe_purge(arena_t *arena) +static void +arena_decay_deadline_init(arena_t *arena) { + assert(opt_purge == purge_mode_decay); + + /* + * Generate a new deadline that is uniformly random within the next + * epoch after the current one. + */ + nstime_copy(&arena->decay_deadline, &arena->decay_epoch); + nstime_add(&arena->decay_deadline, &arena->decay_interval); + if (arena->decay_time > 0) { + nstime_t jitter; + + nstime_init(&jitter, prng_range(&arena->decay_jitter_state, + nstime_ns(&arena->decay_interval))); + nstime_add(&arena->decay_deadline, &jitter); + } +} + +static bool +arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) +{ + + assert(opt_purge == purge_mode_decay); + + return (nstime_compare(&arena->decay_deadline, time) <= 0); +} + +static size_t +arena_decay_backlog_npages_limit(const arena_t *arena) +{ + static const uint64_t h_steps[] = { +#define STEP(step, h, x, y) \ + h, + SMOOTHSTEP +#undef STEP + }; + uint64_t sum; + size_t npages_limit_backlog; + unsigned i; + + assert(opt_purge == purge_mode_decay); + + /* + * For each element of decay_backlog, multiply by the corresponding + * fixed-point smoothstep decay factor. Sum the products, then divide + * to round down to the nearest whole number of pages. + */ + sum = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) + sum += arena->decay_backlog[i] * h_steps[i]; + npages_limit_backlog = (sum >> SMOOTHSTEP_BFP); + + return (npages_limit_backlog); +} + +static void +arena_decay_epoch_advance(arena_t *arena, const nstime_t *time) +{ + uint64_t nadvance; + nstime_t delta; + size_t ndirty_delta; + + assert(opt_purge == purge_mode_decay); + assert(arena_decay_deadline_reached(arena, time)); + + nstime_copy(&delta, time); + nstime_subtract(&delta, &arena->decay_epoch); + nadvance = nstime_divide(&delta, &arena->decay_interval); + assert(nadvance > 0); + + /* Add nadvance decay intervals to epoch. */ + nstime_copy(&delta, &arena->decay_interval); + nstime_imultiply(&delta, nadvance); + nstime_add(&arena->decay_epoch, &delta); + + /* Set a new deadline. */ + arena_decay_deadline_init(arena); + + /* Update the backlog. */ + if (nadvance >= SMOOTHSTEP_NSTEPS) { + memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) * + sizeof(size_t)); + } else { + memmove(arena->decay_backlog, &arena->decay_backlog[nadvance], + (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t)); + if (nadvance > 1) { + memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS - + nadvance], 0, (nadvance-1) * sizeof(size_t)); + } + } + ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty - + arena->decay_ndirty : 0; + arena->decay_ndirty = arena->ndirty; + arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; + arena->decay_backlog_npages_limit = + arena_decay_backlog_npages_limit(arena); +} + +static size_t +arena_decay_npages_limit(arena_t *arena) +{ + size_t npages_limit; + + assert(opt_purge == purge_mode_decay); + + npages_limit = arena->decay_backlog_npages_limit; + + /* Add in any dirty pages created during the current epoch. */ + if (arena->ndirty > arena->decay_ndirty) + npages_limit += arena->ndirty - arena->decay_ndirty; + + return (npages_limit); +} + +static void +arena_decay_init(arena_t *arena, ssize_t decay_time) +{ + + arena->decay_time = decay_time; + if (decay_time > 0) { + nstime_init2(&arena->decay_interval, decay_time, 0); + nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS); + } + + nstime_init(&arena->decay_epoch, 0); + nstime_update(&arena->decay_epoch); + arena->decay_jitter_state = (uint64_t)(uintptr_t)arena; + arena_decay_deadline_init(arena); + arena->decay_ndirty = arena->ndirty; + arena->decay_backlog_npages_limit = 0; + memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); +} + +static bool +arena_decay_time_valid(ssize_t decay_time) +{ + + return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX); +} + +ssize_t +arena_decay_time_get(arena_t *arena) +{ + ssize_t decay_time; + + malloc_mutex_lock(&arena->lock); + decay_time = arena->decay_time; + malloc_mutex_unlock(&arena->lock); + + return (decay_time); +} + +bool +arena_decay_time_set(arena_t *arena, ssize_t decay_time) +{ + + if (!arena_decay_time_valid(decay_time)) + return (true); + + malloc_mutex_lock(&arena->lock); + /* + * Restart decay backlog from scratch, which may cause many dirty pages + * to be immediately purged. It would conceptually be possible to map + * the old backlog onto the new backlog, but there is no justification + * for such complexity since decay_time changes are intended to be + * infrequent, either between the {-1, 0, >0} states, or a one-time + * arbitrary change during initial arena configuration. + */ + arena_decay_init(arena, decay_time); + arena_maybe_purge(arena); + malloc_mutex_unlock(&arena->lock); + + return (false); +} + +static void +arena_maybe_purge_ratio(arena_t *arena) +{ + + assert(opt_purge == purge_mode_ratio); + /* Don't purge if the option is disabled. */ if (arena->lg_dirty_mult < 0) return; - /* Don't recursively purge. */ - if (arena->purging) - return; + /* * Iterate, since preventing recursive purging could otherwise leave too * many dirty pages. @@ -1228,10 +1414,59 @@ arena_maybe_purge(arena_t *arena) */ if (arena->ndirty <= threshold) return; - arena_purge(arena, false); + arena_purge_to_limit(arena, threshold); } } +static void +arena_maybe_purge_decay(arena_t *arena) +{ + nstime_t time; + size_t ndirty_limit; + + assert(opt_purge == purge_mode_decay); + + /* Purge all or nothing if the option is disabled. */ + if (arena->decay_time <= 0) { + if (arena->decay_time == 0) + arena_purge_to_limit(arena, 0); + return; + } + + nstime_copy(&time, &arena->decay_epoch); + if (unlikely(nstime_update(&time))) { + /* Time went backwards. Force an epoch advance. */ + nstime_copy(&time, &arena->decay_deadline); + } + + if (arena_decay_deadline_reached(arena, &time)) + arena_decay_epoch_advance(arena, &time); + + ndirty_limit = arena_decay_npages_limit(arena); + + /* + * Don't try to purge unless the number of purgeable pages exceeds the + * current limit. + */ + if (arena->ndirty <= ndirty_limit) + return; + arena_purge_to_limit(arena, ndirty_limit); +} + +void +arena_maybe_purge(arena_t *arena) +{ + + /* Don't recursively purge. */ + if (arena->purging) + return; + + if (opt_purge == purge_mode_ratio) + arena_maybe_purge_ratio(arena); + else + arena_maybe_purge_decay(arena); +} + static size_t arena_dirty_count(arena_t *arena) { @@ -1267,35 +1502,15 @@ arena_dirty_count(arena_t *arena) } static size_t -arena_compute_npurge(arena_t *arena, bool all) -{ - size_t npurge; - - /* - * Compute the minimum number of pages that this thread should try to - * purge. - */ - if (!all) { - size_t threshold = (arena->nactive >> arena->lg_dirty_mult); - threshold = threshold < chunk_npages ? chunk_npages : threshold; - - npurge = arena->ndirty - threshold; - } else - npurge = arena->ndirty; - - return (npurge); -} - -static size_t -arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, - size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel, +arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, + size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { arena_runs_dirty_link_t *rdelm, *rdelm_next; extent_node_t *chunkselm; size_t nstashed = 0; - /* Stash at least npurge pages. */ + /* Stash runs/chunks according to ndirty_limit. */ for (rdelm = qr_next(&arena->runs_dirty, rd_link), chunkselm = qr_next(&arena->chunks_cache, cc_link); rdelm != &arena->runs_dirty; rdelm = rdelm_next) { @@ -1307,6 +1522,11 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, bool zero; UNUSED void *chunk; + npages = extent_node_size_get(chunkselm) >> LG_PAGE; + if (opt_purge == purge_mode_decay && arena->ndirty - + (nstashed + npages) < ndirty_limit) + break; + chunkselm_next = qr_next(chunkselm, cc_link); /* * Allocate. chunkselm remains valid due to the @@ -1321,7 +1541,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, assert(zero == extent_node_zeroed_get(chunkselm)); extent_node_dirty_insert(chunkselm, purge_runs_sentinel, purge_chunks_sentinel); - npages = extent_node_size_get(chunkselm) >> LG_PAGE; + assert(npages == (extent_node_size_get(chunkselm) >> + LG_PAGE)); chunkselm = chunkselm_next; } else { arena_chunk_t *chunk = @@ -1334,6 +1555,9 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, arena_mapbits_unallocated_size_get(chunk, pageind); npages = run_size >> LG_PAGE; + if (opt_purge == purge_mode_decay && arena->ndirty - + (nstashed + npages) < ndirty_limit) + break; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == @@ -1359,7 +1583,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, } nstashed += npages; - if (!all && nstashed >= npurge) + if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <= + ndirty_limit) break; } @@ -1499,11 +1724,20 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, } } +/* + * NB: ndirty_limit is interpreted differently depending on opt_purge: + * - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the + * desired state: + * (arena->ndirty <= ndirty_limit) + * - purge_mode_decay: Purge as many dirty runs/chunks as possible without + * violating the invariant: + * (arena->ndirty >= ndirty_limit) + */ static void -arena_purge(arena_t *arena, bool all) +arena_purge_to_limit(arena_t *arena, size_t ndirty_limit) { chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); - size_t npurge, npurgeable, npurged; + size_t npurge, npurged; arena_runs_dirty_link_t purge_runs_sentinel; extent_node_t purge_chunks_sentinel; @@ -1517,33 +1751,38 @@ arena_purge(arena_t *arena, bool all) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all); + assert(opt_purge != purge_mode_ratio || (arena->nactive >> + arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0); + + qr_new(&purge_runs_sentinel, rd_link); + extent_node_dirty_linkage_init(&purge_chunks_sentinel); + + npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit, + &purge_runs_sentinel, &purge_chunks_sentinel); + if (npurge == 0) + goto label_return; + npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); + assert(npurged == npurge); + arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); if (config_stats) arena->stats.npurge++; - npurge = arena_compute_npurge(arena, all); - qr_new(&purge_runs_sentinel, rd_link); - extent_node_dirty_linkage_init(&purge_chunks_sentinel); - - npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge, - &purge_runs_sentinel, &purge_chunks_sentinel); - assert(npurgeable >= npurge); - npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); - assert(npurged == npurgeable); - arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); - +label_return: arena->purging = false; } void -arena_purge_all(arena_t *arena) +arena_purge(arena_t *arena, bool all) { malloc_mutex_lock(&arena->lock); - arena_purge(arena, true); + if (all) + arena_purge_to_limit(arena, 0); + else + arena_maybe_purge(arena); malloc_mutex_unlock(&arena->lock); } @@ -1660,18 +1899,6 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, return (size); } -static bool -arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run) -{ - arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - size_t run_ind = arena_miscelm_to_pageind(miscelm); - size_t offset = run_ind << LG_PAGE; - size_t length = arena_run_size_get(arena, chunk, run, run_ind); - - return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length, - arena->ind)); -} - static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, bool decommitted) @@ -1687,8 +1914,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, assert(run_ind < chunk_npages); size = arena_run_size_get(arena, chunk, run, run_ind); run_pages = (size >> LG_PAGE); - arena_cactive_update(arena, 0, run_pages); - arena->nactive -= run_pages; + arena_nactive_sub(arena, run_pages); /* * The run is dirty if the caller claims to have dirtied it, as well as @@ -1749,15 +1975,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, arena_maybe_purge(arena); } -static void -arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run) -{ - bool committed = arena_run_decommit(arena, chunk, run); - - arena_run_dalloc(arena, run, committed, false, !committed); -} - static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize) @@ -1986,8 +2203,8 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, - uint64_t prof_accumbytes) +arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin, + szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; @@ -2010,11 +2227,10 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must - * be moved to the base of tbin->avail before bailing - * out. + * be moved just before tbin->avail before bailing out. */ if (i > 0) { - memmove(tbin->avail, &tbin->avail[nfill - i], + memmove(tbin->avail - i, tbin->avail - nfill, i * sizeof(void *)); } break; @@ -2024,7 +2240,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, true); } /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; + *(tbin->avail - nfill + i) = ptr; } if (config_stats) { bin->stats.nmalloc += i; @@ -2035,6 +2251,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, } malloc_mutex_unlock(&bin->lock); tbin->ncached = i; + arena_decay_tick(tsd, arena); } void @@ -2144,18 +2361,17 @@ arena_quarantine_junk_small(void *ptr, size_t usize) arena_redzones_validate(ptr, bin_info, true); } -void * -arena_malloc_small(arena_t *arena, size_t size, bool zero) +static void * +arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero) { void *ret; arena_bin_t *bin; + size_t usize; arena_run_t *run; - szind_t binind; - binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = index2size(binind); + usize = index2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -2174,7 +2390,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.curregs++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && !isthreaded && arena_prof_accum(arena, size)) + if (config_prof && !isthreaded && arena_prof_accum(arena, usize)) prof_idump(); if (!zero) { @@ -2183,23 +2399,24 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); } else { if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); + memset(ret, 0, usize); } + arena_decay_tick(tsd, arena); return (ret); } void * -arena_malloc_large(arena_t *arena, size_t size, bool zero) +arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero) { void *ret; size_t usize; @@ -2209,7 +2426,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) UNUSED bool idump; /* Large allocation. */ - usize = s2u(size); + usize = index2size(binind); malloc_mutex_lock(&arena->lock); if (config_cache_oblivious) { uint64_t r; @@ -2219,9 +2436,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state, - UINT64_C(6364136223846793009), - UINT64_C(1442695040888963409)); + r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; @@ -2234,7 +2449,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + random_offset); if (config_stats) { - szind_t index = size2index(usize) - NBINS; + szind_t index = binind - NBINS; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -2258,9 +2473,26 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) } } + arena_decay_tick(tsd, arena); return (ret); } +void * +arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache) +{ + + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + if (likely(size <= SMALL_MAXCLASS)) + return (arena_malloc_small(tsd, arena, ind, zero)); + if (likely(size <= large_maxclass)) + return (arena_malloc_large(tsd, arena, ind, zero)); + return (huge_malloc(tsd, arena, index2size(ind), zero, tcache)); +} + /* Only handles large allocations that require more than page alignment. */ static void * arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, @@ -2344,6 +2576,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, else if (unlikely(opt_zero)) memset(ret, 0, usize); } + arena_decay_tick(tsd, arena); return (ret); } @@ -2356,7 +2589,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special run placement. */ - ret = arena_malloc(tsd, arena, usize, zero, tcache); + ret = arena_malloc(tsd, arena, usize, size2index(usize), zero, + tcache, true); } else if (usize <= large_maxclass && alignment <= PAGE) { /* * Large; alignment doesn't require special run placement. @@ -2364,7 +2598,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, * the base of the run, so do some bit manipulation to retrieve * the base. */ - ret = arena_malloc(tsd, arena, usize, zero, tcache); + ret = arena_malloc(tsd, arena, usize, size2index(usize), zero, + tcache, true); if (config_cache_oblivious) ret = (void *)((uintptr_t)ret & ~PAGE_MASK); } else { @@ -2441,7 +2676,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - arena_run_dalloc_decommit(arena, chunk, run); + arena_run_dalloc(arena, run, true, false, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -2528,7 +2763,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } void -arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind) { arena_chunk_map_bits_t *bitselm; @@ -2540,6 +2775,7 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, } bitselm = arena_bitselm_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm); + arena_decay_tick(tsd, arena); } #ifdef JEMALLOC_JET @@ -2584,7 +2820,7 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, } } - arena_run_dalloc_decommit(arena, chunk, run); + arena_run_dalloc(arena, run, true, false, false); } void @@ -2596,12 +2832,13 @@ arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, } void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr) { malloc_mutex_lock(&arena->lock); arena_dalloc_large_locked_impl(arena, chunk, ptr, false); malloc_mutex_unlock(&arena->lock); + arena_decay_tick(tsd, arena); } static void @@ -2802,14 +3039,22 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, } bool -arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) +arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero) { size_t usize_min, usize_max; + /* Calls with non-zero extra had to clamp extra. */ + assert(extra == 0 || size + extra <= HUGE_MAXCLASS); + + if (unlikely(size > HUGE_MAXCLASS)) + return (true); + usize_min = s2u(size); usize_max = s2u(size + extra); if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { + arena_chunk_t *chunk; + /* * Avoid moving the allocation if the size class can be left the * same. @@ -2817,23 +3062,24 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((usize_max <= SMALL_MAXCLASS && - size2index(usize_max) == size2index(oldsize)) || - (size <= oldsize && usize_max >= oldsize)) - return (false); + if ((usize_max > SMALL_MAXCLASS || + size2index(usize_max) != size2index(oldsize)) && + (size > oldsize || usize_max < oldsize)) + return (true); } else { - if (usize_max > SMALL_MAXCLASS) { - if (!arena_ralloc_large(ptr, oldsize, usize_min, - usize_max, zero)) - return (false); - } + if (usize_max <= SMALL_MAXCLASS) + return (true); + if (arena_ralloc_large(ptr, oldsize, usize_min, + usize_max, zero)) + return (true); } - /* Reallocation would require a move. */ - return (true); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena_decay_tick(tsd, extent_node_arena_get(&chunk->node)); + return (false); } else { - return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, - zero)); + return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min, + usize_max, zero)); } } @@ -2843,9 +3089,10 @@ arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, { if (alignment == 0) - return (arena_malloc(tsd, arena, usize, zero, tcache)); + return (arena_malloc(tsd, arena, usize, size2index(usize), zero, + tcache, true)); usize = sa2u(usize, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); } @@ -2858,14 +3105,14 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize; usize = s2u(size); - if (usize == 0) + if (unlikely(usize == 0 || size > HUGE_MAXCLASS)) return (NULL); if (likely(usize <= large_maxclass)) { size_t copysize; /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero)) + if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero)) return (ptr); /* @@ -2928,25 +3175,72 @@ bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) { + if (opt_purge != purge_mode_ratio) + return (true); if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) return (true); atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult); return (false); } +ssize_t +arena_decay_time_default_get(void) +{ + + return ((ssize_t)atomic_read_z((size_t *)&decay_time_default)); +} + +bool +arena_decay_time_default_set(ssize_t decay_time) +{ + + if (opt_purge != purge_mode_decay) + return (true); + if (!arena_decay_time_valid(decay_time)) + return (true); + atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time); + return (false); +} + +static void +arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty) +{ + + *nthreads += arena_nthreads_get(arena); + *dss = dss_prec_names[arena->dss_prec]; + *lg_dirty_mult = arena->lg_dirty_mult; + *decay_time = arena->decay_time; + *nactive += arena->nactive; + *ndirty += arena->ndirty; +} + void -arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, - size_t *nactive, size_t *ndirty, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, - malloc_huge_stats_t *hstats) +arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss, + ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, + size_t *ndirty) +{ + + malloc_mutex_lock(&arena->lock); + arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, + decay_time, nactive, ndirty); + malloc_mutex_unlock(&arena->lock); +} + +void +arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss, + ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) { unsigned i; + cassert(config_stats); + malloc_mutex_lock(&arena->lock); - *dss = dss_prec_names[arena->dss_prec]; - *lg_dirty_mult = arena->lg_dirty_mult; - *nactive += arena->nactive; - *ndirty += arena->ndirty; + arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, + decay_time, nactive, ndirty); astats->mapped += arena->stats.mapped; astats->npurge += arena->stats.npurge; @@ -2995,23 +3289,48 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, } } +unsigned +arena_nthreads_get(arena_t *arena) +{ + + return (atomic_read_u(&arena->nthreads)); +} + +void +arena_nthreads_inc(arena_t *arena) +{ + + atomic_add_u(&arena->nthreads, 1); +} + +void +arena_nthreads_dec(arena_t *arena) +{ + + atomic_sub_u(&arena->nthreads, 1); +} + arena_t * arena_new(unsigned ind) { arena_t *arena; + size_t arena_size; unsigned i; arena_bin_t *bin; + /* Compute arena size to incorporate sufficient runs_avail elements. */ + arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) * + runs_avail_nclasses); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { - arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) - + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + + arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + nhclasses) * sizeof(malloc_huge_stats_t)); } else - arena = (arena_t *)base_alloc(sizeof(arena_t)); + arena = (arena_t *)base_alloc(arena_size); if (arena == NULL) return (NULL); @@ -3023,11 +3342,11 @@ arena_new(unsigned ind) if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(sizeof(arena_t))); + + CACHELINE_CEILING(arena_size)); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(sizeof(arena_t)) + + + CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); memset(arena->stats.hstats, 0, nhclasses * sizeof(malloc_huge_stats_t)); @@ -3059,10 +3378,14 @@ arena_new(unsigned ind) arena->nactive = 0; arena->ndirty = 0; - arena_avail_tree_new(&arena->runs_avail); + for(i = 0; i < runs_avail_nclasses; i++) + arena_run_tree_new(&arena->runs_avail[i]); qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); + if (opt_purge == purge_mode_decay) + arena_decay_init(arena, arena_decay_time_default_get()); + ql_new(&arena->huge); if (malloc_mutex_init(&arena->huge_mtx)) return (NULL); @@ -3117,8 +3440,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * be twice as large in order to maintain alignment. */ if (config_fill && unlikely(opt_redzone)) { - size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - - 1); + size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; @@ -3138,18 +3460,19 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * size). */ try_run_size = PAGE; - try_nregs = try_run_size / bin_info->reg_size; + try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); do { perfect_run_size = try_run_size; perfect_nregs = try_nregs; try_run_size += PAGE; - try_nregs = try_run_size / bin_info->reg_size; + try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); } while (perfect_run_size != perfect_nregs * bin_info->reg_size); assert(perfect_nregs <= RUN_MAXREGS); actual_run_size = perfect_run_size; - actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); /* * Redzones can require enough padding that not even a single region can @@ -3161,8 +3484,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) assert(config_fill && unlikely(opt_redzone)); actual_run_size += PAGE; - actual_nregs = (actual_run_size - pad_size) / - bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); } /* @@ -3170,8 +3493,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) */ while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; - actual_nregs = (actual_run_size - pad_size) / - bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); } assert(actual_nregs > 0); assert(actual_run_size == s2u(actual_run_size)); @@ -3179,8 +3502,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* Copy final settings. */ bin_info->run_size = actual_run_size; bin_info->nregs = actual_nregs; - bin_info->reg0_offset = actual_run_size - (actual_nregs * - bin_info->reg_interval) - pad_size + bin_info->redzone_size; + bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs * + bin_info->reg_interval) - pad_size + bin_info->redzone_size); if (actual_run_size > small_maxrun) small_maxrun = actual_run_size; @@ -3234,12 +3557,42 @@ small_run_size_init(void) return (false); } +static bool +run_quantize_init(void) +{ + unsigned i; + + run_quantize_max = chunksize + large_pad; + + run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) * + (run_quantize_max >> LG_PAGE)); + if (run_quantize_floor_tab == NULL) + return (true); + + run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) * + (run_quantize_max >> LG_PAGE)); + if (run_quantize_ceil_tab == NULL) + return (true); + + for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) { + size_t run_size = i << LG_PAGE; + + run_quantize_floor_tab[i-1] = + run_quantize_floor_compute(run_size); + run_quantize_ceil_tab[i-1] = + run_quantize_ceil_compute(run_size); + } + + return (false); +} + bool arena_boot(void) { unsigned i; arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); + arena_decay_time_default_set(opt_decay_time); /* * Compute the header size such that it is large enough to contain the @@ -3281,7 +3634,15 @@ arena_boot(void) nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); - return (small_run_size_init()); + if (small_run_size_init()) + return (true); + if (run_quantize_init()) + return (true); + + runs_avail_bias = size2index(PAGE); + runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias; + + return (false); } void diff --git a/src/bitmap.c b/src/bitmap.c index c733372b..b1e66271 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -3,6 +3,8 @@ /******************************************************************************/ +#ifdef USE_TREE + void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) { @@ -32,20 +34,11 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) binfo->nbits = nbits; } -size_t +static size_t bitmap_info_ngroups(const bitmap_info_t *binfo) { - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); + return (binfo->levels[binfo->nlevels].group_offset); } void @@ -61,8 +54,7 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) * correspond to the first logical bit in the group, so extra bits * are the most significant bits of the last group. */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); + memset(bitmap, 0xffU, bitmap_size(binfo)); extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; if (extra != 0) @@ -76,3 +68,47 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; } } + +#else /* USE_TREE */ + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + size_t i; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + i = nbits >> LG_BITMAP_GROUP_NBITS; + if (nbits % BITMAP_GROUP_NBITS != 0) + i++; + binfo->ngroups = i; + binfo->nbits = nbits; +} + +static size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->ngroups); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + + memset(bitmap, 0xffU, bitmap_size(binfo)); + extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS)); + if (extra != 0) + bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra); +} + +#endif /* USE_TREE */ + +size_t +bitmap_size(const bitmap_info_t *binfo) +{ + + return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP); +} diff --git a/src/chunk.c b/src/chunk.c index 6ba1ca7a..b179d213 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -332,30 +332,20 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) { void *ret; - chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - /* Retained. */ - if ((ret = chunk_recycle(arena, &chunk_hooks, - &arena->chunks_szad_retained, &arena->chunks_ad_retained, false, - new_addr, size, alignment, zero, commit, true)) != NULL) - return (ret); - /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != NULL) return (ret); - /* - * mmap. Requesting an address is not implemented for - * chunk_alloc_mmap(), so only call it if (new_addr == NULL). - */ - if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero, - commit)) != NULL) + /* mmap. */ + if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) != + NULL) return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary && (ret = @@ -380,7 +370,7 @@ chunk_alloc_base(size_t size) */ zero = true; commit = true; - ret = chunk_alloc_mmap(size, chunksize, &zero, &commit); + ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit); if (ret == NULL) return (NULL); if (config_valgrind) @@ -418,9 +408,7 @@ chunk_arena_get(unsigned arena_ind) { arena_t *arena; - /* Dodge tsd for a0 in order to avoid bootstrapping issues. */ - arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind, - false, true); + arena = arena_get(arena_ind, false); /* * The arena we're allocating on behalf of must have been initialized * already. @@ -447,6 +435,21 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, return (ret); } +static void * +chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit) +{ + + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + + return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained, + &arena->chunks_ad_retained, false, new_addr, size, alignment, zero, + commit, true)); +} + void * chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) @@ -454,10 +457,16 @@ chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, void *ret; chunk_hooks_assure_initialized(arena, chunk_hooks); - ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit, - arena->ind); - if (ret == NULL) - return (NULL); + + ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size, + alignment, zero, commit); + if (ret == NULL) { + ret = chunk_hooks->alloc(new_addr, size, alignment, zero, + commit, arena->ind); + if (ret == NULL) + return (NULL); + } + if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default) JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); return (ret); @@ -716,7 +725,7 @@ chunk_boot(void) * so pages_map will always take fast path. */ if (!opt_lg_chunk) { - opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity) + opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity) - 1; } #else @@ -732,8 +741,8 @@ chunk_boot(void) if (have_dss && chunk_dss_boot()) return (true); - if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, chunks_rtree_node_alloc, NULL)) + if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk), chunks_rtree_node_alloc, NULL)) return (true); return (false); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index b9ba7419..56b2ee42 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -32,7 +32,8 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit) } void * -chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) +chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, + bool *commit) { void *ret; size_t offset; @@ -53,9 +54,10 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); + ret = pages_map(new_addr, size); + if (ret == NULL || ret == new_addr) + return (ret); + assert(new_addr == NULL); offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { pages_unmap(ret, size); diff --git a/src/ckh.c b/src/ckh.c index 53a1c1ef..3b423aa2 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +141,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + i = (unsigned)prng_lg_range(&ckh->prng_state, + LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -247,8 +248,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; - size_t lg_curcells; - unsigned lg_prevbuckets; + unsigned lg_prevbuckets, lg_curcells; #ifdef CKH_COUNT ckh->ngrows++; @@ -266,7 +266,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) lg_curcells++; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { ret = true; goto label_return; } @@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true); + idalloctm(tsd, tab, tcache_get(tsd, false), true, true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -302,8 +302,8 @@ static void ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t lg_curcells, usize; - unsigned lg_prevbuckets; + size_t usize; + unsigned lg_prevbuckets, lg_curcells; /* * It is possible (though unlikely, given well behaved hashes) that the @@ -312,7 +312,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return; tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, NULL); @@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true); + idalloctm(tsd, tab, tcache_get(tsd, false), true, true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -387,7 +387,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh->keycomp = keycomp; usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); - if (usize == 0) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { ret = true; goto label_return; } @@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); if (config_debug) memset(ckh, 0x5a, sizeof(ckh_t)); } diff --git a/src/ctl.c b/src/ctl.c index 3de8e602..17bd0719 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -24,7 +24,7 @@ ctl_named_node(const ctl_node_t *node) } JEMALLOC_INLINE_C const ctl_named_node_t * -ctl_named_children(const ctl_named_node_t *node, int index) +ctl_named_children(const ctl_named_node_t *node, size_t index) { const ctl_named_node_t *children = ctl_named_node(node->children); @@ -77,6 +77,7 @@ CTL_PROTO(config_cache_oblivious) CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_malloc_conf) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -91,7 +92,9 @@ CTL_PROTO(opt_abort) CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) +CTL_PROTO(opt_purge) CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_decay_time) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) @@ -114,10 +117,12 @@ CTL_PROTO(opt_prof_accum) CTL_PROTO(tcache_create) CTL_PROTO(tcache_flush) CTL_PROTO(tcache_destroy) +static void arena_i_purge(unsigned arena_ind, bool all); CTL_PROTO(arena_i_purge) -static void arena_purge(unsigned arena_ind); +CTL_PROTO(arena_i_decay) CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_lg_dirty_mult) +CTL_PROTO(arena_i_decay_time) CTL_PROTO(arena_i_chunk_hooks) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) @@ -131,6 +136,7 @@ INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_lg_dirty_mult) +CTL_PROTO(arenas_decay_time) CTL_PROTO(arenas_quantum) CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) @@ -181,6 +187,7 @@ INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_lg_dirty_mult) +CTL_PROTO(stats_arenas_i_decay_time) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) @@ -241,6 +248,7 @@ static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("malloc_conf"), CTL(config_malloc_conf)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -258,7 +266,9 @@ static const ctl_named_node_t opt_node[] = { {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, + {NAME("purge"), CTL(opt_purge)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("decay_time"), CTL(opt_decay_time)}, {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, @@ -288,8 +298,10 @@ static const ctl_named_node_t tcache_node[] = { static const ctl_named_node_t arena_i_node[] = { {NAME("purge"), CTL(arena_i_purge)}, + {NAME("decay"), CTL(arena_i_decay)}, {NAME("dss"), CTL(arena_i_dss)}, {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)}, + {NAME("decay_time"), CTL(arena_i_decay_time)}, {NAME("chunk_hooks"), CTL(arena_i_chunk_hooks)} }; static const ctl_named_node_t super_arena_i_node[] = { @@ -339,6 +351,7 @@ static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)}, + {NAME("decay_time"), CTL(arenas_decay_time)}, {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, @@ -439,6 +452,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)}, + {NAME("decay_time"), CTL(stats_arenas_i_decay_time)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, @@ -519,8 +533,10 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { + astats->nthreads = 0; astats->dss = dss_prec_names[dss_prec_limit]; astats->lg_dirty_mult = -1; + astats->decay_time = -1; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -542,16 +558,23 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, - &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, - cstats->lstats, cstats->hstats); + if (config_stats) { + arena_stats_merge(arena, &cstats->nthreads, &cstats->dss, + &cstats->lg_dirty_mult, &cstats->decay_time, + &cstats->pactive, &cstats->pdirty, &cstats->astats, + cstats->bstats, cstats->lstats, cstats->hstats); - for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].curregs * - index2size(i); - cstats->nmalloc_small += cstats->bstats[i].nmalloc; - cstats->ndalloc_small += cstats->bstats[i].ndalloc; - cstats->nrequests_small += cstats->bstats[i].nrequests; + for (i = 0; i < NBINS; i++) { + cstats->allocated_small += cstats->bstats[i].curregs * + index2size(i); + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; + } + } else { + arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss, + &cstats->lg_dirty_mult, &cstats->decay_time, + &cstats->pactive, &cstats->pdirty); } } @@ -560,57 +583,68 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) { unsigned i; + sstats->nthreads += astats->nthreads; sstats->pactive += astats->pactive; sstats->pdirty += astats->pdirty; - sstats->astats.mapped += astats->astats.mapped; - sstats->astats.npurge += astats->astats.npurge; - sstats->astats.nmadvise += astats->astats.nmadvise; - sstats->astats.purged += astats->astats.purged; + if (config_stats) { + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; - sstats->astats.metadata_mapped += astats->astats.metadata_mapped; - sstats->astats.metadata_allocated += astats->astats.metadata_allocated; + sstats->astats.metadata_mapped += + astats->astats.metadata_mapped; + sstats->astats.metadata_allocated += + astats->astats.metadata_allocated; - sstats->allocated_small += astats->allocated_small; - sstats->nmalloc_small += astats->nmalloc_small; - sstats->ndalloc_small += astats->ndalloc_small; - sstats->nrequests_small += astats->nrequests_small; + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; - sstats->astats.allocated_large += astats->astats.allocated_large; - sstats->astats.nmalloc_large += astats->astats.nmalloc_large; - sstats->astats.ndalloc_large += astats->astats.ndalloc_large; - sstats->astats.nrequests_large += astats->astats.nrequests_large; + sstats->astats.allocated_large += + astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += + astats->astats.nrequests_large; - sstats->astats.allocated_huge += astats->astats.allocated_huge; - sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; - sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; + sstats->astats.allocated_huge += astats->astats.allocated_huge; + sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; + sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; - for (i = 0; i < NBINS; i++) { - sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; - sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; - sstats->bstats[i].nrequests += astats->bstats[i].nrequests; - sstats->bstats[i].curregs += astats->bstats[i].curregs; - if (config_tcache) { - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += - astats->bstats[i].nflushes; + for (i = 0; i < NBINS; i++) { + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += + astats->bstats[i].nrequests; + sstats->bstats[i].curregs += astats->bstats[i].curregs; + if (config_tcache) { + sstats->bstats[i].nfills += + astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; } - sstats->bstats[i].nruns += astats->bstats[i].nruns; - sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].curruns += astats->bstats[i].curruns; - } - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += + astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } - for (i = 0; i < nhclasses; i++) { - sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; - sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; - sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks; + for (i = 0; i < nhclasses; i++) { + sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; + sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; + sstats->hstats[i].curhchunks += + astats->hstats[i].curhchunks; + } } } @@ -621,19 +655,9 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); - - sstats->nthreads += astats->nthreads; - if (config_stats) { - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); - } else { - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; - } + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); } static bool @@ -679,33 +703,17 @@ ctl_grow(void) static void ctl_refresh(void) { - tsd_t *tsd; unsigned i; - bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); /* * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - tsd = tsd_fetch(); - for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, true); - refreshed = true; - } - } - - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arena_nbound(i); - else - ctl_stats.arenas[i].nthreads = 0; - } + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(i, false); for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -960,7 +968,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, assert(node->nchildren > 0); if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->nchildren <= mib[i]) { + if (node->nchildren <= (unsigned)mib[i]) { ret = ENOENT; goto label_return; } @@ -1199,17 +1207,17 @@ label_return: \ return (ret); \ } -#define CTL_RO_BOOL_CONFIG_GEN(n) \ +#define CTL_RO_CONFIG_GEN(n, t) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ void *newp, size_t newlen) \ { \ int ret; \ - bool oldval; \ + t oldval; \ \ READONLY(); \ oldval = n; \ - READ(oldval, bool); \ + READ(oldval, t); \ \ ret = 0; \ label_return: \ @@ -1241,28 +1249,31 @@ label_return: /******************************************************************************/ -CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious) -CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_fill) -CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_munmap) -CTL_RO_BOOL_CONFIG_GEN(config_prof) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) -CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tls) -CTL_RO_BOOL_CONFIG_GEN(config_utrace) -CTL_RO_BOOL_CONFIG_GEN(config_valgrind) -CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) +CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) +CTL_RO_CONFIG_GEN(config_debug, bool) +CTL_RO_CONFIG_GEN(config_fill, bool) +CTL_RO_CONFIG_GEN(config_lazy_lock, bool) +CTL_RO_CONFIG_GEN(config_malloc_conf, const char *) +CTL_RO_CONFIG_GEN(config_munmap, bool) +CTL_RO_CONFIG_GEN(config_prof, bool) +CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) +CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) +CTL_RO_CONFIG_GEN(config_stats, bool) +CTL_RO_CONFIG_GEN(config_tcache, bool) +CTL_RO_CONFIG_GEN(config_tls, bool) +CTL_RO_CONFIG_GEN(config_utrace, bool) +CTL_RO_CONFIG_GEN(config_valgrind, bool) +CTL_RO_CONFIG_GEN(config_xmalloc, bool) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) +CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) @@ -1314,7 +1325,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - newarena = arena_get(tsd, newind, true, true); + newarena = arena_get(newind, true); if (newarena == NULL) { ret = EAGAIN; goto label_return; @@ -1536,34 +1547,44 @@ label_return: /******************************************************************************/ -/* ctl_mutex must be held during execution of this function. */ static void -arena_purge(unsigned arena_ind) +arena_i_purge(unsigned arena_ind, bool all) { - tsd_t *tsd; - unsigned i; - bool refreshed; - VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - tsd = tsd_fetch(); - for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, true); - refreshed = true; - } - } + malloc_mutex_lock(&ctl_mtx); + { + unsigned narenas = ctl_stats.narenas; - if (arena_ind == ctl_stats.narenas) { - unsigned i; - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); + if (arena_ind == narenas) { + unsigned i; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); + + for (i = 0; i < narenas; i++) + tarenas[i] = arena_get(i, false); + + /* + * No further need to hold ctl_mtx, since narenas and + * tarenas contain everything needed below. + */ + malloc_mutex_unlock(&ctl_mtx); + + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge(tarenas[i], all); + } + } else { + arena_t *tarena; + + assert(arena_ind < narenas); + + tarena = arena_get(arena_ind, false); + + /* No further need to hold ctl_mtx. */ + malloc_mutex_unlock(&ctl_mtx); + + if (tarena != NULL) + arena_purge(tarena, all); } - } else { - assert(arena_ind < ctl_stats.narenas); - if (tarenas[arena_ind] != NULL) - arena_purge_all(tarenas[arena_ind]); } } @@ -1575,9 +1596,22 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READONLY(); WRITEONLY(); - malloc_mutex_lock(&ctl_mtx); - arena_purge(mib[1]); - malloc_mutex_unlock(&ctl_mtx); + arena_i_purge((unsigned)mib[1], true); + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + arena_i_purge((unsigned)mib[1], false); ret = 0; label_return: @@ -1590,7 +1624,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; const char *dss = NULL; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; @@ -1615,7 +1649,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena_t *arena = arena_get(arena_ind, false); if (arena == NULL || (dss_prec != dss_prec_limit && arena_dss_prec_set(arena, dss_prec))) { ret = EFAULT; @@ -1645,10 +1679,10 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; - arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena = arena_get(arena_ind, false); if (arena == NULL) { ret = EFAULT; goto label_return; @@ -1674,17 +1708,51 @@ label_return: return (ret); } +static int +arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind = (unsigned)mib[1]; + arena_t *arena; + + arena = arena_get(arena_ind, false); + if (arena == NULL) { + ret = EFAULT; + goto label_return; + } + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_decay_time_get(arena); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_decay_time_set(arena, *(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + static int arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; malloc_mutex_lock(&ctl_mtx); if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) { + arena_get(arena_ind, false)) != NULL) { if (newp != NULL) { chunk_hooks_t old_chunk_hooks, new_chunk_hooks; WRITE(new_chunk_hooks, chunk_hooks_t); @@ -1758,7 +1826,7 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; + ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; nread = ctl_stats.narenas; @@ -1798,6 +1866,32 @@ label_return: return (ret); } +static int +arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_decay_time_default_get(); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_decay_time_default_set(*(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) @@ -1816,7 +1910,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned) -CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1827,7 +1921,8 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned) -CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t) +CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]), + size_t) static const ctl_named_node_t * arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1999,6 +2094,8 @@ CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult, ssize_t) +CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time, + ssize_t) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) diff --git a/src/extent.c b/src/extent.c index 13f94411..9f5146e5 100644 --- a/src/extent.c +++ b/src/extent.c @@ -15,7 +15,7 @@ extent_quantize(size_t size) } JEMALLOC_INLINE_C int -extent_szad_comp(extent_node_t *a, extent_node_t *b) +extent_szad_comp(const extent_node_t *a, const extent_node_t *b) { int ret; size_t a_qsize = extent_quantize(extent_node_size_get(a)); @@ -41,7 +41,7 @@ rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, extent_szad_comp) JEMALLOC_INLINE_C int -extent_ad_comp(extent_node_t *a, extent_node_t *b) +extent_ad_comp(const extent_node_t *a, const extent_node_t *b) { uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); diff --git a/src/huge.c b/src/huge.c index 1e9a6651..5f7ceaf1 100644 --- a/src/huge.c +++ b/src/huge.c @@ -31,35 +31,30 @@ huge_node_unset(const void *ptr, const extent_node_t *node) } void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, +huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero, tcache_t *tcache) { - size_t usize; - usize = s2u(size); - if (usize == 0) { - /* size_t overflow. */ - return (NULL); - } + assert(usize == s2u(usize)); return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); } void * -huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; - size_t usize; + size_t ausize; extent_node_t *node; bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ - usize = sa2u(size, alignment); - if (unlikely(usize == 0)) + ausize = sa2u(usize, alignment); + if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS)) return (NULL); - assert(usize >= chunksize); + assert(ausize >= chunksize); /* Allocate an extent node with which to track the chunk. */ node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), @@ -74,16 +69,16 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, is_zeroed = zero; arena = arena_choose(tsd, arena); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, - size, alignment, &is_zeroed)) == NULL) { - idalloctm(tsd, node, tcache, true); + usize, alignment, &is_zeroed)) == NULL) { + idalloctm(tsd, node, tcache, true, true); return (NULL); } - extent_node_init(node, arena, ret, size, is_zeroed, true); + extent_node_init(node, arena, ret, usize, is_zeroed, true); if (huge_node_set(ret, node)) { - arena_chunk_dalloc_huge(arena, ret, size); - idalloctm(tsd, node, tcache, true); + arena_chunk_dalloc_huge(arena, ret, usize); + idalloctm(tsd, node, tcache, true, true); return (NULL); } @@ -95,10 +90,11 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed) - memset(ret, 0, size); + memset(ret, 0, usize); } else if (config_fill && unlikely(opt_junk_alloc)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); + arena_decay_tick(tsd, arena); return (ret); } @@ -280,11 +276,13 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { } bool -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, +huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { assert(s2u(oldsize) == oldsize); + /* The following should have been caught by callers. */ + assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS); /* Both allocations must be huge to avoid a move. */ if (oldsize < chunksize || usize_max < chunksize) @@ -292,13 +290,18 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) { /* Attempt to expand the allocation in-place. */ - if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero)) + if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, + zero)) { + arena_decay_tick(tsd, huge_aalloc(ptr)); return (false); + } /* Try again, this time with usize_min. */ if (usize_min < usize_max && CHUNK_CEILING(usize_min) > CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr, - oldsize, usize_min, zero)) + oldsize, usize_min, zero)) { + arena_decay_tick(tsd, huge_aalloc(ptr)); return (false); + } } /* @@ -309,12 +312,17 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) { huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max, zero); + arena_decay_tick(tsd, huge_aalloc(ptr)); return (false); } /* Attempt to shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) - return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)); + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) { + if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) { + arena_decay_tick(tsd, huge_aalloc(ptr)); + return (false); + } + } return (true); } @@ -335,8 +343,11 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, void *ret; size_t copysize; + /* The following should have been caught by callers. */ + assert(usize > 0 && usize <= HUGE_MAXCLASS); + /* Try to avoid moving the allocation. */ - if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero)) + if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero)) return (ptr); /* @@ -372,7 +383,9 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) extent_node_size_get(node)); arena_chunk_dalloc_huge(extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); - idalloctm(tsd, node, tcache, true); + idalloctm(tsd, node, tcache, true, true); + + arena_decay_tick(tsd, arena); } arena_t * diff --git a/src/jemalloc.c b/src/jemalloc.c index 5a2d3240..0735376e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -40,14 +40,14 @@ bool opt_redzone = false; bool opt_utrace = false; bool opt_xmalloc = false; bool opt_zero = false; -size_t opt_narenas = 0; +unsigned opt_narenas = 0; /* Initialized to true if the process is running inside Valgrind. */ bool in_valgrind; unsigned ncpus; -/* Protects arenas initialization (arenas, narenas_total). */ +/* Protects arenas initialization. */ static malloc_mutex_t arenas_lock; /* * Arenas that are used to service external requests. Not all elements of the @@ -57,8 +57,8 @@ static malloc_mutex_t arenas_lock; * arenas. arenas[narenas_auto..narenas_total) are only used if the application * takes some action to create them and allocate from them. */ -static arena_t **arenas; -static unsigned narenas_total; +arena_t **arenas; +static unsigned narenas_total; /* Use narenas_total_*(). */ static arena_t *a0; /* arenas[0]; read-only after initialization. */ static unsigned narenas_auto; /* Read-only after initialization. */ @@ -70,12 +70,29 @@ typedef enum { } malloc_init_t; static malloc_init_t malloc_init_state = malloc_init_uninitialized; +/* 0 should be the common case. Set to true to trigger initialization. */ +static bool malloc_slow = true; + +/* When malloc_slow != 0, set the corresponding bits for sanity check. */ +enum { + flag_opt_junk_alloc = (1U), + flag_opt_junk_free = (1U << 1), + flag_opt_quarantine = (1U << 2), + flag_opt_zero = (1U << 3), + flag_opt_utrace = (1U << 4), + flag_in_valgrind = (1U << 5), + flag_opt_xmalloc = (1U << 6) +}; +static uint8_t malloc_slow_flags; + +/* Last entry for overflow detection only. */ JEMALLOC_ALIGNED(CACHELINE) -const size_t index2size_tab[NSIZES] = { +const size_t index2size_tab[NSIZES+1] = { #define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ ((ZU(1)< MALLOCX_ARENA_MAX) return (NULL); - if (ind == narenas_total) { - unsigned narenas_new = narenas_total + 1; - arena_t **arenas_new = - (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * - sizeof(arena_t *))); - if (arenas_new == NULL) - return (NULL); - memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); - arenas_new[ind] = NULL; - /* - * Deallocate only if arenas came from a0malloc() (not - * base_alloc()). - */ - if (narenas_total != narenas_auto) - a0dalloc(arenas); - arenas = arenas_new; - narenas_total = narenas_new; - } + if (ind == narenas_total_get()) + narenas_total_inc(); /* * Another thread may have already initialized arenas[ind] if it's an * auto arena. */ - arena = arenas[ind]; + arena = arena_get(ind, false); if (arena != NULL) { assert(ind < narenas_auto); return (arena); } /* Actually initialize the arena. */ - arena = arenas[ind] = arena_new(ind); + arena = arena_new(ind); + arena_set(ind, arena); return (arena); } @@ -428,37 +450,16 @@ arena_init(unsigned ind) return (arena); } -unsigned -narenas_total_get(void) -{ - unsigned narenas; - - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); - - return (narenas); -} - -static void -arena_bind_locked(tsd_t *tsd, unsigned ind) -{ - arena_t *arena; - - arena = arenas[ind]; - arena->nthreads++; - - if (tsd_nominal(tsd)) - tsd_arena_set(tsd, arena); -} - static void arena_bind(tsd_t *tsd, unsigned ind) { + arena_t *arena; - malloc_mutex_lock(&arenas_lock); - arena_bind_locked(tsd, ind); - malloc_mutex_unlock(&arenas_lock); + arena = arena_get(ind, false); + arena_nthreads_inc(arena); + + if (tsd_nominal(tsd)) + tsd_arena_set(tsd, arena); } void @@ -466,107 +467,97 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) { arena_t *oldarena, *newarena; - malloc_mutex_lock(&arenas_lock); - oldarena = arenas[oldind]; - newarena = arenas[newind]; - oldarena->nthreads--; - newarena->nthreads++; - malloc_mutex_unlock(&arenas_lock); + oldarena = arena_get(oldind, false); + newarena = arena_get(newind, false); + arena_nthreads_dec(oldarena); + arena_nthreads_inc(newarena); tsd_arena_set(tsd, newarena); } -unsigned -arena_nbound(unsigned ind) -{ - unsigned nthreads; - - malloc_mutex_lock(&arenas_lock); - nthreads = arenas[ind]->nthreads; - malloc_mutex_unlock(&arenas_lock); - return (nthreads); -} - static void arena_unbind(tsd_t *tsd, unsigned ind) { arena_t *arena; - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); + arena = arena_get(ind, false); + arena_nthreads_dec(arena); tsd_arena_set(tsd, NULL); } -arena_t * -arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) +arena_tdata_t * +arena_tdata_get_hard(tsd_t *tsd, unsigned ind) { - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); - unsigned narenas_cache = tsd_narenas_cache_get(tsd); + arena_tdata_t *tdata, *arenas_tdata_old; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + unsigned narenas_tdata_old, i; + unsigned narenas_tdata = tsd_narenas_tdata_get(tsd); unsigned narenas_actual = narenas_total_get(); - /* Deallocate old cache if it's too small. */ - if (arenas_cache != NULL && narenas_cache < narenas_actual) { - a0dalloc(arenas_cache); - arenas_cache = NULL; - narenas_cache = 0; - tsd_arenas_cache_set(tsd, arenas_cache); - tsd_narenas_cache_set(tsd, narenas_cache); + /* + * Dissociate old tdata array (and set up for deallocation upon return) + * if it's too small. + */ + if (arenas_tdata != NULL && narenas_tdata < narenas_actual) { + arenas_tdata_old = arenas_tdata; + narenas_tdata_old = narenas_tdata; + arenas_tdata = NULL; + narenas_tdata = 0; + tsd_arenas_tdata_set(tsd, arenas_tdata); + tsd_narenas_tdata_set(tsd, narenas_tdata); + } else { + arenas_tdata_old = NULL; + narenas_tdata_old = 0; } - /* Allocate cache if it's missing. */ - if (arenas_cache == NULL) { - bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); - assert(ind < narenas_actual || !init_if_missing); - narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; + /* Allocate tdata array if it's missing. */ + if (arenas_tdata == NULL) { + bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd); + narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1; - if (tsd_nominal(tsd) && !*arenas_cache_bypassp) { - *arenas_cache_bypassp = true; - arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * - narenas_cache); - *arenas_cache_bypassp = false; + if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) { + *arenas_tdata_bypassp = true; + arenas_tdata = (arena_tdata_t *)a0malloc( + sizeof(arena_tdata_t) * narenas_tdata); + *arenas_tdata_bypassp = false; } - if (arenas_cache == NULL) { - /* - * This function must always tell the truth, even if - * it's slow, so don't let OOM, thread cleanup (note - * tsd_nominal check), nor recursive allocation - * avoidance (note arenas_cache_bypass check) get in the - * way. - */ - if (ind >= narenas_actual) - return (NULL); - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - return (arena); + if (arenas_tdata == NULL) { + tdata = NULL; + goto label_return; } - assert(tsd_nominal(tsd) && !*arenas_cache_bypassp); - tsd_arenas_cache_set(tsd, arenas_cache); - tsd_narenas_cache_set(tsd, narenas_cache); + assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp); + tsd_arenas_tdata_set(tsd, arenas_tdata); + tsd_narenas_tdata_set(tsd, narenas_tdata); } /* - * Copy to cache. It's possible that the actual number of arenas has - * increased since narenas_total_get() was called above, but that causes - * no correctness issues unless two threads concurrently execute the - * arenas.extend mallctl, which we trust mallctl synchronization to + * Copy to tdata array. It's possible that the actual number of arenas + * has increased since narenas_total_get() was called above, but that + * causes no correctness issues unless two threads concurrently execute + * the arenas.extend mallctl, which we trust mallctl synchronization to * prevent. */ - malloc_mutex_lock(&arenas_lock); - memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); - malloc_mutex_unlock(&arenas_lock); - if (narenas_cache > narenas_actual) { - memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * - (narenas_cache - narenas_actual)); + + /* Copy/initialize tickers. */ + for (i = 0; i < narenas_actual; i++) { + if (i < narenas_tdata_old) { + ticker_copy(&arenas_tdata[i].decay_ticker, + &arenas_tdata_old[i].decay_ticker); + } else { + ticker_init(&arenas_tdata[i].decay_ticker, + DECAY_NTICKS_PER_UPDATE); + } + } + if (narenas_tdata > narenas_actual) { + memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t) + * (narenas_tdata - narenas_actual)); } - /* Read the refreshed cache, and init the arena if necessary. */ - arena = arenas_cache[ind]; - if (init_if_missing && arena == NULL) - arena = arenas_cache[ind] = arena_init(ind); - return (arena); + /* Read the refreshed tdata array. */ + tdata = &arenas_tdata[ind]; +label_return: + if (arenas_tdata_old != NULL) + a0dalloc(arenas_tdata_old); + return (tdata); } /* Slow path, called only by arena_choose(). */ @@ -581,15 +572,16 @@ arena_choose_hard(tsd_t *tsd) choose = 0; first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); - assert(a0get() != NULL); + assert(arena_get(0, false) != NULL); for (i = 1; i < narenas_auto; i++) { - if (arenas[i] != NULL) { + if (arena_get(i, false) != NULL) { /* * Choose the first arena that has the lowest * number of threads assigned to it. */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) + if (arena_nthreads_get(arena_get(i, false)) < + arena_nthreads_get(arena_get(choose, + false))) choose = i; } else if (first_null == narenas_auto) { /* @@ -605,13 +597,13 @@ arena_choose_hard(tsd_t *tsd) } } - if (arenas[choose]->nthreads == 0 + if (arena_nthreads_get(arena_get(choose, false)) == 0 || first_null == narenas_auto) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. */ - ret = arenas[choose]; + ret = arena_get(choose, false); } else { /* Initialize a new arena. */ choose = first_null; @@ -621,10 +613,10 @@ arena_choose_hard(tsd_t *tsd) return (NULL); } } - arena_bind_locked(tsd, choose); + arena_bind(tsd, choose); malloc_mutex_unlock(&arenas_lock); } else { - ret = a0get(); + ret = arena_get(0, false); arena_bind(tsd, 0); } @@ -656,26 +648,29 @@ arena_cleanup(tsd_t *tsd) } void -arenas_cache_cleanup(tsd_t *tsd) +arenas_tdata_cleanup(tsd_t *tsd) { - arena_t **arenas_cache; + arena_tdata_t *arenas_tdata; - arenas_cache = tsd_arenas_cache_get(tsd); - if (arenas_cache != NULL) { - tsd_arenas_cache_set(tsd, NULL); - a0dalloc(arenas_cache); + /* Prevent tsd->arenas_tdata from being (re)created. */ + *tsd_arenas_tdata_bypassp_get(tsd) = true; + + arenas_tdata = tsd_arenas_tdata_get(tsd); + if (arenas_tdata != NULL) { + tsd_arenas_tdata_set(tsd, NULL); + a0dalloc(arenas_tdata); } } void -narenas_cache_cleanup(tsd_t *tsd) +narenas_tdata_cleanup(tsd_t *tsd) { /* Do nothing. */ } void -arenas_cache_bypass_cleanup(tsd_t *tsd) +arenas_tdata_bypass_cleanup(tsd_t *tsd) { /* Do nothing. */ @@ -696,7 +691,7 @@ stats_print_atexit(void) * continue to allocate. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena = arenas[i]; + arena_t *arena = arena_get(i, false); if (arena != NULL) { tcache_t *tcache; @@ -838,6 +833,26 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, (int)vlen, v); } +static void +malloc_slow_flag_init(void) +{ + /* + * Combine the runtime options into malloc_slow for fast path. Called + * after processing all the options. + */ + malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) + | (opt_junk_free ? flag_opt_junk_free : 0) + | (opt_quarantine ? flag_opt_quarantine : 0) + | (opt_zero ? flag_opt_zero : 0) + | (opt_utrace ? flag_opt_utrace : 0) + | (opt_xmalloc ? flag_opt_xmalloc : 0); + + if (config_valgrind) + malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0); + + malloc_slow = (malloc_slow_flags != 0); +} + static void malloc_conf_init(void) { @@ -864,10 +879,13 @@ malloc_conf_init(void) opt_tcache = false; } - for (i = 0; i < 3; i++) { + for (i = 0; i < 4; i++) { /* Get runtime configuration. */ switch (i) { case 0: + opts = config_malloc_conf; + break; + case 1: if (je_malloc_conf != NULL) { /* * Use options that were compiled into the @@ -880,8 +898,8 @@ malloc_conf_init(void) opts = buf; } break; - case 1: { - int linklen = 0; + case 2: { + ssize_t linklen = 0; #ifndef _WIN32 int saved_errno = errno; const char *linkname = @@ -907,7 +925,7 @@ malloc_conf_init(void) buf[linklen] = '\0'; opts = buf; break; - } case 2: { + } case 3: { const char *envname = #ifdef JEMALLOC_PREFIX JEMALLOC_CPREFIX"MALLOC_CONF" @@ -954,7 +972,7 @@ malloc_conf_init(void) if (cont) \ continue; \ } -#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ +#define CONF_HANDLE_T_U(t, o, n, min, max, clip) \ if (CONF_MATCH(n)) { \ uintmax_t um; \ char *end; \ @@ -968,11 +986,11 @@ malloc_conf_init(void) k, klen, v, vlen); \ } else if (clip) { \ if ((min) != 0 && um < (min)) \ - o = (min); \ + o = (t)(min); \ else if (um > (max)) \ - o = (max); \ + o = (t)(max); \ else \ - o = um; \ + o = (t)um; \ } else { \ if (((min) != 0 && um < (min)) \ || um > (max)) { \ @@ -981,10 +999,14 @@ malloc_conf_init(void) "conf value", \ k, klen, v, vlen); \ } else \ - o = um; \ + o = (t)um; \ } \ continue; \ } +#define CONF_HANDLE_UNSIGNED(o, n, min, max, clip) \ + CONF_HANDLE_T_U(unsigned, o, n, min, max, clip) +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ + CONF_HANDLE_T_U(size_t, o, n, min, max, clip) #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ if (CONF_MATCH(n)) { \ long l; \ @@ -1052,10 +1074,29 @@ malloc_conf_init(void) } continue; } - CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX, false) + CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1, + UINT_MAX, false) + if (strncmp("purge", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < purge_mode_limit; i++) { + if (strncmp(purge_mode_names[i], v, + vlen) == 0) { + opt_purge = (purge_mode_t)i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1, + NSTIME_SEC_MAX); CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { if (CONF_MATCH("junk")) { @@ -1209,7 +1250,8 @@ malloc_init_hard_a0_locked(void) * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas_total = narenas_auto = 1; + narenas_auto = 1; + narenas_total_set(narenas_auto); arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* @@ -1238,26 +1280,37 @@ malloc_init_hard_a0(void) * * init_lock must be held. */ -static void +static bool malloc_init_hard_recursible(void) { + bool ret = false; malloc_init_state = malloc_init_recursible; malloc_mutex_unlock(&init_lock); + /* LinuxThreads' pthread_setspecific() allocates. */ + if (malloc_tsd_boot0()) { + ret = true; + goto label_return; + } + ncpus = malloc_ncpus(); #if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ && !defined(_WIN32) && !defined(__native_client__)) - /* LinuxThreads's pthread_atfork() allocates. */ + /* LinuxThreads' pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { + ret = true; malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) abort(); } #endif + +label_return: malloc_mutex_lock(&init_lock); + return (ret); } /* init_lock must be held. */ @@ -1280,30 +1333,26 @@ malloc_init_hard_finish(void) } narenas_auto = opt_narenas; /* - * Make sure that the arenas array can be allocated. In practice, this - * limit is enough to allow the allocator to function, but the ctl - * machinery will fail to allocate memory at far lower limits. + * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). */ - if (narenas_auto > chunksize / sizeof(arena_t *)) { - narenas_auto = chunksize / sizeof(arena_t *); + if (narenas_auto > MALLOCX_ARENA_MAX) { + narenas_auto = MALLOCX_ARENA_MAX; malloc_printf(": Reducing narenas to limit (%d)\n", narenas_auto); } - narenas_total = narenas_auto; + narenas_total_set(narenas_auto); /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * + (MALLOCX_ARENA_MAX+1)); if (arenas == NULL) return (true); - /* - * Zero the array. In practice, this should always be pre-zeroed, - * since it was just mmap()ed, but let's be sure. - */ - memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = a0; + arena_set(0, a0); malloc_init_state = malloc_init_initialized; + malloc_slow_flag_init(); + return (false); } @@ -1325,16 +1374,16 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); return (true); } - if (malloc_tsd_boot0()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (config_prof && prof_boot2()) { + + if (malloc_init_hard_recursible()) { malloc_mutex_unlock(&init_lock); return (true); } - malloc_init_hard_recursible(); + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } if (malloc_init_hard_finish()) { malloc_mutex_unlock(&init_lock); @@ -1355,34 +1404,36 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, + prof_tctx_t *tctx, bool slow_path) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = imalloc(tsd, LARGE_MINCLASS); + szind_t ind_large = size2index(LARGE_MINCLASS); + p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = imalloc(tsd, usize); + p = imalloc(tsd, usize, ind, slow_path); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(tsd_t *tsd, size_t usize) +imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = imalloc_prof_sample(tsd, usize, tctx); + p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path); else - p = imalloc(tsd, usize); + p = imalloc(tsd, usize, ind, slow_path); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); @@ -1393,23 +1444,44 @@ imalloc_prof(tsd_t *tsd, size_t usize) } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_body(size_t size, tsd_t **tsd, size_t *usize) +imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path) { + szind_t ind; - if (unlikely(malloc_init())) + if (slow_path && unlikely(malloc_init())) return (NULL); *tsd = tsd_fetch(); + ind = size2index(size); + if (unlikely(ind >= NSIZES)) + return (NULL); - if (config_prof && opt_prof) { - *usize = s2u(size); - if (unlikely(*usize == 0)) - return (NULL); - return (imalloc_prof(*tsd, *usize)); + if (config_stats || (config_prof && opt_prof) || (slow_path && + config_valgrind && unlikely(in_valgrind))) { + *usize = index2size(ind); + assert(*usize > 0 && *usize <= HUGE_MAXCLASS); } - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(*tsd, size)); + if (config_prof && opt_prof) + return (imalloc_prof(*tsd, *usize, ind, slow_path)); + + return (imalloc(*tsd, size, ind, slow_path)); +} + +JEMALLOC_ALWAYS_INLINE_C void +imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path) +{ + if (unlikely(ret == NULL)) { + if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) { + malloc_write(": Error in malloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && likely(ret != NULL)) { + assert(usize == isalloc(ret, config_prof)); + *tsd_thread_allocatedp_get(tsd) += usize; + } } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -1424,21 +1496,20 @@ je_malloc(size_t size) if (size == 0) size = 1; - ret = imalloc_body(size, &tsd, &usize); - if (unlikely(ret == NULL)) { - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in malloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); + if (likely(!malloc_slow)) { + /* + * imalloc_body() is inlined so that fast and slow paths are + * generated separately with statically known slow_path. + */ + ret = imalloc_body(size, &tsd, &usize, false); + imalloc_post_check(ret, tsd, usize, false); + } else { + ret = imalloc_body(size, &tsd, &usize, true); + imalloc_post_check(ret, tsd, usize, true); + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(ret, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; - } - UTRACE(0, size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); + return (ret); } @@ -1515,7 +1586,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } usize = sa2u(size, alignment); - if (unlikely(usize == 0)) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { result = NULL; goto label_oom; } @@ -1576,34 +1647,35 @@ je_aligned_alloc(size_t alignment, size_t size) } static void * -icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = icalloc(tsd, LARGE_MINCLASS); + szind_t ind_large = size2index(LARGE_MINCLASS); + p = icalloc(tsd, LARGE_MINCLASS, ind_large); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = icalloc(tsd, usize); + p = icalloc(tsd, usize, ind); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(tsd_t *tsd, size_t usize) +icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = icalloc_prof_sample(tsd, usize, tctx); + p = icalloc_prof_sample(tsd, usize, ind, tctx); else - p = icalloc(tsd, usize); + p = icalloc(tsd, usize, ind); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); @@ -1621,6 +1693,7 @@ je_calloc(size_t num, size_t size) void *ret; tsd_t *tsd; size_t num_size; + szind_t ind; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (unlikely(malloc_init())) { @@ -1650,17 +1723,18 @@ je_calloc(size_t num, size_t size) goto label_return; } + ind = size2index(num_size); + if (unlikely(ind >= NSIZES)) { + ret = NULL; + goto label_return; + } if (config_prof && opt_prof) { - usize = s2u(num_size); - if (unlikely(usize == 0)) { - ret = NULL; - goto label_return; - } - ret = icalloc_prof(tsd, usize); + usize = index2size(ind); + ret = icalloc_prof(tsd, usize, ind); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = s2u(num_size); - ret = icalloc(tsd, num_size); + usize = index2size(ind); + ret = icalloc(tsd, num_size, ind); } label_return: @@ -1725,7 +1799,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1740,10 +1814,15 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) usize = isalloc(ptr, config_prof); if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; - if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(ptr); - iqalloc(tsd, ptr, tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + + if (likely(!slow_path)) + iqalloc(tsd, ptr, tcache, false); + else { + if (config_valgrind && unlikely(in_valgrind)) + rzsize = p2rz(ptr); + iqalloc(tsd, ptr, tcache, true); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); + } } JEMALLOC_INLINE_C void @@ -1780,7 +1859,7 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + ifree(tsd, ptr, tcache_get(tsd, false), true); return (NULL); } size = 1; @@ -1797,8 +1876,8 @@ je_realloc(void *ptr, size_t size) if (config_prof && opt_prof) { usize = s2u(size); - ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd, - ptr, old_usize, usize); + ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ? + NULL : irealloc_prof(tsd, ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) @@ -1807,7 +1886,10 @@ je_realloc(void *ptr, size_t size) } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - ret = imalloc_body(size, &tsd, &usize); + if (likely(!malloc_slow)) + ret = imalloc_body(size, &tsd, &usize, false); + else + ret = imalloc_body(size, &tsd, &usize, true); } if (unlikely(ret == NULL)) { @@ -1836,7 +1918,10 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + if (likely(!malloc_slow)) + ifree(tsd, ptr, tcache_get(tsd, false), false); + else + ifree(tsd, ptr, tcache_get(tsd, false), true); } } @@ -1923,7 +2008,8 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); *usize = sa2u(size, *alignment); } - assert(*usize != 0); + if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) + return (true); *zero = MALLOCX_ZERO_GET(flags); if ((flags & MALLOCX_TCACHE_MASK) != 0) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) @@ -1934,7 +2020,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *tcache = tcache_get(tsd, true); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - *arena = arena_get(tsd, arena_ind, true, true); + *arena = arena_get(arena_ind, true); if (unlikely(*arena == NULL)) return (true); } else @@ -1949,7 +2035,8 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, if (likely(flags == 0)) { *usize = s2u(size); - assert(*usize != 0); + if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) + return (true); *alignment = 0; *zero = false; *tcache = tcache_get(tsd, true); @@ -1965,12 +2052,15 @@ JEMALLOC_ALWAYS_INLINE_C void * imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { + szind_t ind; if (unlikely(alignment != 0)) return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); + ind = size2index(usize); + assert(ind < NSIZES); if (unlikely(zero)) - return (icalloct(tsd, usize, tcache, arena)); - return (imalloct(tsd, usize, tcache, arena)); + return (icalloct(tsd, usize, ind, tcache, arena)); + return (imalloct(tsd, usize, ind, tcache, arena)); } static void * @@ -2034,9 +2124,15 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) arena_t *arena; if (likely(flags == 0)) { - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(tsd, size)); + szind_t ind = size2index(size); + if (unlikely(ind >= NSIZES)) + return (NULL); + if (config_stats || (config_valgrind && + unlikely(in_valgrind))) { + *usize = index2size(ind); + assert(*usize > 0 && *usize <= HUGE_MAXCLASS); + } + return (imalloc(tsd, size, ind, true)); } if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, @@ -2172,7 +2268,7 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena = arena_get(tsd, arena_ind, true, true); + arena = arena_get(arena_ind, true); if (unlikely(arena == NULL)) goto label_oom; } else @@ -2192,7 +2288,8 @@ je_rallocx(void *ptr, size_t size, int flags) if (config_prof && opt_prof) { usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - assert(usize != 0); + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + goto label_oom; p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, zero, tcache, arena); if (unlikely(p == NULL)) @@ -2225,12 +2322,12 @@ label_oom: } JEMALLOC_ALWAYS_INLINE_C size_t -ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero) +ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero) { size_t usize; - if (ixalloc(ptr, old_usize, size, extra, alignment, zero)) + if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero)) return (old_usize); usize = isalloc(ptr, config_prof); @@ -2238,14 +2335,15 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, } static size_t -ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero, prof_tctx_t *tctx) +ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) { size_t usize; if (tctx == NULL) return (old_usize); - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); + usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment, + zero); return (usize); } @@ -2266,16 +2364,29 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ - usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, - alignment); - assert(usize_max != 0); + if (alignment == 0) { + usize_max = s2u(size+extra); + assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS); + } else { + usize_max = sa2u(size+extra, alignment); + if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) { + /* + * usize_max is out of range, and chances are that + * allocation will fail, but use the maximum possible + * value and carry on with prof_alloc_prep(), just in + * case allocation succeeds. + */ + usize_max = HUGE_MAXCLASS; + } + } tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - usize = ixallocx_prof_sample(ptr, old_usize, size, extra, + usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra, alignment, zero, tctx); } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); + usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, + alignment, zero); } if (usize == old_usize) { prof_alloc_rollback(tsd, tctx, false); @@ -2305,15 +2416,21 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_usize = isalloc(ptr, config_prof); - /* Clamp extra if necessary to avoid (size + extra) overflow. */ - if (unlikely(size + extra > HUGE_MAXCLASS)) { - /* Check for size overflow. */ - if (unlikely(size > HUGE_MAXCLASS)) { - usize = old_usize; - goto label_not_resized; - } - extra = HUGE_MAXCLASS - size; + /* + * The API explicitly absolves itself of protecting against (size + + * extra) numerical overflow, but we may need to clamp extra to avoid + * exceeding HUGE_MAXCLASS. + * + * Ordinarily, size limit checking is handled deeper down, but here we + * have to check as part of (size + extra) clamping, since we need the + * clamped value in the above helper functions. + */ + if (unlikely(size > HUGE_MAXCLASS)) { + usize = old_usize; + goto label_not_resized; } + if (unlikely(HUGE_MAXCLASS - size < extra)) + extra = HUGE_MAXCLASS - size; if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); @@ -2322,8 +2439,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, alignment, zero); } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); + usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, + alignment, zero); } if (unlikely(usize == old_usize)) goto label_not_resized; @@ -2375,7 +2492,7 @@ je_dallocx(void *ptr, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, tcache); + ifree(tsd_fetch(), ptr, tcache, true); } JEMALLOC_ALWAYS_INLINE_C size_t @@ -2387,7 +2504,6 @@ inallocx(size_t size, int flags) usize = s2u(size); else usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); - assert(usize != 0); return (usize); } @@ -2420,13 +2536,18 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) { + size_t usize; assert(size != 0); if (unlikely(malloc_init())) return (0); - return (inallocx(size, flags)); + usize = inallocx(size, flags); + if (unlikely(usize > HUGE_MAXCLASS)) + return (0); + + return (usize); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW @@ -2523,7 +2644,7 @@ JEMALLOC_EXPORT void _malloc_prefork(void) #endif { - unsigned i; + unsigned i, narenas; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2535,9 +2656,11 @@ _malloc_prefork(void) ctl_prefork(); prof_prefork(); malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_prefork(arenas[i]); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(i, false)) != NULL) + arena_prefork(arena); } chunk_prefork(); base_prefork(); @@ -2551,7 +2674,7 @@ JEMALLOC_EXPORT void _malloc_postfork(void) #endif { - unsigned i; + unsigned i, narenas; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2562,9 +2685,11 @@ _malloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ base_postfork_parent(); chunk_postfork_parent(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_parent(arenas[i]); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(i, false)) != NULL) + arena_postfork_parent(arena); } malloc_mutex_postfork_parent(&arenas_lock); prof_postfork_parent(); @@ -2574,16 +2699,18 @@ _malloc_postfork(void) void jemalloc_postfork_child(void) { - unsigned i; + unsigned i, narenas; assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ base_postfork_child(); chunk_postfork_child(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_child(arenas[i]); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(i, false)) != NULL) + arena_postfork_child(arena); } malloc_mutex_postfork_child(&arenas_lock); prof_postfork_child(); diff --git a/src/nstime.c b/src/nstime.c new file mode 100644 index 00000000..4cf90b58 --- /dev/null +++ b/src/nstime.c @@ -0,0 +1,148 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +#define BILLION UINT64_C(1000000000) + +void +nstime_init(nstime_t *time, uint64_t ns) +{ + + time->ns = ns; +} + +void +nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) +{ + + time->ns = sec * BILLION + nsec; +} + +uint64_t +nstime_ns(const nstime_t *time) +{ + + return (time->ns); +} + +uint64_t +nstime_sec(const nstime_t *time) +{ + + return (time->ns / BILLION); +} + +uint64_t +nstime_nsec(const nstime_t *time) +{ + + return (time->ns % BILLION); +} + +void +nstime_copy(nstime_t *time, const nstime_t *source) +{ + + *time = *source; +} + +int +nstime_compare(const nstime_t *a, const nstime_t *b) +{ + + return ((a->ns > b->ns) - (a->ns < b->ns)); +} + +void +nstime_add(nstime_t *time, const nstime_t *addend) +{ + + assert(UINT64_MAX - time->ns >= addend->ns); + + time->ns += addend->ns; +} + +void +nstime_subtract(nstime_t *time, const nstime_t *subtrahend) +{ + + assert(nstime_compare(time, subtrahend) >= 0); + + time->ns -= subtrahend->ns; +} + +void +nstime_imultiply(nstime_t *time, uint64_t multiplier) +{ + + assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << + 2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns)); + + time->ns *= multiplier; +} + +void +nstime_idivide(nstime_t *time, uint64_t divisor) +{ + + assert(divisor != 0); + + time->ns /= divisor; +} + +uint64_t +nstime_divide(const nstime_t *time, const nstime_t *divisor) +{ + + assert(divisor->ns != 0); + + return (time->ns / divisor->ns); +} + +#ifdef JEMALLOC_JET +#undef nstime_update +#define nstime_update JEMALLOC_N(nstime_update_impl) +#endif +bool +nstime_update(nstime_t *time) +{ + nstime_t old_time; + + nstime_copy(&old_time, time); + +#ifdef _WIN32 + { + FILETIME ft; + uint64_t ticks; + GetSystemTimeAsFileTime(&ft); + ticks = (((uint64_t)ft.dwHighDateTime) << 32) | + ft.dwLowDateTime; + time->ns = ticks * 100; + } +#elif JEMALLOC_CLOCK_GETTIME + { + struct timespec ts; + + if (sysconf(_SC_MONOTONIC_CLOCK) > 0) + clock_gettime(CLOCK_MONOTONIC, &ts); + else + clock_gettime(CLOCK_REALTIME, &ts); + time->ns = ts.tv_sec * BILLION + ts.tv_nsec; + } +#else + struct timeval tv; + gettimeofday(&tv, NULL); + time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000; +#endif + + /* Handle non-monotonic clocks. */ + if (unlikely(nstime_compare(&old_time, time) > 0)) { + nstime_copy(time, &old_time); + return (true); + } + + return (false); +} +#ifdef JEMALLOC_JET +#undef nstime_update +#define nstime_update JEMALLOC_N(nstime_update) +nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl); +#endif diff --git a/src/prng.c b/src/prng.c new file mode 100644 index 00000000..76646a2a --- /dev/null +++ b/src/prng.c @@ -0,0 +1,2 @@ +#define JEMALLOC_PRNG_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/prof.c b/src/prof.c index 5d2b9598..b3872277 100644 --- a/src/prof.c +++ b/src/prof.c @@ -109,7 +109,7 @@ static char prof_dump_buf[ 1 #endif ]; -static unsigned prof_dump_buf_end; +static size_t prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ @@ -551,9 +551,9 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) /* * Create a single allocation that has space for vec of length bt->len. */ - prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, - vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), - true, NULL); + size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); + prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size, + size2index(size), false, tcache_get(tsd, true), true, NULL, true); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -594,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); - idalloctm(tsd, gctx, tcache_get(tsd, false), true); + idalloctm(tsd, gctx, tcache_get(tsd, false), true, true); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -701,7 +701,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd, tctx, tcache_get(tsd, false), true); + idalloctm(tsd, tctx, tcache_get(tsd, false), true, true); } static bool @@ -730,7 +730,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); + idalloctm(tsd, gctx.v, tcache_get(tsd, false), true, + true); return (true); } new_gctx = true; @@ -789,8 +790,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) /* Link a prof_tctx_t into gctx for this thread. */ tcache = tcache_get(tsd, true); - ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, - NULL); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), + size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL, + true); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -810,7 +812,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd, ret.v, tcache, true); + idalloctm(tsd, ret.v, tcache, true, true); return (NULL); } malloc_mutex_lock(gctx->lock); @@ -869,8 +871,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), - UINT64_C(1442695040888963407)); + r = prng_lg_range(&tdata->prng_state, 53); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) @@ -988,7 +989,7 @@ prof_dump_close(bool propagate_err) static bool prof_dump_write(bool propagate_err, const char *s) { - unsigned i, slen, n; + size_t i, slen, n; cassert(config_prof); @@ -1211,7 +1212,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) tctx_tree_remove(&gctx->tctxs, to_destroy); idalloctm(tsd, to_destroy, - tcache_get(tsd, false), true); + tcache_get(tsd, false), true, true); } else next = NULL; } while (next != NULL); @@ -1358,6 +1359,7 @@ label_return: return (ret); } +#ifndef _WIN32 JEMALLOC_FORMAT_PRINTF(1, 2) static int prof_open_maps(const char *format, ...) @@ -1373,6 +1375,18 @@ prof_open_maps(const char *format, ...) return (mfd); } +#endif + +static int +prof_getpid(void) +{ + +#ifdef _WIN32 + return (GetCurrentProcessId()); +#else + return (getpid()); +#endif +} static bool prof_dump_maps(bool propagate_err) @@ -1383,9 +1397,11 @@ prof_dump_maps(bool propagate_err) cassert(config_prof); #ifdef __FreeBSD__ mfd = prof_open_maps("/proc/curproc/map"); +#elif defined(_WIN32) + mfd = -1; // Not implemented #else { - int pid = getpid(); + int pid = prof_getpid(); mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); if (mfd == -1) @@ -1554,12 +1570,12 @@ prof_dump_filename(char *filename, char v, uint64_t vseq) /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c%"FMTu64".heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq); } else { /* "....heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c.heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v); + opt_prof_prefix, prof_getpid(), prof_dump_seq, v); } prof_dump_seq++; } @@ -1714,8 +1730,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, /* Initialize an empty cache for this thread. */ tcache = tcache_get(tsd, true); - tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, - tcache, true, NULL); + tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), + size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true); if (tdata == NULL) return (NULL); @@ -1729,7 +1745,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsd, tdata, tcache, true); + idalloctm(tsd, tdata, tcache, true, true); return (NULL); } @@ -1784,9 +1800,9 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, tcache = tcache_get(tsd, false); if (tdata->thread_name != NULL) - idalloctm(tsd, tdata->thread_name, tcache, true); + idalloctm(tsd, tdata->thread_name, tcache, true, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd, tdata, tcache, true); + idalloctm(tsd, tdata, tcache, true, true); } static void @@ -1947,7 +1963,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); + ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd, + true), true, NULL, true); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -1980,7 +1997,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) if (tdata->thread_name != NULL) { idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), - true); + true, true); tdata->thread_name = NULL; } if (strlen(s) > 0) diff --git a/src/quarantine.c b/src/quarantine.c index 6c43dfca..ff8801cb 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -23,12 +23,14 @@ static quarantine_t * quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; + size_t size; assert(tsd_nominal(tsd)); - quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) - + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, - tcache_get(tsd, true), true, NULL); + size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * + sizeof(quarantine_obj_t)); + quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size), + false, tcache_get(tsd, true), true, NULL, true); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -55,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); } static quarantine_t * @@ -87,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); tsd_quarantine_set(tsd, ret); return (ret); @@ -98,7 +100,7 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) { quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloctm(tsd, obj->ptr, NULL, false); + idalloctm(tsd, obj->ptr, NULL, false, true); quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << @@ -123,7 +125,7 @@ quarantine(tsd_t *tsd, void *ptr) assert(opt_quarantine); if ((quarantine = tsd_quarantine_get(tsd)) == NULL) { - idalloctm(tsd, ptr, NULL, false); + idalloctm(tsd, ptr, NULL, false, true); return; } /* @@ -162,7 +164,7 @@ quarantine(tsd_t *tsd, void *ptr) } } else { assert(quarantine->curbytes == 0); - idalloctm(tsd, ptr, NULL, false); + idalloctm(tsd, ptr, NULL, false, true); } } @@ -177,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { quarantine_drain(tsd, quarantine, 0); - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); tsd_quarantine_set(tsd, NULL); } } diff --git a/src/stats.c b/src/stats.c index 154c3e74..a7249479 100644 --- a/src/stats.c +++ b/src/stats.c @@ -258,7 +258,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, { unsigned nthreads; const char *dss; - ssize_t lg_dirty_mult; + ssize_t lg_dirty_mult, decay_time; size_t page, pactive, pdirty, mapped; size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; @@ -278,13 +278,23 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", dss); CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); - if (lg_dirty_mult >= 0) { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: %u:1\n", - (1U << lg_dirty_mult)); - } else { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: N/A\n"); + if (opt_purge == purge_mode_ratio) { + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: N/A\n"); + } + } + CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t); + if (opt_purge == purge_mode_decay) { + if (decay_time >= 0) { + malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n", + decay_time); + } else + malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n"); } CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); @@ -292,9 +302,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); malloc_cprintf(write_cb, cbopaque, - "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64 - " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge == - 1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged); + "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", " + "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); malloc_cprintf(write_cb, cbopaque, " allocated nmalloc ndalloc" @@ -426,9 +435,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bv; unsigned uv; ssize_t ssv; - size_t sv, bsz, ssz, sssz, cpsz; + size_t sv, bsz, usz, ssz, sssz, cpsz; bsz = sizeof(bool); + usz = sizeof(unsigned); ssz = sizeof(size_t); sssz = sizeof(ssize_t); cpsz = sizeof(const char *); @@ -438,6 +448,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("config.debug", &bv, bool); malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", bv ? "enabled" : "disabled"); + malloc_cprintf(write_cb, cbopaque, + "config.malloc_conf: \"%s\"\n", config_malloc_conf); #define OPT_WRITE_BOOL(n) \ if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ @@ -453,6 +465,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, : "false", bv2 ? "true" : "false"); \ } \ } +#define OPT_WRITE_UNSIGNED(n) \ + if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } #define OPT_WRITE_SIZE_T(n) \ if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ @@ -483,8 +500,14 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_CHAR_P(dss) - OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult) + OPT_WRITE_UNSIGNED(narenas) + OPT_WRITE_CHAR_P(purge) + if (opt_purge == purge_mode_ratio) { + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, + arenas.lg_dirty_mult) + } + if (opt_purge == purge_mode_decay) + OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time) OPT_WRITE_BOOL(stats_print) OPT_WRITE_CHAR_P(junk) OPT_WRITE_SIZE_T(quarantine) @@ -529,13 +552,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); - if (ssv >= 0) { + if (opt_purge == purge_mode_ratio) { + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "%u:1\n", (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "N/A\n"); + } + } + CTL_GET("arenas.decay_time", &ssv, ssize_t); + if (opt_purge == purge_mode_decay) { malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: %u:1\n", - (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: N/A\n"); + "Unused dirty page decay time: %zd%s\n", + ssv, (ssv < 0) ? " (no decay)" : ""); } if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { malloc_cprintf(write_cb, cbopaque, diff --git a/src/tcache.c b/src/tcache.c index fdafd0c6..6e32f404 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -10,7 +10,7 @@ ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ -size_t nhbins; +unsigned nhbins; size_t tcache_maxclass; tcaches_t *tcaches; @@ -67,20 +67,19 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; } void * tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind) + tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; - arena_tcache_fill_small(arena, tbin, binind, config_prof ? + arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ? tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; - ret = tcache_alloc_easy(tbin); + ret = tcache_alloc_easy(tbin, tcache_success); return (ret); } @@ -102,7 +101,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *bin_arena = extent_node_arena_get(&chunk->node); arena_bin_t *bin = &bin_arena->bins[binind]; @@ -122,7 +121,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == bin_arena) { @@ -139,11 +138,12 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * locked. Stash the object, so that it can be * handled in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } malloc_mutex_unlock(&bin->lock); + arena_decay_ticks(tsd, bin_arena, nflush - ndeferred); } if (config_stats && !merged_stats) { /* @@ -158,8 +158,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, malloc_mutex_unlock(&bin->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -182,7 +182,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; @@ -206,7 +206,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == @@ -220,13 +220,14 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, * Stash the object, so that it can be handled * in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } malloc_mutex_unlock(&locked_arena->lock); if (config_prof && idump) prof_idump(); + arena_decay_ticks(tsd, locked_arena, nflush - ndeferred); } if (config_stats && !merged_stats) { /* @@ -241,8 +242,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, malloc_mutex_unlock(&arena->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -324,18 +325,26 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, + arena_get(0, false)); if (tcache == NULL) return (NULL); tcache_arena_associate(tcache, arena); + ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR); + assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + /* + * avail points past the available space. Allocations will + * access the slots toward higher addresses (for the benefit of + * prefetch). + */ tcache->tbins[i].avail = (void **)((uintptr_t)tcache + (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } return (tcache); @@ -379,7 +388,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); - idalloctm(tsd, tcache, false, true); + idalloctm(tsd, tcache, false, true, true); } void @@ -445,7 +454,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - tcache = tcache_create(tsd, a0get()); + tcache = tcache_create(tsd, arena_get(0, false)); if (tcache == NULL) return (true); @@ -453,7 +462,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) elm = tcaches_avail; tcaches_avail = tcaches_avail->next; elm->tcache = tcache; - *r_ind = elm - tcaches; + *r_ind = (unsigned)(elm - tcaches); } else { elm = &tcaches[tcaches_past]; elm->tcache = tcache; diff --git a/src/ticker.c b/src/ticker.c new file mode 100644 index 00000000..db090240 --- /dev/null +++ b/src/ticker.c @@ -0,0 +1,2 @@ +#define JEMALLOC_TICKER_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/tsd.c b/src/tsd.c index 9ffe9afe..34c1573c 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -113,7 +113,7 @@ malloc_tsd_boot0(void) ncleanups = 0; if (tsd_boot0()) return (true); - *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; + *tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true; return (false); } @@ -122,7 +122,7 @@ malloc_tsd_boot1(void) { tsd_boot1(); - *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; + *tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false; } #ifdef _WIN32 @@ -148,13 +148,15 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) #ifdef _MSC_VER # ifdef _M_IX86 # pragma comment(linker, "/INCLUDE:__tls_used") +# pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") +# pragma comment(linker, "/INCLUDE:tls_callback") # endif # pragma section(".CRT$XLY",long,read) #endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) -static BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, +BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif diff --git a/src/util.c b/src/util.c index 4cb0d6c1..02673c70 100644 --- a/src/util.c +++ b/src/util.c @@ -1,3 +1,7 @@ +/* + * Define simple versions of assertion macros that won't recurse in case + * of assertion failures in malloc_*printf(). + */ #define assert(e) do { \ if (config_debug && !(e)) { \ malloc_write(": Failed assertion\n"); \ @@ -49,10 +53,14 @@ wrtmessage(void *cbopaque, const char *s) * Use syscall(2) rather than write(2) when possible in order to avoid * the possibility of memory allocation within libc. This is necessary * on FreeBSD; most operating systems do not have this problem though. + * + * syscall() returns long or int, depending on platform, so capture the + * unused result in the widest plausible type to avoid compiler + * warnings. */ - UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); + UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); #else - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); + UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); #endif } @@ -82,7 +90,7 @@ buferror(int err, char *buf, size_t buflen) #ifdef _WIN32 FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, - (LPSTR)buf, buflen, NULL); + (LPSTR)buf, (DWORD)buflen, NULL); return (0); #elif defined(__GLIBC__) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); @@ -577,7 +585,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[i] = '\0'; else str[size - 1] = '\0'; - ret = i; + assert(i < INT_MAX); + ret = (int)i; #undef APPEND_C #undef APPEND_S @@ -648,3 +657,12 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } + +/* + * Restore normal assertion macros, in order to make it possible to compile all + * C files as a single concatenation. + */ +#undef assert +#undef not_reached +#undef not_implemented +#include "jemalloc/internal/assert.h" diff --git a/src/zone.c b/src/zone.c index 12e1734a..6859b3fe 100644 --- a/src/zone.c +++ b/src/zone.c @@ -121,9 +121,11 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { + size_t alloc_size; - if (ivsalloc(ptr, config_prof) != 0) { - assert(ivsalloc(ptr, config_prof) == size); + alloc_size = ivsalloc(ptr, config_prof); + if (alloc_size != 0) { + assert(alloc_size == size); je_free(ptr); return; } diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 455569da..0a3dbeac 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -11,7 +11,6 @@ #ifdef _WIN32 # include "msvc_compat/strings.h" #endif -#include #ifdef _WIN32 # include @@ -94,6 +93,7 @@ # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS # define JEMALLOC_H_INLINES +# include "jemalloc/internal/nstime.h" # include "jemalloc/internal/util.h" # include "jemalloc/internal/qr.h" # include "jemalloc/internal/ql.h" diff --git a/test/include/test/timer.h b/test/include/test/timer.h index a7fefdfd..ace6191b 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -1,23 +1,8 @@ /* Simple timer, for use in benchmark reporting. */ -#include -#include - -#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ - && _POSIX_MONOTONIC_CLOCK >= 0 - typedef struct { -#ifdef _WIN32 - FILETIME ft0; - FILETIME ft1; -#elif JEMALLOC_CLOCK_GETTIME - struct timespec ts0; - struct timespec ts1; - int clock_id; -#else - struct timeval tv0; - struct timeval tv1; -#endif + nstime_t t0; + nstime_t t1; } timedelta_t; void timer_start(timedelta_t *timer); diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 6253175d..42eee105 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -46,6 +46,27 @@ get_huge_size(size_t ind) return (get_size_impl("arenas.hchunk.0.size", ind)); } +TEST_BEGIN(test_overflow) +{ + size_t hugemax; + + hugemax = get_huge_size(get_nhuge()-1); + + assert_ptr_null(mallocx(hugemax+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1); + + assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); + + assert_ptr_null(mallocx(SIZE_T_MAX, 0), + "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX); + + assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), + "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))", + ZU(PTRDIFF_MAX)+1); +} +TEST_END + TEST_BEGIN(test_oom) { size_t hugemax, size, alignment; @@ -176,6 +197,7 @@ main(void) { return (test( + test_overflow, test_oom, test_basic, test_alignment_and_size)); diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index be1b27b7..66ad8660 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -1,5 +1,51 @@ #include "test/jemalloc_test.h" +static unsigned +get_nsizes_impl(const char *cmd) +{ + unsigned ret; + size_t z; + + z = sizeof(unsigned); + assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0, + "Unexpected mallctl(\"%s\", ...) failure", cmd); + + return (ret); +} + +static unsigned +get_nhuge(void) +{ + + return (get_nsizes_impl("arenas.nhchunks")); +} + +static size_t +get_size_impl(const char *cmd, size_t ind) +{ + size_t ret; + size_t z; + size_t mib[4]; + size_t miblen = 4; + + z = sizeof(size_t); + assert_d_eq(mallctlnametomib(cmd, mib, &miblen), + 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); + mib[2] = ind; + z = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0), + 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); + + return (ret); +} + +static size_t +get_huge_size(size_t ind) +{ + + return (get_size_impl("arenas.hchunk.0.size", ind)); +} + TEST_BEGIN(test_grow_and_shrink) { void *p, *q; @@ -138,22 +184,22 @@ TEST_END TEST_BEGIN(test_lg_align_and_zero) { void *p, *q; - size_t lg_align, sz; + unsigned lg_align; + size_t sz; #define MAX_LG_ALIGN 25 #define MAX_VALIDATE (ZU(1) << 22) - lg_align = ZU(0); + lg_align = 0; p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(p, "Unexpected mallocx() error"); for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) { q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(q, - "Unexpected rallocx() error for lg_align=%zu", lg_align); + "Unexpected rallocx() error for lg_align=%u", lg_align); assert_ptr_null( (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)), - "%p inadequately aligned for lg_align=%zu", - q, lg_align); + "%p inadequately aligned for lg_align=%u", q, lg_align); sz = sallocx(q, 0); if ((sz << 1) <= MAX_VALIDATE) { assert_false(validate_fill(q, 0, 0, sz), @@ -173,6 +219,33 @@ TEST_BEGIN(test_lg_align_and_zero) } TEST_END +TEST_BEGIN(test_overflow) +{ + size_t hugemax; + void *p; + + hugemax = get_huge_size(get_nhuge()-1); + + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_ptr_null(rallocx(p, hugemax+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1); + + assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); + + assert_ptr_null(rallocx(p, SIZE_T_MAX, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX); + + assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), + "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))", + ZU(PTRDIFF_MAX)+1); + + dallocx(p, 0); +} +TEST_END + int main(void) { @@ -181,5 +254,6 @@ main(void) test_grow_and_shrink, test_zero, test_align, - test_lg_align_and_zero)); + test_lg_align_and_zero, + test_overflow)); } diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c index 00451961..5c4998b6 100644 --- a/test/integration/xallocx.c +++ b/test/integration/xallocx.c @@ -305,63 +305,63 @@ TEST_END TEST_BEGIN(test_extra_huge) { int flags = MALLOCX_ARENA(arena_ind()); - size_t largemax, huge0, huge1, huge2, hugemax; + size_t largemax, huge1, huge2, huge3, hugemax; void *p; /* Get size classes. */ largemax = get_large_size(get_nlarge()-1); - huge0 = get_huge_size(0); huge1 = get_huge_size(1); huge2 = get_huge_size(2); + huge3 = get_huge_size(3); hugemax = get_huge_size(get_nhuge()-1); - p = mallocx(huge2, flags); + p = mallocx(huge3, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_zu_eq(xallocx(p, huge2, 0, flags), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size decrease with zero extra. */ - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, 0, flags), huge0, + assert_zu_ge(xallocx(p, largemax, 0, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, 0, flags), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size decrease with non-zero extra. */ - assert_zu_eq(xallocx(p, huge0, huge2 - huge0, flags), huge2, + assert_zu_eq(xallocx(p, huge1, huge3 - huge1, flags), huge3, + "Unexpected xallocx() behavior"); + assert_zu_eq(xallocx(p, huge2, huge3 - huge2, flags), huge3, "Unexpected xallocx() behavior"); assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge0, huge1 - huge0, flags), huge1, - "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, huge0 - largemax, flags), huge0, + assert_zu_ge(xallocx(p, largemax, huge1 - largemax, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with zero extra. */ - assert_zu_le(xallocx(p, huge2, 0, flags), huge2, + assert_zu_le(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge2, + assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, flags), hugemax, + assert_zu_le(xallocx(p, huge1, SIZE_T_MAX - huge1, flags), hugemax, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge0, huge2 - huge0, flags), huge2, + assert_zu_le(xallocx(p, huge1, huge3 - huge1, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, 0, flags), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size+extra overflow. */ - assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, flags), hugemax, + assert_zu_le(xallocx(p, huge3, hugemax - huge3 + 1, flags), hugemax, "Unexpected xallocx() behavior"); dallocx(p, flags); diff --git a/test/src/timer.c b/test/src/timer.c index 0c93abaf..e91b3cf2 100644 --- a/test/src/timer.c +++ b/test/src/timer.c @@ -4,50 +4,26 @@ void timer_start(timedelta_t *timer) { -#ifdef _WIN32 - GetSystemTimeAsFileTime(&timer->ft0); -#elif JEMALLOC_CLOCK_GETTIME - if (sysconf(_SC_MONOTONIC_CLOCK) <= 0) - timer->clock_id = CLOCK_REALTIME; - else - timer->clock_id = CLOCK_MONOTONIC; - clock_gettime(timer->clock_id, &timer->ts0); -#else - gettimeofday(&timer->tv0, NULL); -#endif + nstime_init(&timer->t0, 0); + nstime_update(&timer->t0); } void timer_stop(timedelta_t *timer) { -#ifdef _WIN32 - GetSystemTimeAsFileTime(&timer->ft0); -#elif JEMALLOC_CLOCK_GETTIME - clock_gettime(timer->clock_id, &timer->ts1); -#else - gettimeofday(&timer->tv1, NULL); -#endif + nstime_copy(&timer->t1, &timer->t0); + nstime_update(&timer->t1); } uint64_t timer_usec(const timedelta_t *timer) { + nstime_t delta; -#ifdef _WIN32 - uint64_t t0, t1; - t0 = (((uint64_t)timer->ft0.dwHighDateTime) << 32) | - timer->ft0.dwLowDateTime; - t1 = (((uint64_t)timer->ft1.dwHighDateTime) << 32) | - timer->ft1.dwLowDateTime; - return ((t1 - t0) / 10); -#elif JEMALLOC_CLOCK_GETTIME - return (((timer->ts1.tv_sec - timer->ts0.tv_sec) * 1000000) + - (timer->ts1.tv_nsec - timer->ts0.tv_nsec) / 1000); -#else - return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + - timer->tv1.tv_usec - timer->tv0.tv_usec); -#endif + nstime_copy(&delta, &timer->t1); + nstime_subtract(&delta, &timer->t0); + return (nstime_ns(&delta) / 1000); } void diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c index 7da583d8..1ab0bb8e 100644 --- a/test/unit/bitmap.c +++ b/test/unit/bitmap.c @@ -6,7 +6,11 @@ TEST_BEGIN(test_bitmap_size) prev_size = 0; for (i = 1; i <= BITMAP_MAXBITS; i++) { - size_t size = bitmap_size(i); + bitmap_info_t binfo; + size_t size; + + bitmap_info_init(&binfo, i); + size = bitmap_size(&binfo); assert_true(size >= prev_size, "Bitmap size is smaller than expected"); prev_size = size; @@ -23,8 +27,8 @@ TEST_BEGIN(test_bitmap_init) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) { @@ -46,8 +50,8 @@ TEST_BEGIN(test_bitmap_set) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -69,8 +73,8 @@ TEST_BEGIN(test_bitmap_unset) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -98,8 +102,8 @@ TEST_BEGIN(test_bitmap_sfu) bitmap_info_init(&binfo, i); { ssize_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ diff --git a/test/unit/decay.c b/test/unit/decay.c new file mode 100644 index 00000000..70a2e67a --- /dev/null +++ b/test/unit/decay.c @@ -0,0 +1,360 @@ +#include "test/jemalloc_test.h" + +const char *malloc_conf = "purge:decay,decay_time:1"; + +static nstime_update_t *nstime_update_orig; + +static unsigned nupdates_mock; +static nstime_t time_mock; +static bool nonmonotonic_mock; + +static bool +nstime_update_mock(nstime_t *time) +{ + + nupdates_mock++; + if (!nonmonotonic_mock) + nstime_copy(time, &time_mock); + return (nonmonotonic_mock); +} + +TEST_BEGIN(test_decay_ticks) +{ + ticker_t *decay_ticker; + unsigned tick0, tick1; + size_t sz, huge0, large0; + void *p; + + test_skip_if(opt_purge != purge_mode_decay); + + decay_ticker = decay_ticker_get(tsd_fetch(), 0); + assert_ptr_not_null(decay_ticker, + "Unexpected failure getting decay ticker"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + /* + * Test the standard APIs using a huge size class, since we can't + * control tcache interactions (except by completely disabling tcache + * for the entire test program). + */ + + /* malloc(). */ + tick0 = ticker_read(decay_ticker); + p = malloc(huge0); + assert_ptr_not_null(p, "Unexpected malloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()"); + /* free(). */ + tick0 = ticker_read(decay_ticker); + free(p); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()"); + + /* calloc(). */ + tick0 = ticker_read(decay_ticker); + p = calloc(1, huge0); + assert_ptr_not_null(p, "Unexpected calloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()"); + free(p); + + /* posix_memalign(). */ + tick0 = ticker_read(decay_ticker); + assert_d_eq(posix_memalign(&p, sizeof(size_t), huge0), 0, + "Unexpected posix_memalign() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during posix_memalign()"); + free(p); + + /* aligned_alloc(). */ + tick0 = ticker_read(decay_ticker); + p = aligned_alloc(sizeof(size_t), huge0); + assert_ptr_not_null(p, "Unexpected aligned_alloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during aligned_alloc()"); + free(p); + + /* realloc(). */ + /* Allocate. */ + tick0 = ticker_read(decay_ticker); + p = realloc(NULL, huge0); + assert_ptr_not_null(p, "Unexpected realloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + /* Reallocate. */ + tick0 = ticker_read(decay_ticker); + p = realloc(p, huge0); + assert_ptr_not_null(p, "Unexpected realloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + /* Deallocate. */ + tick0 = ticker_read(decay_ticker); + realloc(p, 0); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + + /* + * Test the *allocx() APIs using huge, large, and small size classes, + * with tcache explicitly disabled. + */ + { + unsigned i; + size_t allocx_sizes[3]; + allocx_sizes[0] = huge0; + allocx_sizes[1] = large0; + allocx_sizes[2] = 1; + + for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) { + sz = allocx_sizes[i]; + + /* mallocx(). */ + tick0 = ticker_read(decay_ticker); + p = mallocx(sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during mallocx() (sz=%zu)", + sz); + /* rallocx(). */ + tick0 = ticker_read(decay_ticker); + p = rallocx(p, sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected rallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during rallocx() (sz=%zu)", + sz); + /* xallocx(). */ + tick0 = ticker_read(decay_ticker); + xallocx(p, sz, 0, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during xallocx() (sz=%zu)", + sz); + /* dallocx(). */ + tick0 = ticker_read(decay_ticker); + dallocx(p, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during dallocx() (sz=%zu)", + sz); + /* sdallocx(). */ + p = mallocx(sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick0 = ticker_read(decay_ticker); + sdallocx(p, sz, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during sdallocx() " + "(sz=%zu)", sz); + } + } + + /* + * Test tcache fill/flush interactions for large and small size classes, + * using an explicit tcache. + */ + if (config_tcache) { + unsigned tcache_ind, i; + size_t tcache_sizes[2]; + tcache_sizes[0] = large0; + tcache_sizes[1] = 1; + + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0), + 0, "Unexpected mallctl failure"); + + for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) { + sz = tcache_sizes[i]; + + /* tcache fill. */ + tick0 = ticker_read(decay_ticker); + p = mallocx(sz, MALLOCX_TCACHE(tcache_ind)); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache fill " + "(sz=%zu)", sz); + /* tcache flush. */ + dallocx(p, MALLOCX_TCACHE(tcache_ind)); + tick0 = ticker_read(decay_ticker); + assert_d_eq(mallctl("tcache.flush", NULL, NULL, + &tcache_ind, sizeof(unsigned)), 0, + "Unexpected mallctl failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache flush " + "(sz=%zu)", sz); + } + } +} +TEST_END + +TEST_BEGIN(test_decay_ticker) +{ +#define NPS 1024 + int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); + void *ps[NPS]; + uint64_t epoch; + uint64_t npurge0 = 0; + uint64_t npurge1 = 0; + size_t sz, large; + unsigned i, nupdates0; + nstime_t time, decay_time, deadline; + + test_skip_if(opt_purge != purge_mode_decay); + + /* + * Allocate a bunch of large objects, pause the clock, deallocate the + * objects, restore the clock, then [md]allocx() in a tight loop to + * verify the ticker triggers purging. + */ + + if (config_tcache) { + size_t tcache_max; + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, + 0), 0, "Unexpected mallctl failure"); + large = nallocx(tcache_max + 1, flags); + } else { + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.lrun.0.size", &large, &sz, NULL, 0), + 0, "Unexpected mallctl failure"); + } + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, + "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), + config_stats ? 0 : ENOENT, "Unexpected mallctl result"); + + for (i = 0; i < NPS; i++) { + ps[i] = mallocx(large, flags); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); + } + + nupdates_mock = 0; + nstime_init(&time_mock, 0); + nstime_update(&time_mock); + nonmonotonic_mock = false; + + nstime_update_orig = nstime_update; + nstime_update = nstime_update_mock; + + for (i = 0; i < NPS; i++) { + dallocx(ps[i], flags); + nupdates0 = nupdates_mock; + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.decay failure"); + assert_u_gt(nupdates_mock, nupdates0, + "Expected nstime_update() to be called"); + } + + nstime_update = nstime_update_orig; + + nstime_init(&time, 0); + nstime_update(&time); + nstime_init2(&decay_time, opt_decay_time, 0); + nstime_copy(&deadline, &time); + nstime_add(&deadline, &decay_time); + do { + for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) { + void *p = mallocx(1, flags); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + dallocx(p, flags); + } + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, + sizeof(uint64_t)), 0, "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, + NULL, 0), config_stats ? 0 : ENOENT, + "Unexpected mallctl result"); + + nstime_update(&time); + } while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0); + + if (config_stats) + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); +#undef NPS +} +TEST_END + +TEST_BEGIN(test_decay_nonmonotonic) +{ +#define NPS (SMOOTHSTEP_NSTEPS + 1) + int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); + void *ps[NPS]; + uint64_t epoch; + uint64_t npurge0 = 0; + uint64_t npurge1 = 0; + size_t sz, large0; + unsigned i, nupdates0; + + test_skip_if(opt_purge != purge_mode_decay); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, + "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), + config_stats ? 0 : ENOENT, "Unexpected mallctl result"); + + nupdates_mock = 0; + nstime_init(&time_mock, 0); + nstime_update(&time_mock); + nonmonotonic_mock = true; + + nstime_update_orig = nstime_update; + nstime_update = nstime_update_mock; + + for (i = 0; i < NPS; i++) { + ps[i] = mallocx(large0, flags); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); + } + + for (i = 0; i < NPS; i++) { + dallocx(ps[i], flags); + nupdates0 = nupdates_mock; + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.decay failure"); + assert_u_gt(nupdates_mock, nupdates0, + "Expected nstime_update() to be called"); + } + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, + "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), + config_stats ? 0 : ENOENT, "Unexpected mallctl result"); + + if (config_stats) + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); + + nstime_update = nstime_update_orig; +#undef NPS +} +TEST_END + +int +main(void) +{ + + return (test( + test_decay_ticks, + test_decay_ticker, + test_decay_nonmonotonic)); +} diff --git a/test/unit/hash.c b/test/unit/hash.c index 77a8cede..f50ba81b 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -35,7 +35,7 @@ typedef enum { hash_variant_x64_128 } hash_variant_t; -static size_t +static int hash_variant_bits(hash_variant_t variant) { @@ -59,17 +59,17 @@ hash_variant_string(hash_variant_t variant) } } +#define KEY_SIZE 256 static void -hash_variant_verify(hash_variant_t variant) +hash_variant_verify_key(hash_variant_t variant, uint8_t *key) { - const size_t hashbytes = hash_variant_bits(variant) / 8; - uint8_t key[256]; + const int hashbytes = hash_variant_bits(variant) / 8; VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256); VARIABLE_ARRAY(uint8_t, final, hashbytes); unsigned i; uint32_t computed, expected; - memset(key, 0, sizeof(key)); + memset(key, 0, KEY_SIZE); memset(hashes, 0, sizeof(hashes)); memset(final, 0, sizeof(final)); @@ -139,6 +139,19 @@ hash_variant_verify(hash_variant_t variant) hash_variant_string(variant), expected, computed); } +static void +hash_variant_verify(hash_variant_t variant) +{ +#define MAX_ALIGN 16 + uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)]; + unsigned i; + + for (i = 0; i < MAX_ALIGN; i++) + hash_variant_verify_key(variant, &key[i]); +#undef MAX_ALIGN +} +#undef KEY_SIZE + TEST_BEGIN(test_hash_x86_32) { diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 31e354ca..69f8c20c 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -117,8 +117,8 @@ TEST_END TEST_BEGIN(test_mallctl_config) { -#define TEST_MALLCTL_CONFIG(config) do { \ - bool oldval; \ +#define TEST_MALLCTL_CONFIG(config, t) do { \ + t oldval; \ size_t sz = sizeof(oldval); \ assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0), \ 0, "Unexpected mallctl() failure"); \ @@ -126,20 +126,21 @@ TEST_BEGIN(test_mallctl_config) assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ } while (0) - TEST_MALLCTL_CONFIG(cache_oblivious); - TEST_MALLCTL_CONFIG(debug); - TEST_MALLCTL_CONFIG(fill); - TEST_MALLCTL_CONFIG(lazy_lock); - TEST_MALLCTL_CONFIG(munmap); - TEST_MALLCTL_CONFIG(prof); - TEST_MALLCTL_CONFIG(prof_libgcc); - TEST_MALLCTL_CONFIG(prof_libunwind); - TEST_MALLCTL_CONFIG(stats); - TEST_MALLCTL_CONFIG(tcache); - TEST_MALLCTL_CONFIG(tls); - TEST_MALLCTL_CONFIG(utrace); - TEST_MALLCTL_CONFIG(valgrind); - TEST_MALLCTL_CONFIG(xmalloc); + TEST_MALLCTL_CONFIG(cache_oblivious, bool); + TEST_MALLCTL_CONFIG(debug, bool); + TEST_MALLCTL_CONFIG(fill, bool); + TEST_MALLCTL_CONFIG(lazy_lock, bool); + TEST_MALLCTL_CONFIG(malloc_conf, const char *); + TEST_MALLCTL_CONFIG(munmap, bool); + TEST_MALLCTL_CONFIG(prof, bool); + TEST_MALLCTL_CONFIG(prof_libgcc, bool); + TEST_MALLCTL_CONFIG(prof_libunwind, bool); + TEST_MALLCTL_CONFIG(stats, bool); + TEST_MALLCTL_CONFIG(tcache, bool); + TEST_MALLCTL_CONFIG(tls, bool); + TEST_MALLCTL_CONFIG(utrace, bool); + TEST_MALLCTL_CONFIG(valgrind, bool); + TEST_MALLCTL_CONFIG(xmalloc, bool); #undef TEST_MALLCTL_CONFIG } @@ -162,8 +163,10 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(size_t, lg_chunk, always); TEST_MALLCTL_OPT(const char *, dss, always); - TEST_MALLCTL_OPT(size_t, narenas, always); + TEST_MALLCTL_OPT(unsigned, narenas, always); + TEST_MALLCTL_OPT(const char *, purge, always); TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); + TEST_MALLCTL_OPT(ssize_t, decay_time, always); TEST_MALLCTL_OPT(bool, stats_print, always); TEST_MALLCTL_OPT(const char *, junk, fill); TEST_MALLCTL_OPT(size_t, quarantine, fill); @@ -354,6 +357,8 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); + test_skip_if(opt_purge != purge_mode_ratio); + assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -381,6 +386,39 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) } TEST_END +TEST_BEGIN(test_arena_i_decay_time) +{ + ssize_t decay_time, orig_decay_time, prev_decay_time; + size_t sz = sizeof(ssize_t); + + test_skip_if(opt_purge != purge_mode_decay); + + assert_d_eq(mallctl("arena.0.decay_time", &orig_decay_time, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + decay_time = -2; + assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + decay_time = 0x7fffffff; + assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + + for (prev_decay_time = decay_time, decay_time = -1; + decay_time < 20; prev_decay_time = decay_time, decay_time++) { + ssize_t old_decay_time; + + assert_d_eq(mallctl("arena.0.decay_time", &old_decay_time, + &sz, &decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_decay_time, prev_decay_time, + "Unexpected old arena.0.decay_time"); + } +} +TEST_END + TEST_BEGIN(test_arena_i_purge) { unsigned narenas; @@ -401,6 +439,26 @@ TEST_BEGIN(test_arena_i_purge) } TEST_END +TEST_BEGIN(test_arena_i_decay) +{ + unsigned narenas; + size_t sz = sizeof(unsigned); + size_t mib[3]; + size_t miblen = 3; + + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + mib[1] = narenas; + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, + "Unexpected mallctlbymib() failure"); +} +TEST_END + TEST_BEGIN(test_arena_i_dss) { const char *dss_prec_old, *dss_prec_new; @@ -465,6 +523,8 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); + test_skip_if(opt_purge != purge_mode_ratio); + assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -492,6 +552,39 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) } TEST_END +TEST_BEGIN(test_arenas_decay_time) +{ + ssize_t decay_time, orig_decay_time, prev_decay_time; + size_t sz = sizeof(ssize_t); + + test_skip_if(opt_purge != purge_mode_decay); + + assert_d_eq(mallctl("arenas.decay_time", &orig_decay_time, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + decay_time = -2; + assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + decay_time = 0x7fffffff; + assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), 0, + "Expected mallctl() failure"); + + for (prev_decay_time = decay_time, decay_time = -1; + decay_time < 20; prev_decay_time = decay_time, decay_time++) { + ssize_t old_decay_time; + + assert_d_eq(mallctl("arenas.decay_time", &old_decay_time, + &sz, &decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_decay_time, prev_decay_time, + "Unexpected old arenas.decay_time"); + } +} +TEST_END + TEST_BEGIN(test_arenas_constants) { @@ -594,8 +687,10 @@ TEST_BEGIN(test_stats_arenas) 0), 0, "Unexpected mallctl() failure"); \ } while (0) - TEST_STATS_ARENAS(const char *, dss); TEST_STATS_ARENAS(unsigned, nthreads); + TEST_STATS_ARENAS(const char *, dss); + TEST_STATS_ARENAS(ssize_t, lg_dirty_mult); + TEST_STATS_ARENAS(ssize_t, decay_time); TEST_STATS_ARENAS(size_t, pactive); TEST_STATS_ARENAS(size_t, pdirty); @@ -620,10 +715,13 @@ main(void) test_tcache, test_thread_arena, test_arena_i_lg_dirty_mult, + test_arena_i_decay_time, test_arena_i_purge, + test_arena_i_decay, test_arena_i_dss, test_arenas_initialized, test_arenas_lg_dirty_mult, + test_arenas_decay_time, test_arenas_constants, test_arenas_bin_constants, test_arenas_lrun_constants, diff --git a/test/unit/nstime.c b/test/unit/nstime.c new file mode 100644 index 00000000..cd7d9a6d --- /dev/null +++ b/test/unit/nstime.c @@ -0,0 +1,220 @@ +#include "test/jemalloc_test.h" + +#define BILLION UINT64_C(1000000000) + +TEST_BEGIN(test_nstime_init) +{ + nstime_t nst; + + nstime_init(&nst, 42000000043); + assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read"); + assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); + assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_nstime_init2) +{ + nstime_t nst; + + nstime_init2(&nst, 42, 43); + assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); + assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_nstime_copy) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_init(&nstb, 0); + nstime_copy(&nstb, &nsta); + assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied"); + assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied"); +} +TEST_END + +TEST_BEGIN(test_nstime_compare) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal"); + assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal"); + + nstime_init2(&nstb, 42, 42); + assert_d_eq(nstime_compare(&nsta, &nstb), 1, + "nsta should be greater than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), -1, + "nstb should be less than nsta"); + + nstime_init2(&nstb, 42, 44); + assert_d_eq(nstime_compare(&nsta, &nstb), -1, + "nsta should be less than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), 1, + "nstb should be greater than nsta"); + + nstime_init2(&nstb, 41, BILLION - 1); + assert_d_eq(nstime_compare(&nsta, &nstb), 1, + "nsta should be greater than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), -1, + "nstb should be less than nsta"); + + nstime_init2(&nstb, 43, 0); + assert_d_eq(nstime_compare(&nsta, &nstb), -1, + "nsta should be less than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), 1, + "nstb should be greater than nsta"); +} +TEST_END + +TEST_BEGIN(test_nstime_add) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_add(&nsta, &nstb); + nstime_init2(&nstb, 84, 86); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect addition result"); + + nstime_init2(&nsta, 42, BILLION - 1); + nstime_copy(&nstb, &nsta); + nstime_add(&nsta, &nstb); + nstime_init2(&nstb, 85, BILLION - 2); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect addition result"); +} +TEST_END + +TEST_BEGIN(test_nstime_subtract) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_subtract(&nsta, &nstb); + nstime_init(&nstb, 0); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect subtraction result"); + + nstime_init2(&nsta, 42, 43); + nstime_init2(&nstb, 41, 44); + nstime_subtract(&nsta, &nstb); + nstime_init2(&nstb, 0, BILLION - 1); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect subtraction result"); +} +TEST_END + +TEST_BEGIN(test_nstime_imultiply) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_imultiply(&nsta, 10); + nstime_init2(&nstb, 420, 430); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect multiplication result"); + + nstime_init2(&nsta, 42, 666666666); + nstime_imultiply(&nsta, 3); + nstime_init2(&nstb, 127, 999999998); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect multiplication result"); +} +TEST_END + +TEST_BEGIN(test_nstime_idivide) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_idivide(&nsta, 10); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 666666666); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 3); + nstime_idivide(&nsta, 3); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_nstime_divide) +{ + nstime_t nsta, nstb, nstc; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + assert_u64_eq(nstime_divide(&nsta, &nstb), 10, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_init(&nstc, 1); + nstime_add(&nsta, &nstc); + assert_u64_eq(nstime_divide(&nsta, &nstb), 10, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_init(&nstc, 1); + nstime_subtract(&nsta, &nstc); + assert_u64_eq(nstime_divide(&nsta, &nstb), 9, + "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_nstime_update) +{ + nstime_t nst; + + nstime_init(&nst, 0); + + assert_false(nstime_update(&nst), "Basic time update failed."); + + /* Only Rip Van Winkle sleeps this long. */ + { + nstime_t addend; + nstime_init2(&addend, 631152000, 0); + nstime_add(&nst, &addend); + } + { + nstime_t nst0; + nstime_copy(&nst0, &nst); + assert_true(nstime_update(&nst), + "Update should detect time roll-back."); + assert_d_eq(nstime_compare(&nst, &nst0), 0, + "Time should not have been modified"); + } + +} +TEST_END + +int +main(void) +{ + + return (test( + test_nstime_init, + test_nstime_init2, + test_nstime_copy, + test_nstime_compare, + test_nstime_add, + test_nstime_subtract, + test_nstime_imultiply, + test_nstime_idivide, + test_nstime_divide, + test_nstime_update)); +} diff --git a/test/unit/prng.c b/test/unit/prng.c new file mode 100644 index 00000000..b22bd2f5 --- /dev/null +++ b/test/unit/prng.c @@ -0,0 +1,68 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_prng_lg_range) +{ + uint64_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range(&sa, 64); + sa = 42; + rb = prng_lg_range(&sa, 64); + assert_u64_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range(&sb, 64); + assert_u64_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range(&sa, 64); + rb = prng_lg_range(&sa, 64); + assert_u64_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range(&sa, 64); + for (lg_range = 63; lg_range > 0; lg_range--) { + sb = 42; + rb = prng_lg_range(&sb, lg_range); + assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_u64_eq(rb, (ra >> (64 - lg_range)), + "Expected high order bits of full-width result, " + "lg_range=%u", lg_range); + } +} +TEST_END + +TEST_BEGIN(test_prng_range) +{ + uint64_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + uint64_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + uint64_t r = prng_range(&s, range); + + assert_u64_lt(r, range, "Out of range"); + } + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_prng_lg_range, + test_prng_range)); +} diff --git a/test/unit/rb.c b/test/unit/rb.c index b38eb0e3..cf3d3a78 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -3,7 +3,7 @@ #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \ a_type *rbp_bh_t; \ for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \ - rbp_bh_t != &(a_rbt)->rbt_nil; \ + rbp_bh_t != NULL; \ rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \ if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \ (r_height)++; \ @@ -21,7 +21,7 @@ struct node_s { }; static int -node_cmp(node_t *a, node_t *b) { +node_cmp(const node_t *a, const node_t *b) { int ret; assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); @@ -68,38 +68,43 @@ TEST_BEGIN(test_rb_empty) TEST_END static unsigned -tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, - node_t *nil) +tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) { unsigned ret = 0; - node_t *left_node = rbtn_left_get(node_t, link, node); - node_t *right_node = rbtn_right_get(node_t, link, node); + node_t *left_node; + node_t *right_node; + + if (node == NULL) + return (ret); + + left_node = rbtn_left_get(node_t, link, node); + right_node = rbtn_right_get(node_t, link, node); if (!rbtn_red_get(node_t, link, node)) black_depth++; /* Red nodes must be interleaved with black nodes. */ if (rbtn_red_get(node_t, link, node)) { - assert_false(rbtn_red_get(node_t, link, left_node), - "Node should be black"); - assert_false(rbtn_red_get(node_t, link, right_node), - "Node should be black"); + if (left_node != NULL) + assert_false(rbtn_red_get(node_t, link, left_node), + "Node should be black"); + if (right_node != NULL) + assert_false(rbtn_red_get(node_t, link, right_node), + "Node should be black"); } - if (node == nil) - return (ret); /* Self. */ assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic"); /* Left subtree. */ - if (left_node != nil) - ret += tree_recurse(left_node, black_height, black_depth, nil); + if (left_node != NULL) + ret += tree_recurse(left_node, black_height, black_depth); else ret += (black_depth != black_height); /* Right subtree. */ - if (right_node != nil) - ret += tree_recurse(right_node, black_height, black_depth, nil); + if (right_node != NULL) + ret += tree_recurse(right_node, black_height, black_depth); else ret += (black_depth != black_height); @@ -181,8 +186,7 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) node->magic = 0; rbtn_black_height(node_t, link, tree, black_height); - imbalances = tree_recurse(tree->rbt_root, black_height, 0, - &(tree->rbt_nil)); + imbalances = tree_recurse(tree->rbt_root, black_height, 0); assert_u_eq(imbalances, 0, "Tree is unbalanced"); assert_u_eq(tree_iterate(tree), nnodes-1, "Unexpected node iteration count"); @@ -212,6 +216,15 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) return (ret); } +static void +destroy_cb(node_t *node, void *data) +{ + unsigned *nnodes = (unsigned *)data; + + assert_u_gt(*nnodes, 0, "Destruction removed too many nodes"); + (*nnodes)--; +} + TEST_BEGIN(test_rb_random) { #define NNODES 25 @@ -244,7 +257,6 @@ TEST_BEGIN(test_rb_random) for (j = 1; j <= NNODES; j++) { /* Initialize tree and nodes. */ tree_new(&tree); - tree.rbt_nil.magic = 0; for (k = 0; k < j; k++) { nodes[k].magic = NODE_MAGIC; nodes[k].key = bag[k]; @@ -257,7 +269,7 @@ TEST_BEGIN(test_rb_random) rbtn_black_height(node_t, link, &tree, black_height); imbalances = tree_recurse(tree.rbt_root, - black_height, 0, &(tree.rbt_nil)); + black_height, 0); assert_u_eq(imbalances, 0, "Tree is unbalanced"); @@ -278,7 +290,7 @@ TEST_BEGIN(test_rb_random) } /* Remove nodes. */ - switch (i % 4) { + switch (i % 5) { case 0: for (k = 0; k < j; k++) node_remove(&tree, &nodes[k], j - k); @@ -314,6 +326,12 @@ TEST_BEGIN(test_rb_random) assert_u_eq(nnodes, 0, "Removal terminated early"); break; + } case 4: { + unsigned nnodes = j; + tree_destroy(&tree, destroy_cb, &nnodes); + assert_u_eq(nnodes, 0, + "Destruction terminated early"); + break; } default: not_reached(); } diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c new file mode 100644 index 00000000..f6a2f74f --- /dev/null +++ b/test/unit/run_quantize.c @@ -0,0 +1,149 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_small_run_size) +{ + unsigned nbins, i; + size_t sz, run_size; + size_t mib[4]; + size_t miblen = sizeof(mib) / sizeof(size_t); + + /* + * Iterate over all small size classes, get their run sizes, and verify + * that the quantized size is the same as the run size. + */ + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0, + "Unexpected mallctlnametomib failure"); + for (i = 0; i < nbins; i++) { + mib[2] = i; + sz = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL, 0), + 0, "Unexpected mallctlbymib failure"); + assert_zu_eq(run_size, run_quantize_floor(run_size), + "Small run quantization should be a no-op (run_size=%zu)", + run_size); + assert_zu_eq(run_size, run_quantize_ceil(run_size), + "Small run quantization should be a no-op (run_size=%zu)", + run_size); + } +} +TEST_END + +TEST_BEGIN(test_large_run_size) +{ + bool cache_oblivious; + unsigned nlruns, i; + size_t sz, run_size_prev, ceil_prev; + size_t mib[4]; + size_t miblen = sizeof(mib) / sizeof(size_t); + + /* + * Iterate over all large size classes, get their run sizes, and verify + * that the quantized size is the same as the run size. + */ + + sz = sizeof(bool); + assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz, + NULL, 0), 0, "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0, + "Unexpected mallctlnametomib failure"); + for (i = 0; i < nlruns; i++) { + size_t lrun_size, run_size, floor, ceil; + + mib[2] = i; + sz = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, &lrun_size, &sz, NULL, 0), + 0, "Unexpected mallctlbymib failure"); + run_size = cache_oblivious ? lrun_size + PAGE : lrun_size; + floor = run_quantize_floor(run_size); + ceil = run_quantize_ceil(run_size); + + assert_zu_eq(run_size, floor, + "Large run quantization should be a no-op for precise " + "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); + assert_zu_eq(run_size, ceil, + "Large run quantization should be a no-op for precise " + "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); + + if (i > 0) { + assert_zu_eq(run_size_prev, run_quantize_floor(run_size + - PAGE), "Floor should be a precise size"); + if (run_size_prev < ceil_prev) { + assert_zu_eq(ceil_prev, run_size, + "Ceiling should be a precise size " + "(run_size_prev=%zu, ceil_prev=%zu, " + "run_size=%zu)", run_size_prev, ceil_prev, + run_size); + } + } + run_size_prev = floor; + ceil_prev = run_quantize_ceil(run_size + PAGE); + } +} +TEST_END + +TEST_BEGIN(test_monotonic) +{ + unsigned nbins, nlruns, i; + size_t sz, floor_prev, ceil_prev; + + /* + * Iterate over all run sizes and verify that + * run_quantize_{floor,ceil}() are monotonic. + */ + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + floor_prev = 0; + ceil_prev = 0; + for (i = 1; i < run_quantize_max >> LG_PAGE; i++) { + size_t run_size, floor, ceil; + + run_size = i << LG_PAGE; + floor = run_quantize_floor(run_size); + ceil = run_quantize_ceil(run_size); + + assert_zu_le(floor, run_size, + "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)", + floor, run_size, ceil); + assert_zu_ge(ceil, run_size, + "Ceiling should be >= (floor=%zu, run_size=%zu, ceil=%zu)", + floor, run_size, ceil); + + assert_zu_le(floor_prev, floor, "Floor should be monotonic " + "(floor_prev=%zu, floor=%zu, run_size=%zu, ceil=%zu)", + floor_prev, floor, run_size, ceil); + assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic " + "(floor=%zu, run_size=%zu, ceil_prev=%zu, ceil=%zu)", + floor, run_size, ceil_prev, ceil); + + floor_prev = floor; + ceil_prev = ceil; + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_small_run_size, + test_large_run_size, + test_monotonic)); +} diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index d3aaebd7..2e2caaf5 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -80,10 +80,33 @@ TEST_BEGIN(test_size_classes) } TEST_END +TEST_BEGIN(test_overflow) +{ + size_t max_size_class; + + max_size_class = get_max_size_class(); + + assert_u_ge(size2index(max_size_class+1), NSIZES, + "size2index() should return >= NSIZES on overflow"); + assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES, + "size2index() should return >= NSIZES on overflow"); + assert_u_ge(size2index(SIZE_T_MAX), NSIZES, + "size2index() should return >= NSIZES on overflow"); + + assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS, + "s2u() should return > HUGE_MAXCLASS for unsupported size"); + assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS, + "s2u() should return > HUGE_MAXCLASS for unsupported size"); + assert_zu_eq(s2u(SIZE_T_MAX), 0, + "s2u() should return 0 on overflow"); +} +TEST_END + int main(void) { return (test( - test_size_classes)); + test_size_classes, + test_overflow)); } diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c new file mode 100644 index 00000000..4cfb2134 --- /dev/null +++ b/test/unit/smoothstep.c @@ -0,0 +1,106 @@ +#include "test/jemalloc_test.h" + +static const uint64_t smoothstep_tab[] = { +#define STEP(step, h, x, y) \ + h, + SMOOTHSTEP +#undef STEP +}; + +TEST_BEGIN(test_smoothstep_integral) +{ + uint64_t sum, min, max; + unsigned i; + + /* + * The integral of smoothstep in the [0..1] range equals 1/2. Verify + * that the fixed point representation's integral is no more than + * rounding error distant from 1/2. Regarding rounding, each table + * element is rounded down to the nearest fixed point value, so the + * integral may be off by as much as SMOOTHSTEP_NSTEPS ulps. + */ + sum = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) + sum += smoothstep_tab[i]; + + max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1); + min = max - SMOOTHSTEP_NSTEPS; + + assert_u64_ge(sum, min, + "Integral too small, even accounting for truncation"); + assert_u64_le(sum, max, "Integral exceeds 1/2"); + if (false) { + malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n", + max - sum, SMOOTHSTEP_NSTEPS); + } +} +TEST_END + +TEST_BEGIN(test_smoothstep_monotonic) +{ + uint64_t prev_h; + unsigned i; + + /* + * The smoothstep function is monotonic in [0..1], i.e. its slope is + * non-negative. In practice we want to parametrize table generation + * such that piecewise slope is greater than zero, but do not require + * that here. + */ + prev_h = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { + uint64_t h = smoothstep_tab[i]; + assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i); + prev_h = h; + } + assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1], + (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1"); +} +TEST_END + +TEST_BEGIN(test_smoothstep_slope) +{ + uint64_t prev_h, prev_delta; + unsigned i; + + /* + * The smoothstep slope strictly increases until x=0.5, and then + * strictly decreases until x=1.0. Verify the slightly weaker + * requirement of monotonicity, so that inadequate table precision does + * not cause false test failures. + */ + prev_h = 0; + prev_delta = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) { + uint64_t h = smoothstep_tab[i]; + uint64_t delta = h - prev_h; + assert_u64_ge(delta, prev_delta, + "Slope must monotonically increase in 0.0 <= x <= 0.5, " + "i=%u", i); + prev_h = h; + prev_delta = delta; + } + + prev_h = KQU(1) << SMOOTHSTEP_BFP; + prev_delta = 0; + for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) { + uint64_t h = smoothstep_tab[i]; + uint64_t delta = prev_h - h; + assert_u64_ge(delta, prev_delta, + "Slope must monotonically decrease in 0.5 <= x <= 1.0, " + "i=%u", i); + prev_h = h; + prev_delta = delta; + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_smoothstep_integral, + test_smoothstep_monotonic, + test_smoothstep_slope)); +} diff --git a/test/unit/stats.c b/test/unit/stats.c index 8e4bc631..6e803160 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -93,6 +93,10 @@ TEST_BEGIN(test_stats_arenas_summary) huge = mallocx(chunksize, 0); assert_ptr_not_null(huge, "Unexpected mallocx() failure"); + dallocx(little, 0); + dallocx(large, 0); + dallocx(huge, 0); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -116,10 +120,6 @@ TEST_BEGIN(test_stats_arenas_summary) assert_u64_le(nmadvise, purged, "nmadvise should be no greater than purged"); } - - dallocx(little, 0); - dallocx(large, 0); - dallocx(huge, 0); } TEST_END diff --git a/test/unit/ticker.c b/test/unit/ticker.c new file mode 100644 index 00000000..e737020a --- /dev/null +++ b/test/unit/ticker.c @@ -0,0 +1,76 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_ticker_tick) +{ +#define NREPS 2 +#define NTICKS 3 + ticker_t ticker; + int32_t i, j; + + ticker_init(&ticker, NTICKS); + for (i = 0; i < NREPS; i++) { + for (j = 0; j < NTICKS; j++) { + assert_u_eq(ticker_read(&ticker), NTICKS - j, + "Unexpected ticker value (i=%d, j=%d)", i, j); + assert_false(ticker_tick(&ticker), + "Unexpected ticker fire (i=%d, j=%d)", i, j); + } + assert_u32_eq(ticker_read(&ticker), 0, + "Expected ticker depletion"); + assert_true(ticker_tick(&ticker), + "Expected ticker fire (i=%d)", i); + assert_u32_eq(ticker_read(&ticker), NTICKS, + "Expected ticker reset"); + } +#undef NTICKS +} +TEST_END + +TEST_BEGIN(test_ticker_ticks) +{ +#define NTICKS 3 + ticker_t ticker; + + ticker_init(&ticker, NTICKS); + + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); + assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire"); + assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value"); + assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire"); + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); + + assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire"); + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); +#undef NTICKS +} +TEST_END + +TEST_BEGIN(test_ticker_copy) +{ +#define NTICKS 3 + ticker_t ta, tb; + + ticker_init(&ta, NTICKS); + ticker_copy(&tb, &ta); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); + assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire"); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); + + ticker_tick(&ta); + ticker_copy(&tb, &ta); + assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value"); + assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire"); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); +#undef NTICKS +} +TEST_END + +int +main(void) +{ + + return (test( + test_ticker_tick, + test_ticker_ticks, + test_ticker_copy)); +} diff --git a/test/unit/util.c b/test/unit/util.c index 8ab39a45..2f65aad2 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -1,33 +1,54 @@ #include "test/jemalloc_test.h" -TEST_BEGIN(test_pow2_ceil) +#define TEST_POW2_CEIL(t, suf, pri) do { \ + unsigned i, pow2; \ + t x; \ + \ + assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \ + \ + for (i = 0; i < sizeof(t) * 8; i++) { \ + assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i, \ + "Unexpected result"); \ + } \ + \ + for (i = 2; i < sizeof(t) * 8; i++) { \ + assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1), \ + ((t)1) << i, "Unexpected result"); \ + } \ + \ + for (i = 0; i < sizeof(t) * 8 - 1; i++) { \ + assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1), \ + ((t)1) << (i+1), "Unexpected result"); \ + } \ + \ + for (pow2 = 1; pow2 < 25; pow2++) { \ + for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2; \ + x++) { \ + assert_zu_eq(pow2_ceil_##suf(x), \ + ((t)1) << pow2, \ + "Unexpected result, x=%"pri, x); \ + } \ + } \ +} while (0) + +TEST_BEGIN(test_pow2_ceil_u64) { - unsigned i, pow2; - size_t x; - assert_zu_eq(pow2_ceil(0), 0, "Unexpected result"); + TEST_POW2_CEIL(uint64_t, u64, FMTu64); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i, - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_u32) +{ - for (i = 2; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i, - "Unexpected result"); - } + TEST_POW2_CEIL(uint32_t, u32, FMTu32); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8 - 1; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1), - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_zu) +{ - for (pow2 = 1; pow2 < 25; pow2++) { - for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) { - assert_zu_eq(pow2_ceil(x), ZU(1) << pow2, - "Unexpected result, x=%zu", x); - } - } + TEST_POW2_CEIL(size_t, zu, "zu"); } TEST_END @@ -286,7 +307,9 @@ main(void) { return (test( - test_pow2_ceil, + test_pow2_ceil_u64, + test_pow2_ceil_u32, + test_pow2_ceil_zu, test_malloc_strtoumax_no_endptr, test_malloc_strtoumax, test_malloc_snprintf_truncated,