Merge branch 'dev'

This commit is contained in:
Jason Evans 2011-03-22 17:03:58 -07:00
commit fb4e26aa9e
35 changed files with 2008 additions and 646 deletions

View File

@ -6,6 +6,35 @@ found in the git revision history:
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
git://canonware.com/jemalloc.git git://canonware.com/jemalloc.git
* 2.2.0 (March 22, 2011)
This version incorporates several improvements to algorithms and data
structures that tend to reduce fragmentation and increase speed.
New features:
- Add the "stats.cactive" mallctl.
- Update pprof (from google-perftools 1.7).
- Improve backtracing-related configuration logic, and add the
--disable-prof-libgcc option.
Bug fixes:
- Change default symbol visibility from "internal", to "hidden", which
decreases the overhead of library-internal function calls.
- Fix symbol visibility so that it is also set on OS X.
- Fix a build dependency regression caused by the introduction of the .pic.o
suffix for PIC object files.
- Add missing checks for mutex initialization failures.
- Don't use libgcc-based backtracing except on x64, where it is known to work.
- Fix deadlocks on OS X that were due to memory allocation in
pthread_mutex_lock().
- Heap profiling-specific fixes:
+ Fix memory corruption due to integer overflow in small region index
computation, when using a small enough sample interval that profiling
context pointers are stored in small run headers.
+ Fix a bootstrap ordering bug that only occurred with TLS disabled.
+ Fix a rallocm() rsize bug.
+ Fix error detection bugs for aligned memory allocation.
* 2.1.3 (March 14, 2011) * 2.1.3 (March 14, 2011)
Bug fixes: Bug fixes:

View File

@ -62,18 +62,23 @@ any of the following arguments (not a definitive list) to 'configure':
--enable-prof --enable-prof
Enable heap profiling and leak detection functionality. See the "opt.prof" Enable heap profiling and leak detection functionality. See the "opt.prof"
option documentation for usage details. option documentation for usage details. When enabled, there are several
approaches to backtracing, and the configure script chooses the first one
in the following list that appears to function correctly:
--disable-prof-libgcc + libunwind (requires --enable-prof-libunwind)
Disable the use of libgcc's backtracing functionality. Ordinarily, libgcc's + libgcc (unless --disable-prof-libgcc)
backtracing functionality is superior to the alternatives, but it may fail + gcc intrinsics (unless --disable-prof-gcc)
to capture backtraces on some systems.
--enable-prof-libunwind --enable-prof-libunwind
Use the libunwind library (http://www.nongnu.org/libunwind/) for stack Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
backtracing. libunwind is quite slow, but it tends to work across a wider backtracing.
variety of system configurations than the default backtracing code, which is
based on libgcc functionality or gcc intrinsics. --disable-prof-libgcc
Disable the use of libgcc's backtracing functionality.
--disable-prof-gcc
Disable the use of gcc intrinsics for backtracing.
--with-static-libunwind=<libunwind.a> --with-static-libunwind=<libunwind.a>
Statically link against the specified libunwind.a rather than dynamically Statically link against the specified libunwind.a rather than dynamically

View File

@ -45,13 +45,13 @@ endif
BINS := @srcroot@bin/pprof BINS := @srcroot@bin/pprof
CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \
@objroot@include/jemalloc/jemalloc_defs@install_suffix@.h @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h
CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/base.c \ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \
@srcroot@src/chunk.c @srcroot@src/chunk_dss.c \ @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \
@srcroot@src/chunk_mmap.c @srcroot@src/chunk_swap.c @srcroot@src/ckh.c \ @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \
@srcroot@src/ctl.c @srcroot@src/extent.c @srcroot@src/hash.c \ @srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \
@srcroot@src/huge.c @srcroot@src/mb.c @srcroot@src/mutex.c \ @srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \
@srcroot@src/prof.c @srcroot@src/rtree.c \ @srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \
@srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c
ifeq (macho, @abi@) ifeq (macho, @abi@)
CSRCS += @srcroot@src/zone.c CSRCS += @srcroot@src/zone.c
endif endif
@ -65,8 +65,9 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html)
DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
DOCS := $(DOCS_HTML) $(DOCS_MAN3) DOCS := $(DOCS_HTML) $(DOCS_MAN3)
CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
@srcroot@test/mremap.c @srcroot@test/posix_memalign.c \ @srcroot@test/bitmap.c @srcroot@test/mremap.c \
@srcroot@test/rallocm.c @srcroot@test/thread_arena.c @srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \
@srcroot@test/thread_arena.c
.PHONY: all dist doc_html doc_man doc .PHONY: all dist doc_html doc_man doc
.PHONY: install_bin install_include install_lib .PHONY: install_bin install_include install_lib
@ -94,6 +95,8 @@ doc: $(DOCS)
# Include generated dependency files. # Include generated dependency files.
# #
-include $(CSRCS:@srcroot@%.c=@objroot@%.d) -include $(CSRCS:@srcroot@%.c=@objroot@%.d)
-include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
-include $(CTESTS:@srcroot@%.c=@objroot@%.d)
@objroot@src/%.o: @srcroot@src/%.c @objroot@src/%.o: @srcroot@src/%.c
@mkdir -p $(@D) @mkdir -p $(@D)
@ -103,7 +106,7 @@ doc: $(DOCS)
@objroot@src/%.pic.o: @srcroot@src/%.c @objroot@src/%.pic.o: @srcroot@src/%.c
@mkdir -p $(@D) @mkdir -p $(@D)
$(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $<
@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)"
%.$(SO) : %.$(SO).$(REV) %.$(SO) : %.$(SO).$(REV)
@mkdir -p $(@D) @mkdir -p $(@D)
@ -126,6 +129,9 @@ doc: $(DOCS)
$(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
# Automatic dependency generation misses #include "*.c".
@objroot@test/bitmap.o : @objroot@src/bitmap.o
@objroot@test/%: @objroot@test/%.o \ @objroot@test/%: @objroot@test/%.o \
@objroot@lib/libjemalloc@install_suffix@.$(SO) @objroot@lib/libjemalloc@install_suffix@.$(SO)
@mkdir -p $(@D) @mkdir -p $(@D)

View File

@ -72,7 +72,7 @@ use strict;
use warnings; use warnings;
use Getopt::Long; use Getopt::Long;
my $PPROF_VERSION = "1.5"; my $PPROF_VERSION = "1.7";
# These are the object tools we use which can come from a # These are the object tools we use which can come from a
# user-specified location using --tools, from the PPROF_TOOLS # user-specified location using --tools, from the PPROF_TOOLS
@ -89,6 +89,7 @@ my %obj_tool_map = (
); );
my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local
my $GV = "gv"; my $GV = "gv";
my $EVINCE = "evince"; # could also be xpdf or perhaps acroread
my $KCACHEGRIND = "kcachegrind"; my $KCACHEGRIND = "kcachegrind";
my $PS2PDF = "ps2pdf"; my $PS2PDF = "ps2pdf";
# These are used for dynamic profiles # These are used for dynamic profiles
@ -103,6 +104,7 @@ my $GROWTH_PAGE = "/pprof/growth";
my $CONTENTION_PAGE = "/pprof/contention"; my $CONTENTION_PAGE = "/pprof/contention";
my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter
my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#"
my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST
my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
@ -110,7 +112,7 @@ my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
# All the alternatives must begin with /. # All the alternatives must begin with /.
my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
"$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
"$FILTEREDPROFILE_PAGE)"; "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)";
# default binary name # default binary name
my $UNKNOWN_BINARY = "(unknown)"; my $UNKNOWN_BINARY = "(unknown)";
@ -148,7 +150,7 @@ pprof [options] <profile>
The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
$GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
or /pprof/filteredprofile. $CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
For instance: "pprof http://myserver.com:80$HEAP_PAGE". For instance: "pprof http://myserver.com:80$HEAP_PAGE".
If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
pprof --symbols <program> pprof --symbols <program>
@ -180,6 +182,7 @@ Output type:
--text Generate text report --text Generate text report
--callgrind Generate callgrind format to stdout --callgrind Generate callgrind format to stdout
--gv Generate Postscript and display --gv Generate Postscript and display
--evince Generate PDF and display
--web Generate SVG and display --web Generate SVG and display
--list=<regexp> Generate source listing of matching routines --list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines --disasm=<regexp> Generate disassembly of matching routines
@ -208,6 +211,7 @@ Call-graph Options:
--nodecount=<n> Show at most so many nodes [default=80] --nodecount=<n> Show at most so many nodes [default=80]
--nodefraction=<f> Hide nodes below <f>*total [default=.005] --nodefraction=<f> Hide nodes below <f>*total [default=.005]
--edgefraction=<f> Hide edges below <f>*total [default=.001] --edgefraction=<f> Hide edges below <f>*total [default=.001]
--maxdegree=<n> Max incoming/outgoing edges per node [default=8]
--focus=<regexp> Focus on nodes matching <regexp> --focus=<regexp> Focus on nodes matching <regexp>
--ignore=<regexp> Ignore nodes matching <regexp> --ignore=<regexp> Ignore nodes matching <regexp>
--scale=<n> Set GV scaling [default=0] --scale=<n> Set GV scaling [default=0]
@ -304,6 +308,7 @@ sub Init() {
$main::opt_disasm = ""; $main::opt_disasm = "";
$main::opt_symbols = 0; $main::opt_symbols = 0;
$main::opt_gv = 0; $main::opt_gv = 0;
$main::opt_evince = 0;
$main::opt_web = 0; $main::opt_web = 0;
$main::opt_dot = 0; $main::opt_dot = 0;
$main::opt_ps = 0; $main::opt_ps = 0;
@ -315,6 +320,7 @@ sub Init() {
$main::opt_nodecount = 80; $main::opt_nodecount = 80;
$main::opt_nodefraction = 0.005; $main::opt_nodefraction = 0.005;
$main::opt_edgefraction = 0.001; $main::opt_edgefraction = 0.001;
$main::opt_maxdegree = 8;
$main::opt_focus = ''; $main::opt_focus = '';
$main::opt_ignore = ''; $main::opt_ignore = '';
$main::opt_scale = 0; $main::opt_scale = 0;
@ -372,6 +378,7 @@ sub Init() {
"disasm=s" => \$main::opt_disasm, "disasm=s" => \$main::opt_disasm,
"symbols!" => \$main::opt_symbols, "symbols!" => \$main::opt_symbols,
"gv!" => \$main::opt_gv, "gv!" => \$main::opt_gv,
"evince!" => \$main::opt_evince,
"web!" => \$main::opt_web, "web!" => \$main::opt_web,
"dot!" => \$main::opt_dot, "dot!" => \$main::opt_dot,
"ps!" => \$main::opt_ps, "ps!" => \$main::opt_ps,
@ -383,6 +390,7 @@ sub Init() {
"nodecount=i" => \$main::opt_nodecount, "nodecount=i" => \$main::opt_nodecount,
"nodefraction=f" => \$main::opt_nodefraction, "nodefraction=f" => \$main::opt_nodefraction,
"edgefraction=f" => \$main::opt_edgefraction, "edgefraction=f" => \$main::opt_edgefraction,
"maxdegree=i" => \$main::opt_maxdegree,
"focus=s" => \$main::opt_focus, "focus=s" => \$main::opt_focus,
"ignore=s" => \$main::opt_ignore, "ignore=s" => \$main::opt_ignore,
"scale=i" => \$main::opt_scale, "scale=i" => \$main::opt_scale,
@ -452,6 +460,7 @@ sub Init() {
($main::opt_disasm eq '' ? 0 : 1) + ($main::opt_disasm eq '' ? 0 : 1) +
($main::opt_symbols == 0 ? 0 : 1) + ($main::opt_symbols == 0 ? 0 : 1) +
$main::opt_gv + $main::opt_gv +
$main::opt_evince +
$main::opt_web + $main::opt_web +
$main::opt_dot + $main::opt_dot +
$main::opt_ps + $main::opt_ps +
@ -646,6 +655,8 @@ sub Main() {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) { if ($main::opt_gv) {
RunGV(TempName($main::next_tmpfile, "ps"), ""); RunGV(TempName($main::next_tmpfile, "ps"), "");
} elsif ($main::opt_evince) {
RunEvince(TempName($main::next_tmpfile, "pdf"), "");
} elsif ($main::opt_web) { } elsif ($main::opt_web) {
my $tmp = TempName($main::next_tmpfile, "svg"); my $tmp = TempName($main::next_tmpfile, "svg");
RunWeb($tmp); RunWeb($tmp);
@ -708,6 +719,12 @@ sub RunGV {
} }
} }
sub RunEvince {
my $fname = shift;
my $bg = shift; # "" or " &" if we should run in background
system("$EVINCE " . $fname . $bg);
}
sub RunWeb { sub RunWeb {
my $fname = shift; my $fname = shift;
print STDERR "Loading web page file:///$fname\n"; print STDERR "Loading web page file:///$fname\n";
@ -805,6 +822,7 @@ sub InteractiveCommand {
$main::opt_disasm = 0; $main::opt_disasm = 0;
$main::opt_list = 0; $main::opt_list = 0;
$main::opt_gv = 0; $main::opt_gv = 0;
$main::opt_evince = 0;
$main::opt_cum = 0; $main::opt_cum = 0;
if (m/^\s*(text|top)(\d*)\s*(.*)/) { if (m/^\s*(text|top)(\d*)\s*(.*)/) {
@ -878,11 +896,14 @@ sub InteractiveCommand {
PrintDisassembly($libs, $flat, $cumulative, $routine, $total); PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
return 1; return 1;
} }
if (m/^\s*(gv|web)\s*(.*)/) { if (m/^\s*(gv|web|evince)\s*(.*)/) {
$main::opt_gv = 0; $main::opt_gv = 0;
$main::opt_evince = 0;
$main::opt_web = 0; $main::opt_web = 0;
if ($1 eq "gv") { if ($1 eq "gv") {
$main::opt_gv = 1; $main::opt_gv = 1;
} elsif ($1 eq "evince") {
$main::opt_evince = 1;
} elsif ($1 eq "web") { } elsif ($1 eq "web") {
$main::opt_web = 1; $main::opt_web = 1;
} }
@ -902,6 +923,8 @@ sub InteractiveCommand {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) { if ($main::opt_gv) {
RunGV(TempName($main::next_tmpfile, "ps"), " &"); RunGV(TempName($main::next_tmpfile, "ps"), " &");
} elsif ($main::opt_evince) {
RunEvince(TempName($main::next_tmpfile, "pdf"), " &");
} elsif ($main::opt_web) { } elsif ($main::opt_web) {
RunWeb(TempName($main::next_tmpfile, "svg")); RunWeb(TempName($main::next_tmpfile, "svg"));
} }
@ -1685,6 +1708,8 @@ sub PrintDot {
my $output; my $output;
if ($main::opt_gv) { if ($main::opt_gv) {
$output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
} elsif ($main::opt_evince) {
$output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf");
} elsif ($main::opt_ps) { } elsif ($main::opt_ps) {
$output = "| $DOT -Tps2"; $output = "| $DOT -Tps2";
} elsif ($main::opt_pdf) { } elsif ($main::opt_pdf) {
@ -1792,12 +1817,38 @@ sub PrintDot {
} }
} }
# Print edges # Print edges (process in order of decreasing counts)
foreach my $e (keys(%edge)) { my %indegree = (); # Number of incoming edges added per node so far
my %outdegree = (); # Number of outgoing edges added per node so far
foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) {
my @x = split(/\001/, $e); my @x = split(/\001/, $e);
$n = $edge{$e}; $n = $edge{$e};
if (abs($n) > $edgelimit) { # Initialize degree of kept incoming and outgoing edges if necessary
my $src = $x[0];
my $dst = $x[1];
if (!exists($outdegree{$src})) { $outdegree{$src} = 0; }
if (!exists($indegree{$dst})) { $indegree{$dst} = 0; }
my $keep;
if ($indegree{$dst} == 0) {
# Keep edge if needed for reachability
$keep = 1;
} elsif (abs($n) <= $edgelimit) {
# Drop if we are below --edgefraction
$keep = 0;
} elsif ($outdegree{$src} >= $main::opt_maxdegree ||
$indegree{$dst} >= $main::opt_maxdegree) {
# Keep limited number of in/out edges per node
$keep = 0;
} else {
$keep = 1;
}
if ($keep) {
$outdegree{$src}++;
$indegree{$dst}++;
# Compute line width based on edge count # Compute line width based on edge count
my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
if ($fraction > 1) { $fraction = 1; } if ($fraction > 1) { $fraction = 1; }
@ -2135,6 +2186,19 @@ function handleMouseUp(evt) {
EOF EOF
} }
# Return a small number that identifies the argument.
# Multiple calls with the same argument will return the same number.
# Calls with different arguments will return different numbers.
sub ShortIdFor {
my $key = shift;
my $id = $main::uniqueid{$key};
if (!defined($id)) {
$id = keys(%main::uniqueid) + 1;
$main::uniqueid{$key} = $id;
}
return $id;
}
# Translate a stack of addresses into a stack of symbols # Translate a stack of addresses into a stack of symbols
sub TranslateStack { sub TranslateStack {
my $symbols = shift; my $symbols = shift;
@ -2172,6 +2236,15 @@ sub TranslateStack {
if ($j > 2) { if ($j > 2) {
$func = "$func (inline)"; $func = "$func (inline)";
} }
# Do not merge nodes corresponding to Callback::Run since that
# causes confusing cycles in dot display. Instead, we synthesize
# a unique name for this frame per caller.
if ($func =~ m/Callback.*::Run$/) {
my $caller = ($i > 0) ? $addrs[$i-1] : 0;
$func = "Run#" . ShortIdFor($caller);
}
if ($main::opt_addresses) { if ($main::opt_addresses) {
push(@result, "$a $func $fileline"); push(@result, "$a $func $fileline");
} elsif ($main::opt_lines) { } elsif ($main::opt_lines) {
@ -2415,7 +2488,16 @@ sub RemoveUninterestingFrames {
# old code out of the system. # old code out of the system.
$skip_regexp = "TCMalloc|^tcmalloc::"; $skip_regexp = "TCMalloc|^tcmalloc::";
} elsif ($main::profile_type eq 'contention') { } elsif ($main::profile_type eq 'contention') {
foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') { foreach my $vname ('base::RecordLockProfileData',
'base::SubmitMutexProfileData',
'base::SubmitSpinLockProfileData',
'Mutex::Unlock',
'Mutex::UnlockSlow',
'Mutex::ReaderUnlock',
'MutexLock::~MutexLock',
'SpinLock::Unlock',
'SpinLock::SlowUnlock',
'SpinLockHolder::~SpinLockHolder') {
$skip{$vname} = 1; $skip{$vname} = 1;
} }
} elsif ($main::profile_type eq 'cpu') { } elsif ($main::profile_type eq 'cpu') {
@ -2955,7 +3037,7 @@ sub FetchDynamicProfile {
my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
my $cmd = "$fetcher '$url' > '$tmp_profile'"; my $cmd = "$fetcher '$url' > '$tmp_profile'";
if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){ if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n";
if ($encourage_patience) { if ($encourage_patience) {
print STDERR "Be patient...\n"; print STDERR "Be patient...\n";
@ -3154,24 +3236,47 @@ BEGIN {
} }
} }
# Return the next line from the profile file, assuming it's a text # Reads the top, 'header' section of a profile, and returns the last
# line (which in this case means, doesn't start with a NUL byte). If # line of the header, commonly called a 'header line'. The header
# it's not a text line, return "". At EOF, return undef, like perl does. # section of a profile consists of zero or more 'command' lines that
# Input file should be in binmode. # are instructions to pprof, which pprof executes when reading the
sub ReadProfileLine { # header. All 'command' lines start with a %. After the command
# lines is the 'header line', which is a profile-specific line that
# indicates what type of profile it is, and perhaps other global
# information about the profile. For instance, here's a header line
# for a heap profile:
# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile
# For historical reasons, the CPU profile does not contain a text-
# readable header line. If the profile looks like a CPU profile,
# this function returns "". If no header line could be found, this
# function returns undef.
#
# The following commands are recognized:
# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:'
#
# The input file should be in binmode.
sub ReadProfileHeader {
local *PROFILE = shift; local *PROFILE = shift;
my $firstchar = ""; my $firstchar = "";
my $line = ""; my $line = "";
read(PROFILE, $firstchar, 1); read(PROFILE, $firstchar, 1);
seek(PROFILE, -1, 1); # unread the firstchar seek(PROFILE, -1, 1); # unread the firstchar
if ($firstchar eq "\0") { if ($firstchar !~ /[[:print:]]/) { # is not a text character
return ""; return "";
} }
$line = <PROFILE>; while (defined($line = <PROFILE>)) {
if (defined($line)) {
$line =~ s/\r//g; # turn windows-looking lines into unix-looking lines $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
} if ($line =~ /^%warn\s+(.*)/) { # 'warn' command
# Note this matches both '%warn blah\n' and '%warn\n'.
print STDERR "WARNING: $1\n"; # print the rest of the line
} elsif ($line =~ /^%/) {
print STDERR "Ignoring unknown command from profile header: $line";
} else {
# End of commands, must be the header line.
return $line; return $line;
}
}
return undef; # got to EOF without seeing a header line
} }
sub IsSymbolizedProfileFile { sub IsSymbolizedProfileFile {
@ -3182,7 +3287,7 @@ sub IsSymbolizedProfileFile {
# Check if the file contains a symbol-section marker. # Check if the file contains a symbol-section marker.
open(TFILE, "<$file_name"); open(TFILE, "<$file_name");
binmode TFILE; binmode TFILE;
my $firstline = ReadProfileLine(*TFILE); my $firstline = ReadProfileHeader(*TFILE);
close(TFILE); close(TFILE);
if (!$firstline) { if (!$firstline) {
return 0; return 0;
@ -3202,14 +3307,7 @@ sub IsSymbolizedProfileFile {
sub ReadProfile { sub ReadProfile {
my $prog = shift; my $prog = shift;
my $fname = shift; my $fname = shift;
my $result; # return value
if (IsSymbolizedProfileFile($fname) && !$main::use_symbolized_profile) {
# we have both a binary and symbolized profiles, abort
usage("Symbolized profile '$fname' cannot be used with a binary arg. " .
"Try again without passing '$prog'.");
}
$main::profile_type = '';
$CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash
my $contention_marker = $&; my $contention_marker = $&;
@ -3226,40 +3324,45 @@ sub ReadProfile {
# whole firstline, since it may be gigabytes(!) of data. # whole firstline, since it may be gigabytes(!) of data.
open(PROFILE, "<$fname") || error("$fname: $!\n"); open(PROFILE, "<$fname") || error("$fname: $!\n");
binmode PROFILE; # New perls do UTF-8 processing binmode PROFILE; # New perls do UTF-8 processing
my $header = ReadProfileLine(*PROFILE); my $header = ReadProfileHeader(*PROFILE);
if (!defined($header)) { # means "at EOF" if (!defined($header)) { # means "at EOF"
error("Profile is empty.\n"); error("Profile is empty.\n");
} }
my $symbols; my $symbols;
if ($header =~ m/^--- *$symbol_marker/o) { if ($header =~ m/^--- *$symbol_marker/o) {
# Verify that the user asked for a symbolized profile
if (!$main::use_symbolized_profile) {
# we have both a binary and symbolized profiles, abort
error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " .
"a binary arg. Try again without passing\n $prog\n");
}
# Read the symbol section of the symbolized profile file. # Read the symbol section of the symbolized profile file.
$symbols = ReadSymbols(*PROFILE{IO}); $symbols = ReadSymbols(*PROFILE{IO});
# Read the next line to get the header for the remaining profile. # Read the next line to get the header for the remaining profile.
$header = ReadProfileLine(*PROFILE) || ""; $header = ReadProfileHeader(*PROFILE) || "";
} }
my $result; $main::profile_type = '';
if ($header =~ m/^heap profile:.*$growth_marker/o) { if ($header =~ m/^heap profile:.*$growth_marker/o) {
$main::profile_type = 'growth'; $main::profile_type = 'growth';
$result = ReadHeapProfile($prog, $fname, $header); $result = ReadHeapProfile($prog, *PROFILE, $header);
} elsif ($header =~ m/^heap profile:/) { } elsif ($header =~ m/^heap profile:/) {
$main::profile_type = 'heap'; $main::profile_type = 'heap';
$result = ReadHeapProfile($prog, $fname, $header); $result = ReadHeapProfile($prog, *PROFILE, $header);
} elsif ($header =~ m/^--- *$contention_marker/o) { } elsif ($header =~ m/^--- *$contention_marker/o) {
$main::profile_type = 'contention'; $main::profile_type = 'contention';
$result = ReadSynchProfile($prog, $fname); $result = ReadSynchProfile($prog, *PROFILE);
} elsif ($header =~ m/^--- *Stacks:/) { } elsif ($header =~ m/^--- *Stacks:/) {
print STDERR print STDERR
"Old format contention profile: mistakenly reports " . "Old format contention profile: mistakenly reports " .
"condition variable signals as lock contentions.\n"; "condition variable signals as lock contentions.\n";
$main::profile_type = 'contention'; $main::profile_type = 'contention';
$result = ReadSynchProfile($prog, $fname); $result = ReadSynchProfile($prog, *PROFILE);
} elsif ($header =~ m/^--- *$profile_marker/) { } elsif ($header =~ m/^--- *$profile_marker/) {
# the binary cpu profile data starts immediately after this line # the binary cpu profile data starts immediately after this line
$main::profile_type = 'cpu'; $main::profile_type = 'cpu';
$result = ReadCPUProfile($prog, $fname); $result = ReadCPUProfile($prog, $fname, *PROFILE);
} else { } else {
if (defined($symbols)) { if (defined($symbols)) {
# a symbolized profile contains a format we don't recognize, bail out # a symbolized profile contains a format we don't recognize, bail out
@ -3267,9 +3370,11 @@ sub ReadProfile {
} }
# no ascii header present -- must be a CPU profile # no ascii header present -- must be a CPU profile
$main::profile_type = 'cpu'; $main::profile_type = 'cpu';
$result = ReadCPUProfile($prog, $fname); $result = ReadCPUProfile($prog, $fname, *PROFILE);
} }
close(PROFILE);
# if we got symbols along with the profile, return those as well # if we got symbols along with the profile, return those as well
if (defined($symbols)) { if (defined($symbols)) {
$result->{symbols} = $symbols; $result->{symbols} = $symbols;
@ -3308,7 +3413,8 @@ sub FixCallerAddresses {
# CPU profile reader # CPU profile reader
sub ReadCPUProfile { sub ReadCPUProfile {
my $prog = shift; my $prog = shift;
my $fname = shift; my $fname = shift; # just used for logging
local *PROFILE = shift;
my $version; my $version;
my $period; my $period;
my $i; my $i;
@ -3375,7 +3481,6 @@ sub ReadCPUProfile {
my $map = ''; my $map = '';
seek(PROFILE, $i * 4, 0); seek(PROFILE, $i * 4, 0);
read(PROFILE, $map, (stat PROFILE)[7]); read(PROFILE, $map, (stat PROFILE)[7]);
close(PROFILE);
my $r = {}; my $r = {};
$r->{version} = $version; $r->{version} = $version;
@ -3389,7 +3494,7 @@ sub ReadCPUProfile {
sub ReadHeapProfile { sub ReadHeapProfile {
my $prog = shift; my $prog = shift;
my $fname = shift; local *PROFILE = shift;
my $header = shift; my $header = shift;
my $index = 1; my $index = 1;
@ -3574,7 +3679,9 @@ sub ReadHeapProfile {
} }
sub ReadSynchProfile { sub ReadSynchProfile {
my ($prog, $fname, $header) = @_; my $prog = shift;
local *PROFILE = shift;
my $header = shift;
my $map = ''; my $map = '';
my $profile = {}; my $profile = {};
@ -3649,7 +3756,6 @@ sub ReadSynchProfile {
$map .= $line; $map .= $line;
} }
} }
close PROFILE;
if (!$seen_clockrate) { if (!$seen_clockrate) {
printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n",
@ -4098,8 +4204,9 @@ sub ExtractSymbols {
# advance through the libraries as we advance the pc. Sometimes the # advance through the libraries as we advance the pc. Sometimes the
# addresses of libraries may overlap with the addresses of the main # addresses of libraries may overlap with the addresses of the main
# binary, so to make sure the libraries 'win', we iterate over the # binary, so to make sure the libraries 'win', we iterate over the
# libraries in reverse order (binary will have the lowest start addr). # libraries in reverse order (which assumes the binary doesn't start
my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # in the middle of a library, which seems a fair assumption).
my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings
foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) {
my $libname = $lib->[0]; my $libname = $lib->[0];
my $start = $lib->[1]; my $start = $lib->[1];
@ -4109,14 +4216,18 @@ sub ExtractSymbols {
# Get list of pcs that belong in this library. # Get list of pcs that belong in this library.
my $contained = []; my $contained = [];
my ($start_pc_index, $finish_pc_index); my ($start_pc_index, $finish_pc_index);
# Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
$finish_pc_index--) { $finish_pc_index--) {
last if $pcs[$finish_pc_index - 1] le $finish; last if $pcs[$finish_pc_index - 1] le $finish;
} }
# Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
$start_pc_index--) { $start_pc_index--) {
last if $pcs[$start_pc_index - 1] lt $start; last if $pcs[$start_pc_index - 1] lt $start;
} }
# This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
# in case there are overlaps in libraries and the main binary.
@{$contained} = splice(@pcs, $start_pc_index, @{$contained} = splice(@pcs, $start_pc_index,
$finish_pc_index - $start_pc_index); $finish_pc_index - $start_pc_index);
# Map to symbols # Map to symbols

View File

@ -132,6 +132,16 @@ else
fi fi
AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT]) AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
AC_CHECK_SIZEOF([long])
if test "x${ac_cv_sizeof_long}" = "x8" ; then
LG_SIZEOF_LONG=3
elif test "x${ac_cv_sizeof_long}" = "x4" ; then
LG_SIZEOF_LONG=2
else
AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
fi
AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
AC_CANONICAL_HOST AC_CANONICAL_HOST
dnl CPU-specific settings. dnl CPU-specific settings.
CPU_SPINWAIT="" CPU_SPINWAIT=""
@ -157,17 +167,6 @@ case "${host_cpu}" in
esac esac
AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
JE_COMPILABLE([__attribute__ syntax],
[static __attribute__((unused)) void foo(void){}],
[],
[attribute])
if test "x${attribute}" = "xyes" ; then
AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
if test "x$GCC" = "xyes" -a "${abi}" = "xelf"; then
JE_CFLAGS_APPEND([-fvisibility=internal])
fi
fi
dnl Platform-specific settings. abi and RPATH can probably be determined dnl Platform-specific settings. abi and RPATH can probably be determined
dnl programmatically, but doing so is error-prone, which makes it generally dnl programmatically, but doing so is error-prone, which makes it generally
dnl not worth the trouble. dnl not worth the trouble.
@ -227,6 +226,17 @@ esac
AC_SUBST([abi]) AC_SUBST([abi])
AC_SUBST([RPATH]) AC_SUBST([RPATH])
JE_COMPILABLE([__attribute__ syntax],
[static __attribute__((unused)) void foo(void){}],
[],
[attribute])
if test "x${attribute}" = "xyes" ; then
AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
JE_CFLAGS_APPEND([-fvisibility=hidden])
fi
fi
JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
#define _GNU_SOURCE #define _GNU_SOURCE
#include <sys/mman.h> #include <sys/mman.h>
@ -404,17 +414,12 @@ fi
], ],
[enable_prof="0"] [enable_prof="0"]
) )
AC_ARG_ENABLE([prof-libgcc], if test "x$enable_prof" = "x1" ; then
[AS_HELP_STRING([--disable-prof-libgcc], backtrace_method=""
[Do not use libgcc for backtracing])],
[if test "x$enable_prof_libgcc" = "xno" ; then
enable_prof_libgcc="0"
else else
enable_prof_libgcc="1" backtrace_method="N/A"
fi fi
],
[enable_prof_libgcc="1"]
)
AC_ARG_ENABLE([prof-libunwind], AC_ARG_ENABLE([prof-libunwind],
[AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])], [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])],
[if test "x$enable_prof_libunwind" = "xno" ; then [if test "x$enable_prof_libunwind" = "xno" ; then
@ -438,10 +443,7 @@ else
fi, fi,
LUNWIND="-lunwind" LUNWIND="-lunwind"
) )
if test "x$enable_prof" = "x1" ; then if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
LIBS="$LIBS -lm"
AC_DEFINE([JEMALLOC_PROF], [ ])
if test "x$enable_prof_libunwind" = "x1" ; then
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
if test "x$LUNWIND" = "x-lunwind" ; then if test "x$LUNWIND" = "x-lunwind" ; then
AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
@ -450,27 +452,81 @@ if test "x$enable_prof" = "x1" ; then
LIBS="$LIBS $LUNWIND" LIBS="$LIBS $LUNWIND"
fi fi
if test "x${enable_prof_libunwind}" = "x1" ; then if test "x${enable_prof_libunwind}" = "x1" ; then
backtrace_method="libunwind"
AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
fi fi
fi
fi fi
AC_SUBST([enable_prof])
dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics AC_ARG_ENABLE([prof-libgcc],
dnl for backtracing. [AS_HELP_STRING([--disable-prof-libgcc],
if test "x$enable_prof" = "x1" -a "x$enable_prof_libgcc" = "x1" ; then [Do not use libgcc for backtracing])],
if test "x$enable_prof_libunwind" = "x0" -a "x$GCC" = "xyes" ; then [if test "x$enable_prof_libgcc" = "xno" ; then
enable_prof_libgcc="0"
else
enable_prof_libgcc="1" enable_prof_libgcc="1"
fi
],
[enable_prof_libgcc="1"]
)
if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
-a "x$GCC" = "xyes" ; then
AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
dnl The following is conservative, in that it only has entries for CPUs on
dnl which jemalloc has been tested.
AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}])
case "${host_cpu}" in
i[[3456]]86)
AC_MSG_RESULT([unreliable])
enable_prof_libgcc="0";
;;
x86_64)
AC_MSG_RESULT([reliable])
;;
*)
AC_MSG_RESULT([unreliable])
enable_prof_libgcc="0";
;;
esac
if test "x${enable_prof_libgcc}" = "x1" ; then if test "x${enable_prof_libgcc}" = "x1" ; then
backtrace_method="libgcc"
AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
fi fi
else else
enable_prof_libgcc="0" enable_prof_libgcc="0"
fi
fi fi
AC_ARG_ENABLE([prof-gcc],
[AS_HELP_STRING([--disable-prof-gcc],
[Do not use gcc intrinsics for backtracing])],
[if test "x$enable_prof_gcc" = "xno" ; then
enable_prof_gcc="0"
else
enable_prof_gcc="1"
fi
],
[enable_prof_gcc="1"]
)
if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
-a "x$GCC" = "xyes" ; then
backtrace_method="gcc intrinsics"
AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
else
enable_prof_gcc="0"
fi
if test "x$backtrace_method" = "x" ; then
backtrace_method="none (disabling profiling)"
enable_prof="0"
fi
AC_MSG_CHECKING([configured backtracing method])
AC_MSG_RESULT([$backtrace_method])
if test "x$enable_prof" = "x1" ; then
LIBS="$LIBS -lm"
AC_DEFINE([JEMALLOC_PROF], [ ])
fi
AC_SUBST([enable_prof])
dnl Enable tiny allocations by default. dnl Enable tiny allocations by default.
AC_ARG_ENABLE([tiny], AC_ARG_ENABLE([tiny],
[AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
@ -706,6 +762,51 @@ if test "x${enable_tls}" = "x0" ; then
AC_DEFINE_UNQUOTED([NO_TLS], [ ]) AC_DEFINE_UNQUOTED([NO_TLS], [ ])
fi fi
dnl ============================================================================
dnl Check for ffsl(3), and fail if not found. This function exists on all
dnl platforms that jemalloc currently has a chance of functioning on without
dnl modification.
AC_CHECK_FUNC([ffsl], [],
[AC_MSG_ERROR([Cannot build without ffsl(3)])])
dnl ============================================================================
dnl Check for atomic(3) operations as provided on Darwin.
JE_COMPILABLE([Darwin OSAtomic*()], [
#include <libkern/OSAtomic.h>
#include <inttypes.h>
], [
{
int32_t x32 = 0;
volatile int32_t *x32p = &x32;
OSAtomicAdd32(1, x32p);
}
{
int64_t x64 = 0;
volatile int64_t *x64p = &x64;
OSAtomicAdd64(1, x64p);
}
], [osatomic])
if test "x${osatomic}" = "xyes" ; then
AC_DEFINE([JEMALLOC_OSATOMIC])
fi
dnl ============================================================================
dnl Check for spinlock(3) operations as provided on Darwin.
JE_COMPILABLE([Darwin OSSpin*()], [
#include <libkern/OSAtomic.h>
#include <inttypes.h>
], [
OSSpinLock lock = 0;
OSSpinLockLock(&lock);
OSSpinLockUnlock(&lock);
], [osspin])
if test "x${osspin}" = "xyes" ; then
AC_DEFINE([JEMALLOC_OSSPIN])
fi
dnl ============================================================================ dnl ============================================================================
dnl Check for allocator-related functions that should be wrapped. dnl Check for allocator-related functions that should be wrapped.
@ -810,8 +911,9 @@ AC_MSG_RESULT([cc-silence : ${enable_cc_silence}])
AC_MSG_RESULT([debug : ${enable_debug}]) AC_MSG_RESULT([debug : ${enable_debug}])
AC_MSG_RESULT([stats : ${enable_stats}]) AC_MSG_RESULT([stats : ${enable_stats}])
AC_MSG_RESULT([prof : ${enable_prof}]) AC_MSG_RESULT([prof : ${enable_prof}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}])
AC_MSG_RESULT([tiny : ${enable_tiny}]) AC_MSG_RESULT([tiny : ${enable_tiny}])
AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([tcache : ${enable_tcache}])
AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([fill : ${enable_fill}])

View File

@ -1535,6 +1535,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
option for additional information.</para></listitem> option for additional information.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="stats.cactive">
<term>
<mallctl>stats.cactive</mallctl>
(<type>size_t *</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
<listitem><para>Pointer to a counter that contains an approximate count
of the current number of bytes in active pages. The estimate may be
high, but never low, because each arena rounds up to the nearest
multiple of the chunk size when computing its contribution to the
counter. Note that the <link
linkend="epoch"><mallctl>epoch</mallctl></link> mallctl has no bearing
on this counter. Furthermore, counter consistency is maintained via
atomic operations, so it is necessary to use an atomic operation in
order to guarantee a consistent read when dereferencing the pointer.
</para></listitem>
</varlistentry>
<varlistentry id="stats.allocated"> <varlistentry id="stats.allocated">
<term> <term>
<mallctl>stats.allocated</mallctl> <mallctl>stats.allocated</mallctl>
@ -1642,6 +1661,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</para></listitem> </para></listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term>
<mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
(<type>unsigned</type>)
<literal>r-</literal>
</term>
<listitem><para>Number of threads currently assigned to
arena.</para></listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term> <term>
<mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl> <mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl>

View File

@ -19,6 +19,7 @@
#ifdef JEMALLOC_TINY #ifdef JEMALLOC_TINY
/* Smallest size class to support. */ /* Smallest size class to support. */
# define LG_TINY_MIN LG_SIZEOF_PTR # define LG_TINY_MIN LG_SIZEOF_PTR
# define TINY_MIN (1U << LG_TINY_MIN)
#endif #endif
/* /*
@ -57,6 +58,10 @@
#define RUN_MAX_OVRHD 0x0000003dU #define RUN_MAX_OVRHD 0x0000003dU
#define RUN_MAX_OVRHD_RELAX 0x00001800U #define RUN_MAX_OVRHD_RELAX 0x00001800U
/* Maximum number of regions in one run. */
#define LG_RUN_MAXREGS 11
#define RUN_MAXREGS (1U << LG_RUN_MAXREGS)
/* /*
* The minimum ratio of active:dirty pages per arena is computed as: * The minimum ratio of active:dirty pages per arena is computed as:
* *
@ -70,6 +75,7 @@
typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_map_s arena_chunk_map_t;
typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_chunk_s arena_chunk_t;
typedef struct arena_run_s arena_run_t; typedef struct arena_run_s arena_run_t;
typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_bin_s arena_bin_t; typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t; typedef struct arena_s arena_t;
@ -207,16 +213,52 @@ struct arena_run_s {
/* Bin this run is associated with. */ /* Bin this run is associated with. */
arena_bin_t *bin; arena_bin_t *bin;
/* Stack of available freed regions, or NULL. */ /* Index of next region that has never been allocated, or nregs. */
void *avail; uint32_t nextind;
/* Next region that has never been allocated, or run boundary. */
void *next;
/* Number of free regions in run. */ /* Number of free regions in run. */
unsigned nfree; unsigned nfree;
}; };
/*
* Read-only information associated with each element of arena_t's bins array
* is stored separately, partly to reduce memory usage (only one copy, rather
* than one per arena), but mainly to avoid false cacheline sharing.
*/
struct arena_bin_info_s {
/* Size of regions in a run for this bin's size class. */
size_t reg_size;
/* Total size of a run for this bin's size class. */
size_t run_size;
/* Total number of regions in a run for this bin's size class. */
uint32_t nregs;
/*
* Offset of first bitmap_t element in a run header for this bin's size
* class.
*/
uint32_t bitmap_offset;
/*
* Metadata used to manipulate bitmaps for runs associated with this
* bin.
*/
bitmap_info_t bitmap_info;
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
uint32_t ctx0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
};
struct arena_bin_s { struct arena_bin_s {
/* /*
* All operations on runcur, runs, and stats require that lock be * All operations on runcur, runs, and stats require that lock be
@ -241,26 +283,6 @@ struct arena_bin_s {
*/ */
arena_run_tree_t runs; arena_run_tree_t runs;
/* Size of regions in a run for this bin's size class. */
size_t reg_size;
/* Total size of a run for this bin's size class. */
size_t run_size;
/* Total number of regions in a run for this bin's size class. */
uint32_t nregs;
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
uint32_t ctx0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
/* Bin statistics. */ /* Bin statistics. */
malloc_bin_stats_t stats; malloc_bin_stats_t stats;
@ -277,8 +299,18 @@ struct arena_s {
unsigned ind; unsigned ind;
/* /*
* All non-bin-related operations on this arena require that lock be * Number of threads currently assigned to this arena. This field is
* locked. * protected by arenas_lock.
*/
unsigned nthreads;
/*
* There are three classes of arena operations from a locking
* perspective:
* 1) Thread asssignment (modifies nthreads) is protected by
* arenas_lock.
* 2) Bin-related operations are protected by bin locks.
* 3) Chunk- and run-related operations are protected by this mutex.
*/ */
malloc_mutex_t lock; malloc_mutex_t lock;
@ -389,7 +421,15 @@ struct arena_s {
extern size_t opt_lg_qspace_max; extern size_t opt_lg_qspace_max;
extern size_t opt_lg_cspace_max; extern size_t opt_lg_cspace_max;
extern ssize_t opt_lg_dirty_mult; extern ssize_t opt_lg_dirty_mult;
/*
* small_size2bin is a compact lookup table that rounds request sizes up to
* size classes. In order to reduce cache footprint, the table is compressed,
* and all accesses are via the SMALL_SIZE2BIN macro.
*/
extern uint8_t const *small_size2bin; extern uint8_t const *small_size2bin;
#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
extern arena_bin_info_t *arena_bin_info;
/* Various bin-related settings. */ /* Various bin-related settings. */
#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ #ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
@ -456,8 +496,9 @@ bool arena_boot(void);
#ifdef JEMALLOC_H_INLINES #ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE
unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
const void *ptr, size_t size); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr);
# ifdef JEMALLOC_PROF # ifdef JEMALLOC_PROF
prof_ctx_t *arena_prof_ctx_get(const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@ -466,21 +507,37 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
JEMALLOC_INLINE size_t
arena_bin_index(arena_t *arena, arena_bin_t *bin)
{
size_t binind = bin - arena->bins;
assert(binind < nbins);
return (binind);
}
JEMALLOC_INLINE unsigned JEMALLOC_INLINE unsigned
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
size_t size)
{ {
unsigned shift, diff, regind; unsigned shift, diff, regind;
size_t size;
assert(run->magic == ARENA_RUN_MAGIC); dassert(run->magic == ARENA_RUN_MAGIC);
/*
* Freeing a pointer lower than region zero can cause assertion
* failure.
*/
assert((uintptr_t)ptr >= (uintptr_t)run +
(uintptr_t)bin_info->reg0_offset);
/* /*
* Avoid doing division with a variable divisor if possible. Using * Avoid doing division with a variable divisor if possible. Using
* actual division here can reduce allocator throughput by over 20%! * actual division here can reduce allocator throughput by over 20%!
*/ */
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
bin_info->reg0_offset);
/* Rescale (factor powers of 2 out of the numerator and denominator). */ /* Rescale (factor powers of 2 out of the numerator and denominator). */
size = bin_info->reg_size;
shift = ffs(size) - 1; shift = ffs(size) - 1;
diff >>= shift; diff >>= shift;
size >>= shift; size >>= shift;
@ -503,7 +560,7 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
* divide by 0, and 1 and 2 are both powers of two, which are * divide by 0, and 1 and 2 are both powers of two, which are
* handled above. * handled above.
*/ */
#define SIZE_INV_SHIFT 21 #define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
static const unsigned size_invs[] = { static const unsigned size_invs[] = {
SIZE_INV(3), SIZE_INV(3),
@ -524,7 +581,7 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
#undef SIZE_INV_SHIFT #undef SIZE_INV_SHIFT
} }
assert(diff == regind * size); assert(diff == regind * size);
assert(regind < bin->nregs); assert(regind < bin_info->nregs);
return (regind); return (regind);
} }
@ -551,13 +608,14 @@ arena_prof_ctx_get(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT)); PAGE_SHIFT));
arena_bin_t *bin = run->bin; size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind; unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC); dassert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size); regind = arena_run_regind(run, bin_info, ptr);
ret = *(prof_ctx_t **)((uintptr_t)run + ret = *(prof_ctx_t **)((uintptr_t)run +
bin->ctx0_offset + (regind * bin_info->ctx0_offset + (regind *
sizeof(prof_ctx_t *))); sizeof(prof_ctx_t *)));
} }
} else } else
@ -585,12 +643,16 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT)); PAGE_SHIFT));
arena_bin_t *bin = run->bin; arena_bin_t *bin = run->bin;
size_t binind;
arena_bin_info_t *bin_info;
unsigned regind; unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC); dassert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size); binind = arena_bin_index(chunk->arena, bin);
bin_info = &arena_bin_info[binind];
regind = arena_run_regind(run, bin_info, ptr);
*((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
+ (regind * sizeof(prof_ctx_t *)))) = ctx; + (regind * sizeof(prof_ctx_t *)))) = ctx;
} else } else
assert((uintptr_t)ctx == (uintptr_t)1U); assert((uintptr_t)ctx == (uintptr_t)1U);
@ -606,7 +668,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
arena_chunk_map_t *mapelm; arena_chunk_map_t *mapelm;
assert(arena != NULL); assert(arena != NULL);
assert(arena->magic == ARENA_MAGIC); dassert(arena->magic == ARENA_MAGIC);
assert(chunk->arena == arena); assert(chunk->arena == arena);
assert(ptr != NULL); assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(CHUNK_ADDR2BASE(ptr) != ptr);
@ -629,11 +691,18 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
run = (arena_run_t *)((uintptr_t)chunk + run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapelm->bits >> (uintptr_t)((pageind - (mapelm->bits >>
PAGE_SHIFT)) << PAGE_SHIFT)); PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC); dassert(run->magic == ARENA_RUN_MAGIC);
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) %
run->bin->reg_size == 0);
bin = run->bin; bin = run->bin;
#ifdef JEMALLOC_DEBUG
{
size_t binind = arena_bin_index(arena, bin);
arena_bin_info_t *bin_info =
&arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)bin_info->reg0_offset)) %
bin_info->reg_size == 0);
}
#endif
malloc_mutex_lock(&bin->lock); malloc_mutex_lock(&bin->lock);
arena_dalloc_bin(arena, chunk, ptr, mapelm); arena_dalloc_bin(arena, chunk, ptr, mapelm);
malloc_mutex_unlock(&bin->lock); malloc_mutex_unlock(&bin->lock);

View File

@ -0,0 +1,113 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
#define atomic_read_uint64(p) atomic_add_uint64(p, 0)
#define atomic_read_uint32(p) atomic_add_uint32(p, 0)
#if (LG_SIZEOF_PTR == 3)
# define atomic_read_z(p) \
(size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
# define atomic_add_z(p, x) \
(size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
# define atomic_sub_z(p, x) \
(size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
#elif (LG_SIZEOF_PTR == 2)
# define atomic_read_z(p) \
(size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
# define atomic_add_z(p, x) \
(size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
# define atomic_sub_z(p, x) \
(size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
#endif
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/* 64-bit operations. */
#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
return (__sync_add_and_fetch(p, x));
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
return (__sync_sub_and_fetch(p, x));
}
#elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
}
#else
# if (LG_SIZEOF_PTR == 3)
# error "Missing implementation for 64-bit atomic operations"
# endif
#endif
/* 32-bit operations. */
#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
return (__sync_add_and_fetch(p, x));
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
return (__sync_sub_and_fetch(p, x));
}
#elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
}
#else
# error "Missing implementation for 32-bit atomic operations"
#endif
#endif
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -0,0 +1,184 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS
typedef struct bitmap_level_s bitmap_level_t;
typedef struct bitmap_info_s bitmap_info_t;
typedef unsigned long bitmap_t;
#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
/* Number of bits per group. */
#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
/* Maximum number of levels possible. */
#define BITMAP_MAX_LEVELS \
(LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
struct bitmap_level_s {
/* Offset of this level's groups within the array of groups. */
size_t group_offset;
};
struct bitmap_info_s {
/* Logical number of bits in bitmap (stored at bottom level). */
size_t nbits;
/* Number of levels necessary for nbits. */
unsigned nlevels;
/*
* Only the first (nlevels+1) elements are used, and levels are ordered
* bottom to top (e.g. the bottom level is stored in levels[0]).
*/
bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
size_t bitmap_info_ngroups(const bitmap_info_t *binfo);
size_t bitmap_size(size_t nbits);
void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
JEMALLOC_INLINE bool
bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
bitmap_t rg = bitmap[rgoff];
/* The bitmap is full iff the root group is 0. */
return (rg == 0);
}
JEMALLOC_INLINE bool
bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
{
size_t goff;
bitmap_t g;
assert(bit < binfo->nbits);
goff = bit >> LG_BITMAP_GROUP_NBITS;
g = bitmap[goff];
return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
}
JEMALLOC_INLINE void
bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
{
size_t goff;
bitmap_t *gp;
bitmap_t g;
assert(bit < binfo->nbits);
assert(bitmap_get(bitmap, binfo, bit) == false);
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(bitmap_get(bitmap, binfo, bit));
/* Propagate group state transitions up the tree. */
if (g == 0) {
unsigned i;
for (i = 1; i < binfo->nlevels; i++) {
bit = goff;
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[binfo->levels[i].group_offset + goff];
g = *gp;
assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
if (g != 0)
break;
}
}
}
/* sfu: set first unset. */
JEMALLOC_INLINE size_t
bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
size_t bit;
bitmap_t g;
unsigned i;
assert(bitmap_full(bitmap, binfo) == false);
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
bit = ffsl(g) - 1;
while (i > 0) {
i--;
g = bitmap[binfo->levels[i].group_offset + bit];
bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
}
bitmap_set(bitmap, binfo, bit);
return (bit);
}
JEMALLOC_INLINE void
bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
{
size_t goff;
bitmap_t *gp;
bitmap_t g;
bool propagate;
assert(bit < binfo->nbits);
assert(bitmap_get(bitmap, binfo, bit));
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
propagate = (g == 0);
assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(bitmap_get(bitmap, binfo, bit) == false);
/* Propagate group state transitions up the tree. */
if (propagate) {
unsigned i;
for (i = 1; i < binfo->nlevels; i++) {
bit = goff;
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[binfo->levels[i].group_offset + goff];
g = *gp;
propagate = (g == 0);
assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
== 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
if (propagate == false)
break;
}
}
}
#endif
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -29,6 +29,7 @@ struct ctl_node_s {
struct ctl_arena_stats_s { struct ctl_arena_stats_s {
bool initialized; bool initialized;
unsigned nthreads;
size_t pactive; size_t pactive;
size_t pdirty; size_t pdirty;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS

View File

@ -17,7 +17,7 @@
uint64_t hash(const void *key, size_t len, uint64_t seed); uint64_t hash(const void *key, size_t len, uint64_t seed);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
/* /*
* The following hash function is based on MurmurHash64A(), placed into the * The following hash function is based on MurmurHash64A(), placed into the
* public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for

View File

@ -33,6 +33,10 @@
#define JEMALLOC_MANGLE #define JEMALLOC_MANGLE
#include "../jemalloc@install_suffix@.h" #include "../jemalloc@install_suffix@.h"
#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
#include <libkern/OSAtomic.h>
#endif
#ifdef JEMALLOC_ZONE #ifdef JEMALLOC_ZONE
#include <mach/mach_error.h> #include <mach/mach_error.h>
#include <mach/mach_init.h> #include <mach/mach_init.h>
@ -55,7 +59,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
* Define a custom assert() in order to reduce the chances of deadlock during * Define a custom assert() in order to reduce the chances of deadlock during
* assertion failure. * assertion failure.
*/ */
#ifdef JEMALLOC_DEBUG #ifndef assert
# ifdef JEMALLOC_DEBUG
# define assert(e) do { \ # define assert(e) do { \
if (!(e)) { \ if (!(e)) { \
char line_buf[UMAX2S_BUFSIZE]; \ char line_buf[UMAX2S_BUFSIZE]; \
@ -70,8 +75,15 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
abort(); \ abort(); \
} \ } \
} while (0) } while (0)
# else
# define assert(e)
# endif
#endif
#ifdef JEMALLOC_DEBUG
# define dassert(e) assert(e)
#else #else
#define assert(e) # define dassert(e)
#endif #endif
/* /*
@ -146,7 +158,19 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#define QUANTUM_CEILING(a) \ #define QUANTUM_CEILING(a) \
(((a) + QUANTUM_MASK) & ~QUANTUM_MASK) (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
#define LONG_MASK (LONG - 1)
/* Return the smallest long multiple that is >= a. */
#define LONG_CEILING(a) \
(((a) + LONG_MASK) & ~LONG_MASK)
#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) #define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
#define PTR_MASK (SIZEOF_PTR - 1)
/* Return the smallest (void *) multiple that is >= a. */
#define PTR_CEILING(a) \
(((a) + PTR_MASK) & ~PTR_MASK)
/* /*
* Maximum size of L1 cache line. This is used to avoid cache line aliasing. * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
@ -193,6 +217,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#define PAGE_CEILING(s) \ #define PAGE_CEILING(s) \
(((s) + PAGE_MASK) & ~PAGE_MASK) (((s) + PAGE_MASK) & ~PAGE_MASK)
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prn.h" #include "jemalloc/internal/prn.h"
#include "jemalloc/internal/ckh.h" #include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h" #include "jemalloc/internal/stats.h"
@ -201,6 +226,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#include "jemalloc/internal/mb.h" #include "jemalloc/internal/mb.h"
#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arena.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/base.h" #include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h" #include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h" #include "jemalloc/internal/huge.h"
@ -216,12 +242,14 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
/******************************************************************************/ /******************************************************************************/
#define JEMALLOC_H_STRUCTS #define JEMALLOC_H_STRUCTS
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prn.h" #include "jemalloc/internal/prn.h"
#include "jemalloc/internal/ckh.h" #include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h" #include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h" #include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h" #include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h" #include "jemalloc/internal/base.h"
@ -271,6 +299,7 @@ extern size_t lg_pagesize;
extern unsigned ncpus; extern unsigned ncpus;
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
extern pthread_key_t arenas_tsd;
#ifndef NO_TLS #ifndef NO_TLS
/* /*
* Map of pthread_self() --> arenas[???], used for selecting an arena to use * Map of pthread_self() --> arenas[???], used for selecting an arena to use
@ -280,9 +309,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
# define ARENA_GET() arenas_tls # define ARENA_GET() arenas_tls
# define ARENA_SET(v) do { \ # define ARENA_SET(v) do { \
arenas_tls = (v); \ arenas_tls = (v); \
pthread_setspecific(arenas_tsd, (void *)(v)); \
} while (0) } while (0)
#else #else
extern pthread_key_t arenas_tsd;
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) # define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
# define ARENA_SET(v) do { \ # define ARENA_SET(v) do { \
pthread_setspecific(arenas_tsd, (void *)(v)); \ pthread_setspecific(arenas_tsd, (void *)(v)); \
@ -329,12 +358,14 @@ int buferror(int errnum, char *buf, size_t buflen);
void jemalloc_prefork(void); void jemalloc_prefork(void);
void jemalloc_postfork(void); void jemalloc_postfork(void);
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prn.h" #include "jemalloc/internal/prn.h"
#include "jemalloc/internal/ckh.h" #include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h" #include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h" #include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h" #include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h" #include "jemalloc/internal/base.h"
@ -352,6 +383,7 @@ void jemalloc_postfork(void);
/******************************************************************************/ /******************************************************************************/
#define JEMALLOC_H_INLINES #define JEMALLOC_H_INLINES
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prn.h" #include "jemalloc/internal/prn.h"
#include "jemalloc/internal/ckh.h" #include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h" #include "jemalloc/internal/stats.h"
@ -402,7 +434,7 @@ s2u(size_t size)
{ {
if (size <= small_maxclass) if (size <= small_maxclass)
return (arenas[0]->bins[small_size2bin[size]].reg_size); return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
if (size <= arena_maxclass) if (size <= arena_maxclass)
return (PAGE_CEILING(size)); return (PAGE_CEILING(size));
return (CHUNK_CEILING(size)); return (CHUNK_CEILING(size));
@ -446,10 +478,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
} }
if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
if (usize <= small_maxclass) { if (usize <= small_maxclass)
return return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
(arenas[0]->bins[small_size2bin[usize]].reg_size);
}
return (PAGE_CEILING(usize)); return (PAGE_CEILING(usize));
} else { } else {
size_t run_size; size_t run_size;
@ -547,6 +577,7 @@ thread_allocated_get(void)
#endif #endif
#endif #endif
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arena.h"
@ -558,7 +589,7 @@ thread_allocated_get(void)
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE
void *imalloc(size_t size); void *imalloc(size_t size);
void *icalloc(size_t size); void *icalloc(size_t size);
void *ipalloc(size_t size, size_t alignment, bool zero); void *ipalloc(size_t usize, size_t alignment, bool zero);
size_t isalloc(const void *ptr); size_t isalloc(const void *ptr);
# ifdef JEMALLOC_IVSALLOC # ifdef JEMALLOC_IVSALLOC
size_t ivsalloc(const void *ptr); size_t ivsalloc(const void *ptr);
@ -592,28 +623,39 @@ icalloc(size_t size)
} }
JEMALLOC_INLINE void * JEMALLOC_INLINE void *
ipalloc(size_t size, size_t alignment, bool zero) ipalloc(size_t usize, size_t alignment, bool zero)
{ {
void *ret; void *ret;
size_t usize;
size_t run_size
# ifdef JEMALLOC_CC_SILENCE
= 0
# endif
;
usize = sa2u(size, alignment, &run_size); assert(usize != 0);
if (usize == 0) assert(usize == sa2u(usize, alignment, NULL));
return (NULL);
if (usize <= arena_maxclass && alignment <= PAGE_SIZE) if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
ret = arena_malloc(usize, zero); ret = arena_malloc(usize, zero);
else if (run_size <= arena_maxclass) { else {
ret = arena_palloc(choose_arena(), usize, run_size, alignment, size_t run_size
zero); #ifdef JEMALLOC_CC_SILENCE
= 0
#endif
;
/*
* Ideally we would only ever call sa2u() once per aligned
* allocation request, and the caller of this function has
* already done so once. However, it's rather burdensome to
* require every caller to pass in run_size, especially given
* that it's only relevant to large allocations. Therefore,
* just call it again here in order to get run_size.
*/
sa2u(usize, alignment, &run_size);
if (run_size <= arena_maxclass) {
ret = arena_palloc(choose_arena(), usize, run_size,
alignment, zero);
} else if (alignment <= chunksize) } else if (alignment <= chunksize)
ret = huge_malloc(usize, zero); ret = huge_malloc(usize, zero);
else else
ret = huge_palloc(usize, alignment, zero); ret = huge_palloc(usize, alignment, zero);
}
assert(((uintptr_t)ret & (alignment - 1)) == 0); assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret); return (ret);
@ -630,7 +672,7 @@ isalloc(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) { if (chunk != ptr) {
/* Region. */ /* Region. */
assert(chunk->arena->magic == ARENA_MAGIC); dassert(chunk->arena->magic == ARENA_MAGIC);
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
ret = arena_salloc_demote(ptr); ret = arena_salloc_demote(ptr);
@ -684,7 +726,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) { != 0) {
size_t copysize; size_t usize, copysize;
/* /*
* Existing object alignment is inadquate; allocate new space * Existing object alignment is inadquate; allocate new space
@ -692,12 +734,18 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
*/ */
if (no_move) if (no_move)
return (NULL); return (NULL);
ret = ipalloc(size + extra, alignment, zero); usize = sa2u(size + extra, alignment, NULL);
if (usize == 0)
return (NULL);
ret = ipalloc(usize, alignment, zero);
if (ret == NULL) { if (ret == NULL) {
if (extra == 0) if (extra == 0)
return (NULL); return (NULL);
/* Try again, without extra this time. */ /* Try again, without extra this time. */
ret = ipalloc(size, alignment, zero); usize = sa2u(size, alignment, NULL);
if (usize == 0)
return (NULL);
ret = ipalloc(usize, alignment, zero);
if (ret == NULL) if (ret == NULL)
return (NULL); return (NULL);
} }

View File

@ -17,7 +17,7 @@
void mb_write(void); void mb_write(void);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
#ifdef __i386__ #ifdef __i386__
/* /*
* According to the Intel Architecture Software Developer's Manual, current * According to the Intel Architecture Software Developer's Manual, current

View File

@ -1,7 +1,11 @@
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_TYPES #ifdef JEMALLOC_H_TYPES
#ifdef JEMALLOC_OSSPIN
typedef OSSpinLock malloc_mutex_t;
#else
typedef pthread_mutex_t malloc_mutex_t; typedef pthread_mutex_t malloc_mutex_t;
#endif
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
@ -41,17 +45,26 @@ JEMALLOC_INLINE void
malloc_mutex_lock(malloc_mutex_t *mutex) malloc_mutex_lock(malloc_mutex_t *mutex)
{ {
if (isthreaded) if (isthreaded) {
#ifdef JEMALLOC_OSSPIN
OSSpinLockLock(mutex);
#else
pthread_mutex_lock(mutex); pthread_mutex_lock(mutex);
#endif
}
} }
JEMALLOC_INLINE bool JEMALLOC_INLINE bool
malloc_mutex_trylock(malloc_mutex_t *mutex) malloc_mutex_trylock(malloc_mutex_t *mutex)
{ {
if (isthreaded) if (isthreaded) {
#ifdef JEMALLOC_OSSPIN
return (OSSpinLockTry(mutex) == false);
#else
return (pthread_mutex_trylock(mutex) != 0); return (pthread_mutex_trylock(mutex) != 0);
else #endif
} else
return (false); return (false);
} }
@ -59,8 +72,13 @@ JEMALLOC_INLINE void
malloc_mutex_unlock(malloc_mutex_t *mutex) malloc_mutex_unlock(malloc_mutex_t *mutex)
{ {
if (isthreaded) if (isthreaded) {
#ifdef JEMALLOC_OSSPIN
OSSpinLockUnlock(mutex);
#else
pthread_mutex_unlock(mutex); pthread_mutex_unlock(mutex);
#endif
}
} }
#endif #endif

View File

@ -247,8 +247,22 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
double u; double u;
/* /*
* Compute prof_sample_threshold as a geometrically distributed random * Compute sample threshold as a geometrically distributed random
* variable with mean (2^opt_lg_prof_sample). * variable with mean (2^opt_lg_prof_sample).
*
* __ __
* | log(u) | 1
* prof_tdata->threshold = | -------- |, where p = -------------------
* | log(1-p) | opt_lg_prof_sample
* 2
*
* For more information on the math, see:
*
* Non-Uniform Random Variate Generation
* Luc Devroye
* Springer-Verlag, New York, 1986
* pp 500
* (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
*/ */
prn64(r, 53, prof_tdata->prn_state, prn64(r, 53, prof_tdata->prn_state,
(uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
@ -334,7 +348,7 @@ prof_ctx_get(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) { if (chunk != ptr) {
/* Region. */ /* Region. */
assert(chunk->arena->magic == ARENA_MAGIC); dassert(chunk->arena->magic == ARENA_MAGIC);
ret = arena_prof_ctx_get(ptr); ret = arena_prof_ctx_get(ptr);
} else } else
@ -353,7 +367,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) { if (chunk != ptr) {
/* Region. */ /* Region. */
assert(chunk->arena->magic == ARENA_MAGIC); dassert(chunk->arena->magic == ARENA_MAGIC);
arena_prof_ctx_set(ptr, ctx); arena_prof_ctx_set(ptr, ctx);
} else } else
@ -374,7 +388,7 @@ prof_sample_accum_update(size_t size)
/* Take care to avoid integer overflow. */ /* Take care to avoid integer overflow. */
if (size >= prof_tdata->threshold - prof_tdata->accum) { if (size >= prof_tdata->threshold - prof_tdata->accum) {
prof_tdata->accum -= (prof_tdata->threshold - size); prof_tdata->accum -= (prof_tdata->threshold - size);
/* Compute new prof_sample_threshold. */ /* Compute new sample threshold. */
prof_sample_threshold_update(prof_tdata); prof_sample_threshold_update(prof_tdata);
while (prof_tdata->accum >= prof_tdata->threshold) { while (prof_tdata->accum >= prof_tdata->threshold) {
prof_tdata->accum -= prof_tdata->threshold; prof_tdata->accum -= prof_tdata->threshold;

View File

@ -49,7 +49,7 @@ void *rtree_get(rtree_t *rtree, uintptr_t key);
bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); bool rtree_set(rtree_t *rtree, uintptr_t key, void *val);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
#define RTREE_GET_GENERATE(f) \ #define RTREE_GET_GENERATE(f) \
/* The least significant bits of the key are ignored. */ \ /* The least significant bits of the key are ignored. */ \
JEMALLOC_INLINE void * \ JEMALLOC_INLINE void * \

View File

@ -154,6 +154,10 @@ struct chunk_stats_s {
extern bool opt_stats_print; extern bool opt_stats_print;
#ifdef JEMALLOC_STATS
extern size_t stats_cactive;
#endif
char *u2s(uint64_t x, unsigned base, char *s); char *u2s(uint64_t x, unsigned base, char *s);
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
@ -166,9 +170,38 @@ void stats_print(void (*write)(void *, const char *), void *cbopaque,
#endif /* JEMALLOC_H_EXTERNS */ #endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_STATS
#ifdef JEMALLOC_H_INLINES #ifdef JEMALLOC_H_INLINES
#ifdef JEMALLOC_STATS
#ifndef JEMALLOC_ENABLE_INLINE
size_t stats_cactive_get(void);
void stats_cactive_add(size_t size);
void stats_cactive_sub(size_t size);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
JEMALLOC_INLINE size_t
stats_cactive_get(void)
{
return (atomic_read_z(&stats_cactive));
}
JEMALLOC_INLINE void
stats_cactive_add(size_t size)
{
atomic_add_z(&stats_cactive, size);
}
JEMALLOC_INLINE void
stats_cactive_sub(size_t size)
{
atomic_sub_z(&stats_cactive, size);
}
#endif
#endif /* JEMALLOC_H_INLINES */
#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_STATS */
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/ /******************************************************************************/

View File

@ -2,6 +2,7 @@
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_TYPES #ifdef JEMALLOC_H_TYPES
typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t; typedef struct tcache_s tcache_t;
@ -32,14 +33,22 @@ typedef struct tcache_s tcache_t;
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS #ifdef JEMALLOC_H_STRUCTS
/*
* Read-only information associated with each element of tcache_t's tbins array
* is stored separately, mainly to reduce memory usage.
*/
struct tcache_bin_info_s {
unsigned ncached_max; /* Upper limit on ncached. */
};
struct tcache_bin_s { struct tcache_bin_s {
# ifdef JEMALLOC_STATS # ifdef JEMALLOC_STATS
tcache_bin_stats_t tstats; tcache_bin_stats_t tstats;
# endif # endif
unsigned low_water; /* Min # cached since last GC. */ int low_water; /* Min # cached since last GC. */
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
unsigned ncached; /* # of cached objects. */ unsigned ncached; /* # of cached objects. */
unsigned ncached_max; /* Upper limit on ncached. */ void **avail; /* Stack of available objects. */
void *avail; /* Chain of available objects. */
}; };
struct tcache_s { struct tcache_s {
@ -53,6 +62,12 @@ struct tcache_s {
unsigned ev_cnt; /* Event count since incremental GC. */ unsigned ev_cnt; /* Event count since incremental GC. */
unsigned next_gc_bin; /* Next bin to GC. */ unsigned next_gc_bin; /* Next bin to GC. */
tcache_bin_t tbins[1]; /* Dynamically sized. */ tcache_bin_t tbins[1]; /* Dynamically sized. */
/*
* The pointer stacks associated with tbins follow as a contiguous
* array. During tcache initialization, the avail pointer in each
* element of tbins is initialized to point to the proper offset within
* this array.
*/
}; };
#endif /* JEMALLOC_H_STRUCTS */ #endif /* JEMALLOC_H_STRUCTS */
@ -63,6 +78,8 @@ extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_max;
extern ssize_t opt_lg_tcache_gc_sweep; extern ssize_t opt_lg_tcache_gc_sweep;
extern tcache_bin_info_t *tcache_bin_info;
/* Map of thread-specific caches. */ /* Map of thread-specific caches. */
#ifndef NO_TLS #ifndef NO_TLS
extern __thread tcache_t *tcache_tls extern __thread tcache_t *tcache_tls
@ -109,7 +126,7 @@ void tcache_destroy(tcache_t *tcache);
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
void tcache_stats_merge(tcache_t *tcache, arena_t *arena); void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
#endif #endif
void tcache_boot(void); bool tcache_boot(void);
#endif /* JEMALLOC_H_EXTERNS */ #endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/ /******************************************************************************/
@ -168,6 +185,7 @@ tcache_event(tcache_t *tcache)
if (tcache->ev_cnt == tcache_gc_incr) { if (tcache->ev_cnt == tcache_gc_incr) {
size_t binind = tcache->next_gc_bin; size_t binind = tcache->next_gc_bin;
tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_t *tbin = &tcache->tbins[binind];
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
if (tbin->low_water > 0) { if (tbin->low_water > 0) {
/* /*
@ -191,6 +209,20 @@ tcache_event(tcache_t *tcache)
#endif #endif
); );
} }
/*
* Reduce fill count by 2X. Limit lg_fill_div such that
* the fill count is always at least 1.
*/
if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
>= 1)
tbin->lg_fill_div++;
} else if (tbin->low_water < 0) {
/*
* Increase fill count by 2X. Make sure lg_fill_div
* stays greater than 0.
*/
if (tbin->lg_fill_div > 1)
tbin->lg_fill_div--;
} }
tbin->low_water = tbin->ncached; tbin->low_water = tbin->ncached;
@ -206,13 +238,14 @@ tcache_alloc_easy(tcache_bin_t *tbin)
{ {
void *ret; void *ret;
if (tbin->ncached == 0) if (tbin->ncached == 0) {
tbin->low_water = -1;
return (NULL); return (NULL);
}
tbin->ncached--; tbin->ncached--;
if (tbin->ncached < tbin->low_water) if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached; tbin->low_water = tbin->ncached;
ret = tbin->avail; ret = tbin->avail[tbin->ncached];
tbin->avail = *(void **)ret;
return (ret); return (ret);
} }
@ -223,7 +256,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
size_t binind; size_t binind;
tcache_bin_t *tbin; tcache_bin_t *tbin;
binind = small_size2bin[size]; binind = SMALL_SIZE2BIN(size);
assert(binind < nbins); assert(binind < nbins);
tbin = &tcache->tbins[binind]; tbin = &tcache->tbins[binind];
ret = tcache_alloc_easy(tbin); ret = tcache_alloc_easy(tbin);
@ -232,7 +265,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
if (ret == NULL) if (ret == NULL)
return (NULL); return (NULL);
} }
assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size); assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
if (zero == false) { if (zero == false) {
#ifdef JEMALLOC_FILL #ifdef JEMALLOC_FILL
@ -248,7 +281,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
tbin->tstats.nrequests++; tbin->tstats.nrequests++;
#endif #endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size; tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
#endif #endif
tcache_event(tcache); tcache_event(tcache);
return (ret); return (ret);
@ -312,6 +345,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
arena_run_t *run; arena_run_t *run;
arena_bin_t *bin; arena_bin_t *bin;
tcache_bin_t *tbin; tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
size_t pageind, binind; size_t pageind, binind;
arena_chunk_map_t *mapelm; arena_chunk_map_t *mapelm;
@ -323,7 +357,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
mapelm = &chunk->map[pageind-map_bias]; mapelm = &chunk->map[pageind-map_bias];
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC); dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin; bin = run->bin;
binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
sizeof(arena_bin_t); sizeof(arena_bin_t);
@ -331,20 +365,21 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
#ifdef JEMALLOC_FILL #ifdef JEMALLOC_FILL
if (opt_junk) if (opt_junk)
memset(ptr, 0x5a, bin->reg_size); memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
#endif #endif
tbin = &tcache->tbins[binind]; tbin = &tcache->tbins[binind];
if (tbin->ncached == tbin->ncached_max) { tbin_info = &tcache_bin_info[binind];
tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1) if (tbin->ncached == tbin_info->ncached_max) {
tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache , tcache
#endif #endif
); );
} }
assert(tbin->ncached < tbin->ncached_max); assert(tbin->ncached < tbin_info->ncached_max);
*(void **)ptr = tbin->avail; tbin->avail[tbin->ncached] = ptr;
tbin->avail = ptr;
tbin->ncached++; tbin->ncached++;
tcache_event(tcache); tcache_event(tcache);
@ -357,6 +392,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
arena_chunk_t *chunk; arena_chunk_t *chunk;
size_t pageind, binind; size_t pageind, binind;
tcache_bin_t *tbin; tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
assert((size & PAGE_MASK) == 0); assert((size & PAGE_MASK) == 0);
assert(arena_salloc(ptr) > small_maxclass); assert(arena_salloc(ptr) > small_maxclass);
@ -373,16 +409,17 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
#endif #endif
tbin = &tcache->tbins[binind]; tbin = &tcache->tbins[binind];
if (tbin->ncached == tbin->ncached_max) { tbin_info = &tcache_bin_info[binind];
tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1) if (tbin->ncached == tbin_info->ncached_max) {
tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache , tcache
#endif #endif
); );
} }
assert(tbin->ncached < tbin->ncached_max); assert(tbin->ncached < tbin_info->ncached_max);
*(void **)ptr = tbin->avail; tbin->avail[tbin->ncached] = ptr;
tbin->avail = ptr;
tbin->ncached++; tbin->ncached++;
tcache_event(tcache); tcache_event(tcache);

View File

@ -24,6 +24,18 @@
*/ */
#undef CPU_SPINWAIT #undef CPU_SPINWAIT
/*
* Defined if OSAtomic*() functions are available, as provided by Darwin, and
* documented in the atomic(3) manual page.
*/
#undef JEMALLOC_OSATOMIC
/*
* Defined if OSSpin*() functions are available, as provided by Darwin, and
* documented in the spinlock(3) manual page.
*/
#undef JEMALLOC_OSSPIN
/* Defined if __attribute__((...)) syntax is supported. */ /* Defined if __attribute__((...)) syntax is supported. */
#undef JEMALLOC_HAVE_ATTR #undef JEMALLOC_HAVE_ATTR
#ifdef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR
@ -53,6 +65,9 @@
/* Use libgcc for profile backtracing if defined. */ /* Use libgcc for profile backtracing if defined. */
#undef JEMALLOC_PROF_LIBGCC #undef JEMALLOC_PROF_LIBGCC
/* Use gcc intrinsics for profile backtracing if defined. */
#undef JEMALLOC_PROF_GCC
/* /*
* JEMALLOC_TINY enables support for tiny objects, which are smaller than one * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
* quantum. * quantum.
@ -137,4 +152,7 @@
/* sizeof(int) == 2^LG_SIZEOF_INT. */ /* sizeof(int) == 2^LG_SIZEOF_INT. */
#undef LG_SIZEOF_INT #undef LG_SIZEOF_INT
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#undef LG_SIZEOF_LONG
#endif /* JEMALLOC_DEFS_H_ */ #endif /* JEMALLOC_DEFS_H_ */

File diff suppressed because it is too large Load Diff

2
jemalloc/src/atomic.c Normal file
View File

@ -0,0 +1,2 @@
#define JEMALLOC_ATOMIC_C_
#include "jemalloc/internal/jemalloc_internal.h"

90
jemalloc/src/bitmap.c Normal file
View File

@ -0,0 +1,90 @@
#define JEMALLOC_BITMAP_C_
#include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static size_t bits2groups(size_t nbits);
/******************************************************************************/
static size_t
bits2groups(size_t nbits)
{
return ((nbits >> LG_BITMAP_GROUP_NBITS) +
!!(nbits & BITMAP_GROUP_NBITS_MASK));
}
void
bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
{
unsigned i;
size_t group_count;
assert(nbits > 0);
assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
/*
* Compute the number of groups necessary to store nbits bits, and
* progressively work upward through the levels until reaching a level
* that requires only one group.
*/
binfo->levels[0].group_offset = 0;
group_count = bits2groups(nbits);
for (i = 1; group_count > 1; i++) {
assert(i < BITMAP_MAX_LEVELS);
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
group_count = bits2groups(group_count);
}
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
binfo->nlevels = i;
binfo->nbits = nbits;
}
size_t
bitmap_info_ngroups(const bitmap_info_t *binfo)
{
return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
}
size_t
bitmap_size(size_t nbits)
{
bitmap_info_t binfo;
bitmap_info_init(&binfo, nbits);
return (bitmap_info_ngroups(&binfo));
}
void
bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
size_t extra;
unsigned i;
/*
* Bits are actually inverted with regard to the external bitmap
* interface, so the bitmap starts out with all 1 bits, except for
* trailing unused bits (if any). Note that each group uses bit 0 to
* correspond to the first logical bit in the group, so extra bits
* are the most significant bits of the last group.
*/
memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
LG_SIZEOF_BITMAP);
extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
& BITMAP_GROUP_NBITS_MASK;
if (extra != 0)
bitmap[binfo->levels[1].group_offset - 1] >>= extra;
for (i = 1; i < binfo->nlevels; i++) {
size_t group_count = binfo->levels[i].group_offset -
binfo->levels[i-1].group_offset;
extra = (BITMAP_GROUP_NBITS - (group_count &
BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
if (extra != 0)
bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
}
}

View File

@ -34,7 +34,7 @@
* respectively. * respectively.
* *
******************************************************************************/ ******************************************************************************/
#define CKH_C_ #define JEMALLOC_CKH_C_
#include "jemalloc/internal/jemalloc_internal.h" #include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/ /******************************************************************************/
@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
size_t hash1, hash2, bucket, cell; size_t hash1, hash2, bucket, cell;
assert(ckh != NULL); assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC); dassert(ckh->magic == CKH_MAGIC);
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
@ -262,9 +262,15 @@ ckh_grow(ckh_t *ckh)
lg_prevbuckets = ckh->lg_curbuckets; lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
while (true) { while (true) {
size_t usize;
lg_curcells++; lg_curcells++;
tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
ZU(1) << LG_CACHELINE, true); if (usize == 0) {
ret = true;
goto RETURN;
}
tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
if (tab == NULL) { if (tab == NULL) {
ret = true; ret = true;
goto RETURN; goto RETURN;
@ -295,7 +301,7 @@ static void
ckh_shrink(ckh_t *ckh) ckh_shrink(ckh_t *ckh)
{ {
ckhc_t *tab, *ttab; ckhc_t *tab, *ttab;
size_t lg_curcells; size_t lg_curcells, usize;
unsigned lg_prevbuckets; unsigned lg_prevbuckets;
/* /*
@ -304,8 +310,10 @@ ckh_shrink(ckh_t *ckh)
*/ */
lg_prevbuckets = ckh->lg_curbuckets; lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
ZU(1) << LG_CACHELINE, true); if (usize == 0)
return;
tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
if (tab == NULL) { if (tab == NULL) {
/* /*
* An OOM error isn't worth propagating, since it doesn't * An OOM error isn't worth propagating, since it doesn't
@ -340,7 +348,7 @@ bool
ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
{ {
bool ret; bool ret;
size_t mincells; size_t mincells, usize;
unsigned lg_mincells; unsigned lg_mincells;
assert(minitems > 0); assert(minitems > 0);
@ -375,8 +383,12 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
ckh->hash = hash; ckh->hash = hash;
ckh->keycomp = keycomp; ckh->keycomp = keycomp;
ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells, usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
(ZU(1) << LG_CACHELINE), true); if (usize == 0) {
ret = true;
goto RETURN;
}
ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
if (ckh->tab == NULL) { if (ckh->tab == NULL) {
ret = true; ret = true;
goto RETURN; goto RETURN;
@ -396,7 +408,7 @@ ckh_delete(ckh_t *ckh)
{ {
assert(ckh != NULL); assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC); dassert(ckh->magic == CKH_MAGIC);
#ifdef CKH_VERBOSE #ifdef CKH_VERBOSE
malloc_printf( malloc_printf(
@ -421,7 +433,7 @@ ckh_count(ckh_t *ckh)
{ {
assert(ckh != NULL); assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC); dassert(ckh->magic == CKH_MAGIC);
return (ckh->count); return (ckh->count);
} }
@ -452,7 +464,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
bool ret; bool ret;
assert(ckh != NULL); assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC); dassert(ckh->magic == CKH_MAGIC);
assert(ckh_search(ckh, key, NULL, NULL)); assert(ckh_search(ckh, key, NULL, NULL));
#ifdef CKH_COUNT #ifdef CKH_COUNT
@ -477,7 +489,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
size_t cell; size_t cell;
assert(ckh != NULL); assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC); dassert(ckh->magic == CKH_MAGIC);
cell = ckh_isearch(ckh, searchkey); cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) { if (cell != SIZE_T_MAX) {
@ -509,7 +521,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
size_t cell; size_t cell;
assert(ckh != NULL); assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC); dassert(ckh->magic == CKH_MAGIC);
cell = ckh_isearch(ckh, searchkey); cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) { if (cell != SIZE_T_MAX) {

View File

@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
CTL_PROTO(stats_arenas_i_lruns_j_curruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns)
INDEX_PROTO(stats_arenas_i_lruns_j) INDEX_PROTO(stats_arenas_i_lruns_j)
#endif #endif
CTL_PROTO(stats_arenas_i_nthreads)
CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pactive)
CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_pdirty)
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -192,6 +193,7 @@ CTL_PROTO(stats_arenas_i_purged)
#endif #endif
INDEX_PROTO(stats_arenas_i) INDEX_PROTO(stats_arenas_i)
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
CTL_PROTO(stats_cactive)
CTL_PROTO(stats_allocated) CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active) CTL_PROTO(stats_active)
CTL_PROTO(stats_mapped) CTL_PROTO(stats_mapped)
@ -434,6 +436,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
#endif #endif
static const ctl_node_t stats_arenas_i_node[] = { static const ctl_node_t stats_arenas_i_node[] = {
{NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
{NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)},
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)} {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -458,6 +461,7 @@ static const ctl_node_t stats_arenas_node[] = {
static const ctl_node_t stats_node[] = { static const ctl_node_t stats_node[] = {
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
{NAME("cactive"), CTL(stats_cactive)},
{NAME("allocated"), CTL(stats_allocated)}, {NAME("allocated"), CTL(stats_allocated)},
{NAME("active"), CTL(stats_active)}, {NAME("active"), CTL(stats_active)},
{NAME("mapped"), CTL(stats_mapped)}, {NAME("mapped"), CTL(stats_mapped)},
@ -620,6 +624,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
ctl_arena_clear(astats); ctl_arena_clear(astats);
sstats->nthreads += astats->nthreads;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
ctl_arena_stats_amerge(astats, arena); ctl_arena_stats_amerge(astats, arena);
/* Merge into sum stats as well. */ /* Merge into sum stats as well. */
@ -657,10 +662,17 @@ ctl_refresh(void)
* Clear sum stats, since they will be merged into by * Clear sum stats, since they will be merged into by
* ctl_arena_refresh(). * ctl_arena_refresh().
*/ */
ctl_stats.arenas[narenas].nthreads = 0;
ctl_arena_clear(&ctl_stats.arenas[narenas]); ctl_arena_clear(&ctl_stats.arenas[narenas]);
malloc_mutex_lock(&arenas_lock); malloc_mutex_lock(&arenas_lock);
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
for (i = 0; i < narenas; i++) {
if (arenas[i] != NULL)
ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
else
ctl_stats.arenas[i].nthreads = 0;
}
malloc_mutex_unlock(&arenas_lock); malloc_mutex_unlock(&arenas_lock);
for (i = 0; i < narenas; i++) { for (i = 0; i < narenas; i++) {
bool initialized = (tarenas[i] != NULL); bool initialized = (tarenas[i] != NULL);
@ -1129,6 +1141,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
malloc_mutex_lock(&arenas_lock); malloc_mutex_lock(&arenas_lock);
if ((arena = arenas[newind]) == NULL) if ((arena = arenas[newind]) == NULL)
arena = arenas_extend(newind); arena = arenas_extend(newind);
arenas[oldind]->nthreads--;
arenas[newind]->nthreads++;
malloc_mutex_unlock(&arenas_lock); malloc_mutex_unlock(&arenas_lock);
if (arena == NULL) { if (arena == NULL) {
ret = EAGAIN; ret = EAGAIN;
@ -1289,9 +1303,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
/******************************************************************************/ /******************************************************************************/
CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
const ctl_node_t * const ctl_node_t *
arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
{ {
@ -1536,6 +1550,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
} }
#endif #endif
CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -1567,6 +1582,7 @@ RETURN:
} }
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *)
CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
CTL_RO_GEN(stats_active, ctl_stats.active, size_t) CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)

View File

@ -1,2 +1,2 @@
#define HASH_C_ #define JEMALLOC_HASH_C_
#include "jemalloc/internal/jemalloc_internal.h" #include "jemalloc/internal/jemalloc_internal.h"

View File

@ -50,6 +50,7 @@ huge_malloc(size_t size, bool zero)
malloc_mutex_lock(&huge_mtx); malloc_mutex_lock(&huge_mtx);
extent_tree_ad_insert(&huge, node); extent_tree_ad_insert(&huge, node);
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
stats_cactive_add(csize);
huge_nmalloc++; huge_nmalloc++;
huge_allocated += csize; huge_allocated += csize;
#endif #endif
@ -134,6 +135,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
malloc_mutex_lock(&huge_mtx); malloc_mutex_lock(&huge_mtx);
extent_tree_ad_insert(&huge, node); extent_tree_ad_insert(&huge, node);
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
stats_cactive_add(chunk_size);
huge_nmalloc++; huge_nmalloc++;
huge_allocated += chunk_size; huge_allocated += chunk_size;
#endif #endif
@ -278,6 +280,7 @@ huge_dalloc(void *ptr, bool unmap)
extent_tree_ad_remove(&huge, node); extent_tree_ad_remove(&huge, node);
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
stats_cactive_sub(node->size);
huge_ndalloc++; huge_ndalloc++;
huge_allocated -= node->size; huge_allocated -= node->size;
#endif #endif

View File

@ -7,12 +7,10 @@
malloc_mutex_t arenas_lock; malloc_mutex_t arenas_lock;
arena_t **arenas; arena_t **arenas;
unsigned narenas; unsigned narenas;
static unsigned next_arena;
pthread_key_t arenas_tsd;
#ifndef NO_TLS #ifndef NO_TLS
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
#else
pthread_key_t arenas_tsd;
#endif #endif
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -30,7 +28,13 @@ static bool malloc_initialized = false;
static pthread_t malloc_initializer = (unsigned long)0; static pthread_t malloc_initializer = (unsigned long)0;
/* Used to avoid initialization races. */ /* Used to avoid initialization races. */
static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; static malloc_mutex_t init_lock =
#ifdef JEMALLOC_OSSPIN
0
#else
MALLOC_MUTEX_INITIALIZER
#endif
;
#ifdef DYNAMIC_PAGE_SHIFT #ifdef DYNAMIC_PAGE_SHIFT
size_t pagesize; size_t pagesize;
@ -70,6 +74,7 @@ size_t opt_narenas = 0;
static void wrtmessage(void *cbopaque, const char *s); static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void); static void stats_print_atexit(void);
static unsigned malloc_ncpus(void); static unsigned malloc_ncpus(void);
static void arenas_cleanup(void *arg);
#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void thread_allocated_cleanup(void *arg); static void thread_allocated_cleanup(void *arg);
#endif #endif
@ -147,13 +152,53 @@ choose_arena_hard(void)
arena_t *ret; arena_t *ret;
if (narenas > 1) { if (narenas > 1) {
unsigned i, choose, first_null;
choose = 0;
first_null = narenas;
malloc_mutex_lock(&arenas_lock); malloc_mutex_lock(&arenas_lock);
if ((ret = arenas[next_arena]) == NULL) assert(arenas[0] != NULL);
ret = arenas_extend(next_arena); for (i = 1; i < narenas; i++) {
next_arena = (next_arena + 1) % narenas; if (arenas[i] != NULL) {
/*
* Choose the first arena that has the lowest
* number of threads assigned to it.
*/
if (arenas[i]->nthreads <
arenas[choose]->nthreads)
choose = i;
} else if (first_null == narenas) {
/*
* Record the index of the first uninitialized
* arena, in case all extant arenas are in use.
*
* NB: It is possible for there to be
* discontinuities in terms of initialized
* versus uninitialized arenas, due to the
* "thread.arena" mallctl.
*/
first_null = i;
}
}
if (arenas[choose] == 0 || first_null == narenas) {
/*
* Use an unloaded arena, or the least loaded arena if
* all arenas are already initialized.
*/
ret = arenas[choose];
} else {
/* Initialize a new arena. */
ret = arenas_extend(first_null);
}
ret->nthreads++;
malloc_mutex_unlock(&arenas_lock); malloc_mutex_unlock(&arenas_lock);
} else } else {
ret = arenas[0]; ret = arenas[0];
malloc_mutex_lock(&arenas_lock);
ret->nthreads++;
malloc_mutex_unlock(&arenas_lock);
}
ARENA_SET(ret); ARENA_SET(ret);
@ -259,6 +304,16 @@ malloc_ncpus(void)
return (ret); return (ret);
} }
static void
arenas_cleanup(void *arg)
{
arena_t *arena = (arena_t *)arg;
malloc_mutex_lock(&arenas_lock);
arena->nthreads--;
malloc_mutex_unlock(&arenas_lock);
}
#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void static void
thread_allocated_cleanup(void *arg) thread_allocated_cleanup(void *arg)
@ -693,7 +748,10 @@ malloc_init_hard(void)
} }
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
tcache_boot(); if (tcache_boot()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
#endif #endif
if (huge_boot()) { if (huge_boot()) {
@ -734,8 +792,15 @@ malloc_init_hard(void)
* threaded mode. * threaded mode.
*/ */
ARENA_SET(arenas[0]); ARENA_SET(arenas[0]);
arenas[0]->nthreads++;
malloc_mutex_init(&arenas_lock); if (malloc_mutex_init(&arenas_lock))
return (true);
if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
malloc_mutex_unlock(&init_lock);
return (true);
}
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (prof_boot2()) { if (prof_boot2()) {
@ -775,15 +840,6 @@ malloc_init_hard(void)
malloc_write(")\n"); malloc_write(")\n");
} }
next_arena = (narenas > 0) ? 1 : 0;
#ifdef NO_TLS
if (pthread_key_create(&arenas_tsd, NULL) != 0) {
malloc_mutex_unlock(&init_lock);
return (true);
}
#endif
/* Allocate and initialize arenas. */ /* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
if (arenas == NULL) { if (arenas == NULL) {
@ -815,7 +871,6 @@ malloc_init_hard(void)
return (false); return (false);
} }
#ifdef JEMALLOC_ZONE #ifdef JEMALLOC_ZONE
JEMALLOC_ATTR(constructor) JEMALLOC_ATTR(constructor)
void void
@ -938,14 +993,12 @@ int
JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
{ {
int ret; int ret;
void *result;
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize size_t usize
# ifdef JEMALLOC_CC_SILENCE #ifdef JEMALLOC_CC_SILENCE
= 0 = 0
# endif
;
#endif #endif
;
void *result;
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE # ifdef JEMALLOC_CC_SILENCE
@ -995,34 +1048,37 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
goto RETURN; goto RETURN;
} }
usize = sa2u(size, alignment, NULL);
if (usize == 0) {
result = NULL;
ret = ENOMEM;
goto RETURN;
}
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
usize = sa2u(size, alignment, NULL);
if ((cnt = prof_alloc_prep(usize)) == NULL) { if ((cnt = prof_alloc_prep(usize)) == NULL) {
result = NULL; result = NULL;
ret = EINVAL; ret = EINVAL;
} else { } else {
if (prof_promote && (uintptr_t)cnt != if (prof_promote && (uintptr_t)cnt !=
(uintptr_t)1U && usize <= small_maxclass) { (uintptr_t)1U && usize <= small_maxclass) {
result = ipalloc(small_maxclass+1, assert(sa2u(small_maxclass+1,
alignment, false); alignment, NULL) != 0);
result = ipalloc(sa2u(small_maxclass+1,
alignment, NULL), alignment, false);
if (result != NULL) { if (result != NULL) {
arena_prof_promoted(result, arena_prof_promoted(result,
usize); usize);
} }
} else { } else {
result = ipalloc(size, alignment, result = ipalloc(usize, alignment,
false); false);
} }
} }
} else } else
#endif #endif
{ result = ipalloc(usize, alignment, false);
#ifdef JEMALLOC_STATS
usize = sa2u(size, alignment, NULL);
#endif
result = ipalloc(size, alignment, false);
}
} }
if (result == NULL) { if (result == NULL) {
@ -1476,15 +1532,18 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
} }
JEMALLOC_INLINE void * JEMALLOC_INLINE void *
iallocm(size_t size, size_t alignment, bool zero) iallocm(size_t usize, size_t alignment, bool zero)
{ {
assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment,
NULL)));
if (alignment != 0) if (alignment != 0)
return (ipalloc(size, alignment, zero)); return (ipalloc(usize, alignment, zero));
else if (zero) else if (zero)
return (icalloc(size)); return (icalloc(usize));
else else
return (imalloc(size)); return (imalloc(usize));
} }
JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(nonnull(1))
@ -1507,20 +1566,27 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
if (malloc_init()) if (malloc_init())
goto OOM; goto OOM;
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
NULL); NULL);
if (usize == 0)
goto OOM;
#ifdef JEMALLOC_PROF
if (opt_prof) {
if ((cnt = prof_alloc_prep(usize)) == NULL) if ((cnt = prof_alloc_prep(usize)) == NULL)
goto OOM; goto OOM;
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) { small_maxclass) {
p = iallocm(small_maxclass+1, alignment, zero); size_t usize_promoted = (alignment == 0) ?
s2u(small_maxclass+1) : sa2u(small_maxclass+1,
alignment, NULL);
assert(usize_promoted != 0);
p = iallocm(usize_promoted, alignment, zero);
if (p == NULL) if (p == NULL)
goto OOM; goto OOM;
arena_prof_promoted(p, usize); arena_prof_promoted(p, usize);
} else { } else {
p = iallocm(size, alignment, zero); p = iallocm(usize, alignment, zero);
if (p == NULL) if (p == NULL)
goto OOM; goto OOM;
} }
@ -1530,15 +1596,13 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
} else } else
#endif #endif
{ {
p = iallocm(size, alignment, zero); p = iallocm(usize, alignment, zero);
if (p == NULL) if (p == NULL)
goto OOM; goto OOM;
#ifndef JEMALLOC_STATS #ifndef JEMALLOC_STATS
if (rsize != NULL) if (rsize != NULL)
#endif #endif
{ {
usize = (alignment == 0) ? s2u(size) : sa2u(size,
alignment, NULL);
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
if (rsize != NULL) if (rsize != NULL)
#endif #endif
@ -1622,6 +1686,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
usize = isalloc(q); usize = isalloc(q);
} }
prof_realloc(q, usize, cnt, old_size, old_ctx); prof_realloc(q, usize, cnt, old_size, old_ctx);
if (rsize != NULL)
*rsize = usize;
} else } else
#endif #endif
{ {

View File

@ -1,2 +1,2 @@
#define MB_C_ #define JEMALLOC_MB_C_
#include "jemalloc/internal/jemalloc_internal.h" #include "jemalloc/internal/jemalloc_internal.h"

View File

@ -55,6 +55,9 @@ pthread_create(pthread_t *__restrict thread,
bool bool
malloc_mutex_init(malloc_mutex_t *mutex) malloc_mutex_init(malloc_mutex_t *mutex)
{ {
#ifdef JEMALLOC_OSSPIN
*mutex = 0;
#else
pthread_mutexattr_t attr; pthread_mutexattr_t attr;
if (pthread_mutexattr_init(&attr) != 0) if (pthread_mutexattr_init(&attr) != 0)
@ -70,6 +73,7 @@ malloc_mutex_init(malloc_mutex_t *mutex)
} }
pthread_mutexattr_destroy(&attr); pthread_mutexattr_destroy(&attr);
#endif
return (false); return (false);
} }
@ -77,8 +81,10 @@ void
malloc_mutex_destroy(malloc_mutex_t *mutex) malloc_mutex_destroy(malloc_mutex_t *mutex)
{ {
#ifndef JEMALLOC_OSSPIN
if (pthread_mutex_destroy(mutex) != 0) { if (pthread_mutex_destroy(mutex) != 0) {
malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n"); malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
abort(); abort();
} }
#endif
} }

View File

@ -3,15 +3,15 @@
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_PROF_LIBGCC
#include <unwind.h>
#endif
#ifdef JEMALLOC_PROF_LIBUNWIND #ifdef JEMALLOC_PROF_LIBUNWIND
#define UNW_LOCAL_ONLY #define UNW_LOCAL_ONLY
#include <libunwind.h> #include <libunwind.h>
#endif #endif
#ifdef JEMALLOC_PROF_LIBGCC
#include <unwind.h>
#endif
/******************************************************************************/ /******************************************************************************/
/* Data. */ /* Data. */
@ -169,39 +169,7 @@ prof_leave(void)
prof_gdump(); prof_gdump();
} }
#ifdef JEMALLOC_PROF_LIBGCC #ifdef JEMALLOC_PROF_LIBUNWIND
static _Unwind_Reason_Code
prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
{
return (_URC_NO_REASON);
}
static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context *context, void *arg)
{
prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
if (data->nignore > 0)
data->nignore--;
else {
data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
data->bt->len++;
if (data->bt->len == data->max)
return (_URC_END_OF_STACK);
}
return (_URC_NO_REASON);
}
void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
prof_unwind_data_t data = {bt, nignore, max};
_Unwind_Backtrace(prof_unwind_callback, &data);
}
#elif defined(JEMALLOC_PROF_LIBUNWIND)
void void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{ {
@ -236,7 +204,41 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
break; break;
} }
} }
#else #endif
#ifdef JEMALLOC_PROF_LIBGCC
static _Unwind_Reason_Code
prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
{
return (_URC_NO_REASON);
}
static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context *context, void *arg)
{
prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
if (data->nignore > 0)
data->nignore--;
else {
data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
data->bt->len++;
if (data->bt->len == data->max)
return (_URC_END_OF_STACK);
}
return (_URC_NO_REASON);
}
void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
prof_unwind_data_t data = {bt, nignore, max};
_Unwind_Backtrace(prof_unwind_callback, &data);
}
#endif
#ifdef JEMALLOC_PROF_GCC
void void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{ {

View File

@ -1,4 +1,4 @@
#define RTREE_C_ #define JEMALLOC_RTREE_C_
#include "jemalloc/internal/jemalloc_internal.h" #include "jemalloc/internal/jemalloc_internal.h"
rtree_t * rtree_t *
@ -20,7 +20,10 @@ rtree_new(unsigned bits)
memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
height)); height));
malloc_mutex_init(&ret->mutex); if (malloc_mutex_init(&ret->mutex)) {
/* Leak the rtree. */
return (NULL);
}
ret->height = height; ret->height = height;
if (bits_per_level * height > bits) if (bits_per_level * height > bits)
ret->level2bits[0] = bits % bits_per_level; ret->level2bits[0] = bits % bits_per_level;

View File

@ -39,6 +39,10 @@
bool opt_stats_print = false; bool opt_stats_print = false;
#ifdef JEMALLOC_STATS
size_t stats_cactive = 0;
#endif
/******************************************************************************/ /******************************************************************************/
/* Function prototypes for non-inline static functions. */ /* Function prototypes for non-inline static functions. */
@ -319,6 +323,7 @@ static void
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
unsigned i) unsigned i)
{ {
unsigned nthreads;
size_t pagesize, pactive, pdirty, mapped; size_t pagesize, pactive, pdirty, mapped;
uint64_t npurge, nmadvise, purged; uint64_t npurge, nmadvise, purged;
size_t small_allocated; size_t small_allocated;
@ -328,6 +333,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_GET("arenas.pagesize", &pagesize, size_t); CTL_GET("arenas.pagesize", &pagesize, size_t);
CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
malloc_cprintf(write_cb, cbopaque,
"assigned threads: %u\n", nthreads);
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
@ -669,21 +677,26 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
{ {
int err; int err;
size_t ssz; size_t sszp, ssz;
size_t *cactive;
size_t allocated, active, mapped; size_t allocated, active, mapped;
size_t chunks_current, chunks_high, swap_avail; size_t chunks_current, chunks_high, swap_avail;
uint64_t chunks_total; uint64_t chunks_total;
size_t huge_allocated; size_t huge_allocated;
uint64_t huge_nmalloc, huge_ndalloc; uint64_t huge_nmalloc, huge_ndalloc;
sszp = sizeof(size_t *);
ssz = sizeof(size_t); ssz = sizeof(size_t);
CTL_GET("stats.cactive", &cactive, size_t *);
CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.allocated", &allocated, size_t);
CTL_GET("stats.active", &active, size_t); CTL_GET("stats.active", &active, size_t);
CTL_GET("stats.mapped", &mapped, size_t); CTL_GET("stats.mapped", &mapped, size_t);
malloc_cprintf(write_cb, cbopaque, malloc_cprintf(write_cb, cbopaque,
"Allocated: %zu, active: %zu, mapped: %zu\n", allocated, "Allocated: %zu, active: %zu, mapped: %zu\n",
active, mapped); allocated, active, mapped);
malloc_cprintf(write_cb, cbopaque,
"Current active ceiling: %zu\n", atomic_read_z(cactive));
/* Print chunk stats. */ /* Print chunk stats. */
CTL_GET("stats.chunks.total", &chunks_total, uint64_t); CTL_GET("stats.chunks.total", &chunks_total, uint64_t);

View File

@ -8,6 +8,9 @@ bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
tcache_bin_info_t *tcache_bin_info;
static unsigned stack_nelms; /* Total stack elms per tcache. */
/* Map of thread-specific caches. */ /* Map of thread-specific caches. */
#ifndef NO_TLS #ifndef NO_TLS
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@ -55,21 +58,19 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif #endif
) )
{ {
void *flush, *deferred, *ptr; void *ptr;
unsigned i, nflush, ndeferred; unsigned i, nflush, ndeferred;
bool first_pass;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
bool merged_stats = false; bool merged_stats = false;
#endif #endif
assert(binind < nbins); assert(binind < nbins);
assert(rem <= tbin->ncached); assert(rem <= tbin->ncached);
assert(tbin->ncached > 0 || tbin->avail == NULL);
for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
true; flush != NULL; flush = deferred, nflush = ndeferred) {
/* Lock the arena bin associated with the first object. */ /* Lock the arena bin associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
tbin->avail[0]);
arena_t *arena = chunk->arena; arena_t *arena = chunk->arena;
arena_bin_t *bin = &arena->bins[binind]; arena_bin_t *bin = &arena->bins[binind];
@ -92,12 +93,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
tbin->tstats.nrequests = 0; tbin->tstats.nrequests = 0;
} }
#endif #endif
deferred = NULL;
ndeferred = 0; ndeferred = 0;
for (i = 0; i < nflush; i++) { for (i = 0; i < nflush; i++) {
ptr = flush; ptr = tbin->avail[i];
assert(ptr != NULL); assert(ptr != NULL);
flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena) { if (chunk->arena == arena) {
size_t pageind = ((uintptr_t)ptr - size_t pageind = ((uintptr_t)ptr -
@ -112,17 +111,11 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
* locked. Stash the object, so that it can be * locked. Stash the object, so that it can be
* handled in a future pass. * handled in a future pass.
*/ */
*(void **)ptr = deferred; tbin->avail[ndeferred] = ptr;
deferred = ptr;
ndeferred++; ndeferred++;
} }
} }
malloc_mutex_unlock(&bin->lock); malloc_mutex_unlock(&bin->lock);
if (first_pass) {
tbin->avail = flush;
first_pass = false;
}
} }
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
if (merged_stats == false) { if (merged_stats == false) {
@ -139,8 +132,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
} }
#endif #endif
memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
rem * sizeof(void *));
tbin->ncached = rem; tbin->ncached = rem;
if (tbin->ncached < tbin->low_water) if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached; tbin->low_water = tbin->ncached;
} }
@ -151,18 +146,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif #endif
) )
{ {
void *flush, *deferred, *ptr; void *ptr;
unsigned i, nflush, ndeferred; unsigned i, nflush, ndeferred;
bool first_pass; #ifdef JEMALLOC_STATS
bool merged_stats = false;
#endif
assert(binind < nhbins); assert(binind < nhbins);
assert(rem <= tbin->ncached); assert(rem <= tbin->ncached);
assert(tbin->ncached > 0 || tbin->avail == NULL);
for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
true; flush != NULL; flush = deferred, nflush = ndeferred) {
/* Lock the arena associated with the first object. */ /* Lock the arena associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
tbin->avail[0]);
arena_t *arena = chunk->arena; arena_t *arena = chunk->arena;
malloc_mutex_lock(&arena->lock); malloc_mutex_lock(&arena->lock);
@ -174,6 +170,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
tcache->prof_accumbytes = 0; tcache->prof_accumbytes = 0;
#endif #endif
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
merged_stats = true;
arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[binind - nbins].nrequests += arena->stats.lstats[binind - nbins].nrequests +=
tbin->tstats.nrequests; tbin->tstats.nrequests;
@ -182,12 +179,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
} }
#endif #endif
deferred = NULL;
ndeferred = 0; ndeferred = 0;
for (i = 0; i < nflush; i++) { for (i = 0; i < nflush; i++) {
ptr = flush; ptr = tbin->avail[i];
assert(ptr != NULL); assert(ptr != NULL);
flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena) if (chunk->arena == arena)
arena_dalloc_large(arena, chunk, ptr); arena_dalloc_large(arena, chunk, ptr);
@ -198,21 +193,32 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
* Stash the object, so that it can be handled * Stash the object, so that it can be handled
* in a future pass. * in a future pass.
*/ */
*(void **)ptr = deferred; tbin->avail[ndeferred] = ptr;
deferred = ptr;
ndeferred++; ndeferred++;
} }
} }
malloc_mutex_unlock(&arena->lock); malloc_mutex_unlock(&arena->lock);
if (first_pass) {
tbin->avail = flush;
first_pass = false;
} }
#ifdef JEMALLOC_STATS
if (merged_stats == false) {
/*
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
arena_t *arena = tcache->arena;
malloc_mutex_lock(&arena->lock);
arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[binind - nbins].nrequests +=
tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
malloc_mutex_unlock(&arena->lock);
} }
#endif
memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
rem * sizeof(void *));
tbin->ncached = rem; tbin->ncached = rem;
if (tbin->ncached < tbin->low_water) if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached; tbin->low_water = tbin->ncached;
} }
@ -220,10 +226,14 @@ tcache_t *
tcache_create(arena_t *arena) tcache_create(arena_t *arena)
{ {
tcache_t *tcache; tcache_t *tcache;
size_t size; size_t size, stack_offset;
unsigned i; unsigned i;
size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
/* Naturally align the pointer stacks. */
size = PTR_CEILING(size);
stack_offset = size;
size += stack_nelms * sizeof(void *);
/* /*
* Round up to the nearest multiple of the cacheline size, in order to * Round up to the nearest multiple of the cacheline size, in order to
* avoid the possibility of false cacheline sharing. * avoid the possibility of false cacheline sharing.
@ -236,6 +246,8 @@ tcache_create(arena_t *arena)
if (size <= small_maxclass) if (size <= small_maxclass)
tcache = (tcache_t *)arena_malloc_small(arena, size, true); tcache = (tcache_t *)arena_malloc_small(arena, size, true);
else if (size <= tcache_maxclass)
tcache = (tcache_t *)arena_malloc_large(arena, size, true);
else else
tcache = (tcache_t *)icalloc(size); tcache = (tcache_t *)icalloc(size);
@ -252,15 +264,12 @@ tcache_create(arena_t *arena)
tcache->arena = arena; tcache->arena = arena;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
for (i = 0; i < nbins; i++) { for (i = 0; i < nhbins; i++) {
if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache->tbins[i].lg_fill_div = 1;
tcache->tbins[i].ncached_max = (arena->bins[i].nregs << tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
1); (uintptr_t)stack_offset);
} else stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
} }
for (; i < nhbins; i++)
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
TCACHE_SET(tcache); TCACHE_SET(tcache);
@ -271,6 +280,7 @@ void
tcache_destroy(tcache_t *tcache) tcache_destroy(tcache_t *tcache)
{ {
unsigned i; unsigned i;
size_t tcache_size;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
/* Unlink from list of extant tcaches. */ /* Unlink from list of extant tcaches. */
@ -327,7 +337,8 @@ tcache_destroy(tcache_t *tcache)
} }
#endif #endif
if (arena_salloc(tcache) <= small_maxclass) { tcache_size = arena_salloc(tcache);
if (tcache_size <= small_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena; arena_t *arena = chunk->arena;
size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
@ -341,6 +352,13 @@ tcache_destroy(tcache_t *tcache)
malloc_mutex_lock(&bin->lock); malloc_mutex_lock(&bin->lock);
arena_dalloc_bin(arena, chunk, tcache, mapelm); arena_dalloc_bin(arena, chunk, tcache, mapelm);
malloc_mutex_unlock(&bin->lock); malloc_mutex_unlock(&bin->lock);
} else if (tcache_size <= tcache_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena;
malloc_mutex_lock(&arena->lock);
arena_dalloc_large(arena, chunk, tcache);
malloc_mutex_unlock(&arena->lock);
} else } else
idalloc(tcache); idalloc(tcache);
} }
@ -397,11 +415,13 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
} }
#endif #endif
void bool
tcache_boot(void) tcache_boot(void)
{ {
if (opt_tcache) { if (opt_tcache) {
unsigned i;
/* /*
* If necessary, clamp opt_lg_tcache_max, now that * If necessary, clamp opt_lg_tcache_max, now that
* small_maxclass and arena_maxclass are known. * small_maxclass and arena_maxclass are known.
@ -416,6 +436,28 @@ tcache_boot(void)
nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
/* Initialize tcache_bin_info. */
tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
sizeof(tcache_bin_info_t));
if (tcache_bin_info == NULL)
return (true);
stack_nelms = 0;
for (i = 0; i < nbins; i++) {
if ((arena_bin_info[i].nregs << 1) <=
TCACHE_NSLOTS_SMALL_MAX) {
tcache_bin_info[i].ncached_max =
(arena_bin_info[i].nregs << 1);
} else {
tcache_bin_info[i].ncached_max =
TCACHE_NSLOTS_SMALL_MAX;
}
stack_nelms += tcache_bin_info[i].ncached_max;
}
for (; i < nhbins; i++) {
tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
stack_nelms += tcache_bin_info[i].ncached_max;
}
/* Compute incremental GC event threshold. */ /* Compute incremental GC event threshold. */
if (opt_lg_tcache_gc_sweep >= 0) { if (opt_lg_tcache_gc_sweep >= 0) {
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
@ -431,6 +473,8 @@ tcache_boot(void)
abort(); abort();
} }
} }
return (false);
} }
/******************************************************************************/ /******************************************************************************/
#endif /* JEMALLOC_TCACHE */ #endif /* JEMALLOC_TCACHE */

157
jemalloc/test/bitmap.c Normal file
View File

@ -0,0 +1,157 @@
#define JEMALLOC_MANGLE
#include "jemalloc_test.h"
/*
* Avoid using the assert() from jemalloc_internal.h, since it requires
* internal libjemalloc functionality.
* */
#include <assert.h>
/*
* Directly include the bitmap code, since it isn't exposed outside
* libjemalloc.
*/
#include "../src/bitmap.c"
#if (LG_BITMAP_MAXBITS > 12)
# define MAXBITS 4500
#else
# define MAXBITS (1U << LG_BITMAP_MAXBITS)
#endif
static void
test_bitmap_size(void)
{
size_t i, prev_size;
prev_size = 0;
for (i = 1; i <= MAXBITS; i++) {
size_t size = bitmap_size(i);
assert(size >= prev_size);
prev_size = size;
}
}
static void
test_bitmap_init(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
for (j = 0; j < i; j++)
assert(bitmap_get(bitmap, &binfo, j) == false);
}
}
}
static void
test_bitmap_set(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
for (j = 0; j < i; j++)
bitmap_set(bitmap, &binfo, j);
assert(bitmap_full(bitmap, &binfo));
}
}
}
static void
test_bitmap_unset(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
for (j = 0; j < i; j++)
bitmap_set(bitmap, &binfo, j);
assert(bitmap_full(bitmap, &binfo));
for (j = 0; j < i; j++)
bitmap_unset(bitmap, &binfo, j);
for (j = 0; j < i; j++)
bitmap_set(bitmap, &binfo, j);
assert(bitmap_full(bitmap, &binfo));
}
}
}
static void
test_bitmap_sfu(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
ssize_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
/* Iteratively set bits starting at the beginning. */
for (j = 0; j < i; j++)
assert(bitmap_sfu(bitmap, &binfo) == j);
assert(bitmap_full(bitmap, &binfo));
/*
* Iteratively unset bits starting at the end, and
* verify that bitmap_sfu() reaches the unset bits.
*/
for (j = i - 1; j >= 0; j--) {
bitmap_unset(bitmap, &binfo, j);
assert(bitmap_sfu(bitmap, &binfo) == j);
bitmap_unset(bitmap, &binfo, j);
}
assert(bitmap_get(bitmap, &binfo, 0) == false);
/*
* Iteratively set bits starting at the beginning, and
* verify that bitmap_sfu() looks past them.
*/
for (j = 1; j < i; j++) {
bitmap_set(bitmap, &binfo, j - 1);
assert(bitmap_sfu(bitmap, &binfo) == j);
bitmap_unset(bitmap, &binfo, j);
}
assert(bitmap_sfu(bitmap, &binfo) == i - 1);
assert(bitmap_full(bitmap, &binfo));
}
}
}
int
main(void)
{
fprintf(stderr, "Test begin\n");
test_bitmap_size();
test_bitmap_init();
test_bitmap_set();
test_bitmap_unset();
test_bitmap_sfu();
fprintf(stderr, "Test end\n");
return (0);
}

2
jemalloc/test/bitmap.exp Normal file
View File

@ -0,0 +1,2 @@
Test begin
Test end