Clean up the manpage and conditionalize various portions according to how
jemalloc is configured. Modify arena_malloc() API to avoid unnecessary choose_arena() calls. Remove unnecessary code from choose_arena(). Enable lazy-lock by default, now that choose_arena() is both faster and out of the critical path. Implement objdir support in the build system.
This commit is contained in:
parent
b7924f50c0
commit
cc00a15770
172
jemalloc/INSTALL
Normal file
172
jemalloc/INSTALL
Normal file
@ -0,0 +1,172 @@
|
||||
Building and installing jemalloc can be as simple as typing the following while
|
||||
in the root directory of the source tree:
|
||||
|
||||
./configure
|
||||
make
|
||||
make install
|
||||
|
||||
=== Advanced configuration =====================================================
|
||||
|
||||
The 'configure' script supports numerous options that allow control of which
|
||||
functionality is enabled, where jemalloc is installed, etc. Optionally, pass
|
||||
any of the following arguments (not a definitive list) to 'configure':
|
||||
|
||||
--help
|
||||
Print a definitive list of options.
|
||||
|
||||
--prefix=<install-root-dir>
|
||||
Set the base directory in which to install. For example:
|
||||
|
||||
./configure --prefix=/usr/local
|
||||
|
||||
will cause files to be installed into /usr/local/include, /usr/local/lib,
|
||||
and /usr/local/man.
|
||||
|
||||
--with-rpath=<colon-separated-rpath>
|
||||
Embed one or more library paths, so that Crux's internal shared library can
|
||||
find the libraries it is linked to. This works only on ELF-based systems.
|
||||
|
||||
--enable-debug
|
||||
Enable assertions and validation code. This incurs a substantial
|
||||
performance hit, but is very useful during application development.
|
||||
|
||||
--enable-stats
|
||||
Enable statistics gathering functionality. Use the 'P' option to print
|
||||
detailed allocation statistics at exit, and/or the 'U' option to print a
|
||||
detailed allocation trace log.
|
||||
|
||||
--disable-tiny
|
||||
Disable tiny (sub-quantum-sized) object support. Technically it is not
|
||||
legal for a malloc implementation to allocate objects with less than
|
||||
quantum alignment (8 or 16 bytes, depending on architecture), but in
|
||||
practice it never causes any problems if, for example, 4-byte allocationsj
|
||||
are 4-byte-aligned.
|
||||
|
||||
--disable-mag
|
||||
Disable thread-specific caches for sub-page-sized objects. Objects are
|
||||
cached and released in bulk using "magazines" -- a term coined by the
|
||||
developers of Solaris's umem allocator.
|
||||
|
||||
--disable-balance
|
||||
Disable dynamic rebalancing of thread-->arena assignments.
|
||||
|
||||
--enable-dss
|
||||
Enable support for page allocation/deallocation via sbrk(2), in addition to
|
||||
mmap(2).
|
||||
|
||||
--enable-fill
|
||||
Enable support for junk/zero filling of memory. Use the 'J' option to
|
||||
control junk filling, or the 'Z' option to control zero filling.
|
||||
|
||||
--enable-xmalloc
|
||||
Enable support for optional immediate termination due to out-of-memory
|
||||
errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
|
||||
Use the 'X' option to control termination behavior.
|
||||
|
||||
--enable-sysv
|
||||
Enable support for System V semantics, wherein malloc(0) returns NULL
|
||||
rather than a minimal allocation. Use the 'V' option to control System V
|
||||
compatibility.
|
||||
|
||||
--enable-dynamic-page-shift
|
||||
Under most conditions, the system page size never changes (usually 4KiB or
|
||||
8KiB, depending on architecture and configuration), and unless this option
|
||||
is enabled, jemalloc assumes that page size can safely be determined during
|
||||
configuration and hard-coded. Enabling dynamic page size determination has
|
||||
a measurable impact on performance, since the compiler is forced to load
|
||||
the page size from memory rather than embedding immediate values.
|
||||
|
||||
--disable-lazy-lock
|
||||
Disable code that wraps pthread_create() to detect when an application
|
||||
switches from single-threaded to multi-threaded mode, so that it can avoid
|
||||
mutex locking/unlocking operations while in single-threaded mode. In
|
||||
practice, this feature usually has little impact on performance unless
|
||||
magazines are disabled.
|
||||
|
||||
The following environment variables (not a definitive list) impact configure's
|
||||
behavior:
|
||||
|
||||
CFLAGS="?"
|
||||
Pass these flags to the compiler. You probably shouldn't define this unless
|
||||
you know what you are doing. (Use EXTRA_CFLAGS instead.)
|
||||
|
||||
EXTRA_CFLAGS="?"
|
||||
Append these flags to CFLAGS. This makes it possible to add flags such as
|
||||
-Werror, while allowing the configure script to determine what other flags
|
||||
are appropriate for the specified configuration.
|
||||
|
||||
The configure script specifically checks whether an optimization flag (-O*)
|
||||
is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
|
||||
level if it finds that one has already been specified.
|
||||
|
||||
CPPFLAGS="?"
|
||||
Pass these flags to the C preprocessor. Note that CFLAGS is not passed to
|
||||
'cpp' when 'configure' is looking for include files, so you must use
|
||||
CPPFLAGS instead if you need to help 'configure' find header files.
|
||||
|
||||
LD_LIBRARY_PATH="?"
|
||||
'ld' uses this colon-separated list to find libraries.
|
||||
|
||||
LDFLAGS="?"
|
||||
Pass these flags when linking.
|
||||
|
||||
PATH="?"
|
||||
'configure' uses this to find programs.
|
||||
|
||||
=== Advanced compilation =======================================================
|
||||
|
||||
To run integrated regression tests, type:
|
||||
|
||||
make check
|
||||
|
||||
To clean up build results to varying degrees, use the following make targets:
|
||||
|
||||
clean
|
||||
distclean
|
||||
relclean
|
||||
|
||||
=== Advanced installation ======================================================
|
||||
|
||||
Optionally, define make variables when invoking make, including (not
|
||||
exclusively):
|
||||
|
||||
INCLUDEDIR="?"
|
||||
Use this as the installation prefix for header files.
|
||||
|
||||
LIBDIR="?"
|
||||
Use this as the installation prefix for libraries.
|
||||
|
||||
MANDIR="?"
|
||||
Use this as the installation prefix for man pages.
|
||||
|
||||
CC="?"
|
||||
Use this to invoke the C compiler.
|
||||
|
||||
CFLAGS="?"
|
||||
Pass these flags to the compiler.
|
||||
|
||||
CPPFLAGS="?"
|
||||
Pass these flags to the C preprocessor.
|
||||
|
||||
LDFLAGS="?"
|
||||
Pass these flags when linking.
|
||||
|
||||
PATH="?"
|
||||
Use this to search for programs used during configuration and building.
|
||||
|
||||
=== Development ================================================================
|
||||
|
||||
If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
|
||||
script rather than 'configure'. This re-generates 'configure', enables
|
||||
configuration dependency rules, and enables re-generation of automatically
|
||||
generated source files.
|
||||
|
||||
The build system supports using an object directory separate from the source
|
||||
tree. For example, you can create an 'obj' directory, and from within that
|
||||
directory, issue configuration and build commands:
|
||||
|
||||
autoconf
|
||||
mkdir obj
|
||||
cd obj
|
||||
../configure --enable-autogen
|
||||
make
|
@ -11,10 +11,8 @@ SHELL := /bin/sh
|
||||
CC := @CC@
|
||||
|
||||
# Configuration parameters.
|
||||
BINDIR := @BINDIR@
|
||||
INCLUDEDIR := @INCLUDEDIR@
|
||||
LIBDIR := @LIBDIR@
|
||||
DATADIR := @DATADIR@
|
||||
MANDIR := @MANDIR@
|
||||
|
||||
# Build parameters.
|
||||
@ -34,20 +32,20 @@ endif
|
||||
REV := 0
|
||||
|
||||
# File lists.
|
||||
CHDRS := src/jemalloc.h
|
||||
CSRCS := src/jemalloc.c
|
||||
DSO := lib/libjemalloc.so.$(REV)
|
||||
MAN3 := doc/jemalloc.3
|
||||
CHDRS := @srcroot@src/jemalloc.h @objroot@src/jemalloc_defs.h
|
||||
CSRCS := @srcroot@src/jemalloc.c
|
||||
DSOS := @objroot@lib/libjemalloc.so.$(REV) @objroot@lib/libjemalloc.so
|
||||
MAN3 := @objroot@doc/jemalloc.3
|
||||
|
||||
.PHONY: all dist install check clean distclean relclean
|
||||
|
||||
# Default target.
|
||||
all: $(DSO)
|
||||
all: $(DSOS)
|
||||
|
||||
src/%.o: src/%.c
|
||||
@objroot@src/%.o: @srcroot@src/%.c
|
||||
$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $+
|
||||
|
||||
$(DSO): $(CSRCS:%.c=%.o)
|
||||
$(DSOS): $(CSRCS:@srcroot@%.c=@objroot@%.o)
|
||||
@mkdir -p $(@D)
|
||||
gcc -shared -o $@ $+ $(LDFLAGS) $(LIBS)
|
||||
ln -sf libjemalloc.so.$(REV) lib/libjemalloc.so
|
||||
@ -59,7 +57,10 @@ install:
|
||||
install -m 644 $$h $(INCLUDEDIR); \
|
||||
done
|
||||
install -d $(LIBDIR)
|
||||
install -m 755 $(DSO) $(LIBDIR)
|
||||
@for s in $(DSOS); do \
|
||||
echo "install -m 755 $$s $(LIBDIR)"; \
|
||||
install -m 755 $$s $(LIBDIR); \
|
||||
done
|
||||
install -d $(MANDIR)
|
||||
@for m in $(MAN3); do \
|
||||
echo "install -m 644 $$m $(MANDIR)/man3"; \
|
||||
@ -69,9 +70,9 @@ done
|
||||
check:
|
||||
|
||||
clean:
|
||||
rm -f src/*.o
|
||||
rm -f lib/libjemalloc.so
|
||||
rm -f lib/libjemalloc.so.$(REV)
|
||||
rm -f @objroot@src/*.o
|
||||
rm -f @objroot@lib/libjemalloc.so
|
||||
rm -f @objroot@lib/libjemalloc.so.$(REV)
|
||||
|
||||
distclean: clean
|
||||
rm -f @objroot@config.log
|
||||
|
4
jemalloc/README
Normal file
4
jemalloc/README
Normal file
@ -0,0 +1,4 @@
|
||||
jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
|
||||
|
||||
The INSTALL file contains information on how to configure, build, and install
|
||||
jemalloc.
|
@ -41,7 +41,7 @@ MANDIR=`eval echo $mandir`
|
||||
MANDIR=`eval echo $MANDIR`
|
||||
AC_SUBST([MANDIR])
|
||||
|
||||
cfgoutputs="Makefile"
|
||||
cfgoutputs="Makefile doc/jemalloc.3"
|
||||
cfghdrs="src/jemalloc_defs.h"
|
||||
|
||||
dnl If CFLAGS isn't defined and using gcc, set CFLAGS to something reasonable.
|
||||
@ -219,6 +219,12 @@ if test "x$enable_stats" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_STATS], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_stats])
|
||||
if test "x$enable_stats" = "x0" ; then
|
||||
roff_stats=".\\\" "
|
||||
else
|
||||
roff_stats=""
|
||||
fi
|
||||
AC_SUBST([roff_stats])
|
||||
|
||||
dnl Enable tiny allocations by default.
|
||||
AC_ARG_ENABLE([tiny],
|
||||
@ -235,6 +241,15 @@ if test "x$enable_tiny" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_TINY], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_tiny])
|
||||
if test "x$enable_tiny" = "x0" ; then
|
||||
roff_tiny=".\\\" "
|
||||
roff_no_tiny=""
|
||||
else
|
||||
roff_tiny=""
|
||||
roff_no_tiny=".\\\" "
|
||||
fi
|
||||
AC_SUBST([roff_tiny])
|
||||
AC_SUBST([roff_no_tiny])
|
||||
|
||||
dnl Enable magazines by default.
|
||||
AC_ARG_ENABLE([mag],
|
||||
@ -251,6 +266,12 @@ if test "x$enable_mag" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_MAG], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_mag])
|
||||
if test "x$enable_mag" = "x0" ; then
|
||||
roff_mag=".\\\" "
|
||||
else
|
||||
roff_mag=""
|
||||
fi
|
||||
AC_SUBST([roff_mag])
|
||||
|
||||
dnl Enable dynamic arena load balancing by default.
|
||||
AC_ARG_ENABLE([balance],
|
||||
@ -267,6 +288,12 @@ if test "x$enable_balance" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_BALANCE], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_balance])
|
||||
if test "x$enable_balance" = "x0" ; then
|
||||
roff_balance=".\\\" "
|
||||
else
|
||||
roff_balance=""
|
||||
fi
|
||||
AC_SUBST([roff_balance])
|
||||
|
||||
dnl Do not enable allocation from DSS by default.
|
||||
AC_ARG_ENABLE([dss],
|
||||
@ -283,6 +310,12 @@ if test "x$enable_dss" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_DSS], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_dss])
|
||||
if test "x$enable_dss" = "x0" ; then
|
||||
roff_dss=".\\\" "
|
||||
else
|
||||
roff_dss=""
|
||||
fi
|
||||
AC_SUBST([roff_dss])
|
||||
|
||||
dnl Do not support the junk/zero filling option by default.
|
||||
AC_ARG_ENABLE([fill],
|
||||
@ -299,6 +332,12 @@ if test "x$enable_fill" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_FILL], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_fill])
|
||||
if test "x$enable_fill" = "x0" ; then
|
||||
roff_fill=".\\\" "
|
||||
else
|
||||
roff_fill=""
|
||||
fi
|
||||
AC_SUBST([roff_fill])
|
||||
|
||||
dnl Do not support the xmalloc option by default.
|
||||
AC_ARG_ENABLE([xmalloc],
|
||||
@ -315,6 +354,12 @@ if test "x$enable_xmalloc" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_XMALLOC], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_xmalloc])
|
||||
if test "x$enable_xmalloc" = "x0" ; then
|
||||
roff_xmalloc=".\\\" "
|
||||
else
|
||||
roff_xmalloc=""
|
||||
fi
|
||||
AC_SUBST([roff_xmalloc])
|
||||
|
||||
dnl Do not support the SYSV option by default.
|
||||
AC_ARG_ENABLE([sysv],
|
||||
@ -331,6 +376,12 @@ if test "x$enable_sysv" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_SYSV], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_sysv])
|
||||
if test "x$enable_sysv" = "x0" ; then
|
||||
roff_sysv=".\\\" "
|
||||
else
|
||||
roff_sysv=""
|
||||
fi
|
||||
AC_SUBST([roff_sysv])
|
||||
|
||||
dnl Do not determine page shift at run time by default.
|
||||
AC_ARG_ENABLE([dynamic_page_shift],
|
||||
@ -380,6 +431,7 @@ dnl ============================================================================
|
||||
dnl jemalloc configuration.
|
||||
dnl
|
||||
jemalloc_version=`cat ${srcroot}VERSION`
|
||||
AC_DEFINE_UNQUOTED([JEMALLOC_VERSION], ["$jemalloc_version"])
|
||||
AC_SUBST([jemalloc_version])
|
||||
|
||||
dnl ============================================================================
|
||||
@ -400,21 +452,24 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM(
|
||||
|
||||
return 0;
|
||||
]])],
|
||||
AC_MSG_RESULT([yes]),
|
||||
AC_MSG_RESULT([yes])
|
||||
roff_tls="",
|
||||
AC_MSG_RESULT([no])
|
||||
roff_tls=".\\\" "
|
||||
AC_DEFINE_UNQUOTED([NO_TLS], [ ]))
|
||||
AC_SUBST([roff_tls])
|
||||
|
||||
dnl Do not enable lazy locking by default.
|
||||
dnl Enable lazy locking by default.
|
||||
AC_ARG_ENABLE([lazy_lock],
|
||||
[AS_HELP_STRING([--enable-lazy-lock],
|
||||
[Enable lazy locking (avoid locking unless multiple threads)])],
|
||||
[Disable lazy locking (always lock, even when single-threaded)])],
|
||||
[if test "x$enable_lazy_lock" = "xno" ; then
|
||||
enable_lazy_lock="0"
|
||||
else
|
||||
enable_lazy_lock="1"
|
||||
fi
|
||||
],
|
||||
[enable_lazy_lock="0"]
|
||||
[enable_lazy_lock="1"]
|
||||
)
|
||||
if test "x$enable_lazy_lock" = "x1" ; then
|
||||
AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
|
||||
|
@ -1,5 +1,5 @@
|
||||
.\" Copyright (c) 2006-2008 Jason Evans <jasone@canonware.com>.
|
||||
.\" Copyright (c) 2009 Facebook, Inc. All rights reserved.
|
||||
.\" Copyright (c) 2006-2008 Jason Evans <jasone@canonware.com>.
|
||||
.\" All rights reserved.
|
||||
.\" Copyright (c) 1980, 1991, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
@ -42,7 +42,7 @@
|
||||
.Nm malloc , calloc , posix_memalign , realloc , free , malloc_usable_size
|
||||
.Nd general purpose memory allocation functions
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
.Lb libjemalloc
|
||||
.Sh SYNOPSIS
|
||||
.In stdlib.h
|
||||
.Ft void *
|
||||
@ -55,22 +55,23 @@
|
||||
.Fn realloc "void *ptr" "size_t size"
|
||||
.Ft void
|
||||
.Fn free "void *ptr"
|
||||
.In jemalloc.h
|
||||
.Ft size_t
|
||||
.Fn malloc_usable_size "const void *ptr"
|
||||
.Ft const char *
|
||||
.Va jemalloc_options ;
|
||||
.Ft void
|
||||
.Fo \*(lp*jemalloc_message\*(rp
|
||||
.Fa "const char *p1" "const char *p2" "const char *p3" "const char *p4"
|
||||
.Fc
|
||||
.In malloc_np.h
|
||||
.Ft size_t
|
||||
.Fn malloc_usable_size "const void *ptr"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn malloc
|
||||
function allocates
|
||||
.Fa size
|
||||
bytes of uninitialized memory.
|
||||
The allocated space is suitably aligned (after possible pointer coercion)
|
||||
The allocated space is suitably aligned
|
||||
@roff_tiny@(after possible pointer coercion)
|
||||
for storage of any type of object.
|
||||
.Pp
|
||||
The
|
||||
@ -187,31 +188,32 @@ flags being set) become fatal.
|
||||
The process will call
|
||||
.Xr abort 3
|
||||
in these cases.
|
||||
.It B
|
||||
Double/halve the per-arena lock contention threshold at which a thread is
|
||||
randomly re-assigned to an arena.
|
||||
This dynamic load balancing tends to push threads away from highly contended
|
||||
arenas, which avoids worst case contention scenarios in which threads
|
||||
disproportionately utilize arenas.
|
||||
However, due to the highly dynamic load that applications may place on the
|
||||
allocator, it is impossible for the allocator to know in advance how sensitive
|
||||
it should be to contention over arenas.
|
||||
Therefore, some applications may benefit from increasing or decreasing this
|
||||
threshold parameter.
|
||||
This option is not available for some configurations (non-PIC).
|
||||
@roff_balance@@roff_tls@.It B
|
||||
@roff_balance@@roff_tls@Double/halve the per-arena lock contention threshold at
|
||||
@roff_balance@@roff_tls@which a thread is randomly re-assigned to an arena.
|
||||
@roff_balance@@roff_tls@This dynamic load balancing tends to push threads away
|
||||
@roff_balance@@roff_tls@from highly contended arenas, which avoids worst case
|
||||
@roff_balance@@roff_tls@contention scenarios in which threads disproportionately
|
||||
@roff_balance@@roff_tls@utilize arenas.
|
||||
@roff_balance@@roff_tls@However, due to the highly dynamic load that
|
||||
@roff_balance@@roff_tls@applications may place on the allocator, it is
|
||||
@roff_balance@@roff_tls@impossible for the allocator to know in advance how
|
||||
@roff_balance@@roff_tls@sensitive it should be to contention over arenas.
|
||||
@roff_balance@@roff_tls@Therefore, some applications may benefit from increasing
|
||||
@roff_balance@@roff_tls@or decreasing this threshold parameter.
|
||||
.It C
|
||||
Double/halve the size of the maximum size class that is a multiple of the
|
||||
cacheline size (64).
|
||||
Above this size, subpage spacing (256 bytes) is used for size classes.
|
||||
The default value is 512 bytes.
|
||||
.It D
|
||||
Use
|
||||
.Xr sbrk 2
|
||||
to acquire memory in the data storage segment (DSS).
|
||||
This option is enabled by default.
|
||||
See the
|
||||
.Dq M
|
||||
option for related information and interactions.
|
||||
@roff_dss@.It D
|
||||
@roff_dss@Use
|
||||
@roff_dss@.Xr sbrk 2
|
||||
@roff_dss@to acquire memory in the data storage segment (DSS).
|
||||
@roff_dss@This option is enabled by default.
|
||||
@roff_dss@See the
|
||||
@roff_dss@.Dq M
|
||||
@roff_dss@option for related information and interactions.
|
||||
.It F
|
||||
Double/halve the per-arena maximum number of dirty unused pages that are
|
||||
allowed to accumulate before informing the kernel about at least half of those
|
||||
@ -222,46 +224,48 @@ physical memory becomes scarce and the pages remain unused.
|
||||
The default is 512 pages per arena;
|
||||
.Ev JEMALLOC_OPTIONS=10f
|
||||
will prevent any dirty unused pages from accumulating.
|
||||
.It G
|
||||
When there are multiple threads, use thread-specific caching for objects that
|
||||
are smaller than one page.
|
||||
This option is enabled by default.
|
||||
Thread-specific caching allows many allocations to be satisfied without
|
||||
performing any thread synchronization, at the cost of increased memory use.
|
||||
See the
|
||||
.Dq R
|
||||
option for related tuning information.
|
||||
This option is not available for some configurations (non-PIC).
|
||||
.It J
|
||||
Each byte of new memory allocated by
|
||||
.Fn malloc
|
||||
or
|
||||
.Fn realloc
|
||||
will be initialized to 0xa5.
|
||||
All memory returned by
|
||||
.Fn free
|
||||
or
|
||||
.Fn realloc
|
||||
will be initialized to 0x5a.
|
||||
This is intended for debugging and will impact performance negatively.
|
||||
@roff_mag@@roff_tls@.It G
|
||||
@roff_mag@@roff_tls@When there are multiple threads, use thread-specific caching
|
||||
@roff_mag@@roff_tls@for objects that are smaller than one page.
|
||||
@roff_mag@@roff_tls@This option is enabled by default.
|
||||
@roff_mag@@roff_tls@Thread-specific caching allows many allocations to be
|
||||
@roff_mag@@roff_tls@satisfied without performing any thread synchronization, at
|
||||
@roff_mag@@roff_tls@the cost of increased memory use.
|
||||
@roff_mag@@roff_tls@See the
|
||||
@roff_mag@@roff_tls@.Dq R
|
||||
@roff_mag@@roff_tls@option for related tuning information.
|
||||
@roff_fill@.It J
|
||||
@roff_fill@Each byte of new memory allocated by
|
||||
@roff_fill@.Fn malloc
|
||||
@roff_fill@or
|
||||
@roff_fill@.Fn realloc
|
||||
@roff_fill@will be initialized to 0xa5.
|
||||
@roff_fill@All memory returned by
|
||||
@roff_fill@.Fn free
|
||||
@roff_fill@or
|
||||
@roff_fill@.Fn realloc
|
||||
@roff_fill@will be initialized to 0x5a.
|
||||
@roff_fill@This is intended for debugging and will impact performance
|
||||
@roff_fill@negatively.
|
||||
.It K
|
||||
Double/halve the virtual memory chunk size.
|
||||
The default chunk size is 1 MB.
|
||||
.It M
|
||||
Use
|
||||
.Xr mmap 2
|
||||
to acquire anonymously mapped memory.
|
||||
This option is enabled by default.
|
||||
If both the
|
||||
.Dq D
|
||||
and
|
||||
.Dq M
|
||||
options are enabled, the allocator prefers the DSS over anonymous mappings,
|
||||
but allocation only fails if memory cannot be acquired via either method.
|
||||
If neither option is enabled, then the
|
||||
.Dq M
|
||||
option is implicitly enabled in order to assure that there is a method for
|
||||
acquiring memory.
|
||||
@roff_dss@.It M
|
||||
@roff_dss@Use
|
||||
@roff_dss@.Xr mmap 2
|
||||
@roff_dss@to acquire anonymously mapped memory.
|
||||
@roff_dss@This option is enabled by default.
|
||||
@roff_dss@If both the
|
||||
@roff_dss@.Dq D
|
||||
@roff_dss@and
|
||||
@roff_dss@.Dq M
|
||||
@roff_dss@options are enabled, the allocator prefers the DSS over anonymous
|
||||
@roff_dss@mappings, but allocation only fails if memory cannot be acquired via
|
||||
@roff_dss@either method.
|
||||
@roff_dss@If neither option is enabled, then the
|
||||
@roff_dss@.Dq M
|
||||
@roff_dss@option is implicitly enabled in order to assure that there is a method
|
||||
@roff_dss@for acquiring memory.
|
||||
.It N
|
||||
Double/halve the number of arenas.
|
||||
The default number of arenas is two times the number of CPUs, or one if there
|
||||
@ -279,88 +283,70 @@ Double/halve the size of the maximum size class that is a multiple of the
|
||||
quantum (8 or 16 bytes, depending on architecture).
|
||||
Above this size, cacheline spacing is used for size classes.
|
||||
The default value is 128 bytes.
|
||||
.It R
|
||||
Double/halve magazine size, which approximately doubles/halves the number of
|
||||
rounds in each magazine.
|
||||
Magazines are used by the thread-specific caching machinery to acquire and
|
||||
release objects in bulk.
|
||||
Increasing the magazine size decreases locking overhead, at the expense of
|
||||
increased memory usage.
|
||||
This option is not available for some configurations (non-PIC).
|
||||
.It U
|
||||
Generate
|
||||
.Dq utrace
|
||||
entries for
|
||||
.Xr ktrace 1 ,
|
||||
for all operations.
|
||||
Consult the source for details on this option.
|
||||
.It V
|
||||
Attempting to allocate zero bytes will return a
|
||||
.Dv NULL
|
||||
pointer instead of
|
||||
a valid pointer.
|
||||
(The default behavior is to make a minimal allocation and return a
|
||||
pointer to it.)
|
||||
This option is provided for System V compatibility.
|
||||
This option is incompatible with the
|
||||
.Dq X
|
||||
option.
|
||||
.It X
|
||||
Rather than return failure for any allocation function,
|
||||
display a diagnostic message on
|
||||
.Dv stderr
|
||||
and cause the program to drop
|
||||
core (using
|
||||
.Xr abort 3 ) .
|
||||
This option should be set at compile time by including the following in
|
||||
the source code:
|
||||
.Bd -literal -offset indent
|
||||
jemalloc_options = "X";
|
||||
.Ed
|
||||
.It Z
|
||||
Each byte of new memory allocated by
|
||||
.Fn malloc
|
||||
or
|
||||
.Fn realloc
|
||||
will be initialized to 0.
|
||||
Note that this initialization only happens once for each byte, so
|
||||
.Fn realloc
|
||||
calls do not zero memory that was previously allocated.
|
||||
This is intended for debugging and will impact performance negatively.
|
||||
@roff_mag@@roff_tls@.It R
|
||||
@roff_mag@@roff_tls@Double/halve magazine size, which approximately
|
||||
@roff_mag@@roff_tls@doubles/halves the number of rounds in each magazine.
|
||||
@roff_mag@@roff_tls@Magazines are used by the thread-specific caching machinery
|
||||
@roff_mag@@roff_tls@to acquire and release objects in bulk.
|
||||
@roff_mag@@roff_tls@Increasing the magazine size decreases locking overhead, at
|
||||
@roff_mag@@roff_tls@the expense of increased memory usage.
|
||||
@roff_stats@.It U
|
||||
@roff_stats@Generate a verbose trace log via
|
||||
@roff_stats@.Fn jemalloc_message
|
||||
@roff_stats@for all allocation operations.
|
||||
@roff_sysv@.It V
|
||||
@roff_sysv@Attempting to allocate zero bytes will return a
|
||||
@roff_sysv@.Dv NULL
|
||||
@roff_sysv@pointer instead of a valid pointer.
|
||||
@roff_sysv@(The default behavior is to make a minimal allocation and return a
|
||||
@roff_sysv@pointer to it.)
|
||||
@roff_sysv@This option is provided for System V compatibility.
|
||||
@roff_sysv@@roff_xmalloc@This option is incompatible with the
|
||||
@roff_sysv@@roff_xmalloc@.Dq X
|
||||
@roff_sysv@@roff_xmalloc@option.
|
||||
@roff_xmalloc@.It X
|
||||
@roff_xmalloc@Rather than return failure for any allocation function, display a
|
||||
@roff_xmalloc@diagnostic message on
|
||||
@roff_xmalloc@.Dv stderr
|
||||
@roff_xmalloc@and cause the program to drop core (using
|
||||
@roff_xmalloc@.Xr abort 3 ) .
|
||||
@roff_xmalloc@This option should be set at compile time by including the
|
||||
@roff_xmalloc@following in the source code:
|
||||
@roff_xmalloc@.Bd -literal -offset indent
|
||||
@roff_xmalloc@jemalloc_options = "X";
|
||||
@roff_xmalloc@.Ed
|
||||
@roff_fill@.It Z
|
||||
@roff_fill@Each byte of new memory allocated by
|
||||
@roff_fill@.Fn malloc
|
||||
@roff_fill@or
|
||||
@roff_fill@.Fn realloc
|
||||
@roff_fill@will be initialized to 0.
|
||||
@roff_fill@Note that this initialization only happens once for each byte, so
|
||||
@roff_fill@.Fn realloc
|
||||
@roff_fill@calls do not zero memory that was previously allocated.
|
||||
@roff_fill@This is intended for debugging and will impact performance
|
||||
@roff_fill@negatively.
|
||||
.El
|
||||
.Pp
|
||||
The
|
||||
.Dq J
|
||||
and
|
||||
.Dq Z
|
||||
options are intended for testing and debugging.
|
||||
An application which changes its behavior when these options are used
|
||||
is flawed.
|
||||
@roff_fill@The
|
||||
@roff_fill@.Dq J
|
||||
@roff_fill@and
|
||||
@roff_fill@.Dq Z
|
||||
@roff_fill@options are intended for testing and debugging.
|
||||
@roff_fill@An application which changes its behavior when these options are used
|
||||
@roff_fill@is flawed.
|
||||
.Sh IMPLEMENTATION NOTES
|
||||
Traditionally, allocators have used
|
||||
.Xr sbrk 2
|
||||
to obtain memory, which is suboptimal for several reasons, including race
|
||||
conditions, increased fragmentation, and artificial limitations on maximum
|
||||
usable memory.
|
||||
This allocator uses both
|
||||
.Xr sbrk 2
|
||||
and
|
||||
.Xr mmap 2
|
||||
by default, but it can be configured at run time to use only one or the other.
|
||||
If resource limits are not a primary concern, the preferred configuration is
|
||||
.Ev JEMALLOC_OPTIONS=dM
|
||||
or
|
||||
.Ev JEMALLOC_OPTIONS=DM .
|
||||
When so configured, the
|
||||
.Ar datasize
|
||||
resource limit has little practical effect for typical applications; use
|
||||
.Ev JEMALLOC_OPTIONS=Dm
|
||||
if that is a concern.
|
||||
Regardless of allocator configuration, the
|
||||
.Ar vmemoryuse
|
||||
resource limit can be used to bound the total virtual memory used by a
|
||||
process, as described in
|
||||
.Xr limits 1 .
|
||||
@roff_dss@Traditionally, allocators have used
|
||||
@roff_dss@.Xr sbrk 2
|
||||
@roff_dss@to obtain memory, which is suboptimal for several reasons, including
|
||||
@roff_dss@race conditions, increased fragmentation, and artificial limitations
|
||||
@roff_dss@on maximum usable memory.
|
||||
@roff_dss@This allocator uses both
|
||||
@roff_dss@.Xr sbrk 2
|
||||
@roff_dss@and
|
||||
@roff_dss@.Xr mmap 2
|
||||
@roff_dss@by default, but it can be configured at run time to use only one or
|
||||
@roff_dss@the other.
|
||||
.Pp
|
||||
This allocator uses multiple arenas in order to reduce lock contention for
|
||||
threaded programs on multi-processor systems.
|
||||
@ -375,13 +361,14 @@ improve performance, mainly due to reduced cache performance.
|
||||
However, it may make sense to reduce the number of arenas if an application
|
||||
does not make much use of the allocation functions.
|
||||
.Pp
|
||||
In addition to multiple arenas, this allocator supports thread-specific
|
||||
caching for small objects (smaller than one page), in order to make it
|
||||
possible to completely avoid synchronization for most small allocation requests.
|
||||
Such caching allows very fast allocation in the common case, but it increases
|
||||
memory usage and fragmentation, since a bounded number of objects can remain
|
||||
allocated in each thread cache.
|
||||
.Pp
|
||||
@roff_mag@In addition to multiple arenas, this allocator supports
|
||||
@roff_mag@thread-specific caching for small objects (smaller than one page), in
|
||||
@roff_mag@order to make it possible to completely avoid synchronization for most
|
||||
@roff_mag@small allocation requests.
|
||||
@roff_mag@Such caching allows very fast allocation in the common case, but it
|
||||
@roff_mag@increases memory usage and fragmentation, since a bounded number of
|
||||
@roff_mag@objects can remain allocated in each thread cache.
|
||||
@roff_mag@.Pp
|
||||
Memory is conceptually broken into equal-sized chunks, where the chunk size is
|
||||
a power of two that is greater than the page size.
|
||||
Chunks are always aligned to multiples of the chunk size.
|
||||
@ -406,12 +393,16 @@ determine all metadata regarding small and large allocations in constant time.
|
||||
.Pp
|
||||
Small objects are managed in groups by page runs.
|
||||
Each run maintains a bitmap that tracks which regions are in use.
|
||||
Allocation requests that are no more than half the quantum (8 or 16, depending
|
||||
on architecture) are rounded up to the nearest power of two.
|
||||
Allocation requests that are more than half the quantum, but no more than the
|
||||
minimum cacheline-multiple size class (see the
|
||||
@roff_tiny@Allocation requests that are no more than half the quantum (8 or 16,
|
||||
@roff_tiny@depending on architecture) are rounded up to the nearest power of
|
||||
@roff_tiny@two.
|
||||
Allocation requests that are
|
||||
@roff_tiny@more than half the quantum, but
|
||||
no more than the minimum cacheline-multiple size class (see the
|
||||
.Dq Q
|
||||
option) are rounded up to the nearest multiple of the quantum.
|
||||
option) are rounded up to the nearest multiple of the
|
||||
@roff_tiny@quantum.
|
||||
@roff_no_tiny@quantum (8 or 16, depending on architecture).
|
||||
Allocation requests that are more than the minumum cacheline-multiple size
|
||||
class, but no more than the minimum subpage-multiple size class (see the
|
||||
.Dq C
|
||||
@ -440,26 +431,26 @@ rather than the normal policy of trying to continue if at all possible.
|
||||
It is probably also a good idea to recompile the program with suitable
|
||||
options and symbols for debugger support.
|
||||
.Pp
|
||||
If the program starts to give unusual results, coredump or generally behave
|
||||
differently without emitting any of the messages mentioned in the next
|
||||
section, it is likely because it depends on the storage being filled with
|
||||
zero bytes.
|
||||
Try running it with the
|
||||
.Dq Z
|
||||
option set;
|
||||
if that improves the situation, this diagnosis has been confirmed.
|
||||
If the program still misbehaves,
|
||||
the likely problem is accessing memory outside the allocated area.
|
||||
.Pp
|
||||
Alternatively, if the symptoms are not easy to reproduce, setting the
|
||||
.Dq J
|
||||
option may help provoke the problem.
|
||||
.Pp
|
||||
In truly difficult cases, the
|
||||
.Dq U
|
||||
option, if supported by the kernel, can provide a detailed trace of
|
||||
all calls made to these functions.
|
||||
.Pp
|
||||
@roff_fill@If the program starts to give unusual results, coredump or generally
|
||||
@roff_fill@behave differently without emitting any of the messages mentioned in
|
||||
@roff_fill@the next section, it is likely because it depends on the storage
|
||||
@roff_fill@being filled with zero bytes.
|
||||
@roff_fill@Try running it with the
|
||||
@roff_fill@.Dq Z
|
||||
@roff_fill@option set;
|
||||
@roff_fill@if that improves the situation, this diagnosis has been confirmed.
|
||||
@roff_fill@If the program still misbehaves,
|
||||
@roff_fill@the likely problem is accessing memory outside the allocated area.
|
||||
@roff_fill@.Pp
|
||||
@roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the
|
||||
@roff_fill@.Dq J
|
||||
@roff_fill@option may help provoke the problem.
|
||||
@roff_fill@.Pp
|
||||
@roff_stats@In truly difficult cases, the
|
||||
@roff_stats@.Dq U
|
||||
@roff_stats@option can provide a detailed trace of all calls made to these
|
||||
@roff_stats@functions.
|
||||
@roff_stats@.Pp
|
||||
Unfortunately this implementation does not provide much detail about
|
||||
the problems it detects; the performance impact for storing such information
|
||||
would be prohibitive.
|
||||
@ -476,7 +467,7 @@ If the
|
||||
option is set, all warnings are treated as errors.
|
||||
.Pp
|
||||
The
|
||||
.Va _malloc_message
|
||||
.Va jemalloc_message
|
||||
variable allows the programmer to override the function which emits
|
||||
the text strings forming the errors and warnings if for some reason
|
||||
the
|
||||
@ -486,7 +477,7 @@ Please note that doing anything which tries to allocate memory in
|
||||
this function is likely to result in a crash or deadlock.
|
||||
.Pp
|
||||
All messages are prefixed by
|
||||
.Dq Ao Ar progname Ac Ns Li : (malloc) .
|
||||
.Dq <jemalloc>: .
|
||||
.Sh RETURN VALUES
|
||||
The
|
||||
.Fn malloc
|
||||
@ -564,15 +555,12 @@ on calls to these functions:
|
||||
jemalloc_options = "X";
|
||||
.Ed
|
||||
.Sh SEE ALSO
|
||||
.Xr limits 1 ,
|
||||
.Xr madvise 2 ,
|
||||
.Xr mmap 2 ,
|
||||
.Xr sbrk 2 ,
|
||||
.Xr alloca 3 ,
|
||||
.Xr atexit 3 ,
|
||||
.Xr getpagesize 3 ,
|
||||
.Xr memory 3 ,
|
||||
.Xr posix_memalign 3
|
||||
.Xr getpagesize 3
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Fn malloc ,
|
@ -1178,8 +1178,8 @@ static bool size2bin_init_hard(void);
|
||||
static unsigned malloc_ncpus(void);
|
||||
static bool malloc_init_hard(void);
|
||||
static void thread_cleanup(void *arg);
|
||||
void jemalloc_prefork(void);
|
||||
void jemalloc_postfork(void);
|
||||
static void jemalloc_prefork(void);
|
||||
static void jemalloc_postfork(void);
|
||||
|
||||
/*
|
||||
* End function prototypes.
|
||||
@ -1231,9 +1231,10 @@ umax2s(uintmax_t x, char *s)
|
||||
# define assert(e) do { \
|
||||
if (!(e)) { \
|
||||
char line_buf[UMAX2S_BUFSIZE]; \
|
||||
jemalloc_message(__FILE__, ":", umax2s(__LINE__, \
|
||||
line_buf), ": Failed assertion: "); \
|
||||
jemalloc_message("\"", #e, "\"\n", ""); \
|
||||
jemalloc_message("<jemalloc>: ", __FILE__, ":", \
|
||||
umax2s(__LINE__, line_buf)); \
|
||||
jemalloc_message(": Failed assertion: ", "\"", #e, \
|
||||
"\"\n"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
@ -1250,15 +1251,17 @@ utrace(const void *addr, size_t len)
|
||||
assert(len == sizeof(malloc_utrace_t));
|
||||
|
||||
if (ut->p == NULL && ut->s == 0 && ut->r == NULL)
|
||||
malloc_printf("%d x USER malloc_init()\n", getpid());
|
||||
malloc_printf("<jemalloc>:utrace: %d malloc_init()\n",
|
||||
getpid());
|
||||
else if (ut->p == NULL && ut->r != NULL) {
|
||||
malloc_printf("%d x USER %p = malloc(%zu)\n", getpid(), ut->r,
|
||||
ut->s);
|
||||
malloc_printf("<jemalloc>:utrace: %d %p = malloc(%zu)\n",
|
||||
getpid(), ut->r, ut->s);
|
||||
} else if (ut->p != NULL && ut->r != NULL) {
|
||||
malloc_printf("%d x USER %p = realloc(%p, %zu)\n", getpid(),
|
||||
ut->r, ut->p, ut->s);
|
||||
malloc_printf("<jemalloc>:utrace: %d %p = realloc(%p, %zu)\n",
|
||||
getpid(), ut->r, ut->p, ut->s);
|
||||
} else
|
||||
malloc_printf("%d x USER free(%p)\n", getpid(), ut->p);
|
||||
malloc_printf("<jemalloc>:utrace: %d free(%p)\n", getpid(),
|
||||
ut->p);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -2247,11 +2250,6 @@ choose_arena(void)
|
||||
* introduces a bootstrapping issue.
|
||||
*/
|
||||
#ifndef NO_TLS
|
||||
if (isthreaded == false) {
|
||||
/* Avoid the overhead of TLS for single-threaded operation. */
|
||||
return (arenas[0]);
|
||||
}
|
||||
|
||||
ret = arenas_map;
|
||||
if (ret == NULL) {
|
||||
ret = choose_arena_hard();
|
||||
@ -3405,11 +3403,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
|
||||
}
|
||||
|
||||
static inline void *
|
||||
arena_malloc(arena_t *arena, size_t size, bool zero)
|
||||
arena_malloc(size_t size, bool zero)
|
||||
{
|
||||
|
||||
assert(arena != NULL);
|
||||
assert(arena->magic == ARENA_MAGIC);
|
||||
assert(size != 0);
|
||||
assert(QUANTUM_CEILING(size) <= arena_maxclass);
|
||||
|
||||
@ -3418,7 +3414,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero)
|
||||
if (opt_mag) {
|
||||
mag_rack_t *rack = mag_rack;
|
||||
if (rack == NULL) {
|
||||
rack = mag_rack_create(arena);
|
||||
rack = mag_rack_create(choose_arena());
|
||||
if (rack == NULL)
|
||||
return (NULL);
|
||||
mag_rack = rack;
|
||||
@ -3427,9 +3423,9 @@ arena_malloc(arena_t *arena, size_t size, bool zero)
|
||||
return (mag_rack_alloc(rack, size, zero));
|
||||
} else
|
||||
#endif
|
||||
return (arena_malloc_small(arena, size, zero));
|
||||
return (arena_malloc_small(choose_arena(), size, zero));
|
||||
} else
|
||||
return (arena_malloc_large(arena, size, zero));
|
||||
return (arena_malloc_large(choose_arena(), size, zero));
|
||||
}
|
||||
|
||||
static inline void *
|
||||
@ -3439,7 +3435,7 @@ imalloc(size_t size)
|
||||
assert(size != 0);
|
||||
|
||||
if (size <= arena_maxclass)
|
||||
return (arena_malloc(choose_arena(), size, false));
|
||||
return (arena_malloc(size, false));
|
||||
else
|
||||
return (huge_malloc(size, false));
|
||||
}
|
||||
@ -3449,7 +3445,7 @@ icalloc(size_t size)
|
||||
{
|
||||
|
||||
if (size <= arena_maxclass)
|
||||
return (arena_malloc(choose_arena(), size, true));
|
||||
return (arena_malloc(size, true));
|
||||
else
|
||||
return (huge_malloc(size, true));
|
||||
}
|
||||
@ -3553,7 +3549,7 @@ ipalloc(size_t alignment, size_t size)
|
||||
|
||||
if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
|
||||
&& ceil_size <= arena_maxclass))
|
||||
ret = arena_malloc(choose_arena(), ceil_size, false);
|
||||
ret = arena_malloc(ceil_size, false);
|
||||
else {
|
||||
size_t run_size;
|
||||
|
||||
@ -4113,7 +4109,7 @@ arena_ralloc(void *ptr, size_t size, size_t oldsize)
|
||||
* need to move the object. In that case, fall back to allocating new
|
||||
* space and copying.
|
||||
*/
|
||||
ret = arena_malloc(choose_arena(), size, false);
|
||||
ret = arena_malloc(size, false);
|
||||
if (ret == NULL)
|
||||
return (NULL);
|
||||
|
||||
@ -5725,7 +5721,7 @@ thread_cleanup(void *arg)
|
||||
* is threaded here.
|
||||
*/
|
||||
|
||||
void
|
||||
static void
|
||||
jemalloc_prefork(void)
|
||||
{
|
||||
bool again;
|
||||
@ -5773,7 +5769,7 @@ jemalloc_prefork(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
jemalloc_postfork(void)
|
||||
{
|
||||
unsigned i;
|
||||
|
@ -28,10 +28,24 @@
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef JEMALLOC_H_
|
||||
#define JEMALLOC_H_
|
||||
|
||||
#include "jemalloc_defs.h"
|
||||
|
||||
size_t malloc_usable_size(const void *ptr);
|
||||
|
||||
extern const char *jemalloc_options;
|
||||
extern void (*jemalloc_message)(const char *p1, const char *p2,
|
||||
const char *p3, const char *p4);
|
||||
|
||||
void jemalloc_thread_cleanup(void);
|
||||
void jemalloc_prefork(void);
|
||||
void jemalloc_postfork(void);
|
||||
#endif /* JEMALLOC_H_ */
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -28,6 +28,14 @@
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef JEMALLOC_DEFS_H_
|
||||
#define JEMALLOC_DEFS_H_
|
||||
|
||||
/*
|
||||
* jemalloc version string.
|
||||
*/
|
||||
#undef JEMALLOC_VERSION
|
||||
|
||||
/*
|
||||
* Hyper-threaded CPUs may need a special instruction inside spin loops in
|
||||
* order to yield to another virtual CPU.
|
||||
@ -92,3 +100,5 @@
|
||||
|
||||
/* sizeof(void *) == 2^SIZEOF_PTR_2POW. */
|
||||
#undef SIZEOF_PTR_2POW
|
||||
|
||||
#endif /* JEMALLOC_DEFS_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user