diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index 1bf51588..c5697c6d 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -27,9 +27,17 @@ any of the following arguments (not a definitive list) to 'configure': it is linked to. This works only on ELF-based systems. --with-jemalloc-prefix= - Prefix all public APIs with , so that, for example, malloc() - becomes malloc(). This makes it possible to use jemalloc at the - same time as the system allocator. + Prefix all public APIs with . For example, if is + "prefix_", the API changes like the following occur: + + malloc() --> prefix_malloc() + malloc_conf --> prefix_malloc_conf + /etc/malloc.conf --> /etc/prefix_malloc.conf + MALLOC_CONF --> PREFIX_MALLOC_CONF + + This makes it possible to use jemalloc at the same time as the + system allocator, or even to use multiple copies of jemalloc + simultaneously. By default, the prefix is "", except on OS X, where it is "je_". On OS X, jemalloc overlays the default malloc zone, but makes no attempt to actually diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index b27955de..b613cb1b 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -256,9 +256,13 @@ else fi] ) if test "x$JEMALLOC_PREFIX" != "x" ; then - AC_DEFINE([JEMALLOC_PREFIX], [ ]) + JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` + AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) + AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) jemalloc_prefix="$JEMALLOC_PREFIX" + jemalloc_cprefix="$JEMALLOC_CPREFIX" AC_SUBST([jemalloc_prefix]) + AC_SUBST([jemalloc_cprefix]) AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix]) fi @@ -325,6 +329,15 @@ if test "x$enable_debug" = "x1" ; then AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) fi AC_SUBST([enable_debug]) +if test "x$enable_debug" = "x0" ; then + roff_debug=".\\\" " + roff_no_debug="" +else + roff_debug="" + roff_no_debug=".\\\" " +fi +AC_SUBST([roff_debug]) +AC_SUBST([roff_no_debug]) dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index 5202a2bf..1557ecbd 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -38,7 +38,7 @@ .\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $ .\" -.Dd September 30, 2010 +.Dd October 23, 2010 .Dt JEMALLOC 3 .Os .Sh NAME @@ -85,7 +85,7 @@ .Ft int .Fn @jemalloc_prefix@mallctlbymib "const size_t *mib" "size_t miblen" "void *oldp" "size_t *oldlenp" "void *newp" "size_t newlen" .Ft const char * -.Va @jemalloc_prefix@malloc_options ; +.Va @jemalloc_prefix@malloc_conf ; .Ft void .Fn \*(lp*@jemalloc_prefix@malloc_message\*(rp "void *cbopaque" "const char *s" .Ss Experimental API @@ -381,8 +381,8 @@ is non-zero, an attempt is made to resize the allocation to be at least .Fa ( size + .Fa extra ) -bytes, though an inability to allocate the extra byte(s) will not by itself -result in failure. +bytes, though inability to allocate the extra byte(s) will not by itself result +in failure. Behavior is undefined if .Fa ( size + @@ -402,292 +402,33 @@ function causes the memory referenced by .Fa ptr to be made available for future allocations. .Sh TUNING -Once, when the first call is made to one of these memory allocation -routines, various flags will be set or reset, which affects the -workings of this allocator implementation. +Once, when the first call is made to one of the memory allocation routines, the +allocator initializes its internals based in part on various options that can +be specified at compile- or run-time. .Pp -The +The string pointed to by the global variable +.Va @jemalloc_prefix@malloc_conf , +the .Dq name of the file referenced by the symbolic link named -.Pa /etc/jemalloc.conf , -the value of the environment variable -.Ev JEMALLOC_OPTIONS , -and the string pointed to by the global variable -.Va @jemalloc_prefix@malloc_options -will be interpreted, in that order, from left to right as flags. +.Pa /etc/@jemalloc_prefix@malloc.conf , +and the value of the environment variable +.Ev @jemalloc_cprefix@MALLOC_CONF , +will be interpreted, in that order, from left to right as options. .Pp -Each flag is a single letter, optionally prefixed by a non-negative base 10 -integer repetition count. +An options string is a comma-separated list of option:value pairs. +There is one key corresponding to each +.Dq opt.* +mallctl. For example, -.Dq 3N -is equivalent to -.Dq NNN . -Some flags control parameter magnitudes, where uppercase increases the -magnitude, and lowercase decreases the magnitude. -Other flags control boolean parameters, where uppercase indicates that a -behavior is set, or on, and lowercase means that a behavior is not set, or off. -.Bl -tag -width indent -.It A -All warnings (except for the warning about unknown -flags being set) become fatal. -The process will call -.Xr abort 3 -in these cases. -@roff_prof@.It B -@roff_prof@Double/halve the maximum backtrace depth when profiling memory -@roff_prof@allocation activity. -@roff_prof@The default is 128. -.It C -Double/halve the size of the maximum size class that is a multiple of the -cacheline size (64). -Above this size, subpage spacing (256 bytes) is used for size classes. -The default value is 512 bytes. -.It D -Halve/double the per-arena minimum ratio of active to dirty pages. -Some dirty unused pages may be allowed to accumulate, within the limit set by -the ratio (or one chunk worth of dirty pages, whichever is greater), before -informing the kernel about some of those pages via -.Xr madvise 2 . -This provides the kernel with sufficient information to recycle dirty pages if -physical memory becomes scarce and the pages remain unused. -The default minimum ratio is 32:1; -.Ev JEMALLOC_OPTIONS=6D -will disable dirty page purging. -@roff_prof@.It E -@roff_prof@Activate/deactivate profiling. -@roff_prof@This is a secondary control mechanism that makes it possible to -@roff_prof@start the application with profiling enabled (see the -@roff_prof@.Dq F -@roff_prof@option) but inactive, then toggle profiling at any time during -@roff_prof@program execution with the -@roff_prof@.Dq prof.active -@roff_prof@mallctl. -@roff_prof@This option is enabled by default. -@roff_prof@.It F -@roff_prof@Profile memory allocation activity, and use an -@roff_prof@.Xr atexit 3 -@roff_prof@function to dump final memory usage to a file named according to -@roff_prof@the pattern -@roff_prof@.Pa ...f.heap , -@roff_prof@where -@roff_prof@.Pa -@roff_prof@is controlled by the -@roff_prof@JEMALLOC_PROF_PREFIX -@roff_prof@environment variable. -@roff_prof@See the -@roff_prof@.Dq B -@roff_prof@option for backtrace depth control. -@roff_prof@See the -@roff_prof@.Dq E -@roff_prof@option for on-the-fly activation/deactivation. -@roff_prof@See the -@roff_prof@.Dq S -@roff_prof@option for probabilistic sampling control. -@roff_prof@See the -@roff_prof@.Dq R -@roff_prof@option for control of cumulative sample reporting. -@roff_prof@See the -@roff_prof@.Dq T -@roff_prof@option for control of per thread backtrace caching. -@roff_prof@See the -@roff_prof@.Dq I -@roff_prof@option for information on interval-triggered profile dumping, and the -@roff_prof@.Dq U -@roff_prof@option for information on high-water-triggered profile dumping. -@roff_prof@Profile output is compatible with the included pprof Perl script, -@roff_prof@which originates from the google-perftools package -@roff_prof@(http://code.google.com/p/google-perftools/). -@roff_tcache@.It G -@roff_tcache@Double/halve the approximate interval (counted in terms of -@roff_tcache@thread-specific cache allocation/deallocation events) between full -@roff_tcache@thread-specific cache garbage collection sweeps. -@roff_tcache@Garbage collection is actually performed incrementally, one size -@roff_tcache@class at a time, in order to avoid large collection pauses. -@roff_tcache@The default sweep interval is 8192; -@roff_tcache@.Ev JEMALLOC_OPTIONS=14g -@roff_tcache@will disable garbage collection. -@roff_tcache@.It H -@roff_tcache@Enable/disable thread-specific caching. -@roff_tcache@When there are multiple threads, each thread uses a -@roff_tcache@thread-specific cache for objects up to a certain size. -@roff_tcache@Thread-specific caching allows many allocations to be satisfied -@roff_tcache@without performing any thread synchronization, at the cost of -@roff_tcache@increased memory use. -@roff_tcache@See the -@roff_tcache@.Dq G -@roff_tcache@and -@roff_tcache@.Dq M -@roff_tcache@options for related tuning information. -@roff_tcache@This option is enabled by default. -@roff_prof@.It I -@roff_prof@Double/halve the average interval between memory profile dumps, as -@roff_prof@measured in bytes of allocation activity. -@roff_prof@The actual interval between dumps may be sporadic because -@roff_prof@decentralized allocation counters are used to avoid synchronization -@roff_prof@bottlenecks. -@roff_prof@Profiles are dumped to files named according to the pattern -@roff_prof@.Pa ...i.heap , -@roff_prof@where -@roff_prof@.Pa -@roff_prof@is controlled by the -@roff_prof@JEMALLOC_PROF_PREFIX -@roff_prof@environment variable. -@roff_prof@By default, interval-triggered profile dumping is disabled. -@roff_prof@This is internally encoded as (1 << -1), and each -@roff_prof@.Dq I -@roff_prof@that is specified increments the shift amount. -@roff_prof@Therefore, e.g. -@roff_prof@.Ev JEMALLOC_OPTIONS=31I -@roff_prof@specifies a dump interval of 1 GiB. -@roff_fill@.It J -@roff_fill@Each byte of new memory allocated by -@roff_fill@.Fn @jemalloc_prefix@malloc -@roff_fill@or -@roff_fill@.Fn @jemalloc_prefix@realloc -@roff_fill@will be initialized to 0xa5. -@roff_fill@All memory returned by -@roff_fill@.Fn @jemalloc_prefix@free -@roff_fill@or -@roff_fill@.Fn @jemalloc_prefix@realloc -@roff_fill@will be initialized to 0x5a. -@roff_fill@This is intended for debugging and will impact performance -@roff_fill@negatively. -.It K -Double/halve the virtual memory chunk size. -The default chunk size is 4 MiB. -@roff_prof@.It L -@roff_prof@Use an -@roff_prof@.Xr atexit 3 -@roff_prof@function to report memory leaks. -@roff_prof@See the -@roff_prof@.Dq B -@roff_prof@option for backtrace depth control. -@roff_prof@See the -@roff_prof@.Dq F option for information on analyzing heap profile output. -@roff_prof@This option is disabled by default. -@roff_tcache@.It M -@roff_tcache@Double/halve the maximum size class to cache. -@roff_tcache@At a minimum, all small size classes are cached, and at a maximum -@roff_tcache@all large size classes are cached. -@roff_tcache@The default maximum is 32 KiB. -.It N -Double/halve the number of arenas. -The default number of arenas is four times the number of CPUs, or one if there -is a single CPU. -@roff_swap@.It O -@roff_swap@Over-commit memory as a side effect of using anonymous -@roff_swap@.Xr mmap 2 -@roff_swap@@roff_dss@ and -@roff_swap@@roff_dss@.Xr sbrk 2 -@roff_swap@for virtual memory allocation. -@roff_swap@In order for overcommit to be disabled, the -@roff_swap@.Dq swap.fds -@roff_swap@mallctl must have been successfully written to. -@roff_swap@This option is enabled by default. -.It P -The -.Fn @jemalloc_prefix@malloc_stats_print -function is called at program exit via an -.Xr atexit 3 -function. -@roff_stats@This has the potential to cause deadlock for a multi-threaded -@roff_stats@process that exits while one or more threads are executing in the -@roff_stats@memory allocation functions. -@roff_stats@Therefore, this option should only be used with care; it is -@roff_stats@primarily intended as a performance tuning aid during application -@roff_stats@development. -.It Q -Double/halve the size of the maximum size class that is a multiple of the -quantum (8 or 16 bytes, depending on architecture). -Above this size, cacheline spacing is used for size classes. -The default value is 128 bytes. -@roff_prof@.It R -@roff_prof@Enable/disable reporting of cumulative object/byte counts in profile -@roff_prof@dumps. -@roff_prof@If this option is enabled, every unique backtrace must be stored for -@roff_prof@the duration of execution. -@roff_prof@Depending on the application, this can impose a large memory -@roff_prof@overhead, and the cumulative counts are not always of interest. -@roff_prof@See the -@roff_prof@.Dq T -@roff_prof@option for control of per thread backtrace caching, which has -@roff_prof@important interactions. -@roff_prof@This option is enabled by default. -@roff_prof@.It S -@roff_prof@Double/halve the average interval between allocation samples, as -@roff_prof@measured in bytes of allocation activity. -@roff_prof@Increasing the sampling interval decreases profile fidelity, but -@roff_prof@also decreases the computational overhead. -@roff_prof@The default sample interval is one (i.e. all allocations are -@roff_prof@sampled). -@roff_prof@.It T -@roff_prof@Double/halve the maximum per thread backtrace cache used for heap -@roff_prof@profiling. -@roff_prof@A backtrace can only be discarded if the -@roff_prof@.Dq R -@roff_prof@option is disabled, and no thread caches currently refer to the -@roff_prof@backtrace. -@roff_prof@Therefore, a backtrace cache limit should be imposed if the -@roff_prof@intention is to limit how much memory is used by backtraces. -@roff_prof@By default, no limit is imposed. -@roff_prof@This is internally encoded as (1 << -1), and each -@roff_prof@.Dq T -@roff_prof@that is specified increments the shift amount. -@roff_prof@Therefore, e.g. -@roff_prof@.Ev JEMALLOC_OPTIONS=11T -@roff_prof@specifies a backtrace cache limit of 1024 backtraces. -@roff_prof@.It U -@roff_prof@Trigger a memory profile dump every time the total virtual memory -@roff_prof@exceeds the previous maximum. -@roff_prof@Profiles are dumped to files named according to the pattern -@roff_prof@.Pa ...u.heap , -@roff_prof@where -@roff_prof@.Pa -@roff_prof@is controlled by the -@roff_prof@JEMALLOC_PROF_PREFIX -@roff_prof@environment variable. -@roff_prof@This option is disabled by default. -@roff_sysv@.It V -@roff_sysv@Attempting to allocate zero bytes will return a -@roff_sysv@.Dv NULL -@roff_sysv@pointer instead of a valid pointer. -@roff_sysv@(The default behavior is to make a minimal allocation and return a -@roff_sysv@pointer to it.) -@roff_sysv@This option is provided for System V compatibility. -@roff_sysv@@roff_xmalloc@This option is incompatible with the -@roff_sysv@@roff_xmalloc@.Dq X -@roff_sysv@@roff_xmalloc@option. -@roff_xmalloc@.It X -@roff_xmalloc@Rather than return failure for any allocation function, display a -@roff_xmalloc@diagnostic message on -@roff_xmalloc@.Dv STDERR_FILENO -@roff_xmalloc@and cause the program to drop core (using -@roff_xmalloc@.Xr abort 3 ) . -@roff_xmalloc@This option should be set at compile time by including the -@roff_xmalloc@following in the source code: -@roff_xmalloc@.Bd -literal -offset indent -@roff_xmalloc@@jemalloc_prefix@malloc_options = "X"; -@roff_xmalloc@.Ed -@roff_fill@.It Z -@roff_fill@Each byte of new memory allocated by -@roff_fill@.Fn @jemalloc_prefix@malloc -@roff_fill@or -@roff_fill@.Fn @jemalloc_prefix@realloc -@roff_fill@will be initialized to 0. -@roff_fill@Note that this initialization only happens once for each byte, so -@roff_fill@.Fn @jemalloc_prefix@realloc -@roff_fill@calls do not zero memory that was previously allocated. -@roff_fill@This is intended for debugging and will impact performance -@roff_fill@negatively. -.El -.Pp -@roff_fill@The -@roff_fill@.Dq J -@roff_fill@and -@roff_fill@.Dq Z -@roff_fill@options are intended for testing and debugging. -@roff_fill@An application which changes its behavior when these options are used -@roff_fill@is flawed. +.Dq abort:true,narenas:1 +sets the +.Dq opt.abort +and +.Dq opt.narenas +options. +Some options have boolean values (true/false), others have integer values (base +8, 10, or 16, depending on prefix), and yet others have raw string values. .Sh IMPLEMENTATION NOTES @roff_dss@Traditionally, allocators have used @roff_dss@.Xr sbrk 2 @@ -715,7 +456,7 @@ does not make much use of the allocation functions. .Pp @roff_tcache@In addition to multiple arenas, this allocator supports @roff_tcache@thread-specific caching for small and large objects, in order to -@roff_tcache@make it possible to completely avoid synchronization for most small +@roff_tcache@make it possible to completely avoid synchronization for most @roff_tcache@allocation requests. @roff_tcache@Such caching allows very fast allocation in the common case, but it @roff_tcache@increases memory usage and fragmentation, since a bounded number of @@ -744,31 +485,37 @@ The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time. .Pp Small objects are managed in groups by page runs. -Each run maintains a bitmap that tracks which regions are in use. +Each run maintains a frontier and free list to track which regions are in use. @roff_tiny@Allocation requests that are no more than half the quantum (8 or 16, @roff_tiny@depending on architecture) are rounded up to the nearest power of @roff_tiny@two. Allocation requests that are @roff_tiny@more than half the quantum, but no more than the minimum cacheline-multiple size class (see the -.Dq Q +.Dq opt.lg_qspace_max option) are rounded up to the nearest multiple of the @roff_tiny@quantum. @roff_no_tiny@quantum (8 or 16, depending on architecture). Allocation requests that are more than the minimum cacheline-multiple size class, but no more than the minimum subpage-multiple size class (see the -.Dq C +.Dq opt.lg_cspace_max option) are rounded up to the nearest multiple of the cacheline size (64). Allocation requests that are more than the minimum subpage-multiple size class, but no more than the maximum subpage-multiple size class are rounded up to the nearest multiple of the subpage size (256). Allocation requests that are more than the maximum subpage-multiple size class, but small enough to fit in an arena-managed chunk (see the -.Dq K +.Dq opt.lg_chunk option), are rounded up to the nearest run size. Allocation requests that are too large to fit in an arena-managed chunk are rounded up to the nearest multiple of the chunk size. .Pp +Allocations are packed tightly together, which can be an issue for +multi-threaded applications. +If you need to assure that allocations do not suffer from cacheline sharing, +round your allocation requests up to the nearest multiple of the cacheline +size, or specify cacheline alignment when allocating. +.Pp Assuming 4 MiB chunks, 4 KiB pages, and a 16 byte quantum on a 64-bit system, the size classes in each category are as follows: .TS @@ -825,12 +572,6 @@ Huge;4 MiB ;;12 MiB ;;... .TE -.Pp -Allocations are packed tightly together, which can be an issue for -multi-threaded applications. -If you need to assure that allocations do not suffer from cacheline sharing, -round your allocation requests up to the nearest multiple of the cacheline -size, or specify cacheline alignment when allocating. .Sh MALLCTL NAMESPACE The following names are defined in the namespace accessible via the .Fn @jemalloc_prefix@mallctl* @@ -845,6 +586,10 @@ introspection. @roff_stats@ equal to @roff_stats@.Dq arenas.narenas @roff_stats@can be used to access the summation of statistics from all arenas. +.Pp +Take special note of the +.Dq epoch +mallctl, which controls refreshing of cached dynamic statistics. .Bl -ohang .\"----------------------------------------------------------------------------- .It Sy "version (const char *) r-" @@ -861,48 +606,6 @@ Return the current epoch. This is useful for detecting whether another thread caused a refresh. .Ed .\"----------------------------------------------------------------------------- -@roff_tcache@.It Sy "tcache.flush (void) --" -@roff_tcache@.Bd -ragged -offset indent -compact -@roff_tcache@Flush calling thread's tcache. -@roff_tcache@This interface releases all cached objects and internal data -@roff_tcache@structures associated with the calling thread's thread-specific -@roff_tcache@cache. -@roff_tcache@Ordinarily, this interface need not be called, since automatic -@roff_tcache@periodic incremental garbage collection occurs, and the thread -@roff_tcache@cache is automatically discarded when a thread exits. -@roff_tcache@However, garbage collection is triggered by allocation activity, -@roff_tcache@so it is possible for a thread that stops allocating/deallocating -@roff_tcache@to retain its cache indefinitely, in which case the developer may -@roff_tcache@find manual flushing useful. -.Ed -.\"----------------------------------------------------------------------------- -.It Sy "thread.arena (unsigned) rw" -.Bd -ragged -offset indent -compact -Get or set the arena associated with the calling thread. -The arena index must be less than the maximum number of arenas (see the -.Dq arenas.narenas -mallctl). -If the specified arena was not initialized beforehand (see the -.Dq arenas.initialized -mallctl), it will be automatically initialized as a side effect of calling this -interface. -.Ed -.\"----------------------------------------------------------------------------- -@roff_stats@.It Sy "thread.allocated (uint64_t) r-" -@roff_stats@.Bd -ragged -offset indent -compact -@roff_stats@Get the total number of bytes ever allocated by the calling thread. -@roff_stats@This counter has the potential to wrap around; it is up to the -@roff_stats@application to appropriately interpret the counter in such cases. -@roff_stats@.Ed -.\"----------------------------------------------------------------------------- -@roff_stats@.It Sy "thread.deallocated (uint64_t) r-" -@roff_stats@.Bd -ragged -offset indent -compact -@roff_stats@Get the total number of bytes ever deallocated by the calling -@roff_stats@thread. -@roff_stats@This counter has the potential to wrap around; it is up to the -@roff_stats@application to appropriately interpret the counter in such cases. -@roff_stats@.Ed -.\"----------------------------------------------------------------------------- .It Sy "config.debug (bool) r-" .Bd -ragged -offset indent -compact --enable-debug was specified during build configuration. @@ -980,150 +683,384 @@ interface. .\"----------------------------------------------------------------------------- .It Sy "opt.abort (bool) r-" .Bd -ragged -offset indent -compact -See the -.Dq A -option. +Abort-on-warning enabled/disabled. +If true, most warnings are fatal. +The process will call +.Xr abort 3 +in these cases. +This option is +@roff_debug@enabled +@roff_no_debug@disabled +by default. .Ed .\"----------------------------------------------------------------------------- -@roff_fill@.It Sy "opt.junk (bool) r-" -@roff_fill@.Bd -ragged -offset indent -compact -@roff_fill@See the -@roff_fill@.Dq J -@roff_fill@option. -@roff_fill@.Ed -.\"----------------------------------------------------------------------------- -@roff_fill@.It Sy "opt.zero (bool) r-" -@roff_fill@.Bd -ragged -offset indent -compact -@roff_fill@See the -@roff_fill@.Dq Z -@roff_fill@option. -@roff_fill@.Ed -.\"----------------------------------------------------------------------------- -@roff_xmalloc@.It Sy "opt.xmalloc (bool) r-" -@roff_xmalloc@.Bd -ragged -offset indent -compact -@roff_xmalloc@See the -@roff_xmalloc@.Dq X -@roff_xmalloc@option. -@roff_xmalloc@.Ed -.\"----------------------------------------------------------------------------- -@roff_tcache@.It Sy "opt.tcache (bool) r-" -@roff_tcache@.Bd -ragged -offset indent -compact -@roff_tcache@See the -@roff_tcache@.Dq H -@roff_tcache@option. -@roff_tcache@.Ed -.\"----------------------------------------------------------------------------- -@roff_tcache@.It Sy "opt.lg_tcache_gc_sweep (ssize_t) r-" -@roff_tcache@.Bd -ragged -offset indent -compact -@roff_tcache@See the -@roff_tcache@.Dq G -@roff_tcache@option. -@roff_tcache@.Ed -.\"----------------------------------------------------------------------------- -.It Sy "opt.stats_print (bool) r-" -.Bd -ragged -offset indent -compact -See the -.Dq P -option. -.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.prof (bool) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq F -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq B -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.prof_accum (bool) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq R -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.lg_prof_tcmax (ssize_t) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq T -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.lg_prof_sample (ssize_t) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq S -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq I -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.prof_udump (bool) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq U -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- -@roff_prof@.It Sy "opt.prof_leak (bool) r-" -@roff_prof@.Bd -ragged -offset indent -compact -@roff_prof@See the -@roff_prof@.Dq L -@roff_prof@option. -@roff_prof@.Ed -.\"----------------------------------------------------------------------------- .It Sy "opt.lg_qspace_max (size_t) r-" .Bd -ragged -offset indent -compact -See the -.Dq Q -option. +Size (log base 2) of the maximum size class that is a multiple of the quantum +(8 or 16 bytes, depending on architecture). +Above this size, cacheline spacing is used for size classes. +The default value is 128 bytes (2^7). .Ed .\"----------------------------------------------------------------------------- .It Sy "opt.lg_cspace_max (size_t) r-" .Bd -ragged -offset indent -compact -See the -.Dq C -option. -.Ed -.\"----------------------------------------------------------------------------- -.It Sy "opt.lg_dirty_mult (ssize_t) r-" -.Bd -ragged -offset indent -compact -See the -.Dq D -option. +Size (log base 2) of the maximum size class that is a multiple of the cacheline +size (64). +Above this size, subpage spacing (256 bytes) is used for size classes. +The default value is 512 bytes (2^9). .Ed .\"----------------------------------------------------------------------------- .It Sy "opt.lg_chunk (size_t) r-" .Bd -ragged -offset indent -compact -See the -.Dq K -option. +Virtual memory chunk size (log base 2). +The default chunk size is 4 MiB (2^22). .Ed .\"----------------------------------------------------------------------------- +.It Sy "opt.narenas (size_t) r-" +.Bd -ragged -offset indent -compact +Maximum number of arenas to use. +The default maximum number of arenas is four times the number of CPUs, or one +if there is a single CPU. +.Ed +.\"----------------------------------------------------------------------------- +.It Sy "opt.lg_dirty_mult (ssize_t) r-" +.Bd -ragged -offset indent -compact +Per-arena minimum ratio (log base 2) of active to dirty pages. +Some dirty unused pages may be allowed to accumulate, within the limit set by +the ratio (or one chunk worth of dirty pages, whichever is greater), before +informing the kernel about some of those pages via +.Xr madvise 2 +or a similar system call. +This provides the kernel with sufficient information to recycle dirty pages if +physical memory becomes scarce and the pages remain unused. +The default minimum ratio is 32:1 (2^5:1); an option value of -1 will disable +dirty page purging. +.Ed +.\"----------------------------------------------------------------------------- +.It Sy "opt.stats_print (bool) r-" +.Bd -ragged -offset indent -compact +Enable/disable statistics printing at exit. +If enabled, the +.Fn @jemalloc_prefix@malloc_stats_print +function is called at program exit via an +.Xr atexit 3 +function. +@roff_stats@This has the potential to cause deadlock for a multi-threaded +@roff_stats@process that exits while one or more threads are executing in the +@roff_stats@memory allocation functions. +@roff_stats@Therefore, this option should only be used with care; it is +@roff_stats@primarily intended as a performance tuning aid during application +@roff_stats@development. +This option is disabled by default. +.Ed +.\"----------------------------------------------------------------------------- +@roff_fill@.It Sy "opt.junk (bool) r-" +@roff_fill@.Bd -ragged -offset indent -compact +@roff_fill@Junk filling enabled/disabled. +@roff_fill@If enabled, each byte of uninitialized allocated memory will be +@roff_fill@initialized to 0xa5. +@roff_fill@All deallocated memory will be initialized to 0x5a. +@roff_fill@This is intended for debugging and will impact performance +@roff_fill@negatively. +@roff_fill@This option is +@roff_fill@@roff_debug@enabled +@roff_fill@@roff_no_debug@disabled +@roff_fill@by default. +@roff_fill@.Ed +.\"----------------------------------------------------------------------------- +@roff_fill@.It Sy "opt.zero (bool) r-" +@roff_fill@.Bd -ragged -offset indent -compact +@roff_fill@Zero filling enabled/disabled. +@roff_fill@If enabled, each byte of uninitialized allocated memory will be +@roff_fill@initialized to 0. +@roff_fill@Note that this initialization only happens once for each byte, so +@roff_fill@.Fn @jemalloc_prefix@realloc +@roff_fill@calls do not zero memory that was previously allocated. +@roff_fill@This is intended for debugging and will impact performance +@roff_fill@negatively. +@roff_fill@This option is disabled by default. +@roff_fill@.Ed +.\"----------------------------------------------------------------------------- +@roff_sysv@.It Sy "opt.sysv (bool) r-" +@roff_sysv@.Bd -ragged -offset indent -compact +@roff_sysv@If enabled, attempting to allocate zero bytes will return a +@roff_sysv@.Dv NULL +@roff_sysv@pointer instead of a valid pointer. +@roff_sysv@(The default behavior is to make a minimal allocation and return a +@roff_sysv@pointer to it.) +@roff_sysv@This option is provided for System V compatibility. +@roff_sysv@@roff_xmalloc@This option is incompatible with the +@roff_sysv@@roff_xmalloc@.Dq opt.xmalloc +@roff_sysv@@roff_xmalloc@option. +@roff_sysv@This option is disabled by default. +@roff_sysv@.Ed +.\"----------------------------------------------------------------------------- +@roff_xmalloc@.It Sy "opt.xmalloc (bool) r-" +@roff_xmalloc@.Bd -ragged -offset indent -compact +@roff_xmalloc@Abort-on-out-of-memory enabled/disabled. +@roff_xmalloc@If enabled, rather than returning failure for any allocation +@roff_xmalloc@function, display a diagnostic message on +@roff_xmalloc@.Dv STDERR_FILENO +@roff_xmalloc@and cause the program to drop core (using +@roff_xmalloc@.Xr abort 3 ) . +@roff_xmalloc@If an application is designed to depend on this behavior, set the +@roff_xmalloc@option at compile time by including the following in the source +@roff_xmalloc@code: +@roff_xmalloc@.Bd -literal -offset indent +@roff_xmalloc@@jemalloc_prefix@malloc_conf = "xmalloc:true"; +@roff_xmalloc@.Ed +@roff_xmalloc@.Pp +@roff_xmalloc@This option is disabled by default. +@roff_xmalloc@.Ed +.\"----------------------------------------------------------------------------- +@roff_tcache@.It Sy "opt.tcache (bool) r-" +@roff_tcache@.Bd -ragged -offset indent -compact +@roff_tcache@Thread-specific caching enabled/disabled. +@roff_tcache@When there are multiple threads, each thread uses a +@roff_tcache@thread-specific cache for objects up to a certain size. +@roff_tcache@Thread-specific caching allows many allocations to be satisfied +@roff_tcache@without performing any thread synchronization, at the cost of +@roff_tcache@increased memory use. +@roff_tcache@See the +@roff_tcache@.Dq opt.lg_tcache_gc_sweep +@roff_tcache@and +@roff_tcache@.Dq opt.tcache_max +@roff_tcache@options for related tuning information. +@roff_tcache@This option is enabled by default. +@roff_tcache@.Ed +.\"----------------------------------------------------------------------------- +@roff_tcache@.It Sy "opt.lg_tcache_gc_sweep (ssize_t) r-" +@roff_tcache@.Bd -ragged -offset indent -compact +@roff_tcache@Approximate interval (log base 2) between full thread-specific +@roff_tcache@cache garbage collection sweeps, counted in terms of +@roff_tcache@thread-specific cache allocation/deallocation events. +@roff_tcache@Garbage collection is actually performed incrementally, one size +@roff_tcache@class at a time, in order to avoid large collection pauses. +@roff_tcache@The default sweep interval is 8192 (2^13); setting this option to +@roff_tcache@-1 will disable garbage collection. +@roff_tcache@.Ed +.\"----------------------------------------------------------------------------- +@roff_tcache@.It Sy "opt.lg_tcache_max (size_t) r-" +@roff_tcache@.Bd -ragged -offset indent -compact +@roff_tcache@Maximum size class (log base 2) to cache in the thread-specific +@roff_tcache@cache. +@roff_tcache@At a minimum, all small size classes are cached, and at a maximum +@roff_tcache@all large size classes are cached. +@roff_tcache@The default maximum is 32 KiB (2^15). +@roff_tcache@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Memory profiling enabled/disabled. +@roff_prof@If enabled, profile memory allocation activity, and use an +@roff_prof@.Xr atexit 3 +@roff_prof@function to dump final memory usage to a file named according to +@roff_prof@the pattern +@roff_prof@.Pa ...f.heap , +@roff_prof@where +@roff_prof@.Pa +@roff_prof@is controlled by the +@roff_prof@.Dq opt.prof_prefix +@roff_prof@option. +@roff_prof@See the +@roff_prof@.Dq opt.lg_prof_bt_max +@roff_prof@option for backtrace depth control. +@roff_prof@See the +@roff_prof@.Dq opt.prof_active +@roff_prof@option for on-the-fly activation/deactivation. +@roff_prof@See the +@roff_prof@.Dq opt.lg_prof_sample +@roff_prof@option for probabilistic sampling control. +@roff_prof@See the +@roff_prof@.Dq opt.prof_accum +@roff_prof@option for control of cumulative sample reporting. +@roff_prof@See the +@roff_prof@.Dq opt.lg_prof_tcmax +@roff_prof@option for control of per thread backtrace caching. +@roff_prof@See the +@roff_prof@.Dq opt.lg_prof_interval +@roff_prof@option for information on interval-triggered profile dumping, and the +@roff_prof@.Dq opt.prof_gdump +@roff_prof@option for information on high-water-triggered profile dumping. +@roff_prof@Profile output is compatible with the included pprof Perl script, +@roff_prof@which originates from the google-perftools package +@roff_prof@(http://code.google.com/p/google-perftools/). +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_prefix (const char *) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Filename prefix for profile dumps. +@roff_prof@If the prefix is set to the empty string, no automatic dumps will +@roff_prof@occur; this is primarily useful for disabling the automatic final +@roff_prof@heap dump (which also disables leak reporting, if enabled). +@roff_prof@The default prefix is +@roff_prof@.Pa jeprof . +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Maximum backtrace depth (log base 2) when profiling memory +@roff_prof@allocation activity. +@roff_prof@The default is 128 (2^7). +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_active (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Profiling activated/deactivated. +@roff_prof@This is a secondary control mechanism that makes it possible to +@roff_prof@start the application with profiling enabled (see the +@roff_prof@.Dq opt.prof +@roff_prof@option) but inactive, then toggle profiling at any time during +@roff_prof@program execution with the +@roff_prof@.Dq prof.active +@roff_prof@mallctl. +@roff_prof@This option is enabled by default. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.lg_prof_sample (ssize_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Average interval (log base 2) between allocation samples, as +@roff_prof@measured in bytes of allocation activity. +@roff_prof@Increasing the sampling interval decreases profile fidelity, but +@roff_prof@also decreases the computational overhead. +@roff_prof@The default sample interval is 1 (2^0) (i.e. all allocations are +@roff_prof@sampled). +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_accum (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Reporting of cumulative object/byte counts in profile dumps +@roff_prof@enabled/disabled. +@roff_prof@If this option is enabled, every unique backtrace must be stored for +@roff_prof@the duration of execution. +@roff_prof@Depending on the application, this can impose a large memory +@roff_prof@overhead, and the cumulative counts are not always of interest. +@roff_prof@See the +@roff_prof@.Dq opt.lg_prof_tcmax +@roff_prof@option for control of per thread backtrace caching, which has +@roff_prof@important interactions. +@roff_prof@This option is enabled by default. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.lg_prof_tcmax (ssize_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Maximum per thread backtrace cache (log base 2) used for heap +@roff_prof@profiling. +@roff_prof@A backtrace can only be discarded if the +@roff_prof@.Dq opt.prof_accum +@roff_prof@option is disabled, and no thread caches currently refer to the +@roff_prof@backtrace. +@roff_prof@Therefore, a backtrace cache limit should be imposed if the +@roff_prof@intention is to limit how much memory is used by backtraces. +@roff_prof@By default, no limit is imposed (encoded as -1). +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Average interval (log base 2) between memory profile dumps, as +@roff_prof@measured in bytes of allocation activity. +@roff_prof@The actual interval between dumps may be sporadic because +@roff_prof@decentralized allocation counters are used to avoid synchronization +@roff_prof@bottlenecks. +@roff_prof@Profiles are dumped to files named according to the pattern +@roff_prof@.Pa ...i.heap , +@roff_prof@where +@roff_prof@.Pa +@roff_prof@is controlled by the +@roff_prof@.Dq opt.prof_prefix +@roff_prof@option. +@roff_prof@By default, interval-triggered profile dumping is disabled (encoded +@roff_prof@as -1). +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_gdump (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Trigger a memory profile dump every time the total virtual memory +@roff_prof@exceeds the previous maximum. +@roff_prof@Profiles are dumped to files named according to the pattern +@roff_prof@.Pa ...u.heap , +@roff_prof@where +@roff_prof@.Pa +@roff_prof@is controlled by the +@roff_prof@.Dq opt.prof_prefix +@roff_prof@option. +@roff_prof@This option is disabled by default. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_leak (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Leak reporting enabled/disabled. +@roff_prof@If enabled, use an +@roff_prof@.Xr atexit 3 +@roff_prof@function to report memory leaks detected by allocation sampling. +@roff_prof@See the +@roff_prof@.Dq opt.lg_prof_bt_max +@roff_prof@option for backtrace depth control. +@roff_prof@See the +@roff_prof@.Dq opt.prof +@roff_prof@option for information on analyzing heap profile output. +@roff_prof@This option is disabled by default. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- .It Sy "opt.overcommit (bool) r-" .Bd -ragged -offset indent -compact -See the -.Dq O -option. +@roff_swap@Over-commit enabled/disabled. +@roff_swap@If enabled, over-commit memory as a side effect of using anonymous +@roff_swap@.Xr mmap 2 +@roff_swap@@roff_dss@ and +@roff_swap@@roff_dss@.Xr sbrk 2 +@roff_swap@for virtual memory allocation. +@roff_swap@In order for overcommit to be disabled, the +@roff_swap@.Dq swap.fds +@roff_swap@mallctl must have been successfully written to. +@roff_swap@This option is enabled by default. .Ed .\"----------------------------------------------------------------------------- +@roff_tcache@.It Sy "tcache.flush (void) --" +@roff_tcache@.Bd -ragged -offset indent -compact +@roff_tcache@Flush calling thread's tcache. +@roff_tcache@This interface releases all cached objects and internal data +@roff_tcache@structures associated with the calling thread's thread-specific +@roff_tcache@cache. +@roff_tcache@Ordinarily, this interface need not be called, since automatic +@roff_tcache@periodic incremental garbage collection occurs, and the thread +@roff_tcache@cache is automatically discarded when a thread exits. +@roff_tcache@However, garbage collection is triggered by allocation activity, +@roff_tcache@so it is possible for a thread that stops allocating/deallocating +@roff_tcache@to retain its cache indefinitely, in which case the developer may +@roff_tcache@find manual flushing useful. +.Ed +.\"----------------------------------------------------------------------------- +.It Sy "thread.arena (unsigned) rw" +.Bd -ragged -offset indent -compact +Get or set the arena associated with the calling thread. +The arena index must be less than the maximum number of arenas (see the +.Dq arenas.narenas +mallctl). +If the specified arena was not initialized beforehand (see the +.Dq arenas.initialized +mallctl), it will be automatically initialized as a side effect of calling this +interface. +.Ed +.\"----------------------------------------------------------------------------- +@roff_stats@.It Sy "thread.allocated (uint64_t) r-" +@roff_stats@.Bd -ragged -offset indent -compact +@roff_stats@Get the total number of bytes ever allocated by the calling thread. +@roff_stats@This counter has the potential to wrap around; it is up to the +@roff_stats@application to appropriately interpret the counter in such cases. +@roff_stats@.Ed +.\"----------------------------------------------------------------------------- +@roff_stats@.It Sy "thread.deallocated (uint64_t) r-" +@roff_stats@.Bd -ragged -offset indent -compact +@roff_stats@Get the total number of bytes ever deallocated by the calling +@roff_stats@thread. +@roff_stats@This counter has the potential to wrap around; it is up to the +@roff_stats@application to appropriately interpret the counter in such cases. +@roff_stats@.Ed +.\"----------------------------------------------------------------------------- .It Sy "arenas.narenas (unsigned) r-" .Bd -ragged -offset indent -compact Maximum number of arenas. -See the -.Dq N -option. .Ed .\"----------------------------------------------------------------------------- .It Sy "arenas.initialized (bool *) r-" @@ -1269,7 +1206,7 @@ specified. @roff_prof@.Bd -ragged -offset indent -compact @roff_prof@Control whether sampling is currently active. @roff_prof@See the -@roff_prof@.Dq E +@roff_prof@.Dq opt.prof_active @roff_prof@option for additional information. @roff_prof@.Ed .\"----------------------------------------------------------------------------- @@ -1281,8 +1218,8 @@ specified. @roff_prof@where @roff_prof@.Pa @roff_prof@is controlled by the -@roff_prof@JEMALLOC_PROF_PREFIX -@roff_prof@environment variable. +@roff_prof@.Dq opt.prof_prefix +@roff_prof@option. @roff_prof@.Ed .\"----------------------------------------------------------------------------- @roff_prof@.It Sy "prof.interval (uint64_t) r-" @@ -1290,7 +1227,7 @@ specified. @roff_prof@Average number of bytes allocated between inverval-based profile @roff_prof@dumps. @roff_prof@See the -@roff_prof@.Dq I +@roff_prof@.Dq opt.lg_prof_interval @roff_prof@option for additional information. @roff_prof@.Ed .\"----------------------------------------------------------------------------- @@ -1544,10 +1481,9 @@ has not been called. .\"----------------------------------------------------------------------------- .El .Sh DEBUGGING MALLOC PROBLEMS -The first thing to do is to set the -.Dq A -option. -This option forces a coredump (if possible) at the first sign of trouble, +Start by setting the +.Dq opt.abort +option, which forces a coredump (if possible) at the first sign of trouble, rather than the normal policy of trying to continue if at all possible. .Pp It is probably also a good idea to recompile the program with suitable @@ -1558,19 +1494,19 @@ options and symbols for debugger support. @roff_fill@the next section, it is likely because it depends on the storage @roff_fill@being filled with zero bytes. @roff_fill@Try running it with the -@roff_fill@.Dq Z +@roff_fill@.Dq opt.zero @roff_fill@option set; @roff_fill@if that improves the situation, this diagnosis has been confirmed. @roff_fill@If the program still misbehaves, @roff_fill@the likely problem is accessing memory outside the allocated area. @roff_fill@.Pp @roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the -@roff_fill@.Dq J +@roff_fill@.Dq opt.junk @roff_fill@option may help provoke the problem. @roff_fill@.Pp -Unfortunately this implementation does not provide much detail about -the problems it detects; the performance impact for storing such information -would be prohibitive. +This implementation does not provide much detail about the problems it detects, +because the performance impact for storing such information would be +prohibitive. There are a number of allocator implementations available on the Internet which focus on detecting and pinpointing problems by trading performance for extra sanity checks and detailed diagnostics. @@ -1580,7 +1516,7 @@ warning condition, a message will be printed to file descriptor .Dv STDERR_FILENO . Errors will result in the process dumping core. If the -.Dq A +.Dq opt.abort option is set, all warnings are treated as errors. .Pp The @@ -1736,33 +1672,24 @@ was specified, but the reallocation request could not be serviced without moving the object. .El .Sh ENVIRONMENT -The following environment variables affect the execution of the allocation +The following environment variable affects the execution of the allocation functions: -@roff_prof@.Bl -tag -width ".Ev JEMALLOC_PROF_PREFIX" -@roff_no_prof@.Bl -tag -width ".Ev JEMALLOC_OPTIONS" -.It Ev JEMALLOC_OPTIONS +.Bl -tag -width ".Ev @jemalloc_cprefix@MALLOC_CONF" +.It Ev @jemalloc_cprefix@MALLOC_CONF If the environment variable -.Ev JEMALLOC_OPTIONS -is set, the characters it contains will be interpreted as flags to the -allocation functions. -@roff_prof@.It Ev JEMALLOC_PROF_PREFIX -@roff_prof@If the environment variable -@roff_prof@.Ev JEMALLOC_PROF_PREFIX -@roff_prof@is set, use it as the filename prefix for profile dumps; otherwise -@roff_prof@use -@roff_prof@.Pa jeprof -@roff_prof@as the prefix. +.Ev @jemalloc_cprefix@MALLOC_CONF +is set, the characters it contains will be interpreted as options. .El .Sh EXAMPLES To dump core whenever a problem occurs: .Pp .Bd -literal -offset indent -ln -s 'A' /etc/jemalloc.conf +ln -s 'abort:true' /etc/@jemalloc_prefix@malloc.conf .Ed .Pp -To specify in the source a chunk size that is twice the default: +To specify in the source a chunk size that is 16 MiB: .Bd -literal -offset indent -@jemalloc_prefix@malloc_options = "K"; +@jemalloc_prefix@malloc_conf = "lg_chunk:24"; .Ed .Sh SEE ALSO .Xr madvise 2 , diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h index 7bbf21e0..8776ad13 100644 --- a/jemalloc/include/jemalloc/internal/ctl.h +++ b/jemalloc/include/jemalloc/internal/ctl.h @@ -82,9 +82,9 @@ bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ != 0) { \ - malloc_write(": Invalid xmallctl(\""); \ + malloc_write(": Failure in xmallctl(\""); \ malloc_write(name); \ - malloc_write("\", ...) call\n"); \ + malloc_write("\", ...)\n"); \ abort(); \ } \ } while (0) @@ -92,9 +92,9 @@ bool ctl_boot(void); #define xmallctlnametomib(name, mibp, miblenp) do { \ if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ malloc_write( \ - ": Invalid xmallctlnametomib(\""); \ + ": Failure in xmallctlnametomib(\""); \ malloc_write(name); \ - malloc_write("\", ...) call\n"); \ + malloc_write("\", ...)\n"); \ abort(); \ } \ } while (0) @@ -103,7 +103,7 @@ bool ctl_boot(void); if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ - ": Invalid xmallctlbymib() call\n"); \ + ": Failure in xmallctlbymib()\n"); \ abort(); \ } \ } while (0) diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 99746bbd..3d253001 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); malloc_write(": "); \ malloc_write(__FILE__); \ malloc_write(":"); \ - malloc_write(umax2s(__LINE__, 10, line_buf)); \ + malloc_write(u2s(__LINE__, 10, line_buf)); \ malloc_write(": Failed assertion: "); \ malloc_write("\""); \ malloc_write(#e); \ @@ -256,6 +257,7 @@ extern bool opt_xmalloc; #ifdef JEMALLOC_FILL extern bool opt_zero; #endif +extern size_t opt_narenas; #ifdef DYNAMIC_PAGE_SHIFT extern size_t pagesize; diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index 3e85bdab..7864000b 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -9,6 +9,7 @@ typedef struct prof_ctx_s prof_ctx_t; typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ +#define PROF_PREFIX_DEFAULT "jeprof" #define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 #define LG_PROF_INTERVAL_DEFAULT -1 @@ -164,10 +165,11 @@ extern bool opt_prof_active; extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_udump; /* High-water memory dumping. */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ +extern char opt_prof_prefix[PATH_MAX + 1]; /* * Profile dump interval, measured in bytes allocated. Each arena triggers a @@ -215,10 +217,11 @@ void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); void prof_idump(void); bool prof_mdump(const char *filename); -void prof_udump(void); +void prof_gdump(void); prof_tdata_t *prof_tdata_init(void); void prof_boot0(void); -bool prof_boot1(void); +void prof_boot1(void); +bool prof_boot2(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/stats.h b/jemalloc/include/jemalloc/internal/stats.h index cbf035ff..3fc2080a 100644 --- a/jemalloc/include/jemalloc/internal/stats.h +++ b/jemalloc/include/jemalloc/internal/stats.h @@ -154,7 +154,7 @@ struct chunk_stats_s { extern bool opt_stats_print; -char *umax2s(uintmax_t x, unsigned base, char *s); +char *u2s(uint64_t x, unsigned base, char *s); #ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index 168a3069..1ad91a9b 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -17,7 +17,7 @@ typedef struct tcache_s tcache_t; /* Number of cache slots for large size classes. */ #define TCACHE_NSLOTS_LARGE 20 -/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */ +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ #define LG_TCACHE_MAXCLASS_DEFAULT 15 /* @@ -61,7 +61,7 @@ struct tcache_s { #ifdef JEMALLOC_H_EXTERNS extern bool opt_tcache; -extern ssize_t opt_lg_tcache_maxclass; +extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_gc_sweep; /* Map of thread-specific caches. */ diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/jemalloc/include/jemalloc/jemalloc.h.in index e3983078..4dd3981a 100644 --- a/jemalloc/include/jemalloc/jemalloc.h.in +++ b/jemalloc/include/jemalloc/jemalloc.h.in @@ -32,7 +32,7 @@ extern "C" { #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 -extern const char *JEMALLOC_P(malloc_options); +extern const char *JEMALLOC_P(malloc_conf); extern void (*JEMALLOC_P(malloc_message))(void *, const char *); void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in index fe35170c..54e5d943 100644 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -13,6 +13,7 @@ * the API prefixing. */ #undef JEMALLOC_PREFIX +#undef JEMALLOC_CPREFIX #if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) #undef JEMALLOC_P #endif diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index d811f658..00f425fa 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -290,7 +290,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) assert((uintptr_t)ptr >= (uintptr_t)run + (uintptr_t)run->bin->reg0_offset); /* - * Freeing a pointer in the run's wilderness can cause assertion + * Freeing a pointer past in the run's frontier can cause assertion * failure. */ assert((uintptr_t)ptr < (uintptr_t)run->next); @@ -2532,7 +2532,7 @@ arena_boot(void) if (nbins > 255) { char line_buf[UMAX2S_BUFSIZE]; malloc_write(": Too many small size classes ("); - malloc_write(umax2s(nbins, 10, line_buf)); + malloc_write(u2s(nbins, 10, line_buf)); malloc_write(" > max 255)\n"); abort(); } @@ -2541,7 +2541,7 @@ arena_boot(void) if (nbins > 256) { char line_buf[UMAX2S_BUFSIZE]; malloc_write(": Too many small size classes ("); - malloc_write(umax2s(nbins, 10, line_buf)); + malloc_write(u2s(nbins, 10, line_buf)); malloc_write(" > max 256)\n"); abort(); } diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c index 0be24fbd..00bf50a0 100644 --- a/jemalloc/src/chunk.c +++ b/jemalloc/src/chunk.c @@ -78,7 +78,7 @@ RETURN: #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) if (ret != NULL) { # ifdef JEMALLOC_PROF - bool udump; + bool gdump; # endif malloc_mutex_lock(&chunks_mtx); # ifdef JEMALLOC_STATS @@ -88,17 +88,17 @@ RETURN: if (stats_chunks.curchunks > stats_chunks.highchunks) { stats_chunks.highchunks = stats_chunks.curchunks; # ifdef JEMALLOC_PROF - udump = true; + gdump = true; # endif } # ifdef JEMALLOC_PROF else - udump = false; + gdump = false; # endif malloc_mutex_unlock(&chunks_mtx); # ifdef JEMALLOC_PROF - if (opt_prof && opt_prof_udump && udump) - prof_udump(); + if (opt_prof && opt_prof_gdump && gdump) + prof_gdump(); # endif } #endif diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index dbc5cd42..c83ee4f1 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -62,8 +62,15 @@ CTL_PROTO(config_tiny) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) +CTL_PROTO(opt_lg_qspace_max) +CTL_PROTO(opt_lg_cspace_max) +CTL_PROTO(opt_lg_chunk) +CTL_PROTO(opt_narenas) +CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_stats_print) #ifdef JEMALLOC_FILL CTL_PROTO(opt_junk) +CTL_PROTO(opt_zero) #endif #ifdef JEMALLOC_SYSV CTL_PROTO(opt_sysv) @@ -71,29 +78,22 @@ CTL_PROTO(opt_sysv) #ifdef JEMALLOC_XMALLOC CTL_PROTO(opt_xmalloc) #endif -#ifdef JEMALLOC_ZERO -CTL_PROTO(opt_zero) -#endif #ifdef JEMALLOC_TCACHE CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) #endif #ifdef JEMALLOC_PROF CTL_PROTO(opt_prof) +CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) CTL_PROTO(opt_lg_prof_bt_max) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) -CTL_PROTO(opt_prof_udump) +CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(opt_lg_prof_tcmax) #endif -CTL_PROTO(opt_stats_print) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) -CTL_PROTO(opt_lg_dirty_mult) -CTL_PROTO(opt_lg_chunk) #ifdef JEMALLOC_SWAP CTL_PROTO(opt_overcommit) #endif @@ -247,38 +247,43 @@ static const ctl_node_t config_node[] = { static const ctl_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, + {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, + {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)} #ifdef JEMALLOC_FILL + , {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)} #endif #ifdef JEMALLOC_SYSV - {NAME("sysv"), CTL(opt_sysv)}, + , + {NAME("sysv"), CTL(opt_sysv)} #endif #ifdef JEMALLOC_XMALLOC - {NAME("xmalloc"), CTL(opt_xmalloc)}, -#endif -#ifdef JEMALLOC_ZERO - {NAME("zero"), CTL(opt_zero)}, + , + {NAME("xmalloc"), CTL(opt_xmalloc)} #endif #ifdef JEMALLOC_TCACHE + , {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, + {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} #endif #ifdef JEMALLOC_PROF + , {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_udump"), CTL(opt_prof_udump)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)}, + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} #endif - {NAME("stats_print"), CTL(opt_stats_print)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)} #ifdef JEMALLOC_SWAP , {NAME("overcommit"), CTL(opt_overcommit)} @@ -1201,8 +1206,15 @@ CTL_RO_FALSE_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_GEN(opt_abort, opt_abort, bool) +CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) +CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) +CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) #ifdef JEMALLOC_FILL CTL_RO_GEN(opt_junk, opt_junk, bool) +CTL_RO_GEN(opt_zero, opt_zero, bool) #endif #ifdef JEMALLOC_SYSV CTL_RO_GEN(opt_sysv, opt_sysv, bool) @@ -1210,29 +1222,22 @@ CTL_RO_GEN(opt_sysv, opt_sysv, bool) #ifdef JEMALLOC_XMALLOC CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool) #endif -#ifdef JEMALLOC_ZERO -CTL_RO_GEN(opt_zero, opt_zero, bool) -#endif #ifdef JEMALLOC_TCACHE CTL_RO_GEN(opt_tcache, opt_tcache, bool) CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) #endif #ifdef JEMALLOC_PROF CTL_RO_GEN(opt_prof, opt_prof, bool) +CTL_RO_GEN(opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool) +CTL_RO_GEN(opt_prof_gdump, opt_prof_gdump, bool) CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool) CTL_RO_GEN(opt_prof_accum, opt_prof_accum, bool) CTL_RO_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) #endif -CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) -CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t) #ifdef JEMALLOC_SWAP CTL_RO_GEN(opt_overcommit, opt_overcommit, bool) #endif diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index dedf011a..012c4a8f 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -41,8 +41,7 @@ size_t lg_pagesize; unsigned ncpus; /* Runtime configuration options. */ -const char *JEMALLOC_P(malloc_options) - JEMALLOC_ATTR(visibility("default")); +const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -63,7 +62,7 @@ bool opt_xmalloc = false; #ifdef JEMALLOC_FILL bool opt_zero = false; #endif -static int opt_narenas_lshift = 0; +size_t opt_narenas = 0; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -74,6 +73,11 @@ static unsigned malloc_ncpus(void); #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg); #endif +static bool malloc_conf_next(char const **opts_p, char const **k_p, + size_t *klen_p, char const **v_p, size_t *vlen_p); +static void malloc_conf_error(const char *msg, const char *k, size_t klen, + const char *v, size_t vlen); +static void malloc_conf_init(void); static bool malloc_init_hard(void); /******************************************************************************/ @@ -260,12 +264,323 @@ malloc_init(void) } static bool -malloc_init_hard(void) +malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, + char const **v_p, size_t *vlen_p) +{ + bool accept; + const char *opts = *opts_p; + + *k_p = opts; + + for (accept = false; accept == false;) { + switch (*opts) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write(": Conf string " + "ends with key\n"); + } + return (true); + default: + malloc_write(": Malformed conf " + "string\n"); + return (true); + } + } + + for (accept = false; accept == false;) { + switch (*opts) { + case ',': + opts++; + /* + * Look ahead one character here, because the + * next time this function is called, it will + * assume that end of input has been cleanly + * reached if no input remains, but we have + * optimistically already consumed the comma if + * one exists. + */ + if (*opts == '\0') { + malloc_write(": Conf string " + "ends with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; + } + } + + *opts_p = opts; + return (false); +} + +static void +malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, + size_t vlen) +{ + char buf[PATH_MAX + 1]; + + malloc_write(": "); + malloc_write(msg); + malloc_write(": "); + memcpy(buf, k, klen); + memcpy(&buf[klen], ":", 1); + memcpy(&buf[klen+1], v, vlen); + buf[klen+1+vlen] = '\0'; + malloc_write(buf); + malloc_write("\n"); +} + +static void +malloc_conf_init(void) { unsigned i; - int linklen; char buf[PATH_MAX + 1]; - const char *opts; + const char *opts, *k, *v; + size_t klen, vlen; + + for (i = 0; i < 3; i++) { + /* Get runtime configuration. */ + switch (i) { + case 0: + if (JEMALLOC_P(malloc_conf) != NULL) { + /* + * Use options that were compiled into the + * program. + */ + opts = JEMALLOC_P(malloc_conf); + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 1: { + int linklen; + const char *linkname = +#ifdef JEMALLOC_PREFIX + "/etc/"JEMALLOC_PREFIX"malloc.conf" +#else + "/etc/malloc.conf" +#endif + ; + + if ((linklen = readlink(linkname, buf, + sizeof(buf) - 1)) != -1) { + /* + * Use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + buf[linklen] = '\0'; + opts = buf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } + case 2: { + const char *envname = +#ifdef JEMALLOC_PREFIX + JEMALLOC_CPREFIX"MALLOC_CONF" +#else + "MALLOC_CONF" +#endif + ; + + if ((opts = getenv(envname)) != NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the JEMALLOC_OPTIONS + * environment variable. + */ + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } + default: + /* NOTREACHED */ + assert(false); + buf[0] = '\0'; + opts = buf; + } + + while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, + &vlen) == false) { +#define CONF_HANDLE_BOOL(n) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + if (strncmp("true", v, vlen) == 0 && \ + vlen == sizeof("true")-1) \ + opt_##n = true; \ + else if (strncmp("false", v, vlen) == \ + 0 && vlen == sizeof("false")-1) \ + opt_##n = false; \ + else { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } \ + continue; \ + } +#define CONF_HANDLE_SIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + unsigned long ul; \ + char *end; \ + \ + errno = 0; \ + ul = strtoul(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (ul < min || ul > max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = ul; \ + continue; \ + } +#define CONF_HANDLE_SSIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + long l; \ + char *end; \ + \ + errno = 0; \ + l = strtol(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (l < (ssize_t)min || l > \ + (ssize_t)max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = l; \ + continue; \ + } +#define CONF_HANDLE_CHAR_P(n, d) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + size_t cpylen = (vlen <= \ + sizeof(opt_##n)-1) ? vlen : \ + sizeof(opt_##n)-1; \ + strncpy(opt_##n, v, cpylen); \ + opt_##n[cpylen] = '\0'; \ + continue; \ + } + + CONF_HANDLE_BOOL(abort) + CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + /* + * Chunks always require at least one * header page, + * plus one data page. + */ + CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(stats_print) +#ifdef JEMALLOC_FILL + CONF_HANDLE_BOOL(junk) + CONF_HANDLE_BOOL(zero) +#endif +#ifdef JEMALLOC_SYSV + CONF_HANDLE_BOOL(sysv) +#endif +#ifdef JEMALLOC_XMALLOC + CONF_HANDLE_BOOL(xmalloc) +#endif +#ifdef JEMALLOC_TCACHE + CONF_HANDLE_BOOL(tcache) + CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) +#endif +#ifdef JEMALLOC_PROF + CONF_HANDLE_BOOL(prof) + CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") + CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) + CONF_HANDLE_BOOL(prof_active) + CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_accum) + CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_gdump) + CONF_HANDLE_BOOL(prof_leak) +#endif +#ifdef JEMALLOC_SWAP + CONF_HANDLE_BOOL(overcommit) +#endif + malloc_conf_error("Invalid conf pair", k, klen, v, + vlen); +#undef CONF_HANDLE_BOOL +#undef CONF_HANDLE_SIZE_T +#undef CONF_HANDLE_SSIZE_T +#undef CONF_HANDLE_CHAR_P + } + + /* Validate configuration of options that are inter-related. */ + if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { + malloc_write(": Invalid lg_[qc]space_max " + "relationship; restoring defaults\n"); + opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; + opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; + } + } +} + +static bool +malloc_init_hard(void) +{ arena_t *init_arenas[1]; malloc_mutex_lock(&init_lock); @@ -308,302 +623,9 @@ malloc_init_hard(void) } #endif - for (i = 0; i < 3; i++) { - unsigned j; + prof_boot0(); - /* Get runtime configuration. */ - switch (i) { - case 0: - if ((linklen = readlink("/etc/jemalloc.conf", buf, - sizeof(buf) - 1)) != -1) { - /* - * Use the contents of the "/etc/jemalloc.conf" - * symbolic link's name. - */ - buf[linklen] = '\0'; - opts = buf; - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 1: - if ((opts = getenv("JEMALLOC_OPTIONS")) != NULL) { - /* - * Do nothing; opts is already initialized to - * the value of the JEMALLOC_OPTIONS - * environment variable. - */ - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 2: - if (JEMALLOC_P(malloc_options) != NULL) { - /* - * Use options that were compiled into the - * program. - */ - opts = JEMALLOC_P(malloc_options); - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - default: - /* NOTREACHED */ - assert(false); - buf[0] = '\0'; - opts = buf; - } - - for (j = 0; opts[j] != '\0'; j++) { - unsigned k, nreps; - bool nseen; - - /* Parse repetition count, if any. */ - for (nreps = 0, nseen = false;; j++, nseen = true) { - switch (opts[j]) { - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - case '8': case '9': - nreps *= 10; - nreps += opts[j] - '0'; - break; - default: - goto MALLOC_OUT; - } - } -MALLOC_OUT: - if (nseen == false) - nreps = 1; - - for (k = 0; k < nreps; k++) { - switch (opts[j]) { - case 'a': - opt_abort = false; - break; - case 'A': - opt_abort = true; - break; -#ifdef JEMALLOC_PROF - case 'b': - if (opt_lg_prof_bt_max > 0) - opt_lg_prof_bt_max--; - break; - case 'B': - if (opt_lg_prof_bt_max < LG_PROF_BT_MAX) - opt_lg_prof_bt_max++; - break; -#endif - case 'c': - if (opt_lg_cspace_max - 1 > - opt_lg_qspace_max && - opt_lg_cspace_max > - LG_CACHELINE) - opt_lg_cspace_max--; - break; - case 'C': - if (opt_lg_cspace_max < PAGE_SHIFT - - 1) - opt_lg_cspace_max++; - break; - case 'd': - if (opt_lg_dirty_mult + 1 < - (sizeof(size_t) << 3)) - opt_lg_dirty_mult++; - break; - case 'D': - if (opt_lg_dirty_mult >= 0) - opt_lg_dirty_mult--; - break; -#ifdef JEMALLOC_PROF - case 'e': - opt_prof_active = false; - break; - case 'E': - opt_prof_active = true; - break; - case 'f': - opt_prof = false; - break; - case 'F': - opt_prof = true; - break; -#endif -#ifdef JEMALLOC_TCACHE - case 'g': - if (opt_lg_tcache_gc_sweep >= 0) - opt_lg_tcache_gc_sweep--; - break; - case 'G': - if (opt_lg_tcache_gc_sweep + 1 < - (sizeof(size_t) << 3)) - opt_lg_tcache_gc_sweep++; - break; - case 'h': - opt_tcache = false; - break; - case 'H': - opt_tcache = true; - break; -#endif -#ifdef JEMALLOC_PROF - case 'i': - if (opt_lg_prof_interval >= 0) - opt_lg_prof_interval--; - break; - case 'I': - if (opt_lg_prof_interval + 1 < - (sizeof(uint64_t) << 3)) - opt_lg_prof_interval++; - break; -#endif -#ifdef JEMALLOC_FILL - case 'j': - opt_junk = false; - break; - case 'J': - opt_junk = true; - break; -#endif - case 'k': - /* - * Chunks always require at least one - * header page, plus one data page. - */ - if ((1U << (opt_lg_chunk - 1)) >= - (2U << PAGE_SHIFT)) - opt_lg_chunk--; - break; - case 'K': - if (opt_lg_chunk + 1 < - (sizeof(size_t) << 3)) - opt_lg_chunk++; - break; -#ifdef JEMALLOC_PROF - case 'l': - opt_prof_leak = false; - break; - case 'L': - opt_prof_leak = true; - break; -#endif -#ifdef JEMALLOC_TCACHE - case 'm': - if (opt_lg_tcache_maxclass >= 0) - opt_lg_tcache_maxclass--; - break; - case 'M': - if (opt_lg_tcache_maxclass + 1 < - (sizeof(size_t) << 3)) - opt_lg_tcache_maxclass++; - break; -#endif - case 'n': - opt_narenas_lshift--; - break; - case 'N': - opt_narenas_lshift++; - break; -#ifdef JEMALLOC_SWAP - case 'o': - opt_overcommit = false; - break; - case 'O': - opt_overcommit = true; - break; -#endif - case 'p': - opt_stats_print = false; - break; - case 'P': - opt_stats_print = true; - break; - case 'q': - if (opt_lg_qspace_max > LG_QUANTUM) - opt_lg_qspace_max--; - break; - case 'Q': - if (opt_lg_qspace_max + 1 < - opt_lg_cspace_max) - opt_lg_qspace_max++; - break; -#ifdef JEMALLOC_PROF - case 'r': - opt_prof_accum = false; - break; - case 'R': - opt_prof_accum = true; - break; - case 's': - if (opt_lg_prof_sample > 0) - opt_lg_prof_sample--; - break; - case 'S': - if (opt_lg_prof_sample + 1 < - (sizeof(uint64_t) << 3)) - opt_lg_prof_sample++; - break; - case 't': - if (opt_lg_prof_tcmax >= 0) - opt_lg_prof_tcmax--; - break; - case 'T': - if (opt_lg_prof_tcmax + 1 < - (sizeof(size_t) << 3)) - opt_lg_prof_tcmax++; - break; - case 'u': - opt_prof_udump = false; - break; - case 'U': - opt_prof_udump = true; - break; -#endif -#ifdef JEMALLOC_SYSV - case 'v': - opt_sysv = false; - break; - case 'V': - opt_sysv = true; - break; -#endif -#ifdef JEMALLOC_XMALLOC - case 'x': - opt_xmalloc = false; - break; - case 'X': - opt_xmalloc = true; - break; -#endif -#ifdef JEMALLOC_FILL - case 'z': - opt_zero = false; - break; - case 'Z': - opt_zero = true; - break; -#endif - default: { - char cbuf[2]; - - cbuf[0] = opts[j]; - cbuf[1] = '\0'; - malloc_write( - ": Unsupported character " - "in malloc options: '"); - malloc_write(cbuf); - malloc_write("'\n"); - } - } - } - } - } + malloc_conf_init(); /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, @@ -638,7 +660,7 @@ MALLOC_OUT: } #ifdef JEMALLOC_PROF - prof_boot0(); + prof_boot1(); #endif if (arena_boot()) { @@ -692,7 +714,7 @@ MALLOC_OUT: malloc_mutex_init(&arenas_lock); #ifdef JEMALLOC_PROF - if (prof_boot1()) { + if (prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -704,31 +726,29 @@ MALLOC_OUT: ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); - if (ncpus > 1) { + if (opt_narenas == 0) { /* * For SMP systems, create more than one arena per CPU by * default. */ - opt_narenas_lshift += 2; + if (ncpus > 1) + opt_narenas = ncpus << 2; + else + opt_narenas = 1; } + narenas = opt_narenas; + /* + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. + */ + if (narenas > chunksize / sizeof(arena_t *)) { + char buf[UMAX2S_BUFSIZE]; - /* Determine how many arenas to use. */ - narenas = ncpus; - if (opt_narenas_lshift > 0) { - if ((narenas << opt_narenas_lshift) > narenas) - narenas <<= opt_narenas_lshift; - /* - * Make sure not to exceed the limits of what base_alloc() can - * handle. - */ - if (narenas * sizeof(arena_t *) > chunksize) - narenas = chunksize / sizeof(arena_t *); - } else if (opt_narenas_lshift < 0) { - if ((narenas >> -opt_narenas_lshift) < narenas) - narenas >>= -opt_narenas_lshift; - /* Make sure there is at least one arena. */ - if (narenas == 0) - narenas = 1; + narenas = chunksize / sizeof(arena_t *); + malloc_write(": Reducing narenas to limit ("); + malloc_write(u2s(narenas, 10, buf)); + malloc_write(")\n"); } next_arena = (narenas > 0) ? 1 : 0; diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index fb0e7659..84ce1ba0 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -20,10 +20,11 @@ bool opt_prof_active = true; size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; -bool opt_prof_udump = false; +bool opt_prof_gdump = false; bool opt_prof_leak = false; bool opt_prof_accum = true; ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; +char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; @@ -64,7 +65,7 @@ static bool prof_booted = false; static malloc_mutex_t enq_mtx; static bool enq; static bool enq_idump; -static bool enq_udump; +static bool enq_gdump; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -150,7 +151,7 @@ prof_enter(void) static inline void prof_leave(void) { - bool idump, udump; + bool idump, gdump; malloc_mutex_unlock(&bt2ctx_mtx); @@ -158,14 +159,14 @@ prof_leave(void) enq = false; idump = enq_idump; enq_idump = false; - udump = enq_udump; - enq_udump = false; + gdump = enq_gdump; + enq_gdump = false; malloc_mutex_unlock(&enq_mtx); if (idump) prof_idump(); - if (udump) - prof_udump(); + if (gdump) + prof_gdump(); } #ifdef JEMALLOC_PROF_LIBGCC @@ -681,22 +682,22 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) return (false); } - if (prof_write(umax2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) + if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(ctx->cnt_summed.curbytes, 10, buf), + || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) - || prof_write(umax2s(ctx->cnt_summed.accumobjs, 10, buf), + || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(ctx->cnt_summed.accumbytes, 10, buf), + || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), propagate_err) || prof_write("] @", propagate_err)) return (true); for (i = 0; i < bt->len; i++) { if (prof_write(" 0x", propagate_err) - || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf), + || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), propagate_err)) return (true); } @@ -725,7 +726,7 @@ prof_dump_maps(bool propagate_err) memcpy(&mpath[i], s, slen); i += slen; - s = umax2s(getpid(), 10, buf); + s = u2s(getpid(), 10, buf); slen = strlen(s); memcpy(&mpath[i], s, slen); i += slen; @@ -799,13 +800,13 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) /* Dump profile header. */ if (prof_write("heap profile: ", propagate_err) - || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) - || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err)) + || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) goto ERROR; if (opt_lg_prof_sample == 0) { @@ -813,7 +814,7 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) goto ERROR; } else { if (prof_write("] @ heap_v2/", propagate_err) - || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10, + || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, buf), propagate_err) || prof_write("\n", propagate_err)) goto ERROR; @@ -837,12 +838,12 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) if (leakcheck && cnt_all.curbytes != 0) { malloc_write(": Leak summary: "); - malloc_write(umax2s(cnt_all.curbytes, 10, buf)); + malloc_write(u2s(cnt_all.curbytes, 10, buf)); malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); - malloc_write(umax2s(cnt_all.curobjs, 10, buf)); + malloc_write(u2s(cnt_all.curobjs, 10, buf)); malloc_write((cnt_all.curobjs != 1) ? " objects, " : " object, "); - malloc_write(umax2s(leak_nctx, 10, buf)); + malloc_write(u2s(leak_nctx, 10, buf)); malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); malloc_write(": Run pprof on \""); malloc_write(filename); @@ -872,31 +873,11 @@ prof_dump_filename(char *filename, char v, int64_t vseq) * Construct a filename of the form: * * ...v.heap\0 - * or - * jeprof...v.heap\0 */ i = 0; - /* - * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to - * avoid overflowing DUMP_FILENAME_BUFSIZE. The result may exceed - * PATH_MAX, but creat(2) will catch that problem. - */ - if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL - && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) { - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - } else - s = "jeprof."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = umax2s(getpid(), 10, buf); + s = opt_prof_prefix; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; @@ -906,7 +887,17 @@ prof_dump_filename(char *filename, char v, int64_t vseq) memcpy(&filename[i], s, slen); i += slen; - s = umax2s(prof_dump_seq, 10, buf); + s = u2s(getpid(), 10, buf); + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = "."; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = u2s(prof_dump_seq, 10, buf); prof_dump_seq++; slen = strlen(s); memcpy(&filename[i], s, slen); @@ -921,7 +912,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) i++; if (vseq != 0xffffffffffffffffLLU) { - s = umax2s(vseq, 10, buf); + s = u2s(vseq, 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; @@ -943,10 +934,12 @@ prof_fdump(void) if (prof_booted == false) return; - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, opt_prof_leak, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, opt_prof_leak, false); + } } void @@ -964,11 +957,13 @@ prof_idump(void) } malloc_mutex_unlock(&enq_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'i', prof_dump_iseq); - prof_dump_iseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'i', prof_dump_iseq); + prof_dump_iseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } } bool @@ -981,6 +976,8 @@ prof_mdump(const char *filename) if (filename == NULL) { /* No filename specified, so automatically generate one. */ + if (opt_prof_prefix[0] == '\0') + return (true); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; @@ -991,7 +988,7 @@ prof_mdump(const char *filename) } void -prof_udump(void) +prof_gdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; @@ -999,17 +996,19 @@ prof_udump(void) return; malloc_mutex_lock(&enq_mtx); if (enq) { - enq_udump = true; + enq_gdump = true; malloc_mutex_unlock(&enq_mtx); return; } malloc_mutex_unlock(&enq_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'u', prof_dump_useq); - prof_dump_useq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'u', prof_dump_useq); + prof_dump_useq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } } static void @@ -1120,6 +1119,14 @@ prof_tdata_cleanup(void *arg) void prof_boot0(void) +{ + + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, + sizeof(PROF_PREFIX_DEFAULT)); +} + +void +prof_boot1(void) { /* @@ -1133,7 +1140,7 @@ prof_boot0(void) * automatically dumped. */ opt_prof = true; - opt_prof_udump = false; + opt_prof_gdump = false; prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { @@ -1147,7 +1154,7 @@ prof_boot0(void) } bool -prof_boot1(void) +prof_boot2(void) { if (opt_prof) { @@ -1171,7 +1178,7 @@ prof_boot1(void) return (true); enq = false; enq_idump = false; - enq_udump = false; + enq_gdump = false; if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c index 9b3271b2..3dfe0d23 100644 --- a/jemalloc/src/stats.c +++ b/jemalloc/src/stats.c @@ -57,12 +57,12 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), /* * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. umax2s() provides minimal - * integer printing functionality, so that malloc_printf() use can be limited to + * cause unnecessary bloat for static binaries. u2s() provides minimal integer + * printing functionality, so that malloc_printf() use can be limited to * JEMALLOC_STATS code. */ char * -umax2s(uintmax_t x, unsigned base, char *s) +u2s(uint64_t x, unsigned base, char *s) { unsigned i; @@ -72,8 +72,8 @@ umax2s(uintmax_t x, unsigned base, char *s) case 10: do { i--; - s[i] = "0123456789"[x % 10]; - x /= 10; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; } while (x > 0); break; case 16: @@ -86,8 +86,9 @@ umax2s(uintmax_t x, unsigned base, char *s) default: do { i--; - s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base]; - x /= base; + s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % + (uint64_t)base]; + x /= (uint64_t)base; } while (x > 0); } @@ -374,6 +375,7 @@ void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { + int err; uint64_t epoch; size_t u64sz; char s[UMAX2S_BUFSIZE]; @@ -383,10 +385,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bins = true; bool large = true; - /* Refresh stats, in case mallctl() was called by the application. */ + /* + * Refresh stats, in case mallctl() was called by the application. + * + * Check for OOM here, since refreshing the ctl cache can trigger + * allocation. In practice, none of the subsequent mallctl()-related + * calls in this function will cause OOM if this one succeeds. + * */ epoch = 1; u64sz = sizeof(uint64_t); - xmallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); + err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, + sizeof(uint64_t)); + if (err != 0) { + if (err == EAGAIN) { + malloc_write(": Memory allocation failure in " + "mallctl(\"epoch\", ...)\n"); + return; + } + malloc_write(": Failure in mallctl(\"epoch\", " + "...)\n"); + abort(); + } if (write_cb == NULL) { /* @@ -430,10 +449,12 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bv; unsigned uv; ssize_t ssv; - size_t sv, bsz, ssz; + size_t sv, bsz, ssz, sssz, cpsz; bsz = sizeof(bool); ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); write_cb(cbopaque, "Version: "); @@ -444,116 +465,140 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, bv ? "enabled" : "disabled"); write_cb(cbopaque, "\n"); - write_cb(cbopaque, "Boolean JEMALLOC_OPTIONS: "); - if ((err = JEMALLOC_P(mallctl)("opt.abort", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "A" : "a"); - if ((err = JEMALLOC_P(mallctl)("prof.active", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "E" : "e"); - if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "F" : "f"); - if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "H" : "h"); - if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "J" : "j"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_leak", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "L" : "l"); - if ((err = JEMALLOC_P(mallctl)("opt.overcommit", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "O" : "o"); - if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "P" : "p"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_accum", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "R" : "r"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "U" : "u"); - if ((err = JEMALLOC_P(mallctl)("opt.sysv", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "V" : "v"); - if ((err = JEMALLOC_P(mallctl)("opt.xmalloc", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "X" : "x"); - if ((err = JEMALLOC_P(mallctl)("opt.zero", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "Z" : "z"); - write_cb(cbopaque, "\n"); +#define OPT_WRITE_BOOL(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, bv ? "true" : "false"); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(sv, 10, s)); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ + NULL, 0)) == 0) { \ + if (ssv >= 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(ssv, 10, s)); \ + } else { \ + write_cb(cbopaque, " opt."#n": -"); \ + write_cb(cbopaque, u2s(-ssv, 10, s)); \ + } \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_CHAR_P(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": \""); \ + write_cb(cbopaque, cpv); \ + write_cb(cbopaque, "\"\n"); \ + } + + write_cb(cbopaque, "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_qspace_max) + OPT_WRITE_SIZE_T(lg_cspace_max) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_BOOL(junk) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(sysv) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_SIZE_T(lg_prof_bt_max) + OPT_WRITE_BOOL(prof_active) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_tcmax) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_leak) + OPT_WRITE_BOOL(overcommit) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P write_cb(cbopaque, "CPUs: "); - write_cb(cbopaque, umax2s(ncpus, 10, s)); + write_cb(cbopaque, u2s(ncpus, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.narenas", &uv, unsigned); write_cb(cbopaque, "Max arenas: "); - write_cb(cbopaque, umax2s(uv, 10, s)); + write_cb(cbopaque, u2s(uv, 10, s)); write_cb(cbopaque, "\n"); write_cb(cbopaque, "Pointer size: "); - write_cb(cbopaque, umax2s(sizeof(void *), 10, s)); + write_cb(cbopaque, u2s(sizeof(void *), 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.quantum", &sv, size_t); write_cb(cbopaque, "Quantum size: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.cacheline", &sv, size_t); write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.subpage", &sv, size_t); write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, NULL, 0)) == 0) { write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); } CTL_GET("arenas.qspace_min", &sv, size_t); write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("arenas.cspace_min", &sv, size_t); write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("arenas.sspace_min", &sv, size_t); write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { write_cb(cbopaque, "Min active:dirty page ratio per arena: "); - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((1U << ssv), 10, s)); write_cb(cbopaque, ":1\n"); } else { write_cb(cbopaque, @@ -563,7 +608,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, &ssz, NULL, 0)) == 0) { write_cb(cbopaque, "Maximum thread-cached size class: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, @@ -573,50 +618,51 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("opt.tcache", &tcache_enabled, bool); write_cb(cbopaque, "Thread cache GC sweep interval: "); write_cb(cbopaque, tcache_enabled && ssv >= 0 ? - umax2s(tcache_gc_sweep, 10, s) : "N/A"); + u2s(tcache_gc_sweep, 10, s) : "N/A"); write_cb(cbopaque, "\n"); } if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_bt_max", &sv, size_t); write_cb(cbopaque, "Maximum profile backtrace depth: "); - write_cb(cbopaque, umax2s((1U << sv), 10, s)); + write_cb(cbopaque, u2s((1U << sv), 10, s)); write_cb(cbopaque, "\n"); CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); write_cb(cbopaque, "Maximum per thread backtrace cache: "); if (ssv >= 0) { - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((1U << ssv), 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(ssv, 10, s)); + write_cb(cbopaque, u2s(ssv, 10, s)); write_cb(cbopaque, ")\n"); } else write_cb(cbopaque, "N/A\n"); CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); - write_cb(cbopaque, umax2s((1U << sv), 10, s)); + write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ")\n"); CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); write_cb(cbopaque, "Average profile dump interval: "); if (ssv >= 0) { - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), + 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(ssv, 10, s)); + write_cb(cbopaque, u2s(ssv, 10, s)); write_cb(cbopaque, ")\n"); } else write_cb(cbopaque, "N/A\n"); } CTL_GET("arenas.chunksize", &sv, size_t); write_cb(cbopaque, "Chunk size: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); CTL_GET("opt.lg_chunk", &sv, size_t); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ")\n"); } diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c index 3fb8f2bd..cbbe7a11 100644 --- a/jemalloc/src/tcache.c +++ b/jemalloc/src/tcache.c @@ -5,7 +5,7 @@ /* Data. */ bool opt_tcache = true; -ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT; +ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; /* Map of thread-specific caches. */ @@ -384,16 +384,16 @@ tcache_boot(void) if (opt_tcache) { /* - * If necessary, clamp opt_lg_tcache_maxclass, now that + * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. */ - if (opt_lg_tcache_maxclass < 0 || (1U << - opt_lg_tcache_maxclass) < small_maxclass) + if (opt_lg_tcache_max < 0 || (1U << + opt_lg_tcache_max) < small_maxclass) tcache_maxclass = small_maxclass; - else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass) + else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; else - tcache_maxclass = (1U << opt_lg_tcache_maxclass); + tcache_maxclass = (1U << opt_lg_tcache_max); nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); diff --git a/jemalloc/test/rallocm.c b/jemalloc/test/rallocm.c index 7e8a271c..a8cadebc 100644 --- a/jemalloc/test/rallocm.c +++ b/jemalloc/test/rallocm.c @@ -14,14 +14,14 @@ main(void) fprintf(stderr, "Test begin\n"); - r = allocm(&p, &sz, 42, 0); + r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } q = p; - r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -32,7 +32,7 @@ main(void) } q = p; - r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -43,7 +43,7 @@ main(void) } q = p; - r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_ERR_NOT_MOVED) fprintf(stderr, "Unexpected rallocm() result\n"); if (q != p) @@ -54,7 +54,7 @@ main(void) } q = p; - r = rallocm(&q, &tsz, sz + 5, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -66,7 +66,7 @@ main(void) p = q; sz = tsz; - r = rallocm(&q, &tsz, 8192, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -78,7 +78,7 @@ main(void) p = q; sz = tsz; - r = rallocm(&q, &tsz, 16384, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (tsz == sz) { @@ -88,7 +88,7 @@ main(void) p = q; sz = tsz; - r = rallocm(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -99,7 +99,7 @@ main(void) } sz = tsz; - r = rallocm(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -110,7 +110,7 @@ main(void) } sz = tsz; - dallocm(p, 0); + JEMALLOC_P(dallocm)(p, 0); fprintf(stderr, "Test end\n"); return (0);