4201af0542
Add malloc_swap_enable(). Add the O/o JEMALLOC_OPTIONS flags, which control memory overcommit. Fix mapped memory stats reporting for arenas.
699 lines
25 KiB
Groff
699 lines
25 KiB
Groff
.\" Copyright (c) 2006-2010 Jason Evans <jasone@canonware.com>.
|
|
.\" All rights reserved.
|
|
.\" Copyright (c) 2009 Facebook, Inc. All rights reserved.
|
|
.\"
|
|
.\" See COPYING for licensing terms provided by the above copyright holders.
|
|
.\"
|
|
.\" Copyright (c) 1980, 1991, 1993
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
.\"
|
|
.\" This code is derived from software contributed to Berkeley by
|
|
.\" the American National Standards Committee X3, on Information
|
|
.\" Processing Systems.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. Neither the name of the University nor the names of its contributors
|
|
.\" may be used to endorse or promote products derived from this software
|
|
.\" without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
|
|
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
|
|
.\"
|
|
.Dd January 23, 2010
|
|
.Dt JEMALLOC 3
|
|
.Os
|
|
.Sh NAME
|
|
.Nm @jemalloc_prefix@malloc ,
|
|
.Nm @jemalloc_prefix@calloc ,
|
|
.Nm @jemalloc_prefix@posix_memalign ,
|
|
.Nm @jemalloc_prefix@realloc ,
|
|
.Nm @jemalloc_prefix@free ,
|
|
.Nm @jemalloc_prefix@malloc_usable_size ,
|
|
@roff_swap@.Nm @jemalloc_prefix@malloc_swap_enable ,
|
|
@roff_tcache@.Nm @jemalloc_prefix@malloc_tcache_flush ,
|
|
.Nm @jemalloc_prefix@malloc_stats_print
|
|
.Nd general purpose memory allocation functions
|
|
.Sh LIBRARY
|
|
.Lb libjemalloc@install_suffix@
|
|
.Sh SYNOPSIS
|
|
.In stdlib.h
|
|
.In jemalloc@install_suffix@.h
|
|
.Ft void *
|
|
.Fn @jemalloc_prefix@malloc "size_t size"
|
|
.Ft void *
|
|
.Fn @jemalloc_prefix@calloc "size_t number" "size_t size"
|
|
.Ft int
|
|
.Fn @jemalloc_prefix@posix_memalign "void **ptr" "size_t alignment" "size_t size"
|
|
.Ft void *
|
|
.Fn @jemalloc_prefix@realloc "void *ptr" "size_t size"
|
|
.Ft void
|
|
.Fn @jemalloc_prefix@free "void *ptr"
|
|
.Ft size_t
|
|
.Fn @jemalloc_prefix@malloc_usable_size "const void *ptr"
|
|
@roff_swap@.Ft int
|
|
@roff_swap@.Fn @jemalloc_prefix@malloc_swap_enable "const int *fds" "unsigned nfds" "int prezeroed"
|
|
@roff_tcache@.Ft void
|
|
@roff_tcache@.Fn @jemalloc_prefix@malloc_tcache_flush "void"
|
|
.Ft void
|
|
.Fn @jemalloc_prefix@malloc_stats_print "void (*write4)(void *" "const char *" "const char *" "const char *" "const char *)" "const char *opts"
|
|
.Ft const char *
|
|
.Va @jemalloc_prefix@malloc_options ;
|
|
.Ft void
|
|
.Fo \*(lp*@jemalloc_prefix@malloc_message\*(rp
|
|
.Fa "void *w4opaque" "const char *p1" "const char *p2" "const char *p3" "const char *p4"
|
|
.Fc
|
|
.Sh DESCRIPTION
|
|
The
|
|
.Fn @jemalloc_prefix@malloc
|
|
function allocates
|
|
.Fa size
|
|
bytes of uninitialized memory.
|
|
The allocated space is suitably aligned
|
|
@roff_tiny@(after possible pointer coercion)
|
|
for storage of any type of object.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@calloc
|
|
function allocates space for
|
|
.Fa number
|
|
objects,
|
|
each
|
|
.Fa size
|
|
bytes in length.
|
|
The result is identical to calling
|
|
.Fn @jemalloc_prefix@malloc
|
|
with an argument of
|
|
.Dq "number * size" ,
|
|
with the exception that the allocated memory is explicitly initialized
|
|
to zero bytes.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@posix_memalign
|
|
function allocates
|
|
.Fa size
|
|
bytes of memory such that the allocation's base address is an even multiple of
|
|
.Fa alignment ,
|
|
and returns the allocation in the value pointed to by
|
|
.Fa ptr .
|
|
The requested
|
|
.Fa alignment
|
|
must be a power of 2 at least as large as
|
|
.Fn sizeof "void *" .
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@realloc
|
|
function changes the size of the previously allocated memory referenced by
|
|
.Fa ptr
|
|
to
|
|
.Fa size
|
|
bytes.
|
|
The contents of the memory are unchanged up to the lesser of the new and
|
|
old sizes.
|
|
If the new size is larger,
|
|
the contents of the newly allocated portion of the memory are undefined.
|
|
Upon success, the memory referenced by
|
|
.Fa ptr
|
|
is freed and a pointer to the newly allocated memory is returned.
|
|
Note that
|
|
.Fn @jemalloc_prefix@realloc
|
|
may move the memory allocation, resulting in a different return value than
|
|
.Fa ptr .
|
|
If
|
|
.Fa ptr
|
|
is
|
|
.Dv NULL ,
|
|
the
|
|
.Fn @jemalloc_prefix@realloc
|
|
function behaves identically to
|
|
.Fn @jemalloc_prefix@malloc
|
|
for the specified size.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@free
|
|
function causes the allocated memory referenced by
|
|
.Fa ptr
|
|
to be made available for future allocations.
|
|
If
|
|
.Fa ptr
|
|
is
|
|
.Dv NULL ,
|
|
no action occurs.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@malloc_usable_size
|
|
function returns the usable size of the allocation pointed to by
|
|
.Fa ptr .
|
|
The return value may be larger than the size that was requested during
|
|
allocation.
|
|
The
|
|
.Fn @jemalloc_prefix@malloc_usable_size
|
|
function is not a mechanism for in-place
|
|
.Fn @jemalloc_prefix@realloc ;
|
|
rather it is provided solely as a tool for introspection purposes.
|
|
Any discrepancy between the requested allocation size and the size reported by
|
|
.Fn @jemalloc_prefix@malloc_usable_size
|
|
should not be depended on, since such behavior is entirely
|
|
implementation-dependent.
|
|
@roff_swap@.Pp
|
|
@roff_swap@The
|
|
@roff_swap@.Fn @jemalloc_prefix@malloc_swap_enable
|
|
@roff_swap@function opens and contiguously maps a list of
|
|
@roff_swap@.Fa nfds
|
|
@roff_swap@file descriptors pointed to by
|
|
@roff_swap@.Fa fds
|
|
@roff_swap@via
|
|
@roff_swap@.Xr mmap 2 .
|
|
@roff_swap@The resulting virtual memory region is preferred over anonymous
|
|
@roff_swap@.Xr mmap 2
|
|
@roff_swap@@roff_dss@and
|
|
@roff_swap@@roff_dss@.Xr sbrk 2
|
|
@roff_swap@memory.
|
|
@roff_swap@Note that if a file's size is not a multiple of the page size, it is
|
|
@roff_swap@automatically truncated to the nearest page size multiple.
|
|
@roff_swap@If
|
|
@roff_swap@.Fa prezeroed
|
|
@roff_swap@is non-zero, the allocator assumes that the file(s) contain nothing
|
|
@roff_swap@but nil bytes.
|
|
@roff_swap@If this assumption is violated, allocator behavior is undefined.
|
|
@roff_tcache@.Pp
|
|
@roff_tcache@The
|
|
@roff_tcache@.Fn @jemalloc_prefix@malloc_tcache_flush
|
|
@roff_tcache@function releases all cached objects and internal data structures
|
|
@roff_tcache@associated with the calling thread's thread-specific cache.
|
|
@roff_tcache@Ordinarily, this function need not be called, since automatic
|
|
@roff_tcache@periodic incremental garbage collection occurs, and the thread
|
|
@roff_tcache@cache is automatically discarded when a thread exits.
|
|
@roff_tcache@However, garbage collection is triggered by allocation activity,
|
|
@roff_tcache@so it is possible for a thread that stops allocating/deallocating
|
|
@roff_tcache@to retain its cache indefinitely, in which case the developer may
|
|
@roff_tcache@find this function useful.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@malloc_stats_print
|
|
function writes human-readable summary statistics via the
|
|
.Fa write4
|
|
callback function pointer and
|
|
.Fa w4opaque
|
|
data passed to
|
|
.Fn write4 ,
|
|
or
|
|
.Fn @jemalloc_prefix@malloc_message
|
|
if
|
|
.Fa write4
|
|
is
|
|
.Dv NULL .
|
|
This function can be called repeatedly.
|
|
General information that never changes
|
|
during execution can be omitted by specifying
|
|
.Dq g
|
|
as a character within the
|
|
.Fa opts
|
|
string.
|
|
@roff_stats@.Dq m
|
|
@roff_stats@and
|
|
@roff_stats@.Dq a
|
|
@roff_stats@can be specified to omit merged arena and per arena statistics,
|
|
@roff_stats@respectively.
|
|
@roff_stats@.Dq b
|
|
@roff_stats@and
|
|
@roff_stats@.Dq l
|
|
@roff_stats@can be specified to omit per size class statistics for bins and
|
|
@roff_stats@large objects, respectively.
|
|
Unrecognized characters are silently ignored.
|
|
@roff_tcache@Note that thread caching may prevent some statistics from being
|
|
@roff_tcache@completely up to date, since extra locking would be required to
|
|
@roff_tcache@merge counters that track thread cache operations.
|
|
.Sh TUNING
|
|
Once, when the first call is made to one of these memory allocation
|
|
routines, various flags will be set or reset, which affects the
|
|
workings of this allocator implementation.
|
|
.Pp
|
|
The
|
|
.Dq name
|
|
of the file referenced by the symbolic link named
|
|
.Pa /etc/jemalloc.conf ,
|
|
the value of the environment variable
|
|
.Ev JEMALLOC_OPTIONS ,
|
|
and the string pointed to by the global variable
|
|
.Va @jemalloc_prefix@malloc_options
|
|
will be interpreted, in that order, from left to right as flags.
|
|
.Pp
|
|
Each flag is a single letter, optionally prefixed by a non-negative base 10
|
|
integer repetition count.
|
|
For example,
|
|
.Dq 3N
|
|
is equivalent to
|
|
.Dq NNN .
|
|
Some flags control parameter magnitudes, where uppercase increases the
|
|
magnitude, and lowercase decreases the magnitude.
|
|
Other flags control boolean parameters, where uppercase indicates that a
|
|
behavior is set, or on, and lowercase means that a behavior is not set, or off.
|
|
.Bl -tag -width indent
|
|
.It A
|
|
All warnings (except for the warning about unknown
|
|
flags being set) become fatal.
|
|
The process will call
|
|
.Xr abort 3
|
|
in these cases.
|
|
.It C
|
|
Double/halve the size of the maximum size class that is a multiple of the
|
|
cacheline size (64).
|
|
Above this size, subpage spacing (256 bytes) is used for size classes.
|
|
The default value is 512 bytes.
|
|
.It D
|
|
Halve/double the per-arena minimum ratio of active to dirty pages.
|
|
Some dirty unused pages may be allowed to accumulate, within the limit set by
|
|
the ratio, before informing the kernel about at least half of those pages via
|
|
.Xr madvise 2 .
|
|
This provides the kernel with sufficient information to recycle dirty pages if
|
|
physical memory becomes scarce and the pages remain unused.
|
|
The default minimum ratio is 32:1;
|
|
.Ev JEMALLOC_OPTIONS=6D
|
|
will disable dirty page purging.
|
|
@roff_tcache@.It G
|
|
@roff_tcache@Double/halve the approximate interval (counted in terms of
|
|
@roff_tcache@thread-specific cache allocation/deallocation events) between full
|
|
@roff_tcache@thread-specific cache garbage collection sweeps.
|
|
@roff_tcache@Garbage collection is actually performed incrementally, one size
|
|
@roff_tcache@class at a time, in order to avoid large collection pauses.
|
|
@roff_tcache@The default sweep interval is 8192;
|
|
@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
|
|
@roff_tcache@will disable garbage collection.
|
|
@roff_tcache@.It H
|
|
@roff_tcache@Double/halve the number of thread-specific cache slots per size
|
|
@roff_tcache@class.
|
|
@roff_tcache@When there are multiple threads, each thread uses a
|
|
@roff_tcache@thread-specific cache for small and medium objects.
|
|
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
|
|
@roff_tcache@without performing any thread synchronization, at the cost of
|
|
@roff_tcache@increased memory use.
|
|
@roff_tcache@See the
|
|
@roff_tcache@.Dq G
|
|
@roff_tcache@option for related tuning information.
|
|
@roff_tcache@The default number of cache slots is 128;
|
|
@roff_tcache@.Ev JEMALLOC_OPTIONS=7h
|
|
@roff_tcache@will disable thread-specific caching.
|
|
@roff_tcache@Note that one cache slot per size class is not a valid
|
|
@roff_tcache@configuration due to implementation details.
|
|
@roff_fill@.It J
|
|
@roff_fill@Each byte of new memory allocated by
|
|
@roff_fill@.Fn @jemalloc_prefix@malloc
|
|
@roff_fill@or
|
|
@roff_fill@.Fn @jemalloc_prefix@realloc
|
|
@roff_fill@will be initialized to 0xa5.
|
|
@roff_fill@All memory returned by
|
|
@roff_fill@.Fn @jemalloc_prefix@free
|
|
@roff_fill@or
|
|
@roff_fill@.Fn @jemalloc_prefix@realloc
|
|
@roff_fill@will be initialized to 0x5a.
|
|
@roff_fill@This is intended for debugging and will impact performance
|
|
@roff_fill@negatively.
|
|
.It K
|
|
Double/halve the virtual memory chunk size.
|
|
The default chunk size is 4 MiB.
|
|
.It M
|
|
Double/halve the size of the maximum medium size class.
|
|
The valid range is from one page to one half chunk.
|
|
The default value is 32 KiB.
|
|
.It N
|
|
Double/halve the number of arenas.
|
|
The default number of arenas is
|
|
@roff_tcache@two
|
|
@roff_no_tcache@four
|
|
times the number of CPUs, or one if there is a single CPU.
|
|
@roff_swap@.It O
|
|
@roff_swap@Over-commit memory as a side effect of using anonymous
|
|
@roff_swap@.Xr mmap 2
|
|
@roff_swap@@roff_dss@ and
|
|
@roff_swap@@roff_dss@.Xr sbrk 2
|
|
@roff_swap@for virtual memory allocation.
|
|
@roff_swap@In order for overcommit to be disabled, the
|
|
@roff_swap@.Fn malloc_swap_enable
|
|
@roff_swap@function must have been successfully called.
|
|
@roff_swap@This option is enabled by default.
|
|
.It P
|
|
The
|
|
.Fn malloc_stats_print
|
|
function is called at program exit via an
|
|
.Xr atexit 3
|
|
function.
|
|
@roff_stats@This has the potential to cause deadlock for a multi-threaded
|
|
@roff_stats@process that exits while one or more threads are executing in the
|
|
@roff_stats@memory allocation functions.
|
|
@roff_stats@Therefore, this option should only be used with care; it is
|
|
@roff_stats@primarily intended as a performance tuning aid during application
|
|
@roff_stats@development.
|
|
.It Q
|
|
Double/halve the size of the maximum size class that is a multiple of the
|
|
quantum (8 or 16 bytes, depending on architecture).
|
|
Above this size, cacheline spacing is used for size classes.
|
|
The default value is 128 bytes.
|
|
@roff_tcache@.It S
|
|
@roff_tcache@Sort the objects of a particular size class that are stored in a
|
|
@roff_tcache@thread-specific cache just before flushing some of them from the
|
|
@roff_tcache@cache, such that the objects highest in memory are preferentially
|
|
@roff_tcache@freed.
|
|
@roff_tcache@This tends to reduce fragmentation, but sorting is (n lg n), and in
|
|
@roff_tcache@practice it is expensive enough to have a moderate performance
|
|
@roff_tcache@impact.
|
|
@roff_tcache@This option is enabled by default.
|
|
@roff_trace@.It T
|
|
@roff_trace@Write a verbose trace log to a set of files named according to the
|
|
@roff_trace@pattern
|
|
@roff_trace@.Pa jemtr.<pid>.<arena>
|
|
@roff_trace@for all allocation operations.
|
|
@roff_trace@The result can be converted from
|
|
@roff_trace@.Nm jemtr
|
|
@roff_trace@to
|
|
@roff_trace@.Nm mtr
|
|
@roff_trace@format via
|
|
@roff_trace@.Xr jemtr2mtr 1 ,
|
|
@roff_trace@the output of which can be used by
|
|
@roff_trace@.Xr mtrplay 1
|
|
@roff_trace@and
|
|
@roff_trace@.Xr mtrgraph 1 .
|
|
@roff_sysv@.It V
|
|
@roff_sysv@Attempting to allocate zero bytes will return a
|
|
@roff_sysv@.Dv NULL
|
|
@roff_sysv@pointer instead of a valid pointer.
|
|
@roff_sysv@(The default behavior is to make a minimal allocation and return a
|
|
@roff_sysv@pointer to it.)
|
|
@roff_sysv@This option is provided for System V compatibility.
|
|
@roff_sysv@@roff_xmalloc@This option is incompatible with the
|
|
@roff_sysv@@roff_xmalloc@.Dq X
|
|
@roff_sysv@@roff_xmalloc@option.
|
|
@roff_xmalloc@.It X
|
|
@roff_xmalloc@Rather than return failure for any allocation function, display a
|
|
@roff_xmalloc@diagnostic message on
|
|
@roff_xmalloc@.Dv STDERR_FILENO
|
|
@roff_xmalloc@and cause the program to drop core (using
|
|
@roff_xmalloc@.Xr abort 3 ) .
|
|
@roff_xmalloc@This option should be set at compile time by including the
|
|
@roff_xmalloc@following in the source code:
|
|
@roff_xmalloc@.Bd -literal -offset indent
|
|
@roff_xmalloc@@jemalloc_prefix@malloc_options = "X";
|
|
@roff_xmalloc@.Ed
|
|
@roff_fill@.It Z
|
|
@roff_fill@Each byte of new memory allocated by
|
|
@roff_fill@.Fn @jemalloc_prefix@malloc
|
|
@roff_fill@or
|
|
@roff_fill@.Fn @jemalloc_prefix@realloc
|
|
@roff_fill@will be initialized to 0.
|
|
@roff_fill@Note that this initialization only happens once for each byte, so
|
|
@roff_fill@.Fn @jemalloc_prefix@realloc
|
|
@roff_fill@calls do not zero memory that was previously allocated.
|
|
@roff_fill@This is intended for debugging and will impact performance
|
|
@roff_fill@negatively.
|
|
.El
|
|
.Pp
|
|
@roff_fill@The
|
|
@roff_fill@.Dq J
|
|
@roff_fill@and
|
|
@roff_fill@.Dq Z
|
|
@roff_fill@options are intended for testing and debugging.
|
|
@roff_fill@An application which changes its behavior when these options are used
|
|
@roff_fill@is flawed.
|
|
.Sh IMPLEMENTATION NOTES
|
|
@roff_dss@Traditionally, allocators have used
|
|
@roff_dss@.Xr sbrk 2
|
|
@roff_dss@to obtain memory, which is suboptimal for several reasons, including
|
|
@roff_dss@race conditions, increased fragmentation, and artificial limitations
|
|
@roff_dss@on maximum usable memory.
|
|
@roff_dss@This allocator uses both
|
|
@roff_dss@.Xr sbrk 2
|
|
@roff_dss@and
|
|
@roff_dss@.Xr mmap 2 ,
|
|
@roff_dss@in that order of preference.
|
|
.Pp
|
|
This allocator uses multiple arenas in order to reduce lock contention for
|
|
threaded programs on multi-processor systems.
|
|
This works well with regard to threading scalability, but incurs some costs.
|
|
There is a small fixed per-arena overhead, and additionally, arenas manage
|
|
memory completely independently of each other, which means a small fixed
|
|
increase in overall memory fragmentation.
|
|
These overheads are not generally an issue, given the number of arenas normally
|
|
used.
|
|
Note that using substantially more arenas than the default is not likely to
|
|
improve performance, mainly due to reduced cache performance.
|
|
However, it may make sense to reduce the number of arenas if an application
|
|
does not make much use of the allocation functions.
|
|
.Pp
|
|
@roff_tcache@In addition to multiple arenas, this allocator supports
|
|
@roff_tcache@thread-specific caching for small and medium objects, in order to
|
|
@roff_tcache@make it possible to completely avoid synchronization for most small
|
|
@roff_tcache@and medium allocation requests.
|
|
@roff_tcache@Such caching allows very fast allocation in the common case, but it
|
|
@roff_tcache@increases memory usage and fragmentation, since a bounded number of
|
|
@roff_tcache@objects can remain allocated in each thread cache.
|
|
@roff_tcache@.Pp
|
|
Memory is conceptually broken into equal-sized chunks, where the chunk size is
|
|
a power of two that is greater than the page size.
|
|
Chunks are always aligned to multiples of the chunk size.
|
|
This alignment makes it possible to find metadata for user objects very
|
|
quickly.
|
|
.Pp
|
|
User objects are broken into four categories according to size: small, medium,
|
|
large, and huge.
|
|
Small objects are smaller than one page.
|
|
Medium objects range from one page to an upper limit determined at run time (see
|
|
the
|
|
.Dq M
|
|
option).
|
|
Large objects are smaller than the chunk size.
|
|
Huge objects are a multiple of the chunk size.
|
|
Small, medium, and large objects are managed by arenas; huge objects are managed
|
|
separately in a single data structure that is shared by all threads.
|
|
Huge objects are used by applications infrequently enough that this single
|
|
data structure is not a scalability issue.
|
|
.Pp
|
|
Each chunk that is managed by an arena tracks its contents as runs of
|
|
contiguous pages (unused, backing a set of small or medium objects, or backing
|
|
one large object).
|
|
The combination of chunk alignment and chunk page maps makes it possible to
|
|
determine all metadata regarding small and large allocations in constant time.
|
|
.Pp
|
|
Small and medium objects are managed in groups by page runs.
|
|
Each run maintains a bitmap that tracks which regions are in use.
|
|
@roff_tiny@Allocation requests that are no more than half the quantum (8 or 16,
|
|
@roff_tiny@depending on architecture) are rounded up to the nearest power of
|
|
@roff_tiny@two.
|
|
Allocation requests that are
|
|
@roff_tiny@more than half the quantum, but
|
|
no more than the minimum cacheline-multiple size class (see the
|
|
.Dq Q
|
|
option) are rounded up to the nearest multiple of the
|
|
@roff_tiny@quantum.
|
|
@roff_no_tiny@quantum (8 or 16, depending on architecture).
|
|
Allocation requests that are more than the minumum cacheline-multiple size
|
|
class, but no more than the minimum subpage-multiple size class (see the
|
|
.Dq C
|
|
option) are rounded up to the nearest multiple of the cacheline size (64).
|
|
Allocation requests that are more than the minimum subpage-multiple size class,
|
|
but no more than the maximum subpage-multiple size class are rounded up to the
|
|
nearest multiple of the subpage size (256).
|
|
Allocation requests that are more than the maximum subpage-multiple size class,
|
|
but no more than the maximum medium size class (see the
|
|
.Dq M
|
|
option) are rounded up to the nearest medium size class; spacing is an
|
|
automatically determined power of two and ranges from the subpage size to the
|
|
page size.
|
|
Allocation requests that are more than the maximum medium size class, but small
|
|
enough to fit in an arena-managed chunk (see the
|
|
.Dq K
|
|
option), are rounded up to the nearest run size.
|
|
Allocation requests that are too large to fit in an arena-managed chunk are
|
|
rounded up to the nearest multiple of the chunk size.
|
|
.Pp
|
|
Allocations are packed tightly together, which can be an issue for
|
|
multi-threaded applications.
|
|
If you need to assure that allocations do not suffer from cacheline sharing,
|
|
round your allocation requests up to the nearest multiple of the cacheline
|
|
size.
|
|
.Sh DEBUGGING MALLOC PROBLEMS
|
|
The first thing to do is to set the
|
|
.Dq A
|
|
option.
|
|
This option forces a coredump (if possible) at the first sign of trouble,
|
|
rather than the normal policy of trying to continue if at all possible.
|
|
.Pp
|
|
It is probably also a good idea to recompile the program with suitable
|
|
options and symbols for debugger support.
|
|
.Pp
|
|
@roff_fill@If the program starts to give unusual results, coredump or generally
|
|
@roff_fill@behave differently without emitting any of the messages mentioned in
|
|
@roff_fill@the next section, it is likely because it depends on the storage
|
|
@roff_fill@being filled with zero bytes.
|
|
@roff_fill@Try running it with the
|
|
@roff_fill@.Dq Z
|
|
@roff_fill@option set;
|
|
@roff_fill@if that improves the situation, this diagnosis has been confirmed.
|
|
@roff_fill@If the program still misbehaves,
|
|
@roff_fill@the likely problem is accessing memory outside the allocated area.
|
|
@roff_fill@.Pp
|
|
@roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the
|
|
@roff_fill@.Dq J
|
|
@roff_fill@option may help provoke the problem.
|
|
@roff_fill@.Pp
|
|
@roff_trace@In truly difficult cases, the
|
|
@roff_trace@.Dq T
|
|
@roff_trace@option can provide a detailed trace of all calls made to these
|
|
@roff_trace@functions.
|
|
@roff_trace@.Pp
|
|
Unfortunately this implementation does not provide much detail about
|
|
the problems it detects; the performance impact for storing such information
|
|
would be prohibitive.
|
|
There are a number of allocator implementations available on the Internet
|
|
which focus on detecting and pinpointing problems by trading performance for
|
|
extra sanity checks and detailed diagnostics.
|
|
.Sh DIAGNOSTIC MESSAGES
|
|
If any of the memory allocation/deallocation functions detect an error or
|
|
warning condition, a message will be printed to file descriptor
|
|
.Dv STDERR_FILENO .
|
|
Errors will result in the process dumping core.
|
|
If the
|
|
.Dq A
|
|
option is set, all warnings are treated as errors.
|
|
.Pp
|
|
The
|
|
.Va @jemalloc_prefix@malloc_message
|
|
variable allows the programmer to override the function which emits the text
|
|
strings forming the errors and warnings if for some reason the
|
|
.Dv STDERR_FILENO
|
|
file descriptor is not suitable for this.
|
|
.Va @jemalloc_prefix@malloc_message
|
|
takes the
|
|
.Fa w4opaque
|
|
pointer argument that is
|
|
.Dv NULL
|
|
unless overridden by the arguments in a call to
|
|
.Fn @jemalloc_prefix@malloc_stats_print ,
|
|
followed by four string pointers.
|
|
Please note that doing anything which tries to allocate memory in this function
|
|
is likely to result in a crash or deadlock.
|
|
.Pp
|
|
All messages are prefixed by
|
|
.Dq <jemalloc>: .
|
|
.Sh RETURN VALUES
|
|
The
|
|
.Fn @jemalloc_prefix@malloc
|
|
and
|
|
.Fn @jemalloc_prefix@calloc
|
|
functions return a pointer to the allocated memory if successful; otherwise
|
|
a
|
|
.Dv NULL
|
|
pointer is returned and
|
|
.Va errno
|
|
is set to
|
|
.Er ENOMEM .
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@posix_memalign
|
|
function returns the value 0 if successful; otherwise it returns an error value.
|
|
The
|
|
.Fn @jemalloc_prefix@posix_memalign
|
|
function will fail if:
|
|
.Bl -tag -width Er
|
|
.It Bq Er EINVAL
|
|
The
|
|
.Fa alignment
|
|
parameter is not a power of 2 at least as large as
|
|
.Fn sizeof "void *" .
|
|
.It Bq Er ENOMEM
|
|
Memory allocation error.
|
|
.El
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@realloc
|
|
function returns a pointer, possibly identical to
|
|
.Fa ptr ,
|
|
to the allocated memory
|
|
if successful; otherwise a
|
|
.Dv NULL
|
|
pointer is returned, and
|
|
.Va errno
|
|
is set to
|
|
.Er ENOMEM
|
|
if the error was the result of an allocation failure.
|
|
The
|
|
.Fn @jemalloc_prefix@realloc
|
|
function always leaves the original buffer intact
|
|
when an error occurs.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@free
|
|
function returns no value.
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@malloc_usable_size
|
|
function returns the usable size of the allocation pointed to by
|
|
.Fa ptr .
|
|
@roff_swap@.Pp
|
|
@roff_swap@The
|
|
@roff_swap@.Fn @jemalloc_prefix@malloc_swap_enable
|
|
@roff_swap@function returns the value 0 if successful; otherwise it returns a
|
|
@roff_swap@non-zero value.
|
|
.Sh ENVIRONMENT
|
|
The following environment variables affect the execution of the allocation
|
|
functions:
|
|
.Bl -tag -width ".Ev JEMALLOC_OPTIONS"
|
|
.It Ev JEMALLOC_OPTIONS
|
|
If the environment variable
|
|
.Ev JEMALLOC_OPTIONS
|
|
is set, the characters it contains will be interpreted as flags to the
|
|
allocation functions.
|
|
.El
|
|
.Sh EXAMPLES
|
|
To dump core whenever a problem occurs:
|
|
.Pp
|
|
.Bd -literal -offset indent
|
|
ln -s 'A' /etc/jemalloc.conf
|
|
.Ed
|
|
.Pp
|
|
To specify in the source a chunk size that is twice the default:
|
|
.Bd -literal -offset indent
|
|
@jemalloc_prefix@malloc_options = "K";
|
|
.Ed
|
|
.Sh SEE ALSO
|
|
.Xr mtrgraph 1 ,
|
|
.Xr mtrplay 1 ,
|
|
.Xr jemtr2mtr 1 ,
|
|
.Xr madvise 2 ,
|
|
.Xr mmap 2 ,
|
|
@roff_dss@.Xr sbrk 2 ,
|
|
.Xr alloca 3 ,
|
|
.Xr atexit 3 ,
|
|
.Xr getpagesize 3
|
|
.Sh STANDARDS
|
|
The
|
|
.Fn @jemalloc_prefix@malloc ,
|
|
.Fn @jemalloc_prefix@calloc ,
|
|
.Fn @jemalloc_prefix@realloc
|
|
and
|
|
.Fn @jemalloc_prefix@free
|
|
functions conform to
|
|
.St -isoC .
|
|
.Pp
|
|
The
|
|
.Fn @jemalloc_prefix@posix_memalign
|
|
function conforms to
|
|
.St -p1003.1-2001 .
|