Implement allocation profiling and leack checking.

Add the --enable-prof and --enable-prof-libunwind configure options.

Add the B/b, F/f, I/i, L/l, and U/u JEMALLOC_OPTIONS.

Interval-based profile dump triggering is not yet implemented.

Add supporting generic code:
* Add memory barriers.
* Add prn (LCG PRNG).
* Add hash (Murmur hash function).
* Add ckh (cuckoo hash tables).
This commit is contained in:
Jason Evans 2010-02-10 10:37:56 -08:00
parent 13668262d1
commit 6109fe07a1
25 changed files with 2824 additions and 166 deletions

View File

@ -44,6 +44,17 @@ any of the following arguments (not a definitive list) to 'configure':
Enable statistics gathering functionality. Use the 'P' option to print Enable statistics gathering functionality. Use the 'P' option to print
detailed allocation statistics at exit. detailed allocation statistics at exit.
--enable-prof
Enable heap profiling and leak detection functionality. Use the 'B', 'F',
'I', 'L', and 'U' options to control these features.
--enable-prof-libunwind
Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
backtracing, rather than frame pointers. libunwind is quite slow in
comparison to frame pointer-based backtracing, but it has the advantage of
working on applications/libraries that were compiled with
-fomit-frame-pointer.
--disable-tiny --disable-tiny
Disable tiny (sub-quantum-sized) object support. Technically it is not Disable tiny (sub-quantum-sized) object support. Technically it is not
legal for a malloc implementation to allocate objects with less than legal for a malloc implementation to allocate objects with less than

View File

@ -40,10 +40,12 @@ CHDRS := @objroot@src/jemalloc@install_suffix@.h \
CSRCS := @srcroot@src/jemalloc.c @srcroot@src/jemalloc_arena.c \ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/jemalloc_arena.c \
@srcroot@src/jemalloc_base.c @srcroot@src/jemalloc_chunk.c \ @srcroot@src/jemalloc_base.c @srcroot@src/jemalloc_chunk.c \
@srcroot@src/jemalloc_chunk_dss.c @srcroot@src/jemalloc_chunk_mmap.c \ @srcroot@src/jemalloc_chunk_dss.c @srcroot@src/jemalloc_chunk_mmap.c \
@srcroot@src/jemalloc_chunk_swap.c @srcroot@src/jemalloc_ctl.c \ @srcroot@src/jemalloc_chunk_swap.c @srcroot@src/ckh.c \
@srcroot@src/jemalloc_extent.c @srcroot@src/jemalloc_huge.c \ @srcroot@src/jemalloc_ctl.c @srcroot@src/jemalloc_extent.c \
@srcroot@src/jemalloc_mutex.c @srcroot@src/jemalloc_stats.c \ @srcroot@src/hash.c @srcroot@src/jemalloc_huge.c @srcroot@src/mb.c \
@srcroot@src/jemalloc_tcache.c @srcroot@src/jemalloc_trace.c @srcroot@src/jemalloc_mutex.c @srcroot@src/prof.c \
@srcroot@src/jemalloc_stats.c @srcroot@src/jemalloc_tcache.c \
@srcroot@src/jemalloc_trace.c
DSOS := @objroot@lib/libjemalloc@install_suffix@.so.$(REV) \ DSOS := @objroot@lib/libjemalloc@install_suffix@.so.$(REV) \
@objroot@lib/libjemalloc@install_suffix@.so \ @objroot@lib/libjemalloc@install_suffix@.so \
@objroot@lib/libjemalloc@install_suffix@_pic.a @objroot@lib/libjemalloc@install_suffix@_pic.a

View File

@ -362,6 +362,29 @@ else
fi fi
AC_SUBST([roff_trace]) AC_SUBST([roff_trace])
dnl Do not enable profiling by default.
AC_ARG_ENABLE([prof],
[AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
[if test "x$enable_prof" = "xno" ; then
enable_prof="0"
else
enable_prof="1"
fi
],
[enable_prof="0"]
)
AC_ARG_ENABLE([prof-libunwind],
[AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])],
[if test "x$enable_prof_libunwind" = "xno" ; then
enable_prof_libunwind="0"
else
enable_prof_libunwind="1"
fi
],
[enable_prof_libunwind="0"]
)
dnl Finish prof-related definitions below, once TLS configuration is done.
dnl Enable tiny allocations by default. dnl Enable tiny allocations by default.
AC_ARG_ENABLE([tiny], AC_ARG_ENABLE([tiny],
[AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
@ -636,6 +659,29 @@ fi
AC_SUBST([roff_tcache]) AC_SUBST([roff_tcache])
AC_SUBST([roff_no_tcache]) AC_SUBST([roff_no_tcache])
dnl Finish prof-related definitions, now that TLS configuration is done.
if test "x$enable_tls" = "x0" ; then
enable_prof="0"
fi
if test "x$enable_prof" = "x1" ; then
AC_DEFINE([JEMALLOC_PROF], [ ])
if test "x$enable_prof_libunwind" = "x1" ; then
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS -lunwind"],
[enable_prof_libunwind="0"])
if test "x${enable_prof_libunwind}" = "x1" ; then
AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
fi
fi
fi
AC_SUBST([enable_prof])
if test "x$enable_prof" = "x0" ; then
roff_prof=".\\\" "
else
roff_prof=""
fi
AC_SUBST([roff_prof])
dnl ============================================================================ dnl ============================================================================
dnl Configure libgd for mtrgraph. dnl Configure libgd for mtrgraph.
bins="${objroot}bin/jemtr2mtr${install_suffix}" bins="${objroot}bin/jemtr2mtr${install_suffix}"
@ -705,6 +751,8 @@ AC_MSG_RESULT([autogen : ${enable_autogen}])
AC_MSG_RESULT([debug : ${enable_debug}]) AC_MSG_RESULT([debug : ${enable_debug}])
AC_MSG_RESULT([stats : ${enable_stats}]) AC_MSG_RESULT([stats : ${enable_stats}])
AC_MSG_RESULT([trace : ${enable_trace}]) AC_MSG_RESULT([trace : ${enable_trace}])
AC_MSG_RESULT([prof : ${enable_prof}])
AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([tiny : ${enable_tiny}]) AC_MSG_RESULT([tiny : ${enable_tiny}])
AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([tcache : ${enable_tcache}])
AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([fill : ${enable_fill}])

View File

@ -321,6 +321,10 @@ flags being set) become fatal.
The process will call The process will call
.Xr abort 3 .Xr abort 3
in these cases. in these cases.
@roff_prof@.It B
@roff_prof@Double/halve the maximum backtrace depth when profiling memory
@roff_prof@allocation activity.
@roff_prof@The default is 4.
.It C .It C
Double/halve the size of the maximum size class that is a multiple of the Double/halve the size of the maximum size class that is a multiple of the
cacheline size (64). cacheline size (64).
@ -336,6 +340,23 @@ physical memory becomes scarce and the pages remain unused.
The default minimum ratio is 32:1; The default minimum ratio is 32:1;
.Ev JEMALLOC_OPTIONS=6D .Ev JEMALLOC_OPTIONS=6D
will disable dirty page purging. will disable dirty page purging.
@roff_prof@.It F
@roff_prof@Profile memory allocation activity, and use an
@roff_prof@.Xr atexit 3
@roff_prof@function to dump final memory usage to a file named according to
@roff_prof@the pattern
@roff_prof@.Pa jeprof.<pid>.<seq>.f.heap .
@roff_prof@See the
@roff_prof@.Dq B
@roff_prof@option for backtrace depth control.
@roff_prof@See the
@roff_prof@.Dq I
@roff_prof@option for information on interval-triggered profile dumping, and the
@roff_prof@.Dq U
@roff_prof@option for information on high-water-triggered profile dumping.
@roff_prof@Profile output is compatible with the pprof Perl script, which is
@roff_prof@part of the google-perftools package
@roff_prof@(http://code.google.com/p/google-perftools/).
@roff_tcache@.It G @roff_tcache@.It G
@roff_tcache@Double/halve the approximate interval (counted in terms of @roff_tcache@Double/halve the approximate interval (counted in terms of
@roff_tcache@thread-specific cache allocation/deallocation events) between full @roff_tcache@thread-specific cache allocation/deallocation events) between full
@ -361,6 +382,16 @@ will disable dirty page purging.
@roff_tcache@will disable thread-specific caching. @roff_tcache@will disable thread-specific caching.
@roff_tcache@Note that one cache slot per size class is not a valid @roff_tcache@Note that one cache slot per size class is not a valid
@roff_tcache@configuration due to implementation details. @roff_tcache@configuration due to implementation details.
@roff_prof@.It I
@roff_prof@Double/halve the maximum interval between memory profile dumps, as
@roff_prof@measured in bytes of allocation activity.
@roff_prof@On average, profiles are written four times as often as the maximum
@roff_prof@interval requires.
@roff_prof@This is an artifact of the concurrent algorithm that is used to
@roff_prof@track allocation activity.
@roff_prof@Profiles are dumped to files named according to the pattern
@roff_prof@.Pa jeprof.<pid>.<seq>.i<iseq>.heap .
@roff_prof@The default maximum interval is 4 GiB.
@roff_fill@.It J @roff_fill@.It J
@roff_fill@Each byte of new memory allocated by @roff_fill@Each byte of new memory allocated by
@roff_fill@.Fn @jemalloc_prefix@malloc @roff_fill@.Fn @jemalloc_prefix@malloc
@ -377,6 +408,16 @@ will disable dirty page purging.
.It K .It K
Double/halve the virtual memory chunk size. Double/halve the virtual memory chunk size.
The default chunk size is 4 MiB. The default chunk size is 4 MiB.
@roff_prof@.It L
@roff_prof@Use an
@roff_prof@.Xr atexit 3
@roff_prof@function to report memory leaks.
@roff_prof@See the
@roff_prof@.Dq B
@roff_prof@option for backtrace depth control.
@roff_prof@See the
@roff_prof@.Dq F option for information on analyzing heap profile output.
@roff_prof@This option is disabled by default.
.It M .It M
Double/halve the size of the maximum medium size class. Double/halve the size of the maximum medium size class.
The valid range is from one page to one half chunk. The valid range is from one page to one half chunk.
@ -429,6 +470,12 @@ The default value is 128 bytes.
@roff_trace@.Xr mtrplay 1 @roff_trace@.Xr mtrplay 1
@roff_trace@and @roff_trace@and
@roff_trace@.Xr mtrgraph 1 . @roff_trace@.Xr mtrgraph 1 .
@roff_prof@.It U
@roff_prof@Trigger a memory profile dump every time the total virtual memory
@roff_prof@exceeds the previous maximum.
@roff_prof@Profiles are dumped to files named according to the pattern
@roff_prof@.Pa jeprof.<pid>.<seq>.u<useq>.heap .
@roff_prof@This option is disabled by default.
@roff_sysv@.It V @roff_sysv@.It V
@roff_sysv@Attempting to allocate zero bytes will return a @roff_sysv@Attempting to allocate zero bytes will return a
@roff_sysv@.Dv NULL @roff_sysv@.Dv NULL
@ -920,6 +967,12 @@ Total number of large size classes.
Maximum size supported by this large size class. Maximum size supported by this large size class.
.Ed .Ed
.\"----------------------------------------------------------------------------- .\"-----------------------------------------------------------------------------
@roff_prof@.It Sy "prof.dump (void) --"
@roff_prof@.Bd -ragged -offset indent -compact
@roff_prof@Dump a memory profile to a file according to the pattern
@roff_prof@.Pa jeprof.<pid>.<seq>.m<mseq>.heap .
@roff_prof@.Ed
.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "stats.allocated (size_t) r-" @roff_stats@.It Sy "stats.allocated (size_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact @roff_stats@.Bd -ragged -offset indent -compact
@roff_stats@Total number of bytes allocated by the application. @roff_stats@Total number of bytes allocated by the application.

601
jemalloc/src/ckh.c Normal file
View File

@ -0,0 +1,601 @@
/*
*******************************************************************************
* Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
* hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
* functions are employed. The original cuckoo hashing algorithm was described
* in:
*
* Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms
* 51(2):122-144.
*
* Generalization of cuckoo hashing was discussed in:
*
* Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
* alternative to traditional hash tables. In Proceedings of the 7th
* Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
* January 2006.
*
* This implementation uses precisely two hash functions because that is the
* fewest that can work, and supporting multiple hashes is an implementation
* burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
* that shows approximate expected maximum load factors for various
* configurations:
*
* | #cells/bucket |
* #hashes | 1 | 2 | 4 | 8 |
* --------+-------+-------+-------+-------+
* 1 | 0.006 | 0.006 | 0.03 | 0.12 |
* 2 | 0.49 | 0.86 |>0.93< |>0.96< |
* 3 | 0.91 | 0.97 | 0.98 | 0.999 |
* 4 | 0.97 | 0.99 | 0.999 | |
*
* The number of cells per bucket is chosen such that a bucket fits in one cache
* line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
* respectively.
*
******************************************************************************/
#define CKH_C_
#include "internal/jemalloc_internal.h"
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static bool ckh_grow(ckh_t *ckh);
static void ckh_shrink(ckh_t *ckh);
/******************************************************************************/
/*
* Search bucket for key and return the cell number if found; SIZE_T_MAX
* otherwise.
*/
JEMALLOC_INLINE size_t
ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
{
ckhc_t *cell;
unsigned i;
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
if (cell->key != NULL && ckh->keycomp(key, cell->key))
return ((bucket << LG_CKH_BUCKET_CELLS) + i);
}
return (SIZE_T_MAX);
}
/*
* Search table for key and return cell number if found; SIZE_T_MAX otherwise.
*/
JEMALLOC_INLINE size_t
ckh_isearch(ckh_t *ckh, const void *key)
{
size_t hash1, hash2, bucket, cell;
assert(ckh != NULL);
assert(ckh->magic = CKH_MAGIG);
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
/* Search primary bucket. */
bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
cell = ckh_bucket_search(ckh, bucket, key);
if (cell != SIZE_T_MAX)
return (cell);
/* Search secondary bucket. */
bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
cell = ckh_bucket_search(ckh, bucket, key);
return (cell);
}
JEMALLOC_INLINE bool
ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
const void *data)
{
ckhc_t *cell;
unsigned offset, i;
/*
* Cycle through the cells in the bucket, starting at a random position.
* The randomness avoids worst-case search overhead as buckets fill up.
*/
prn(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
if (cell->key == NULL) {
cell->key = key;
cell->data = data;
ckh->count++;
return (false);
}
}
return (true);
}
/*
* No space is available in bucket. Randomly evict an item, then try to find an
* alternate location for that item. Iteratively repeat this
* eviction/relocation procedure until either success or detection of an
* eviction/relocation bucket cycle.
*/
JEMALLOC_INLINE bool
ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
void const **argdata)
{
const void *key, *data, *tkey, *tdata;
ckhc_t *cell;
size_t hash1, hash2, bucket, tbucket;
unsigned i;
bucket = argbucket;
key = *argkey;
data = *argdata;
while (true) {
/*
* Choose a random item within the bucket to evict. This is
* critical to correct function, because without (eventually)
* evicting all items within a bucket during iteration, it
* would be possible to get stuck in an infinite loop if there
* were an item for which both hashes indicated the same
* bucket.
*/
prn(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
assert(cell->key != NULL);
/* Swap cell->{key,data} and {key,data} (evict). */
tkey = cell->key; tdata = cell->data;
cell->key = key; cell->data = data;
key = tkey; data = tdata;
#ifdef CKH_COUNT
ckh->nrelocs++;
#endif
/* Find the alternate bucket for the evicted item. */
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
if (tbucket == bucket) {
tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
/*
* It may be that (tbucket == bucket) still, if the
* item's hashes both indicate this bucket. However,
* we are guaranteed to eventually escape this bucket
* during iteration, assuming pseudo-random item
* selection (true randomness would make infinite
* looping a remote possibility). The reason we can
* never get trapped forever is that there are two
* cases:
*
* 1) This bucket == argbucket, so we will quickly
* detect an eviction cycle and terminate.
* 2) An item was evicted to this bucket from another,
* which means that at least one item in this bucket
* has hashes that indicate distinct buckets.
*/
}
/* Check for a cycle. */
if (tbucket == argbucket) {
*argkey = key;
*argdata = data;
return (true);
}
bucket = tbucket;
if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
return (false);
}
}
JEMALLOC_INLINE bool
ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
{
size_t hash1, hash2, bucket;
const void *key = *argkey;
const void *data = *argdata;
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
/* Try to insert in primary bucket. */
bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
return (false);
/* Try to insert in secondary bucket. */
bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
return (false);
/*
* Try to find a place for this item via iterative eviction/relocation.
*/
return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
}
/*
* Try to rebuild the hash table from scratch by inserting all items from the
* old table into the new.
*/
JEMALLOC_INLINE bool
ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
{
size_t count, i, nins;
const void *key, *data;
count = ckh->count;
ckh->count = 0;
for (i = nins = 0; nins < count; i++) {
if (aTab[i].key != NULL) {
key = aTab[i].key;
data = aTab[i].data;
if (ckh_try_insert(ckh, &key, &data)) {
ckh->count = count;
return (true);
}
nins++;
}
}
return (false);
}
static bool
ckh_grow(ckh_t *ckh)
{
bool ret;
ckhc_t *tab, *ttab;
size_t lg_curcells;
unsigned lg_prevbuckets;
#ifdef CKH_COUNT
ckh->ngrows++;
#endif
/*
* It is possible (though unlikely, given well behaved hashes) that the
* table will have to be doubled more than once in order to create a
* usable table.
*/
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
while (true) {
lg_curcells++;
tab = (ckhc_t *) ipalloc((ZU(1) << LG_CACHELINE),
sizeof(ckhc_t) << lg_curcells);
if (tab == NULL) {
ret = true;
goto RETURN;
}
memset(tab, 0, sizeof(ckhc_t) << lg_curcells);
/* Swap in new table. */
ttab = ckh->tab;
ckh->tab = tab;
tab = ttab;
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
if (ckh_rebuild(ckh, tab) == false) {
idalloc(tab);
break;
}
/* Rebuilding failed, so back out partially rebuilt table. */
idalloc(ckh->tab);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
}
ret = false;
RETURN:
return (ret);
}
static void
ckh_shrink(ckh_t *ckh)
{
ckhc_t *tab, *ttab;
size_t lg_curcells;
unsigned lg_prevbuckets;
/*
* It is possible (though unlikely, given well behaved hashes) that the
* table rebuild will fail.
*/
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE),
sizeof(ckhc_t) << lg_curcells);
if (tab == NULL) {
/*
* An OOM error isn't worth propagating, since it doesn't
* prevent this or future operations from proceeding.
*/
return;
}
memset(tab, 0, sizeof(ckhc_t) << lg_curcells);
/* Swap in new table. */
ttab = ckh->tab;
ckh->tab = tab;
tab = ttab;
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
if (ckh_rebuild(ckh, tab) == false) {
idalloc(tab);
#ifdef CKH_COUNT
ckh->nshrinks++;
#endif
return;
}
/* Rebuilding failed, so back out partially rebuilt table. */
idalloc(ckh->tab);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
#ifdef CKH_COUNT
ckh->nshrinkfails++;
#endif
}
bool
ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
{
bool ret;
size_t mincells;
unsigned lg_mincells;
assert(minitems > 0);
assert(hash != NULL);
assert(keycomp != NULL);
#ifdef CKH_COUNT
ckh->ngrows = 0;
ckh->nshrinks = 0;
ckh->nshrinkfails = 0;
ckh->ninserts = 0;
ckh->nrelocs = 0;
#endif
ckh->prn_state = 42; /* Value doesn't really matter. */
ckh->count = 0;
/*
* Find the minimum power of 2 that is large enough to fit aBaseCount
* entries. We are using (2+,2) cuckoo hashing, which has an expected
* maximum load factor of at least ~0.86, so 0.75 is a conservative load
* factor that will typically allow 2^aLgMinItems to fit without ever
* growing the table.
*/
assert(LG_CKH_BUCKET_CELLS > 0);
mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
for (lg_mincells = LG_CKH_BUCKET_CELLS;
(ZU(1) << lg_mincells) < mincells;
lg_mincells++)
; /* Do nothing. */
ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
ckh->hash = hash;
ckh->keycomp = keycomp;
ckh->tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE),
sizeof(ckhc_t) << lg_mincells);
if (ckh->tab == NULL) {
ret = true;
goto RETURN;
}
memset(ckh->tab, 0, sizeof(ckhc_t) << lg_mincells);
#ifdef JEMALLOC_DEBUG
ckh->magic = CKH_MAGIG;
#endif
ret = false;
RETURN:
return (ret);
}
void
ckh_delete(ckh_t *ckh)
{
assert(ckh != NULL);
assert(ckh->magic = CKH_MAGIG);
#ifdef CKH_VERBOSE
malloc_printf(
"%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
" nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
" nrelocs: %"PRIu64"\n", __func__, ckh,
(unsigned long long)ckh->ngrows,
(unsigned long long)ckh->nshrinks,
(unsigned long long)ckh->nshrinkfails,
(unsigned long long)ckh->ninserts,
(unsigned long long)ckh->nrelocs);
#endif
idalloc(ckh->tab);
#ifdef JEMALLOC_DEBUG
memset(ckh, 0x5a, sizeof(ckh_t));
#endif
}
size_t
ckh_count(ckh_t *ckh)
{
assert(ckh != NULL);
assert(ckh->magic = CKH_MAGIG);
return (ckh->count);
}
bool
ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
{
size_t i, ncells;
for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
if (ckh->tab[i].key != NULL) {
if (key != NULL)
*key = (void *)ckh->tab[i].key;
if (data != NULL)
*data = (void *)ckh->tab[i].data;
*tabind = i + 1;
return (false);
}
}
return (true);
}
bool
ckh_insert(ckh_t *ckh, const void *key, const void *data)
{
bool ret;
assert(ckh != NULL);
assert(ckh->magic = CKH_MAGIG);
assert(ckh_search(ckh, key, NULL, NULL));
#ifdef CKH_COUNT
ckh->ninserts++;
#endif
while (ckh_try_insert(ckh, &key, &data)) {
if (ckh_grow(ckh)) {
ret = true;
goto RETURN;
}
}
ret = false;
RETURN:
return (ret);
}
bool
ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
{
size_t cell;
assert(ckh != NULL);
assert(ckh->magic = CKH_MAGIG);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
if (key != NULL)
*key = (void *)ckh->tab[cell].key;
if (data != NULL)
*data = (void *)ckh->tab[cell].data;
ckh->tab[cell].key = NULL;
ckh->tab[cell].data = NULL; /* Not necessary. */
ckh->count--;
/* Try to halve the table if it is less than 1/4 full. */
if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
+ LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
> ckh->lg_minbuckets) {
/* Ignore error due to OOM. */
ckh_shrink(ckh);
}
return (false);
}
return (true);
}
bool
ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
{
size_t cell;
assert(ckh != NULL);
assert(ckh->magic = CKH_MAGIG);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
if (key != NULL)
*key = (void *)ckh->tab[cell].key;
if (data != NULL)
*data = (void *)ckh->tab[cell].data;
return (false);
}
return (true);
}
void
ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
{
size_t ret1, ret2;
uint64_t h;
assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
assert(hash1 != NULL);
assert(hash2 != NULL);
h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU);
if (minbits <= 32) {
/*
* Avoid doing multiple hashes, since a single hash provides
* enough bits.
*/
ret1 = h & ZU(0xffffffffU);
ret2 = h >> 32;
} else {
ret1 = h;
ret2 = hash(key, strlen((const char *)key),
0x8432a476666bbc13U);
}
*hash1 = ret1;
*hash2 = ret2;
}
bool
ckh_string_keycomp(const void *k1, const void *k2)
{
assert(k1 != NULL);
assert(k2 != NULL);
return (strcmp((char *)k1, (char *)k2) ? false : true);
}
void
ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2)
{
size_t ret1, ret2;
uint64_t h;
assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
assert(hash1 != NULL);
assert(hash2 != NULL);
h = hash(&key, sizeof(void *), 0xd983396e68886082LLU);
if (minbits <= 32) {
/*
* Avoid doing multiple hashes, since a single hash provides
* enough bits.
*/
ret1 = h & ZU(0xffffffffU);
ret2 = h >> 32;
} else {
assert(SIZEOF_PTR == 8);
ret1 = h;
ret2 = hash(&key, sizeof(void *), 0x5e2be9aff8709a5dLLU);
}
*hash1 = ret1;
*hash2 = ret2;
}
bool
ckh_pointer_keycomp(const void *k1, const void *k2)
{
return ((k1 == k2) ? true : false);
}

2
jemalloc/src/hash.c Normal file
View File

@ -0,0 +1,2 @@
#define HASH_C_
#include "internal/jemalloc_internal.h"

View File

@ -0,0 +1,95 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
typedef struct ckh_s ckh_t;
typedef struct ckhc_s ckhc_t;
/* Typedefs to allow easy function pointer passing. */
typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *);
typedef bool ckh_keycomp_t (const void *, const void *);
/* Maintain counters used to get an idea of performance. */
/* #define CKH_COUNT */
/* Print counter values in ckh_delete() (requires CKH_COUNT). */
/* #define CKH_VERBOSE */
/*
* There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit
* one bucket per L1 cache line.
*/
#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
/* Hash table cell. */
struct ckhc_s {
const void *key;
const void *data;
};
struct ckh_s {
#ifdef JEMALLOC_DEBUG
#define CKH_MAGIG 0x3af2489d
uint32_t magic;
#endif
#ifdef CKH_COUNT
/* Counters used to get an idea of performance. */
uint64_t ngrows;
uint64_t nshrinks;
uint64_t nshrinkfails;
uint64_t ninserts;
uint64_t nrelocs;
#endif
/* Used for pseudo-random number generation. */
#define CKH_A 12345
#define CKH_C 12347
uint32_t prn_state;
/* Total number of items. */
size_t count;
/*
* Minimum and current number of hash table buckets. There are
* 2^LG_CKH_BUCKET_CELLS cells per bucket.
*/
unsigned lg_minbuckets;
unsigned lg_curbuckets;
/* Hash and comparison functions. */
ckh_hash_t *hash;
ckh_keycomp_t *keycomp;
/* Hash table with 2^lg_curbuckets buckets. */
ckhc_t *tab;
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
ckh_keycomp_t *keycomp);
void ckh_delete(ckh_t *ckh);
size_t ckh_count(ckh_t *ckh);
bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
bool ckh_insert(ckh_t *ckh, const void *key, const void *data);
bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
void **data);
bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2);
bool ckh_string_keycomp(const void *k1, const void *k2);
void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2);
bool ckh_pointer_keycomp(const void *k1, const void *k2);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -0,0 +1,67 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
uint64_t hash(const void *key, size_t len, uint64_t seed);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_))
/*
* The following hash function is based on MurmurHash64A(), placed into the
* public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for
* details.
*/
JEMALLOC_INLINE uint64_t
hash(const void *key, size_t len, uint64_t seed)
{
const uint64_t m = 0xc6a4a7935bd1e995;
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t *data = (const uint64_t *)key;
const unsigned char *data2 = (const unsigned char*)data;
const uint64_t *end = data + (len/8);
while(data != end) {
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
switch(len & 7) {
case 7: h ^= ((uint64_t)(data2[6])) << 48;
case 6: h ^= ((uint64_t)(data2[5])) << 40;
case 5: h ^= ((uint64_t)(data2[4])) << 32;
case 4: h ^= ((uint64_t)(data2[3])) << 24;
case 3: h ^= ((uint64_t)(data2[2])) << 16;
case 2: h ^= ((uint64_t)(data2[1])) << 8;
case 1: h ^= ((uint64_t)(data2[0]));
h *= m;
}
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
#endif
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -110,6 +110,11 @@ struct arena_chunk_map_s {
*/ */
rb_node(arena_chunk_map_t) link; rb_node(arena_chunk_map_t) link;
#ifdef JEMALLOC_PROF
/* Profile counters, used for large object runs. */
prof_thr_cnt_t *prof_cnt;
#endif
/* /*
* Run address (or size) and various flags are stored together. The bit * Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system): * layout looks like (assuming 32-bit system):
@ -238,6 +243,14 @@ struct arena_bin_s {
/* Number of elements in a run's regs_mask for this bin's size class. */ /* Number of elements in a run's regs_mask for this bin's size class. */
uint32_t regs_mask_nelms; uint32_t regs_mask_nelms;
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_cnt_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
uint32_t cnt0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */ /* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset; uint32_t reg0_offset;
@ -253,6 +266,9 @@ struct arena_s {
# define ARENA_MAGIC 0x947d3d24 # define ARENA_MAGIC 0x947d3d24
#endif #endif
/* This arena's index within the arenas array. */
unsigned ind;
/* All operations on this arena require that lock be locked. */ /* All operations on this arena require that lock be locked. */
malloc_mutex_t lock; malloc_mutex_t lock;
@ -403,6 +419,10 @@ void *arena_malloc(size_t size, bool zero);
void *arena_palloc(arena_t *arena, size_t alignment, size_t size, void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
size_t alloc_size); size_t alloc_size);
size_t arena_salloc(const void *ptr); size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
#endif
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm); arena_chunk_map_t *mapelm);
void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);

View File

@ -11,17 +11,22 @@ typedef struct extent_node_s extent_node_t;
struct extent_node_s { struct extent_node_s {
#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) #if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
/* Linkage for the size/address-ordered tree. */ /* Linkage for the size/address-ordered tree. */
rb_node(extent_node_t) link_szad; rb_node(extent_node_t) link_szad;
#endif #endif
/* Linkage for the address-ordered tree. */ /* Linkage for the address-ordered tree. */
rb_node(extent_node_t) link_ad; rb_node(extent_node_t) link_ad;
#ifdef JEMALLOC_PROF
/* Profile counters, used for huge objects. */
prof_thr_cnt_t *prof_cnt;
#endif
/* Pointer to the extent that this tree node is responsible for. */ /* Pointer to the extent that this tree node is responsible for. */
void *addr; void *addr;
/* Total region size. */ /* Total region size. */
size_t size; size_t size;
}; };
typedef rb_tree(extent_node_t) extent_tree_t; typedef rb_tree(extent_node_t) extent_tree_t;

View File

@ -24,6 +24,10 @@ void *huge_palloc(size_t alignment, size_t size);
void *huge_ralloc(void *ptr, size_t size, size_t oldsize); void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
void huge_dalloc(void *ptr); void huge_dalloc(void *ptr);
size_t huge_salloc(const void *ptr); size_t huge_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr);
void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
#endif
bool huge_boot(void); bool huge_boot(void);
#endif /* JEMALLOC_H_EXTERNS */ #endif /* JEMALLOC_H_EXTERNS */

View File

@ -75,6 +75,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
/******************************************************************************/ /******************************************************************************/
#define JEMALLOC_H_TYPES #define JEMALLOC_H_TYPES
#define ZU(z) ((size_t)z)
#ifndef __DECONST #ifndef __DECONST
# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
#endif #endif
@ -101,7 +103,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#ifdef __alpha__ #ifdef __alpha__
# define LG_QUANTUM 4 # define LG_QUANTUM 4
#endif #endif
#ifdef __sparc__ #ifdef __sparc64__
# define LG_QUANTUM 4 # define LG_QUANTUM 4
#endif #endif
#ifdef __amd64__ #ifdef __amd64__
@ -169,6 +171,10 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#define PAGE_CEILING(s) \ #define PAGE_CEILING(s) \
(((s) + PAGE_MASK) & ~PAGE_MASK) (((s) + PAGE_MASK) & ~PAGE_MASK)
#include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
#include "internal/jemalloc_ctl.h" #include "internal/jemalloc_ctl.h"
#include "internal/jemalloc_mutex.h" #include "internal/jemalloc_mutex.h"
@ -179,11 +185,16 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#include "internal/jemalloc_huge.h" #include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/prof.h"
#undef JEMALLOC_H_TYPES #undef JEMALLOC_H_TYPES
/******************************************************************************/ /******************************************************************************/
#define JEMALLOC_H_STRUCTS #define JEMALLOC_H_STRUCTS
#include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
#include "internal/jemalloc_ctl.h" #include "internal/jemalloc_ctl.h"
#include "internal/jemalloc_mutex.h" #include "internal/jemalloc_mutex.h"
@ -194,6 +205,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#include "internal/jemalloc_huge.h" #include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/prof.h"
#undef JEMALLOC_H_STRUCTS #undef JEMALLOC_H_STRUCTS
/******************************************************************************/ /******************************************************************************/
@ -242,6 +254,10 @@ arena_t *arenas_extend(unsigned ind);
arena_t *choose_arena_hard(void); arena_t *choose_arena_hard(void);
#endif #endif
#include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
#include "internal/jemalloc_ctl.h" #include "internal/jemalloc_ctl.h"
#include "internal/jemalloc_mutex.h" #include "internal/jemalloc_mutex.h"
@ -252,11 +268,16 @@ arena_t *choose_arena_hard(void);
#include "internal/jemalloc_huge.h" #include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/prof.h"
#undef JEMALLOC_H_EXTERNS #undef JEMALLOC_H_EXTERNS
/******************************************************************************/ /******************************************************************************/
#define JEMALLOC_H_INLINES #define JEMALLOC_H_INLINES
#include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
#include "internal/jemalloc_ctl.h" #include "internal/jemalloc_ctl.h"
#include "internal/jemalloc_mutex.h" #include "internal/jemalloc_mutex.h"
@ -355,12 +376,15 @@ choose_arena(void)
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_arena.h" #include "internal/jemalloc_arena.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/prof.h"
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE
void *imalloc(size_t size); void *imalloc(size_t size);
void *icalloc(size_t size); void *icalloc(size_t size);
void idalloc(void *ptr); void *ipalloc(size_t alignment, size_t size);
size_t isalloc(const void *ptr); size_t isalloc(const void *ptr);
void *iralloc(void *ptr, size_t size);
void idalloc(void *ptr);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@ -386,18 +410,99 @@ icalloc(size_t size)
return (huge_malloc(size, true)); return (huge_malloc(size, true));
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void *
idalloc(void *ptr) ipalloc(size_t alignment, size_t size)
{ {
arena_chunk_t *chunk; void *ret;
size_t ceil_size;
assert(ptr != NULL); /*
* Round size up to the nearest multiple of alignment.
*
* This done, we can take advantage of the fact that for each small
* size class, every object is aligned at the smallest power of two
* that is non-zero in the base two representation of the size. For
* example:
*
* Size | Base 2 | Minimum alignment
* -----+----------+------------------
* 96 | 1100000 | 32
* 144 | 10100000 | 32
* 192 | 11000000 | 64
*
* Depending on runtime settings, it is possible that arena_malloc()
* will further round up to a power of two, but that never causes
* correctness issues.
*/
ceil_size = (size + (alignment - 1)) & (-alignment);
/*
* (ceil_size < size) protects against the combination of maximal
* alignment and size greater than maximal alignment.
*/
if (ceil_size < size) {
/* size_t overflow. */
return (NULL);
}
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
if (chunk != ptr) && ceil_size <= arena_maxclass))
arena_dalloc(chunk->arena, chunk, ptr); ret = arena_malloc(ceil_size, false);
else else {
huge_dalloc(ptr); size_t run_size;
/*
* We can't achieve subpage alignment, so round up alignment
* permanently; it makes later calculations simpler.
*/
alignment = PAGE_CEILING(alignment);
ceil_size = PAGE_CEILING(size);
/*
* (ceil_size < size) protects against very large sizes within
* PAGE_SIZE of SIZE_T_MAX.
*
* (ceil_size + alignment < ceil_size) protects against the
* combination of maximal alignment and ceil_size large enough
* to cause overflow. This is similar to the first overflow
* check above, but it needs to be repeated due to the new
* ceil_size value, which may now be *equal* to maximal
* alignment, whereas before we only detected overflow if the
* original size was *greater* than maximal alignment.
*/
if (ceil_size < size || ceil_size + alignment < ceil_size) {
/* size_t overflow. */
return (NULL);
}
/*
* Calculate the size of the over-size run that arena_palloc()
* would need to allocate in order to guarantee the alignment.
*/
if (ceil_size >= alignment)
run_size = ceil_size + alignment - PAGE_SIZE;
else {
/*
* It is possible that (alignment << 1) will cause
* overflow, but it doesn't matter because we also
* subtract PAGE_SIZE, which in the case of overflow
* leaves us with a very large run_size. That causes
* the first conditional below to fail, which means
* that the bogus run_size value never gets used for
* anything important.
*/
run_size = (alignment << 1) - PAGE_SIZE;
}
if (run_size <= arena_maxclass) {
ret = arena_palloc(choose_arena(), alignment, ceil_size,
run_size);
} else if (alignment <= chunksize)
ret = huge_malloc(ceil_size, false);
else
ret = huge_palloc(alignment, ceil_size);
}
assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret);
} }
JEMALLOC_INLINE size_t JEMALLOC_INLINE size_t
@ -419,6 +524,36 @@ isalloc(const void *ptr)
return (ret); return (ret);
} }
JEMALLOC_INLINE void *
iralloc(void *ptr, size_t size)
{
size_t oldsize;
assert(ptr != NULL);
assert(size != 0);
oldsize = isalloc(ptr);
if (size <= arena_maxclass)
return (arena_ralloc(ptr, size, oldsize));
else
return (huge_ralloc(ptr, size, oldsize));
}
JEMALLOC_INLINE void
idalloc(void *ptr)
{
arena_chunk_t *chunk;
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr)
arena_dalloc(chunk->arena, chunk, ptr);
else
huge_dalloc(ptr);
}
#endif #endif
#undef JEMALLOC_H_INLINES #undef JEMALLOC_H_INLINES

View File

@ -13,9 +13,10 @@ typedef struct chunk_stats_s chunk_stats_t;
#endif /* JEMALLOC_H_TYPES */ #endif /* JEMALLOC_H_TYPES */
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_STATS
#ifdef JEMALLOC_H_STRUCTS #ifdef JEMALLOC_H_STRUCTS
#ifdef JEMALLOC_STATS
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
struct tcache_bin_stats_s { struct tcache_bin_stats_s {
/* /*
@ -104,10 +105,14 @@ struct arena_stats_s {
*/ */
malloc_large_stats_t *lstats; malloc_large_stats_t *lstats;
}; };
#endif /* JEMALLOC_STATS */
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
struct chunk_stats_s { struct chunk_stats_s {
# ifdef JEMALLOC_STATS
/* Number of chunks that were allocated. */ /* Number of chunks that were allocated. */
uint64_t nchunks; uint64_t nchunks;
# endif
/* High-water mark for number of chunks allocated. */ /* High-water mark for number of chunks allocated. */
size_t highchunks; size_t highchunks;
@ -119,9 +124,9 @@ struct chunk_stats_s {
*/ */
size_t curchunks; size_t curchunks;
}; };
#endif /* JEMALLOC_STATS */
#endif /* JEMALLOC_H_STRUCTS */ #endif /* JEMALLOC_H_STRUCTS */
#endif /* JEMALLOC_STATS */
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS #ifdef JEMALLOC_H_EXTERNS

108
jemalloc/src/internal/mb.h Normal file
View File

@ -0,0 +1,108 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
void mb_write(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_))
#ifdef __i386__
/*
* According to the Intel Architecture Software Developer's Manual, current
* processors execute instructions in order from the perspective of other
* processors in a multiprocessor system, but 1) Intel reserves the right to
* change that, and 2) the compiler's optimizer could re-order instructions if
* there weren't some form of barrier. Therefore, even if running on an
* architecture that does not need memory barriers (everything through at least
* i686), an "optimizer barrier" is necessary.
*/
JEMALLOC_INLINE void
mb_write(void)
{
# if 0
/* This is a true memory barrier. */
asm volatile ("pusha;"
"xor %%eax,%%eax;"
"cpuid;"
"popa;"
: /* Outputs. */
: /* Inputs. */
: "memory" /* Clobbers. */
);
#else
/*
* This is hopefully enough to keep the compiler from reordering
* instructions around this one.
*/
asm volatile ("nop;"
: /* Outputs. */
: /* Inputs. */
: "memory" /* Clobbers. */
);
#endif
}
#elif defined(__amd64_)
JEMALLOC_INLINE void
mb_write(void)
{
asm volatile ("sfence"
: /* Outputs. */
: /* Inputs. */
: "memory" /* Clobbers. */
);
}
#elif defined(__powerpc__)
JEMALLOC_INLINE void
mb_write(void)
{
asm volatile ("eieio"
: /* Outputs. */
: /* Inputs. */
: "memory" /* Clobbers. */
);
}
#elif defined(__sparc64__)
JEMALLOC_INLINE void
mb_write(void)
{
asm volatile ("membar #StoreStore"
: /* Outputs. */
: /* Inputs. */
: "memory" /* Clobbers. */
);
}
#else
/*
* This is much slower than a simple memory barrier, but the semantics of mutex
* unlock make this work.
*/
JEMALLOC_INLINE void
mb_write(void)
{
malloc_mutex_t mtx;
malloc_mutex_init(&mtx);
malloc_mutex_lock(&mtx);
malloc_mutex_unlock(&mtx);
}
#endif
#endif
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -0,0 +1,50 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
/*
* Simple linear congruential pseudo-random number generator:
*
* prn(y) = (a*x + c) % m
*
* where the following constants ensure maximal period:
*
* a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
* c == Odd number (relatively prime to 2^n).
* m == 2^32
*
* See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
*
* This choice of m has the disadvantage that the quality of the bits is
* proportional to bit position. For example. the lowest bit has a cycle of 2,
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
* bits.
*
* Macro parameters:
* uint32_t r : Result.
* unsigned lg_range : (0..32], number of least significant bits to return.
* uint32_t state : Seed value.
* const uint32_t a, c : See above discussion.
*/
#define prn(r, lg_range, state, a, c) do { \
assert(lg_range > 0); \
assert(lg_range <= 32); \
\
r = (state * (a)) + (c); \
state = r; \
r >>= (32 - lg_range); \
} while (false)
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -0,0 +1,136 @@
#ifdef JEMALLOC_PROF
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_thr_cnt_s prof_thr_cnt_t;
typedef struct prof_ctx_s prof_ctx_t;
typedef struct prof_s prof_t;
#define LG_PROF_INTERVAL_DEFAULT 32
/*
* Hard limit on stack backtrace depth. Note that the version of
* prof_backtrace() that is based on __builtin_return_address() necessarily has
* a hard-coded number of backtrace frame handlers, so increasing
* LG_PROF_BT_MAX requires changing prof_backtrace().
*/
#define LG_PROF_BT_MAX 7
#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
/* Initial hash table size. */
#define PROF_CKH_MINITEMS 64
/* Size of memory buffer to use when writing dump files. */
#define PROF_DUMP_BUF_SIZE 65536
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
struct prof_bt_s {
/* Backtrace, stored as len program counters. */
void **vec;
unsigned len;
};
struct prof_cnt_s {
/*
* Profiling counters. An allocation/deallocation pair can operate on
* different prof_thr_cnt_t objects that are linked into the same
* prof_ctx_t sets_ql, so it is possible for the cur* counters to go
* negative. In principle it is possible for the *bytes counters to
* overflow/underflow, but a general solution would require some form
* of 128-bit counter solution; this implementation doesn't bother to
* solve that problem.
*/
int64_t curobjs;
int64_t curbytes;
uint64_t accumobjs;
uint64_t accumbytes;
};
struct prof_thr_cnt_s {
/* Linkage into prof_ctx_t's sets_ql. */
ql_elm(prof_thr_cnt_t) link;
/*
* Associated context. If a thread frees an object that it did not
* allocate, it is possible that the context is not cached in the
* thread's hash table, in which case it must be able to look up the
* context, insert a new prof_thr_cnt_t into the thread's hash table,
* and link it into the prof_ctx_t's sets_ql.
*/
prof_ctx_t *ctx;
/*
* Threads use memory barriers to update the counters. Since there is
* only ever one writer, the only challenge is for the reader to get a
* consistent read of the counters.
*
* The writer uses this series of operations:
*
* 1) Increment epoch to an odd number.
* 2) Update counters.
* 3) Increment epoch to an even number.
*
* The reader must assure 1) that the epoch is even while it reads the
* counters, and 2) that the epoch doesn't change between the time it
* starts and finishes reading the counters.
*/
unsigned epoch;
/* Profiling counters. */
prof_cnt_t cnts;
};
struct prof_ctx_s {
/* Protects cnt_merged and sets_ql. */
malloc_mutex_t lock;
/* Temporary storage for aggregation during dump. */
prof_cnt_t cnt_dump;
/* When threads exit, they merge their stats into cnt_merged. */
prof_cnt_t cnt_merged;
/*
* List of profile counters, one for each thread that has allocated in
* this context.
*/
ql_head(prof_thr_cnt_t) cnts_ql;
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
extern bool opt_prof;
extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
extern size_t opt_lg_prof_interval;
extern bool opt_prof_udump; /* High-water memory dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
bool prof_init(prof_t *prof, bool master);
void prof_destroy(prof_t *prof);
prof_thr_cnt_t *prof_alloc_prep(void);
prof_thr_cnt_t *prof_cnt_get(const void *ptr);
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
size_t old_size, prof_thr_cnt_t *old_cnt);
void prof_free(const void *ptr);
void prof_idump(void);
void prof_mdump(void);
void prof_udump(void);
void prof_boot0(void);
bool prof_boot1(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
#endif /* JEMALLOC_PROF */

View File

@ -241,101 +241,6 @@ choose_arena_hard(void)
} }
#endif #endif
static inline void *
ipalloc(size_t alignment, size_t size)
{
void *ret;
size_t ceil_size;
/*
* Round size up to the nearest multiple of alignment.
*
* This done, we can take advantage of the fact that for each small
* size class, every object is aligned at the smallest power of two
* that is non-zero in the base two representation of the size. For
* example:
*
* Size | Base 2 | Minimum alignment
* -----+----------+------------------
* 96 | 1100000 | 32
* 144 | 10100000 | 32
* 192 | 11000000 | 64
*
* Depending on runtime settings, it is possible that arena_malloc()
* will further round up to a power of two, but that never causes
* correctness issues.
*/
ceil_size = (size + (alignment - 1)) & (-alignment);
/*
* (ceil_size < size) protects against the combination of maximal
* alignment and size greater than maximal alignment.
*/
if (ceil_size < size) {
/* size_t overflow. */
return (NULL);
}
if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
&& ceil_size <= arena_maxclass))
ret = arena_malloc(ceil_size, false);
else {
size_t run_size;
/*
* We can't achieve subpage alignment, so round up alignment
* permanently; it makes later calculations simpler.
*/
alignment = PAGE_CEILING(alignment);
ceil_size = PAGE_CEILING(size);
/*
* (ceil_size < size) protects against very large sizes within
* PAGE_SIZE of SIZE_T_MAX.
*
* (ceil_size + alignment < ceil_size) protects against the
* combination of maximal alignment and ceil_size large enough
* to cause overflow. This is similar to the first overflow
* check above, but it needs to be repeated due to the new
* ceil_size value, which may now be *equal* to maximal
* alignment, whereas before we only detected overflow if the
* original size was *greater* than maximal alignment.
*/
if (ceil_size < size || ceil_size + alignment < ceil_size) {
/* size_t overflow. */
return (NULL);
}
/*
* Calculate the size of the over-size run that arena_palloc()
* would need to allocate in order to guarantee the alignment.
*/
if (ceil_size >= alignment)
run_size = ceil_size + alignment - PAGE_SIZE;
else {
/*
* It is possible that (alignment << 1) will cause
* overflow, but it doesn't matter because we also
* subtract PAGE_SIZE, which in the case of overflow
* leaves us with a very large run_size. That causes
* the first conditional below to fail, which means
* that the bogus run_size value never gets used for
* anything important.
*/
run_size = (alignment << 1) - PAGE_SIZE;
}
if (run_size <= arena_maxclass) {
ret = arena_palloc(choose_arena(), alignment, ceil_size,
run_size);
} else if (alignment <= chunksize)
ret = huge_malloc(ceil_size, false);
else
ret = huge_palloc(alignment, ceil_size);
}
assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret);
}
static void static void
stats_print_atexit(void) stats_print_atexit(void)
{ {
@ -365,22 +270,6 @@ stats_print_atexit(void)
JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
} }
static inline void *
iralloc(void *ptr, size_t size)
{
size_t oldsize;
assert(ptr != NULL);
assert(size != 0);
oldsize = isalloc(ptr);
if (size <= arena_maxclass)
return (arena_ralloc(ptr, size, oldsize));
else
return (huge_ralloc(ptr, size, oldsize));
}
/* /*
* End miscellaneous support functions. * End miscellaneous support functions.
*/ */
@ -550,6 +439,16 @@ MALLOC_OUT:
case 'A': case 'A':
opt_abort = true; opt_abort = true;
break; break;
#ifdef JEMALLOC_PROF
case 'b':
if (opt_lg_prof_bt_max > 0)
opt_lg_prof_bt_max--;
break;
case 'B':
if (opt_lg_prof_bt_max < LG_PROF_BT_MAX)
opt_lg_prof_bt_max++;
break;
#endif
case 'c': case 'c':
if (opt_lg_cspace_max - 1 > if (opt_lg_cspace_max - 1 >
opt_lg_qspace_max && opt_lg_qspace_max &&
@ -571,6 +470,14 @@ MALLOC_OUT:
if (opt_lg_dirty_mult >= 0) if (opt_lg_dirty_mult >= 0)
opt_lg_dirty_mult--; opt_lg_dirty_mult--;
break; break;
#ifdef JEMALLOC_PROF
case 'f':
opt_prof = false;
break;
case 'F':
opt_prof = true;
break;
#endif
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
case 'g': case 'g':
if (opt_lg_tcache_gc_sweep >= 0) if (opt_lg_tcache_gc_sweep >= 0)
@ -591,6 +498,17 @@ MALLOC_OUT:
opt_lg_tcache_nslots++; opt_lg_tcache_nslots++;
break; break;
#endif #endif
#ifdef JEMALLOC_PROF
case 'i':
if (opt_lg_prof_interval > 0)
opt_lg_prof_interval--;
break;
case 'I':
if (opt_lg_prof_interval + 1 <
(sizeof(uint64_t) << 3))
opt_lg_prof_interval++;
break;
#endif
#ifdef JEMALLOC_FILL #ifdef JEMALLOC_FILL
case 'j': case 'j':
opt_junk = false; opt_junk = false;
@ -617,6 +535,14 @@ MALLOC_OUT:
(sizeof(size_t) << 3)) (sizeof(size_t) << 3))
opt_lg_chunk++; opt_lg_chunk++;
break; break;
#ifdef JEMALLOC_PROF
case 'l':
opt_prof_leak = false;
break;
case 'L':
opt_prof_leak = true;
break;
#endif
case 'm': case 'm':
if (opt_lg_medium_max > PAGE_SHIFT) if (opt_lg_medium_max > PAGE_SHIFT)
opt_lg_medium_max--; opt_lg_medium_max--;
@ -663,6 +589,14 @@ MALLOC_OUT:
opt_trace = true; opt_trace = true;
break; break;
#endif #endif
#ifdef JEMALLOC_PROF
case 'u':
opt_prof_udump = false;
break;
case 'U':
opt_prof_udump = true;
break;
#endif
#ifdef JEMALLOC_SYSV #ifdef JEMALLOC_SYSV
case 'v': case 'v':
opt_sysv = false; opt_sysv = false;
@ -702,6 +636,10 @@ MALLOC_OUT:
} }
} }
#ifdef JEMALLOC_PROF
prof_boot0();
#endif
/* Register fork handlers. */ /* Register fork handlers. */
if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, if (pthread_atfork(jemalloc_prefork, jemalloc_postfork,
jemalloc_postfork) != 0) { jemalloc_postfork) != 0) {
@ -799,7 +737,16 @@ MALLOC_OUT:
* default. * default.
*/ */
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
if (tcache_nslots) { if (tcache_nslots
# ifdef JEMALLOC_PROF
/*
* Profile data storage concurrency is directly linked to
* the number of arenas, so only drop the number of arenas
* on behalf of enabled tcache if profiling is disabled.
*/
&& opt_prof == false
# endif
) {
/* /*
* Only large object allocation/deallocation is * Only large object allocation/deallocation is
* guaranteed to acquire an arena mutex, so we can get * guaranteed to acquire an arena mutex, so we can get
@ -868,6 +815,13 @@ MALLOC_OUT:
next_arena = 0; next_arena = 0;
#endif #endif
#ifdef JEMALLOC_PROF
if (prof_boot1()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
#endif
/* Allocate and initialize arenas. */ /* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
if (arenas == NULL) { if (arenas == NULL) {
@ -901,6 +855,9 @@ void *
JEMALLOC_P(malloc)(size_t size) JEMALLOC_P(malloc)(size_t size)
{ {
void *ret; void *ret;
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt;
#endif
if (malloc_init()) { if (malloc_init()) {
ret = NULL; ret = NULL;
@ -928,6 +885,13 @@ JEMALLOC_P(malloc)(size_t size)
#endif #endif
} }
#ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
ret = NULL;
goto OOM;
}
#endif
ret = imalloc(size); ret = imalloc(size);
OOM: OOM:
@ -946,6 +910,10 @@ OOM:
#ifdef JEMALLOC_SYSV #ifdef JEMALLOC_SYSV
RETURN: RETURN:
#endif #endif
#ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL)
prof_malloc(ret, cnt);
#endif
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
if (opt_trace) if (opt_trace)
trace_malloc(ret, size); trace_malloc(ret, size);
@ -960,6 +928,9 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
{ {
int ret; int ret;
void *result; void *result;
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt;
#endif
if (malloc_init()) if (malloc_init())
result = NULL; result = NULL;
@ -1003,7 +974,14 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
goto RETURN; goto RETURN;
} }
result = ipalloc(alignment, size); #ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
result = NULL;
ret = EINVAL;
} else
#endif
result = ipalloc(alignment, size);
} }
if (result == NULL) { if (result == NULL) {
@ -1023,6 +1001,10 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
ret = 0; ret = 0;
RETURN: RETURN:
#ifdef JEMALLOC_PROF
if (opt_prof && result != NULL)
prof_malloc(result, cnt);
#endif
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
if (opt_trace) if (opt_trace)
trace_posix_memalign(result, alignment, size); trace_posix_memalign(result, alignment, size);
@ -1037,6 +1019,9 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
{ {
void *ret; void *ret;
size_t num_size; size_t num_size;
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt;
#endif
if (malloc_init()) { if (malloc_init()) {
num_size = 0; num_size = 0;
@ -1068,6 +1053,13 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
goto RETURN; goto RETURN;
} }
#ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
ret = NULL;
goto RETURN;
}
#endif
ret = icalloc(num_size); ret = icalloc(num_size);
RETURN: RETURN:
@ -1083,6 +1075,10 @@ RETURN:
errno = ENOMEM; errno = ENOMEM;
} }
#ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL)
prof_malloc(ret, cnt);
#endif
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
if (opt_trace) if (opt_trace)
trace_calloc(ret, num, size); trace_calloc(ret, num, size);
@ -1095,8 +1091,26 @@ void *
JEMALLOC_P(realloc)(void *ptr, size_t size) JEMALLOC_P(realloc)(void *ptr, size_t size)
{ {
void *ret; void *ret;
#ifdef JEMALLOC_TRACE #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_TRACE))
size_t old_size; size_t old_size;
#endif
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt, *old_cnt;
#endif
/*
* Both profiling and tracing may need old_size, and the conditional logic for
* deciding whether to call isalloc() is messy. Define need_old_size here in
* order to avoid repeated nasty cpp conditional logic.
*/
#ifdef JEMALLOC_PROF
# ifdef JEMALLOC_TRACE
# define need_old_size opt_prof || opt_trace
# else
# define need_old_size opt_prof
# endif
#elif (defined(JEMALLOC_TRACE))
# define need_old_size opt_trace
#endif #endif
if (size == 0) { if (size == 0) {
@ -1107,12 +1121,26 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_SYSV #ifdef JEMALLOC_SYSV
else { else {
if (ptr != NULL) { if (ptr != NULL) {
#ifdef JEMALLOC_TRACE #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_TRACE))
if (opt_trace) if (need_old_size) {
old_size = isalloc(ptr); old_size = isalloc(ptr);
# ifdef JEMALLOC_PROF
old_cnt = prof_cnt_get(ptr);
cnt = NULL;
# endif
}
#endif #endif
idalloc(ptr); idalloc(ptr);
} }
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_TRACE))
else if (need_old_size) {
old_size = 0;
# ifdef JEMALLOC_PROF
old_cnt = NULL;
cnt = NULL;
# endif
}
#endif
ret = NULL; ret = NULL;
goto RETURN; goto RETURN;
} }
@ -1123,13 +1151,24 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
assert(malloc_initialized || malloc_initializer == assert(malloc_initialized || malloc_initializer ==
pthread_self()); pthread_self());
#ifdef JEMALLOC_TRACE #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_TRACE))
if (opt_trace) if (need_old_size) {
old_size = isalloc(ptr); old_size = isalloc(ptr);
# ifdef JEMALLOC_PROF
old_cnt = prof_cnt_get(ptr);
if ((cnt = prof_alloc_prep()) == NULL) {
ret = NULL;
goto OOM;
}
# endif
}
#endif #endif
ret = iralloc(ptr, size); ret = iralloc(ptr, size);
#ifdef JEMALLOC_PROF
OOM:
#endif
if (ret == NULL) { if (ret == NULL) {
#ifdef JEMALLOC_XMALLOC #ifdef JEMALLOC_XMALLOC
if (opt_xmalloc) { if (opt_xmalloc) {
@ -1142,15 +1181,28 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
errno = ENOMEM; errno = ENOMEM;
} }
} else { } else {
if (malloc_init()) #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_TRACE))
ret = NULL; if (need_old_size) {
else
ret = imalloc(size);
#ifdef JEMALLOC_TRACE
if (opt_trace)
old_size = 0; old_size = 0;
# ifdef JEMALLOC_PROF
old_cnt = NULL;
# endif
}
#endif #endif
if (malloc_init()) {
#ifdef JEMALLOC_PROF
if (opt_prof)
cnt = NULL;
#endif
ret = NULL;
} else {
#ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
ret = NULL;
} else
#endif
ret = imalloc(size);
}
if (ret == NULL) { if (ret == NULL) {
#ifdef JEMALLOC_XMALLOC #ifdef JEMALLOC_XMALLOC
@ -1171,8 +1223,15 @@ RETURN:
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
if (opt_trace) if (opt_trace)
trace_realloc(ret, ptr, size, old_size); trace_realloc(ret, ptr, size, old_size);
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
prof_realloc(ret, cnt, ptr, old_size, old_cnt);
#endif #endif
return (ret); return (ret);
#ifdef need_old_size
# undef need_old_size
#endif
} }
JEMALLOC_ATTR(visibility("default")) JEMALLOC_ATTR(visibility("default"))
@ -1184,6 +1243,10 @@ JEMALLOC_P(free)(void *ptr)
assert(malloc_initialized || malloc_initializer == assert(malloc_initialized || malloc_initializer ==
pthread_self()); pthread_self());
#ifdef JEMALLOC_PROF
if (opt_prof)
prof_free(ptr);
#endif
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
if (opt_trace) if (opt_trace)
trace_free(ptr, isalloc(ptr)); trace_free(ptr, isalloc(ptr));

View File

@ -416,10 +416,11 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
return (NULL); return (NULL);
} }
static inline void static inline unsigned
arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size) arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
size_t size)
{ {
unsigned shift, diff, regind, elm, bit; unsigned shift, diff, regind;
assert(run->magic == ARENA_RUN_MAGIC); assert(run->magic == ARENA_RUN_MAGIC);
@ -475,6 +476,15 @@ arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
assert(diff == regind * size); assert(diff == regind * size);
assert(regind < bin->nregs); assert(regind < bin->nregs);
return (regind);
}
static inline void
arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
{
unsigned regind, elm, bit;
regind = arena_run_regind(run, bin, ptr, size);
elm = regind >> (LG_SIZEOF_INT + 3); elm = regind >> (LG_SIZEOF_INT + 3);
if (elm < run->regs_minelm) if (elm < run->regs_minelm)
run->regs_minelm = elm; run->regs_minelm = elm;
@ -1094,8 +1104,13 @@ static size_t
arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
{ {
size_t try_run_size, good_run_size; size_t try_run_size, good_run_size;
unsigned good_nregs, good_mask_nelms, good_reg0_offset; uint32_t try_nregs, good_nregs;
unsigned try_nregs, try_mask_nelms, try_reg0_offset; uint32_t try_mask_nelms, good_mask_nelms;
uint32_t try_hdr_size, good_hdr_size;
#ifdef JEMALLOC_PROF
uint32_t try_cnt0_offset, good_cnt0_offset;
#endif
uint32_t try_reg0_offset, good_reg0_offset;
assert(min_run_size >= PAGE_SIZE); assert(min_run_size >= PAGE_SIZE);
assert(min_run_size <= arena_maxclass); assert(min_run_size <= arena_maxclass);
@ -1118,9 +1133,20 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
try_nregs--; try_nregs--;
try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) + try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0); ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
try_hdr_size = sizeof(arena_run_t) + (sizeof(unsigned) *
(try_mask_nelms - 1));
#ifdef JEMALLOC_PROF
if (opt_prof) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_cnt0_offset = try_hdr_size;
/* Add space for one (prof_thr_cnt_t *) per region. */
try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *);
} else
try_cnt0_offset = 0;
#endif
try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)) } while (try_hdr_size > try_reg0_offset);
> try_reg0_offset);
/* run_size expansion loop. */ /* run_size expansion loop. */
do { do {
@ -1130,6 +1156,10 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
good_run_size = try_run_size; good_run_size = try_run_size;
good_nregs = try_nregs; good_nregs = try_nregs;
good_mask_nelms = try_mask_nelms; good_mask_nelms = try_mask_nelms;
good_hdr_size = try_hdr_size;
#ifdef JEMALLOC_PROF
good_cnt0_offset = try_cnt0_offset;
#endif
good_reg0_offset = try_reg0_offset; good_reg0_offset = try_reg0_offset;
/* Try more aggressive settings. */ /* Try more aggressive settings. */
@ -1141,24 +1171,39 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) + try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
1 : 0); 1 : 0);
try_hdr_size = sizeof(arena_run_t) + (sizeof(unsigned) *
(try_mask_nelms - 1));
#ifdef JEMALLOC_PROF
if (opt_prof) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_cnt0_offset = try_hdr_size;
/*
* Add space for one (prof_thr_cnt_t *) per
* region.
*/
try_hdr_size += try_nregs *
sizeof(prof_thr_cnt_t *);
}
#endif
try_reg0_offset = try_run_size - (try_nregs * try_reg0_offset = try_run_size - (try_nregs *
bin->reg_size); bin->reg_size);
} while (sizeof(arena_run_t) + (sizeof(unsigned) * } while (try_hdr_size > try_reg0_offset);
(try_mask_nelms - 1)) > try_reg0_offset);
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
&& (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))) && try_hdr_size < PAGE_SIZE);
< PAGE_SIZE);
assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1)) assert(good_hdr_size <= good_reg0_offset);
<= good_reg0_offset);
assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs); assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
/* Copy final settings. */ /* Copy final settings. */
bin->run_size = good_run_size; bin->run_size = good_run_size;
bin->nregs = good_nregs; bin->nregs = good_nregs;
bin->regs_mask_nelms = good_mask_nelms; bin->regs_mask_nelms = good_mask_nelms;
#ifdef JEMALLOC_PROF
bin->cnt0_offset = good_cnt0_offset;
#endif
bin->reg0_offset = good_reg0_offset; bin->reg0_offset = good_reg0_offset;
return (good_run_size); return (good_run_size);
@ -1438,6 +1483,70 @@ arena_salloc(const void *ptr)
return (ret); return (ret);
} }
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *
arena_prof_cnt_get(const void *ptr)
{
prof_thr_cnt_t *ret;
arena_chunk_t *chunk;
size_t pageind, mapbits;
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
mapbits = chunk->map[pageind].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - ((mapbits & CHUNK_MAP_PG_MASK) >>
CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
arena_bin_t *bin = run->bin;
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
ret = *(prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset +
(regind * sizeof(prof_thr_cnt_t *)));
} else {
ret = chunk->map[pageind].prof_cnt;
}
return (ret);
}
void
arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
mapbits = chunk->map[pageind].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - ((mapbits & CHUNK_MAP_PG_MASK) >>
CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
arena_bin_t *bin = run->bin;
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
*((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset +
(regind * sizeof(prof_thr_cnt_t *)))) = cnt;
} else {
chunk->map[pageind].prof_cnt = cnt;
}
}
#endif
static void static void
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin) arena_bin_t *bin)
@ -1864,6 +1973,8 @@ arena_new(arena_t *arena, unsigned ind)
arena_bin_t *bin; arena_bin_t *bin;
size_t prev_run_size; size_t prev_run_size;
arena->ind = ind;
if (malloc_mutex_init(&arena->lock)) if (malloc_mutex_init(&arena->lock))
return (true); return (true);
@ -1918,7 +2029,7 @@ arena_new(arena_t *arena, unsigned ind)
arena->trace_fd = creat(filename, 0644); arena->trace_fd = creat(filename, 0644);
if (arena->trace_fd == -1) { if (arena->trace_fd == -1) {
malloc_write4("<jemalloc>", malloc_write4("<jemalloc>",
": creat(\"", filename, "\", O_RDWR) failed\n"); ": creat(\"", filename, "\", 0644) failed\n");
abort(); abort();
} }
} }

View File

@ -9,7 +9,7 @@ size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
bool opt_overcommit = true; bool opt_overcommit = true;
#endif #endif
#ifdef JEMALLOC_STATS #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
malloc_mutex_t chunks_mtx; malloc_mutex_t chunks_mtx;
chunk_stats_t stats_chunks; chunk_stats_t stats_chunks;
#endif #endif
@ -63,14 +63,31 @@ chunk_alloc(size_t size, bool *zero)
/* All strategies for allocation failed. */ /* All strategies for allocation failed. */
ret = NULL; ret = NULL;
RETURN: RETURN:
#ifdef JEMALLOC_STATS #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
if (ret != NULL) { if (ret != NULL) {
# ifdef JEMALLOC_PROF
bool udump;
# endif
malloc_mutex_lock(&chunks_mtx); malloc_mutex_lock(&chunks_mtx);
# ifdef JEMALLOC_STATS
stats_chunks.nchunks += (size / chunksize); stats_chunks.nchunks += (size / chunksize);
# endif
stats_chunks.curchunks += (size / chunksize); stats_chunks.curchunks += (size / chunksize);
if (stats_chunks.curchunks > stats_chunks.highchunks) if (stats_chunks.curchunks > stats_chunks.highchunks) {
stats_chunks.highchunks = stats_chunks.curchunks; stats_chunks.highchunks = stats_chunks.curchunks;
# ifdef JEMALLOC_PROF
udump = true;
# endif
}
# ifdef JEMALLOC_PROF
else
udump = false;
# endif
malloc_mutex_unlock(&chunks_mtx); malloc_mutex_unlock(&chunks_mtx);
# ifdef JEMALLOC_PROF
if (opt_prof && opt_prof_udump && udump)
prof_udump();
# endif
} }
#endif #endif
@ -87,7 +104,7 @@ chunk_dealloc(void *chunk, size_t size)
assert(size != 0); assert(size != 0);
assert((size & chunksize_mask) == 0); assert((size & chunksize_mask) == 0);
#ifdef JEMALLOC_STATS #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
malloc_mutex_lock(&chunks_mtx); malloc_mutex_lock(&chunks_mtx);
stats_chunks.curchunks -= (size / chunksize); stats_chunks.curchunks -= (size / chunksize);
malloc_mutex_unlock(&chunks_mtx); malloc_mutex_unlock(&chunks_mtx);
@ -114,7 +131,7 @@ chunk_boot(void)
chunksize_mask = chunksize - 1; chunksize_mask = chunksize - 1;
chunk_npages = (chunksize >> PAGE_SHIFT); chunk_npages = (chunksize >> PAGE_SHIFT);
#ifdef JEMALLOC_STATS #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
if (malloc_mutex_init(&chunks_mtx)) if (malloc_mutex_init(&chunks_mtx))
return (true); return (true);
memset(&stats_chunks, 0, sizeof(chunk_stats_t)); memset(&stats_chunks, 0, sizeof(chunk_stats_t));

View File

@ -34,6 +34,9 @@ CTL_PROTO(epoch)
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
CTL_PROTO(tcache_flush) CTL_PROTO(tcache_flush)
#endif #endif
#ifdef JEMALLOC_PROF
CTL_PROTO(prof_dump)
#endif
CTL_PROTO(config_debug) CTL_PROTO(config_debug)
CTL_PROTO(config_dss) CTL_PROTO(config_dss)
CTL_PROTO(config_dynamic_page_shift) CTL_PROTO(config_dynamic_page_shift)
@ -185,6 +188,12 @@ static const ctl_node_t tcache_node[] = {
}; };
#endif #endif
#ifdef JEMALLOC_PROF
static const ctl_node_t prof_node[] = {
{NAME("dump"), CTL(prof_dump)}
};
#endif
static const ctl_node_t config_node[] = { static const ctl_node_t config_node[] = {
{NAME("debug"), CTL(config_debug)}, {NAME("debug"), CTL(config_debug)},
{NAME("dss"), CTL(config_dss)}, {NAME("dss"), CTL(config_dss)},
@ -404,6 +413,9 @@ static const ctl_node_t root_node[] = {
{NAME("epoch"), CTL(epoch)}, {NAME("epoch"), CTL(epoch)},
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
{NAME("tcache"), CHILD(tcache)}, {NAME("tcache"), CHILD(tcache)},
#endif
#ifdef JEMALLOC_PROF
{NAME("prof"), CHILD(prof)},
#endif #endif
{NAME("config"), CHILD(config)}, {NAME("config"), CHILD(config)},
{NAME("opt"), CHILD(opt)}, {NAME("opt"), CHILD(opt)},
@ -926,6 +938,23 @@ RETURN:
} }
#endif #endif
#ifdef JEMALLOC_PROF
static int
prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
int ret;
VOID();
prof_mdump();
ret = 0;
RETURN:
return (ret);
}
#endif
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_DEBUG #ifdef JEMALLOC_DEBUG

View File

@ -48,6 +48,12 @@
/* JEMALLOC_TRACE enables allocation tracing. */ /* JEMALLOC_TRACE enables allocation tracing. */
#undef JEMALLOC_TRACE #undef JEMALLOC_TRACE
/* JEMALLOC_PROF enables allocation profiling. */
#undef JEMALLOC_PROF
/* Use libunwind for profile backtracing if defined. */
#undef JEMALLOC_PROF_LIBUNWIND
/* /*
* JEMALLOC_TINY enables support for tiny objects, which are smaller than one * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
* quantum. * quantum.

View File

@ -240,6 +240,45 @@ huge_salloc(const void *ptr)
return (ret); return (ret);
} }
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *
huge_prof_cnt_get(const void *ptr)
{
prof_thr_cnt_t *ret;
extent_node_t *node, key;
malloc_mutex_lock(&huge_mtx);
/* Extract from tree of huge allocations. */
key.addr = __DECONST(void *, ptr);
node = extent_tree_ad_search(&huge, &key);
assert(node != NULL);
ret = node->prof_cnt;
malloc_mutex_unlock(&huge_mtx);
return (ret);
}
void
huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
{
extent_node_t *node, key;
malloc_mutex_lock(&huge_mtx);
/* Extract from tree of huge allocations. */
key.addr = __DECONST(void *, ptr);
node = extent_tree_ad_search(&huge, &key);
assert(node != NULL);
node->prof_cnt = cnt;
malloc_mutex_unlock(&huge_mtx);
}
#endif
bool bool
huge_boot(void) huge_boot(void)
{ {

View File

@ -50,7 +50,8 @@ trace_flush(arena_t *arena)
if (err == -1) { if (err == -1) {
malloc_write4("<jemalloc>", malloc_write4("<jemalloc>",
": write() failed during trace flush", "\n", ""); ": write() failed during trace flush", "\n", "");
abort(); if (opt_abort)
abort();
} }
arena->trace_buf_end = 0; arena->trace_buf_end = 0;
} }

2
jemalloc/src/mb.c Normal file
View File

@ -0,0 +1,2 @@
#define MB_C_
#include "internal/jemalloc_internal.h"

1048
jemalloc/src/prof.c Normal file

File diff suppressed because it is too large Load Diff