Use bitmaps to track small regions.

The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills.  Fix this in two places:

  - arena_run_t: Use a new bitmap implementation to track which regions
                 are available.  Furthermore, revert to preferring the
                 lowest available region (as jemalloc did with its old
                 bitmap-based approach).

  - tcache_t: Move read-only tcache_bin_t metadata into
              tcache_bin_info_t, and add a contiguous array of pointers
              to tcache_t in order to track cached objects.  This
              substantially increases the size of tcache_t, but results
              in much higher data locality for common tcache operations.
              As a side benefit, it is again possible to efficiently
              flush the least recently used cached objects, so this
              change changes flushing from MRU to LRU.

The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast.  In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.

Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.

Use JEMALLOC_DEBUG rather than NDEBUG.

Add dassert(), and use it for debug-only asserts.
This commit is contained in:
Jason Evans 2011-03-16 10:30:13 -07:00
parent 77f350be08
commit 84c8eefeff
15 changed files with 702 additions and 139 deletions

View File

@ -46,7 +46,7 @@ BINS := @srcroot@bin/pprof
CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \
@objroot@include/jemalloc/jemalloc_defs@install_suffix@.h
CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/base.c \
@srcroot@src/chunk.c @srcroot@src/chunk_dss.c \
@srcroot@src/bitmap.c @srcroot@src/chunk.c @srcroot@src/chunk_dss.c \
@srcroot@src/chunk_mmap.c @srcroot@src/chunk_swap.c @srcroot@src/ckh.c \
@srcroot@src/ctl.c @srcroot@src/extent.c @srcroot@src/hash.c \
@srcroot@src/huge.c @srcroot@src/mb.c @srcroot@src/mutex.c \
@ -65,8 +65,9 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html)
DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
DOCS := $(DOCS_HTML) $(DOCS_MAN3)
CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
@srcroot@test/mremap.c @srcroot@test/posix_memalign.c \
@srcroot@test/rallocm.c @srcroot@test/thread_arena.c
@srcroot@test/bitmap.c @srcroot@test/mremap.c \
@srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \
@srcroot@test/thread_arena.c
.PHONY: all dist doc_html doc_man doc
.PHONY: install_bin install_include install_lib
@ -127,6 +128,9 @@ doc: $(DOCS)
$(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
# Automatic dependency generation misses #include "*.c".
@objroot@test/bitmap.o : @objroot@src/bitmap.o
@objroot@test/%: @objroot@test/%.o \
@objroot@lib/libjemalloc@install_suffix@.$(SO)
@mkdir -p $(@D)

View File

@ -132,6 +132,16 @@ else
fi
AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
AC_CHECK_SIZEOF([long])
if test "x${ac_cv_sizeof_long}" = "x8" ; then
LG_SIZEOF_LONG=3
elif test "x${ac_cv_sizeof_long}" = "x4" ; then
LG_SIZEOF_LONG=2
else
AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
fi
AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
AC_CANONICAL_HOST
dnl CPU-specific settings.
CPU_SPINWAIT=""
@ -752,6 +762,14 @@ if test "x${enable_tls}" = "x0" ; then
AC_DEFINE_UNQUOTED([NO_TLS], [ ])
fi
dnl ============================================================================
dnl Check for ffsl(3), and fail if not found. This function exists on all
dnl platforms that jemalloc currently has a chance of functioning on without
dnl modification.
AC_CHECK_FUNC([ffsl], [],
[AC_MSG_ERROR([Cannot build without ffsl(3)])])
dnl ============================================================================
dnl Check for allocator-related functions that should be wrapped.

View File

@ -209,18 +209,15 @@ struct arena_run_s {
/* Bin this run is associated with. */
arena_bin_t *bin;
/* Stack of available freed regions, or NULL. */
void *avail;
/* Next region that has never been allocated, or run boundary. */
void *next;
/* Index of next region that has never been allocated, or nregs. */
uint32_t nextind;
/* Number of free regions in run. */
unsigned nfree;
};
/*
* Read-only information associated with each element for arena_t's bins array
* Read-only information associated with each element of arena_t's bins array
* is stored separately, partly to reduce memory usage (only one copy, rather
* than one per arena), but mainly to avoid false cacheline sharing.
*/
@ -234,6 +231,18 @@ struct arena_bin_info_s {
/* Total number of regions in a run for this bin's size class. */
uint32_t nregs;
/*
* Offset of first bitmap_t element in a run header for this bin's size
* class.
*/
uint32_t bitmap_offset;
/*
* Metadata used to manipulate bitmaps for runs associated with this
* bin.
*/
bitmap_info_t bitmap_info;
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_ctx_t *) in a run header for this bin's size
@ -498,7 +507,13 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
unsigned shift, diff, regind;
size_t size;
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
/*
* Freeing a pointer lower than region zero can cause assertion
* failure.
*/
assert((uintptr_t)ptr >= (uintptr_t)run +
(uintptr_t)bin_info->reg0_offset);
/*
* Avoid doing division with a variable divisor if possible. Using
@ -583,7 +598,7 @@ arena_prof_ctx_get(const void *ptr)
arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin_info, ptr);
ret = *(prof_ctx_t **)((uintptr_t)run +
bin_info->ctx0_offset + (regind *
@ -618,7 +633,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
arena_bin_info_t *bin_info;
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
binind = arena_bin_index(chunk->arena, bin);
bin_info = &arena_bin_info[binind];
regind = arena_run_regind(run, bin_info, ptr);
@ -639,7 +654,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
arena_chunk_map_t *mapelm;
assert(arena != NULL);
assert(arena->magic == ARENA_MAGIC);
dassert(arena->magic == ARENA_MAGIC);
assert(chunk->arena == arena);
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
@ -662,9 +677,9 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapelm->bits >>
PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
#ifndef NDEBUG
#ifdef JEMALLOC_DEBUG
{
size_t binind = arena_bin_index(arena, bin);
arena_bin_info_t *bin_info =

View File

@ -0,0 +1,184 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
#define LG_BITMAP_MAXBITS 18
typedef struct bitmap_level_s bitmap_level_t;
typedef struct bitmap_info_s bitmap_info_t;
typedef unsigned long bitmap_t;
#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
/* Number of bits per group. */
#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
/* Maximum number of levels possible. */
#define BITMAP_MAX_LEVELS \
(LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
struct bitmap_level_s {
/* Offset of this level's groups within the array of groups. */
size_t group_offset;
};
struct bitmap_info_s {
/* Logical number of bits in bitmap (stored at bottom level). */
size_t nbits;
/* Number of levels necessary for nbits. */
unsigned nlevels;
/*
* Only the first (nlevels+1) elements are used, and levels are ordered
* bottom to top (e.g. the bottom level is stored in levels[0]).
*/
bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
size_t bitmap_info_ngroups(const bitmap_info_t *binfo);
size_t bitmap_size(size_t nbits);
void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
JEMALLOC_INLINE bool
bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
bitmap_t rg = bitmap[rgoff];
/* The bitmap is full iff the root group is 0. */
return (rg == 0);
}
JEMALLOC_INLINE bool
bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
{
size_t goff;
bitmap_t g;
assert(bit < binfo->nbits);
goff = bit >> LG_BITMAP_GROUP_NBITS;
g = bitmap[goff];
return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
}
JEMALLOC_INLINE void
bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
{
size_t goff;
bitmap_t *gp;
bitmap_t g;
assert(bit < binfo->nbits);
assert(bitmap_get(bitmap, binfo, bit) == false);
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(bitmap_get(bitmap, binfo, bit));
/* Propagate group state transitions up the tree. */
if (g == 0) {
unsigned i;
for (i = 1; i < binfo->nlevels; i++) {
bit = goff;
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[binfo->levels[i].group_offset + goff];
g = *gp;
assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
if (g != 0)
break;
}
}
}
/* sfu: set first unset. */
JEMALLOC_INLINE size_t
bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
size_t bit;
bitmap_t g;
unsigned i;
assert(bitmap_full(bitmap, binfo) == false);
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
bit = ffsl(g) - 1;
while (i > 0) {
i--;
g = bitmap[binfo->levels[i].group_offset + bit];
bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
}
bitmap_set(bitmap, binfo, bit);
return (bit);
}
JEMALLOC_INLINE void
bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
{
size_t goff;
bitmap_t *gp;
bitmap_t g;
bool propagate;
assert(bit < binfo->nbits);
assert(bitmap_get(bitmap, binfo, bit));
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
propagate = (g == 0);
assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(bitmap_get(bitmap, binfo, bit) == false);
/* Propagate group state transitions up the tree. */
if (propagate) {
unsigned i;
for (i = 1; i < binfo->nlevels; i++) {
bit = goff;
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[binfo->levels[i].group_offset + goff];
g = *gp;
propagate = (g == 0);
assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
== 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
if (propagate == false)
break;
}
}
}
#endif
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@ -55,7 +55,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
* Define a custom assert() in order to reduce the chances of deadlock during
* assertion failure.
*/
#ifdef JEMALLOC_DEBUG
#ifndef assert
# ifdef JEMALLOC_DEBUG
# define assert(e) do { \
if (!(e)) { \
char line_buf[UMAX2S_BUFSIZE]; \
@ -70,8 +71,15 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
abort(); \
} \
} while (0)
# else
# define assert(e)
# endif
#endif
#ifdef JEMALLOC_DEBUG
# define dassert(e) assert(e)
#else
#define assert(e)
# define dassert(e)
#endif
/*
@ -146,7 +154,19 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#define QUANTUM_CEILING(a) \
(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
#define LONG_MASK (LONG - 1)
/* Return the smallest long multiple that is >= a. */
#define LONG_CEILING(a) \
(((a) + LONG_MASK) & ~LONG_MASK)
#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
#define PTR_MASK (SIZEOF_PTR - 1)
/* Return the smallest (void *) multiple that is >= a. */
#define PTR_CEILING(a) \
(((a) + PTR_MASK) & ~PTR_MASK)
/*
* Maximum size of L1 cache line. This is used to avoid cache line aliasing.
@ -199,6 +219,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
@ -222,6 +243,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
@ -335,6 +357,7 @@ void jemalloc_postfork(void);
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
@ -545,6 +568,7 @@ thread_allocated_get(void)
#endif
#endif
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/arena.h"
@ -628,7 +652,7 @@ isalloc(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
dassert(chunk->arena->magic == ARENA_MAGIC);
#ifdef JEMALLOC_PROF
ret = arena_salloc_demote(ptr);

View File

@ -348,7 +348,7 @@ prof_ctx_get(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
dassert(chunk->arena->magic == ARENA_MAGIC);
ret = arena_prof_ctx_get(ptr);
} else
@ -367,7 +367,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
dassert(chunk->arena->magic == ARENA_MAGIC);
arena_prof_ctx_set(ptr, ctx);
} else

View File

@ -2,6 +2,7 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
@ -32,14 +33,21 @@ typedef struct tcache_s tcache_t;
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
/*
* Read-only information associated with each element of tcache_t's tbins array
* is stored separately, mainly to reduce memory usage.
*/
struct tcache_bin_info_s {
unsigned ncached_max; /* Upper limit on ncached. */
};
struct tcache_bin_s {
# ifdef JEMALLOC_STATS
tcache_bin_stats_t tstats;
# endif
unsigned low_water; /* Min # cached since last GC. */
unsigned ncached; /* # of cached objects. */
unsigned ncached_max; /* Upper limit on ncached. */
void *avail; /* Chain of available objects. */
void **avail; /* Stack of available objects. */
};
struct tcache_s {
@ -53,6 +61,12 @@ struct tcache_s {
unsigned ev_cnt; /* Event count since incremental GC. */
unsigned next_gc_bin; /* Next bin to GC. */
tcache_bin_t tbins[1]; /* Dynamically sized. */
/*
* The pointer stacks associated with tbins follow as a contiguous
* array. During tcache initialization, the avail pointer in each
* element of tbins is initialized to point to the proper offset within
* this array.
*/
};
#endif /* JEMALLOC_H_STRUCTS */
@ -63,6 +77,8 @@ extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max;
extern ssize_t opt_lg_tcache_gc_sweep;
extern tcache_bin_info_t *tcache_bin_info;
/* Map of thread-specific caches. */
#ifndef NO_TLS
extern __thread tcache_t *tcache_tls
@ -109,7 +125,7 @@ void tcache_destroy(tcache_t *tcache);
#ifdef JEMALLOC_STATS
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
#endif
void tcache_boot(void);
bool tcache_boot(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
@ -211,8 +227,7 @@ tcache_alloc_easy(tcache_bin_t *tbin)
tbin->ncached--;
if (tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
ret = tbin->avail;
tbin->avail = *(void **)ret;
ret = tbin->avail[tbin->ncached];
return (ret);
}
@ -312,6 +327,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
arena_run_t *run;
arena_bin_t *bin;
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
size_t pageind, binind;
arena_chunk_map_t *mapelm;
@ -323,7 +339,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
mapelm = &chunk->map[pageind-map_bias];
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
sizeof(arena_bin_t);
@ -335,16 +351,17 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
#endif
tbin = &tcache->tbins[binind];
if (tbin->ncached == tbin->ncached_max) {
tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1)
tbin_info = &tcache_bin_info[binind];
if (tbin->ncached == tbin_info->ncached_max) {
tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache
#endif
);
}
assert(tbin->ncached < tbin->ncached_max);
*(void **)ptr = tbin->avail;
tbin->avail = ptr;
assert(tbin->ncached < tbin_info->ncached_max);
tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
tcache_event(tcache);
@ -357,6 +374,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
arena_chunk_t *chunk;
size_t pageind, binind;
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
assert((size & PAGE_MASK) == 0);
assert(arena_salloc(ptr) > small_maxclass);
@ -373,16 +391,17 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
#endif
tbin = &tcache->tbins[binind];
if (tbin->ncached == tbin->ncached_max) {
tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1)
tbin_info = &tcache_bin_info[binind];
if (tbin->ncached == tbin_info->ncached_max) {
tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache
#endif
);
}
assert(tbin->ncached < tbin->ncached_max);
*(void **)ptr = tbin->avail;
tbin->avail = ptr;
assert(tbin->ncached < tbin_info->ncached_max);
tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
tcache_event(tcache);

View File

@ -140,4 +140,7 @@
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#undef LG_SIZEOF_INT
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#undef LG_SIZEOF_LONG
#endif /* JEMALLOC_DEFS_H_ */

View File

@ -253,59 +253,45 @@ static inline void *
arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
{
void *ret;
unsigned regind;
bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
(uintptr_t)bin_info->bitmap_offset);
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
assert(run->nfree > 0);
assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
(uintptr_t)(bin_info->reg_size * regind));
run->nfree--;
ret = run->avail;
if (ret != NULL) {
/* Double free can cause assertion failure.*/
assert(ret != NULL);
/* Write-after free can cause assertion failure. */
assert((uintptr_t)ret >= (uintptr_t)run +
(uintptr_t)bin_info->reg0_offset);
assert((uintptr_t)ret < (uintptr_t)run->next);
assert(((uintptr_t)ret - ((uintptr_t)run +
(uintptr_t)bin_info->reg0_offset)) %
(uintptr_t)bin_info->reg_size == 0);
run->avail = *(void **)ret;
return (ret);
}
ret = run->next;
run->next = (void *)((uintptr_t)ret + (uintptr_t)bin_info->reg_size);
assert(ret != NULL);
if (regind == run->nextind)
run->nextind++;
assert(regind < run->nextind);
return (ret);
}
static inline void
arena_run_reg_dalloc(arena_run_t *run, void *ptr)
{
#ifndef NDEBUG
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind = arena_run_regind(run, bin_info, ptr);
bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
(uintptr_t)bin_info->bitmap_offset);
assert(run->nfree < bin_info->nregs);
/* Freeing an interior pointer can cause assertion failure. */
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
== 0);
/*
* Freeing a pointer lower than region zero can cause assertion
* failure.
*/
assert((uintptr_t)ptr >= (uintptr_t)run +
(uintptr_t)bin_info->reg0_offset);
/*
* Freeing a pointer past in the run's frontier can cause assertion
* failure.
*/
assert((uintptr_t)ptr < (uintptr_t)run->next);
#endif
/* Freeing an unallocated pointer can cause assertion failure. */
assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
*(void **)ptr = run->avail;
run->avail = ptr;
bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
run->nfree++;
}
@ -772,7 +758,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
chunk + (uintptr_t)(pageind << PAGE_SHIFT));
assert((mapelm->bits >> PAGE_SHIFT) == 0);
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
size_t binind = arena_bin_index(arena,
run->bin);
arena_bin_info_t *bin_info =
@ -1224,12 +1210,14 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
malloc_mutex_lock(&arena->lock);
run = arena_run_alloc(arena, bin_info->run_size, false, false);
if (run != NULL) {
bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
(uintptr_t)bin_info->bitmap_offset);
/* Initialize run internals. */
run->bin = bin;
run->avail = NULL;
run->next = (void *)((uintptr_t)run +
(uintptr_t)bin_info->reg0_offset);
run->nextind = 0;
run->nfree = bin_info->nregs;
bitmap_init(bitmap, &bin_info->bitmap_info);
#ifdef JEMALLOC_DEBUG
run->magic = ARENA_RUN_MAGIC;
#endif
@ -1289,12 +1277,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
bin->runcur = NULL;
run = arena_bin_nonfull_run_get(arena, bin);
if (bin->runcur != NULL && bin->runcur->nfree > 0) {
/*
* Another thread updated runcur while this one ran without the
* bin lock in arena_bin_nonfull_run_get().
*/
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
assert(bin->runcur->nfree > 0);
ret = arena_run_reg_alloc(bin->runcur, bin_info);
if (run != NULL) {
@ -1302,7 +1289,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
/*
* arena_run_alloc() may have allocated run, or it may
* have pulled it from the bin's run tree. Therefore
* have pulled run from the bin's run tree. Therefore
* it is unsafe to make any assumptions about how run
* has previously been used, and arena_bin_lower_run()
* must be called, as if a region were just deallocated
@ -1322,7 +1309,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
bin->runcur = run;
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
assert(bin->runcur->nfree > 0);
return (arena_run_reg_alloc(bin->runcur, bin_info));
@ -1365,15 +1352,15 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
#endif
bin = &arena->bins[binind];
malloc_mutex_lock(&bin->lock);
for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> 1);
i < nfill; i++) {
if ((run = bin->runcur) != NULL && run->nfree > 0)
ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
else
ptr = arena_bin_malloc_hard(arena, bin);
if (ptr == NULL)
break;
*(void **)ptr = tbin->avail;
tbin->avail = ptr;
tbin->avail[i] = ptr;
}
#ifdef JEMALLOC_STATS
bin->stats.allocated += (i - tbin->ncached) *
@ -1607,7 +1594,7 @@ arena_salloc(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
@ -1660,7 +1647,7 @@ arena_salloc_demote(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
@ -1730,8 +1717,9 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
/******************************/
npages = bin_info->run_size >> PAGE_SHIFT;
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk)
>> PAGE_SHIFT);
past = (size_t)(PAGE_CEILING((uintptr_t)run +
(uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT);
malloc_mutex_lock(&arena->lock);
/*
@ -1817,7 +1805,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
size_t binind = arena_bin_index(arena, bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
@ -2065,7 +2053,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
arena = chunk->arena;
assert(arena->magic == ARENA_MAGIC);
dassert(arena->magic == ARENA_MAGIC);
if (psize < oldsize) {
#ifdef JEMALLOC_FILL
@ -2405,8 +2393,8 @@ small_size2bin_init_hard(void)
* *) bin_info->run_size <= arena_maxclass
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
*
* bin_info->nregs and bin_info->reg0_offset are also calculated here, since
* these settings are all interdependent.
* bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
* calculated here, since these settings are all interdependent.
*/
static size_t
bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
@ -2414,6 +2402,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
size_t try_run_size, good_run_size;
uint32_t try_nregs, good_nregs;
uint32_t try_hdr_size, good_hdr_size;
uint32_t try_bitmap_offset, good_bitmap_offset;
#ifdef JEMALLOC_PROF
uint32_t try_ctx0_offset, good_ctx0_offset;
#endif
@ -2438,6 +2427,11 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
do {
try_nregs--;
try_hdr_size = sizeof(arena_run_t);
/* Pad to a long boundary. */
try_hdr_size = LONG_CEILING(try_hdr_size);
try_bitmap_offset = try_hdr_size;
/* Add space for bitmap. */
try_hdr_size += bitmap_size(try_nregs);
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
@ -2460,6 +2454,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
good_run_size = try_run_size;
good_nregs = try_nregs;
good_hdr_size = try_hdr_size;
good_bitmap_offset = try_bitmap_offset;
#ifdef JEMALLOC_PROF
good_ctx0_offset = try_ctx0_offset;
#endif
@ -2473,6 +2468,11 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
do {
try_nregs--;
try_hdr_size = sizeof(arena_run_t);
/* Pad to a long boundary. */
try_hdr_size = LONG_CEILING(try_hdr_size);
try_bitmap_offset = try_hdr_size;
/* Add space for bitmap. */
try_hdr_size += bitmap_size(try_nregs);
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
@ -2498,6 +2498,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
/* Copy final settings. */
bin_info->run_size = good_run_size;
bin_info->nregs = good_nregs;
bin_info->bitmap_offset = good_bitmap_offset;
#ifdef JEMALLOC_PROF
bin_info->ctx0_offset = good_ctx0_offset;
#endif
@ -2525,6 +2526,7 @@ bin_info_init(void)
bin_info = &arena_bin_info[i];
bin_info->reg_size = (1U << (LG_TINY_MIN + i));
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
#endif
@ -2533,6 +2535,7 @@ bin_info_init(void)
bin_info = &arena_bin_info[i];
bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
/* Cacheline-spaced bins. */
@ -2541,6 +2544,7 @@ bin_info_init(void)
bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
LG_CACHELINE);
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
/* Subpage-spaced bins. */
@ -2549,6 +2553,7 @@ bin_info_init(void)
bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
ncbins)) << LG_SUBPAGE);
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
return (false);

90
jemalloc/src/bitmap.c Normal file
View File

@ -0,0 +1,90 @@
#define JEMALLOC_BITMAP_C_
#include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static size_t bits2groups(size_t nbits);
/******************************************************************************/
static size_t
bits2groups(size_t nbits)
{
return ((nbits >> LG_BITMAP_GROUP_NBITS) +
!!(nbits & BITMAP_GROUP_NBITS_MASK));
}
void
bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
{
unsigned i;
size_t group_count;
assert(nbits > 0);
assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
/*
* Compute the number of groups necessary to store nbits bits, and
* progressively work upward through the levels until reaching a level
* that requires only one group.
*/
binfo->levels[0].group_offset = 0;
group_count = bits2groups(nbits);
for (i = 1; group_count > 1; i++) {
assert(i < BITMAP_MAX_LEVELS);
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
group_count = bits2groups(group_count);
}
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
binfo->nlevels = i;
binfo->nbits = nbits;
}
size_t
bitmap_info_ngroups(const bitmap_info_t *binfo)
{
return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
}
size_t
bitmap_size(size_t nbits)
{
bitmap_info_t binfo;
bitmap_info_init(&binfo, nbits);
return (bitmap_info_ngroups(&binfo));
}
void
bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
size_t extra;
unsigned i;
/*
* Bits are actually inverted with regard to the external bitmap
* interface, so the bitmap starts out with all 1 bits, except for
* trailing unused bits (if any). Note that each group uses bit 0 to
* correspond to the first logical bit in the group, so extra bits
* are the most significant bits of the last group.
*/
memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
LG_SIZEOF_BITMAP);
extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
& BITMAP_GROUP_NBITS_MASK;
if (extra != 0)
bitmap[binfo->levels[1].group_offset - 1] >>= extra;
for (i = 1; i < binfo->nlevels; i++) {
size_t group_count = binfo->levels[i].group_offset -
binfo->levels[i-1].group_offset;
extra = (BITMAP_GROUP_NBITS - (group_count &
BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
if (extra != 0)
bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
}
}

View File

@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
size_t hash1, hash2, bucket, cell;
assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC);
dassert(ckh->magic == CKH_MAGIC);
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
@ -396,7 +396,7 @@ ckh_delete(ckh_t *ckh)
{
assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC);
dassert(ckh->magic == CKH_MAGIC);
#ifdef CKH_VERBOSE
malloc_printf(
@ -421,7 +421,7 @@ ckh_count(ckh_t *ckh)
{
assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC);
dassert(ckh->magic == CKH_MAGIC);
return (ckh->count);
}
@ -452,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
bool ret;
assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC);
dassert(ckh->magic == CKH_MAGIC);
assert(ckh_search(ckh, key, NULL, NULL));
#ifdef CKH_COUNT
@ -477,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
size_t cell;
assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC);
dassert(ckh->magic == CKH_MAGIC);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
@ -509,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
size_t cell;
assert(ckh != NULL);
assert(ckh->magic == CKH_MAGIC);
dassert(ckh->magic == CKH_MAGIC);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {

View File

@ -693,7 +693,10 @@ malloc_init_hard(void)
}
#ifdef JEMALLOC_TCACHE
tcache_boot();
if (tcache_boot()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
#endif
if (huge_boot()) {

View File

@ -8,6 +8,9 @@ bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
tcache_bin_info_t *tcache_bin_info;
static unsigned stack_nelms; /* Total stack elms per tcache. */
/* Map of thread-specific caches. */
#ifndef NO_TLS
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@ -55,21 +58,19 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif
)
{
void *flush, *deferred, *ptr;
void *ptr;
unsigned i, nflush, ndeferred;
bool first_pass;
#ifdef JEMALLOC_STATS
bool merged_stats = false;
#endif
assert(binind < nbins);
assert(rem <= tbin->ncached);
assert(tbin->ncached > 0 || tbin->avail == NULL);
for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
true; flush != NULL; flush = deferred, nflush = ndeferred) {
for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena bin associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
tbin->avail[0]);
arena_t *arena = chunk->arena;
arena_bin_t *bin = &arena->bins[binind];
@ -92,12 +93,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
tbin->tstats.nrequests = 0;
}
#endif
deferred = NULL;
ndeferred = 0;
for (i = 0; i < nflush; i++) {
ptr = flush;
ptr = tbin->avail[i];
assert(ptr != NULL);
flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena) {
size_t pageind = ((uintptr_t)ptr -
@ -112,17 +111,11 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
* locked. Stash the object, so that it can be
* handled in a future pass.
*/
*(void **)ptr = deferred;
deferred = ptr;
tbin->avail[ndeferred] = ptr;
ndeferred++;
}
}
malloc_mutex_unlock(&bin->lock);
if (first_pass) {
tbin->avail = flush;
first_pass = false;
}
}
#ifdef JEMALLOC_STATS
if (merged_stats == false) {
@ -139,6 +132,8 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
}
#endif
memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
rem * sizeof(void *));
tbin->ncached = rem;
if (tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
@ -151,18 +146,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif
)
{
void *flush, *deferred, *ptr;
void *ptr;
unsigned i, nflush, ndeferred;
bool first_pass;
#ifdef JEMALLOC_STATS
bool merged_stats = false;
#endif
assert(binind < nhbins);
assert(rem <= tbin->ncached);
assert(tbin->ncached > 0 || tbin->avail == NULL);
for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
true; flush != NULL; flush = deferred, nflush = ndeferred) {
for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
tbin->avail[0]);
arena_t *arena = chunk->arena;
malloc_mutex_lock(&arena->lock);
@ -174,6 +170,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
tcache->prof_accumbytes = 0;
#endif
#ifdef JEMALLOC_STATS
merged_stats = true;
arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[binind - nbins].nrequests +=
tbin->tstats.nrequests;
@ -182,12 +179,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
}
#endif
deferred = NULL;
ndeferred = 0;
for (i = 0; i < nflush; i++) {
ptr = flush;
ptr = tbin->avail[i];
assert(ptr != NULL);
flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena)
arena_dalloc_large(arena, chunk, ptr);
@ -198,19 +193,30 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
* Stash the object, so that it can be handled
* in a future pass.
*/
*(void **)ptr = deferred;
deferred = ptr;
tbin->avail[ndeferred] = ptr;
ndeferred++;
}
}
malloc_mutex_unlock(&arena->lock);
if (first_pass) {
tbin->avail = flush;
first_pass = false;
}
#ifdef JEMALLOC_STATS
if (merged_stats == false) {
/*
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
arena_t *arena = tcache->arena;
malloc_mutex_lock(&arena->lock);
arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[binind - nbins].nrequests +=
tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
malloc_mutex_unlock(&arena->lock);
}
#endif
memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
rem * sizeof(void *));
tbin->ncached = rem;
if (tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
@ -220,10 +226,14 @@ tcache_t *
tcache_create(arena_t *arena)
{
tcache_t *tcache;
size_t size;
size_t size, stack_offset;
unsigned i;
size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
/* Naturally align the pointer stacks. */
size = PTR_CEILING(size);
stack_offset = size;
size += stack_nelms * sizeof(void *);
/*
* Round up to the nearest multiple of the cacheline size, in order to
* avoid the possibility of false cacheline sharing.
@ -236,6 +246,8 @@ tcache_create(arena_t *arena)
if (size <= small_maxclass)
tcache = (tcache_t *)arena_malloc_small(arena, size, true);
else if (size <= tcache_maxclass)
tcache = (tcache_t *)arena_malloc_large(arena, size, true);
else
tcache = (tcache_t *)icalloc(size);
@ -252,15 +264,11 @@ tcache_create(arena_t *arena)
tcache->arena = arena;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
for (i = 0; i < nbins; i++) {
if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
tcache->tbins[i].ncached_max = (arena_bin_info[i].nregs
<< 1);
} else
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
for (i = 0; i < nhbins; i++) {
tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
(uintptr_t)stack_offset);
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
}
for (; i < nhbins; i++)
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
TCACHE_SET(tcache);
@ -271,6 +279,7 @@ void
tcache_destroy(tcache_t *tcache)
{
unsigned i;
size_t tcache_size;
#ifdef JEMALLOC_STATS
/* Unlink from list of extant tcaches. */
@ -327,7 +336,8 @@ tcache_destroy(tcache_t *tcache)
}
#endif
if (arena_salloc(tcache) <= small_maxclass) {
tcache_size = arena_salloc(tcache);
if (tcache_size <= small_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena;
size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
@ -341,6 +351,13 @@ tcache_destroy(tcache_t *tcache)
malloc_mutex_lock(&bin->lock);
arena_dalloc_bin(arena, chunk, tcache, mapelm);
malloc_mutex_unlock(&bin->lock);
} else if (tcache_size <= tcache_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena;
malloc_mutex_lock(&arena->lock);
arena_dalloc_large(arena, chunk, tcache);
malloc_mutex_unlock(&arena->lock);
} else
idalloc(tcache);
}
@ -397,11 +414,13 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
}
#endif
void
bool
tcache_boot(void)
{
if (opt_tcache) {
unsigned i;
/*
* If necessary, clamp opt_lg_tcache_max, now that
* small_maxclass and arena_maxclass are known.
@ -416,6 +435,28 @@ tcache_boot(void)
nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
/* Initialize tcache_bin_info. */
tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
sizeof(tcache_bin_info_t));
if (tcache_bin_info == NULL)
return (true);
stack_nelms = 0;
for (i = 0; i < nbins; i++) {
if ((arena_bin_info[i].nregs << 1) <=
TCACHE_NSLOTS_SMALL_MAX) {
tcache_bin_info[i].ncached_max =
(arena_bin_info[i].nregs << 1);
} else {
tcache_bin_info[i].ncached_max =
TCACHE_NSLOTS_SMALL_MAX;
}
stack_nelms += tcache_bin_info[i].ncached_max;
}
for (; i < nhbins; i++) {
tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
stack_nelms += tcache_bin_info[i].ncached_max;
}
/* Compute incremental GC event threshold. */
if (opt_lg_tcache_gc_sweep >= 0) {
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
@ -431,6 +472,8 @@ tcache_boot(void)
abort();
}
}
return (false);
}
/******************************************************************************/
#endif /* JEMALLOC_TCACHE */

153
jemalloc/test/bitmap.c Normal file
View File

@ -0,0 +1,153 @@
#define JEMALLOC_MANGLE
#include "jemalloc_test.h"
/*
* Avoid using the assert() from jemalloc_internal.h, since it requires
* internal libjemalloc functionality.
* */
#include <assert.h>
/*
* Directly include the bitmap code, since it isn't exposed outside
* libjemalloc.
*/
#include "../src/bitmap.c"
#define MAXBITS 4500
static void
test_bitmap_size(void)
{
size_t i, prev_size;
prev_size = 0;
for (i = 1; i <= MAXBITS; i++) {
size_t size = bitmap_size(i);
assert(size >= prev_size);
prev_size = size;
}
}
static void
test_bitmap_init(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
for (j = 0; j < i; j++)
assert(bitmap_get(bitmap, &binfo, j) == false);
}
}
}
static void
test_bitmap_set(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
for (j = 0; j < i; j++)
bitmap_set(bitmap, &binfo, j);
assert(bitmap_full(bitmap, &binfo));
}
}
}
static void
test_bitmap_unset(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
for (j = 0; j < i; j++)
bitmap_set(bitmap, &binfo, j);
assert(bitmap_full(bitmap, &binfo));
for (j = 0; j < i; j++)
bitmap_unset(bitmap, &binfo, j);
for (j = 0; j < i; j++)
bitmap_set(bitmap, &binfo, j);
assert(bitmap_full(bitmap, &binfo));
}
}
}
static void
test_bitmap_sfu(void)
{
size_t i;
for (i = 1; i <= MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
ssize_t j;
bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
bitmap_init(bitmap, &binfo);
/* Iteratively set bits starting at the beginning. */
for (j = 0; j < i; j++)
assert(bitmap_sfu(bitmap, &binfo) == j);
assert(bitmap_full(bitmap, &binfo));
/*
* Iteratively unset bits starting at the end, and
* verify that bitmap_sfu() reaches the unset bits.
*/
for (j = i - 1; j >= 0; j--) {
bitmap_unset(bitmap, &binfo, j);
assert(bitmap_sfu(bitmap, &binfo) == j);
bitmap_unset(bitmap, &binfo, j);
}
assert(bitmap_get(bitmap, &binfo, 0) == false);
/*
* Iteratively set bits starting at the beginning, and
* verify that bitmap_sfu() looks past them.
*/
for (j = 1; j < i; j++) {
bitmap_set(bitmap, &binfo, j - 1);
assert(bitmap_sfu(bitmap, &binfo) == j);
bitmap_unset(bitmap, &binfo, j);
}
assert(bitmap_sfu(bitmap, &binfo) == i - 1);
assert(bitmap_full(bitmap, &binfo));
}
}
}
int
main(void)
{
fprintf(stderr, "Test begin\n");
test_bitmap_size();
test_bitmap_init();
test_bitmap_set();
test_bitmap_unset();
test_bitmap_sfu();
fprintf(stderr, "Test end\n");
return (0);
}

2
jemalloc/test/bitmap.exp Normal file
View File

@ -0,0 +1,2 @@
Test begin
Test end