diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in index 7a13f210..8ee4c938 100644 --- a/jemalloc/Makefile.in +++ b/jemalloc/Makefile.in @@ -46,7 +46,7 @@ BINS := @srcroot@bin/pprof CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/base.c \ - @srcroot@src/chunk.c @srcroot@src/chunk_dss.c \ + @srcroot@src/bitmap.c @srcroot@src/chunk.c @srcroot@src/chunk_dss.c \ @srcroot@src/chunk_mmap.c @srcroot@src/chunk_swap.c @srcroot@src/ckh.c \ @srcroot@src/ctl.c @srcroot@src/extent.c @srcroot@src/hash.c \ @srcroot@src/huge.c @srcroot@src/mb.c @srcroot@src/mutex.c \ @@ -65,8 +65,9 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ - @srcroot@test/mremap.c @srcroot@test/posix_memalign.c \ - @srcroot@test/rallocm.c @srcroot@test/thread_arena.c + @srcroot@test/bitmap.c @srcroot@test/mremap.c \ + @srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \ + @srcroot@test/thread_arena.c .PHONY: all dist doc_html doc_man doc .PHONY: install_bin install_include install_lib @@ -127,6 +128,9 @@ doc: $(DOCS) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" +# Automatic dependency generation misses #include "*.c". +@objroot@test/bitmap.o : @objroot@src/bitmap.o + @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index dfe2b9b4..dc77d752 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -132,6 +132,16 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT]) +AC_CHECK_SIZEOF([long]) +if test "x${ac_cv_sizeof_long}" = "x8" ; then + LG_SIZEOF_LONG=3 +elif test "x${ac_cv_sizeof_long}" = "x4" ; then + LG_SIZEOF_LONG=2 +else + AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) + AC_CANONICAL_HOST dnl CPU-specific settings. CPU_SPINWAIT="" @@ -752,6 +762,14 @@ if test "x${enable_tls}" = "x0" ; then AC_DEFINE_UNQUOTED([NO_TLS], [ ]) fi +dnl ============================================================================ +dnl Check for ffsl(3), and fail if not found. This function exists on all +dnl platforms that jemalloc currently has a chance of functioning on without +dnl modification. + +AC_CHECK_FUNC([ffsl], [], + [AC_MSG_ERROR([Cannot build without ffsl(3)])]) + dnl ============================================================================ dnl Check for allocator-related functions that should be wrapped. diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index bd983f24..1744b45b 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -209,18 +209,15 @@ struct arena_run_s { /* Bin this run is associated with. */ arena_bin_t *bin; - /* Stack of available freed regions, or NULL. */ - void *avail; - - /* Next region that has never been allocated, or run boundary. */ - void *next; + /* Index of next region that has never been allocated, or nregs. */ + uint32_t nextind; /* Number of free regions in run. */ unsigned nfree; }; /* - * Read-only information associated with each element for arena_t's bins array + * Read-only information associated with each element of arena_t's bins array * is stored separately, partly to reduce memory usage (only one copy, rather * than one per arena), but mainly to avoid false cacheline sharing. */ @@ -234,6 +231,18 @@ struct arena_bin_info_s { /* Total number of regions in a run for this bin's size class. */ uint32_t nregs; + /* + * Offset of first bitmap_t element in a run header for this bin's size + * class. + */ + uint32_t bitmap_offset; + + /* + * Metadata used to manipulate bitmaps for runs associated with this + * bin. + */ + bitmap_info_t bitmap_info; + #ifdef JEMALLOC_PROF /* * Offset of first (prof_ctx_t *) in a run header for this bin's size @@ -397,7 +406,7 @@ struct arena_s { extern size_t opt_lg_qspace_max; extern size_t opt_lg_cspace_max; -extern ssize_t opt_lg_dirty_mult; +extern ssize_t opt_lg_dirty_mult; /* * small_size2bin is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -498,7 +507,13 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) unsigned shift, diff, regind; size_t size; - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); /* * Avoid doing division with a variable divisor if possible. Using @@ -583,7 +598,7 @@ arena_prof_ctx_get(const void *ptr) arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * @@ -618,7 +633,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); binind = arena_bin_index(chunk->arena, bin); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -639,7 +654,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); + dassert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -662,9 +677,9 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; -#ifndef NDEBUG +#ifdef JEMALLOC_DEBUG { size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = diff --git a/jemalloc/include/jemalloc/internal/bitmap.h b/jemalloc/include/jemalloc/internal/bitmap.h new file mode 100644 index 00000000..4bb2212c --- /dev/null +++ b/jemalloc/include/jemalloc/internal/bitmap.h @@ -0,0 +1,184 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS 18 + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* Maximum number of levels possible. */ +#define BITMAP_MAX_LEVELS \ + (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +size_t bitmap_info_ngroups(const bitmap_info_t *binfo); +size_t bitmap_size(size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit) == false); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(bitmap_full(bitmap, binfo) == false); + + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffsl(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + } + + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit) == false); + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (propagate == false) + break; + } + } +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 34b2a232..a80fc7cd 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -55,8 +55,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. */ -#ifdef JEMALLOC_DEBUG -# define assert(e) do { \ +#ifndef assert +# ifdef JEMALLOC_DEBUG +# define assert(e) do { \ if (!(e)) { \ char line_buf[UMAX2S_BUFSIZE]; \ malloc_write(": "); \ @@ -70,8 +71,15 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); abort(); \ } \ } while (0) +# else +# define assert(e) +# endif +#endif + +#ifdef JEMALLOC_DEBUG +# define dassert(e) assert(e) #else -#define assert(e) +# define dassert(e) #endif /* @@ -146,7 +154,19 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define QUANTUM_CEILING(a) \ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + #define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. @@ -199,6 +219,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -222,6 +243,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -335,6 +357,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -545,6 +568,7 @@ thread_allocated_get(void) #endif #endif +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" @@ -628,7 +652,7 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); + dassert(chunk->arena->magic == ARENA_MAGIC); #ifdef JEMALLOC_PROF ret = arena_salloc_demote(ptr); diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index db63465f..f9438735 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -348,7 +348,7 @@ prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); + dassert(chunk->arena->magic == ARENA_MAGIC); ret = arena_prof_ctx_get(ptr); } else @@ -367,7 +367,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); + dassert(chunk->arena->magic == ARENA_MAGIC); arena_prof_ctx_set(ptr, ctx); } else diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index ab02545f..5434d32d 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -2,6 +2,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; @@ -32,14 +33,21 @@ typedef struct tcache_s tcache_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + struct tcache_bin_s { # ifdef JEMALLOC_STATS tcache_bin_stats_t tstats; # endif unsigned low_water; /* Min # cached since last GC. */ unsigned ncached; /* # of cached objects. */ - unsigned ncached_max; /* Upper limit on ncached. */ - void *avail; /* Chain of available objects. */ + void **avail; /* Stack of available objects. */ }; struct tcache_s { @@ -53,6 +61,12 @@ struct tcache_s { unsigned ev_cnt; /* Event count since incremental GC. */ unsigned next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -63,6 +77,8 @@ extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_gc_sweep; +extern tcache_bin_info_t *tcache_bin_info; + /* Map of thread-specific caches. */ #ifndef NO_TLS extern __thread tcache_t *tcache_tls @@ -109,7 +125,7 @@ void tcache_destroy(tcache_t *tcache); #ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena); #endif -void tcache_boot(void); +bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -211,8 +227,7 @@ tcache_alloc_easy(tcache_bin_t *tbin) tbin->ncached--; if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; - ret = tbin->avail; - tbin->avail = *(void **)ret; + ret = tbin->avail[tbin->ncached]; return (ret); } @@ -312,6 +327,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) arena_run_t *run; arena_bin_t *bin; tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; size_t pageind, binind; arena_chunk_map_t *mapelm; @@ -323,7 +339,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); @@ -335,16 +351,17 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) #endif tbin = &tcache->tbins[binind]; - if (tbin->ncached == tbin->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1) + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> + 1) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) , tcache #endif ); } - assert(tbin->ncached < tbin->ncached_max); - *(void **)ptr = tbin->avail; - tbin->avail = ptr; + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; tbin->ncached++; tcache_event(tcache); @@ -357,6 +374,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); assert(arena_salloc(ptr) > small_maxclass); @@ -373,16 +391,17 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #endif tbin = &tcache->tbins[binind]; - if (tbin->ncached == tbin->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1) + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> + 1) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) , tcache #endif ); } - assert(tbin->ncached < tbin->ncached_max); - *(void **)ptr = tbin->avail; - tbin->avail = ptr; + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; tbin->ncached++; tcache_event(tcache); diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in index 773c9f8c..d6698416 100644 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -140,4 +140,7 @@ /* sizeof(int) == 2^LG_SIZEOF_INT. */ #undef LG_SIZEOF_INT +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#undef LG_SIZEOF_LONG + #endif /* JEMALLOC_DEFS_H_ */ diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index e49b8edf..87bd9bb6 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -253,59 +253,45 @@ static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; + unsigned regind; + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); + assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); + ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + (uintptr_t)(bin_info->reg_size * regind)); run->nfree--; - ret = run->avail; - if (ret != NULL) { - /* Double free can cause assertion failure.*/ - assert(ret != NULL); - /* Write-after free can cause assertion failure. */ - assert((uintptr_t)ret >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - assert((uintptr_t)ret < (uintptr_t)run->next); - assert(((uintptr_t)ret - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - (uintptr_t)bin_info->reg_size == 0); - run->avail = *(void **)ret; - return (ret); - } - ret = run->next; - run->next = (void *)((uintptr_t)ret + (uintptr_t)bin_info->reg_size); - assert(ret != NULL); + if (regind == run->nextind) + run->nextind++; + assert(regind < run->nextind); return (ret); } static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { - -#ifndef NDEBUG arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind = arena_run_regind(run, bin_info, ptr); + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size == 0); - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ assert((uintptr_t)ptr >= (uintptr_t)run + (uintptr_t)bin_info->reg0_offset); - /* - * Freeing a pointer past in the run's frontier can cause assertion - * failure. - */ - assert((uintptr_t)ptr < (uintptr_t)run->next); -#endif + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); - *(void **)ptr = run->avail; - run->avail = ptr; + bitmap_unset(bitmap, &bin_info->bitmap_info, regind); run->nfree++; } @@ -772,7 +758,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = @@ -1224,12 +1210,14 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_lock(&arena->lock); run = arena_run_alloc(arena, bin_info->run_size, false, false); if (run != NULL) { + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + /* Initialize run internals. */ run->bin = bin; - run->avail = NULL; - run->next = (void *)((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); + run->nextind = 0; run->nfree = bin_info->nregs; + bitmap_init(bitmap, &bin_info->bitmap_info); #ifdef JEMALLOC_DEBUG run->magic = ARENA_RUN_MAGIC; #endif @@ -1289,12 +1277,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = NULL; run = arena_bin_nonfull_run_get(arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { - /* * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - assert(bin->runcur->magic == ARENA_RUN_MAGIC); + dassert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1302,7 +1289,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) /* * arena_run_alloc() may have allocated run, or it may - * have pulled it from the bin's run tree. Therefore + * have pulled run from the bin's run tree. Therefore * it is unsafe to make any assumptions about how run * has previously been used, and arena_bin_lower_run() * must be called, as if a region were just deallocated @@ -1322,7 +1309,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - assert(bin->runcur->magic == ARENA_RUN_MAGIC); + dassert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); return (arena_run_reg_alloc(bin->runcur, bin_info)); @@ -1365,15 +1352,15 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); - for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) { + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> 1); + i < nfill; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; - *(void **)ptr = tbin->avail; - tbin->avail = ptr; + tbin->avail[i] = ptr; } #ifdef JEMALLOC_STATS bin->stats.allocated += (i - tbin->ncached) * @@ -1607,7 +1594,7 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1660,7 +1647,7 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1730,8 +1717,9 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, /******************************/ npages = bin_info->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) - >> PAGE_SHIFT); + past = (size_t)(PAGE_CEILING((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * + bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); malloc_mutex_lock(&arena->lock); /* @@ -1817,7 +1805,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -2065,7 +2053,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - assert(arena->magic == ARENA_MAGIC); + dassert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { #ifdef JEMALLOC_FILL @@ -2405,8 +2393,8 @@ small_size2bin_init_hard(void) * *) bin_info->run_size <= arena_maxclass * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). * - * bin_info->nregs and bin_info->reg0_offset are also calculated here, since - * these settings are all interdependent. + * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also + * calculated here, since these settings are all interdependent. */ static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) @@ -2414,6 +2402,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) size_t try_run_size, good_run_size; uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; + uint32_t try_bitmap_offset, good_bitmap_offset; #ifdef JEMALLOC_PROF uint32_t try_ctx0_offset, good_ctx0_offset; #endif @@ -2438,6 +2427,11 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) do { try_nregs--; try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); #ifdef JEMALLOC_PROF if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ @@ -2460,6 +2454,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_run_size = try_run_size; good_nregs = try_nregs; good_hdr_size = try_hdr_size; + good_bitmap_offset = try_bitmap_offset; #ifdef JEMALLOC_PROF good_ctx0_offset = try_ctx0_offset; #endif @@ -2473,6 +2468,11 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) do { try_nregs--; try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); #ifdef JEMALLOC_PROF if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ @@ -2498,6 +2498,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) /* Copy final settings. */ bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; + bin_info->bitmap_offset = good_bitmap_offset; #ifdef JEMALLOC_PROF bin_info->ctx0_offset = good_ctx0_offset; #endif @@ -2525,6 +2526,7 @@ bin_info_init(void) bin_info = &arena_bin_info[i]; bin_info->reg_size = (1U << (LG_TINY_MIN + i)); prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } #endif @@ -2533,6 +2535,7 @@ bin_info_init(void) bin_info = &arena_bin_info[i]; bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } /* Cacheline-spaced bins. */ @@ -2541,6 +2544,7 @@ bin_info_init(void) bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << LG_CACHELINE); prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } /* Subpage-spaced bins. */ @@ -2549,6 +2553,7 @@ bin_info_init(void) bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins)) << LG_SUBPAGE); prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } return (false); diff --git a/jemalloc/src/bitmap.c b/jemalloc/src/bitmap.c new file mode 100644 index 00000000..b47e2629 --- /dev/null +++ b/jemalloc/src/bitmap.c @@ -0,0 +1,90 @@ +#define JEMALLOC_BITMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t bits2groups(size_t nbits); + +/******************************************************************************/ + +static size_t +bits2groups(size_t nbits) +{ + + return ((nbits >> LG_BITMAP_GROUP_NBITS) + + !!(nbits & BITMAP_GROUP_NBITS_MASK)); +} + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + unsigned i; + size_t group_count; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + /* + * Compute the number of groups necessary to store nbits bits, and + * progressively work upward through the levels until reaching a level + * that requires only one group. + */ + binfo->levels[0].group_offset = 0; + group_count = bits2groups(nbits); + for (i = 1; group_count > 1; i++) { + assert(i < BITMAP_MAX_LEVELS); + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + group_count = bits2groups(group_count); + } + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + binfo->nlevels = i; + binfo->nbits = nbits; +} + +size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); +} + +size_t +bitmap_size(size_t nbits) +{ + bitmap_info_t binfo; + + bitmap_info_init(&binfo, nbits); + return (bitmap_info_ngroups(&binfo)); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + unsigned i; + + /* + * Bits are actually inverted with regard to the external bitmap + * interface, so the bitmap starts out with all 1 bits, except for + * trailing unused bits (if any). Note that each group uses bit 0 to + * correspond to the first logical bit in the group, so extra bits + * are the most significant bits of the last group. + */ + memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << + LG_SIZEOF_BITMAP); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[1].group_offset - 1] >>= extra; + for (i = 1; i < binfo->nlevels; i++) { + size_t group_count = binfo->levels[i].group_offset - + binfo->levels[i-1].group_offset; + extra = (BITMAP_GROUP_NBITS - (group_count & + BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; + } +} diff --git a/jemalloc/src/ckh.c b/jemalloc/src/ckh.c index e386a531..75ae7fd2 100644 --- a/jemalloc/src/ckh.c +++ b/jemalloc/src/ckh.c @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); + dassert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -396,7 +396,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); + dassert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -421,7 +421,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); + dassert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -452,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); + dassert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -477,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); + dassert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -509,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); + dassert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index c1aaddae..9f2fa92e 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -693,7 +693,10 @@ malloc_init_hard(void) } #ifdef JEMALLOC_TCACHE - tcache_boot(); + if (tcache_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } #endif if (huge_boot()) { diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c index 88e1cc7c..2f4804ee 100644 --- a/jemalloc/src/tcache.c +++ b/jemalloc/src/tcache.c @@ -8,6 +8,9 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; +tcache_bin_info_t *tcache_bin_info; +static unsigned stack_nelms; /* Total stack elms per tcache. */ + /* Map of thread-specific caches. */ #ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); @@ -55,21 +58,19 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *flush, *deferred, *ptr; + void *ptr; unsigned i, nflush, ndeferred; - bool first_pass; #ifdef JEMALLOC_STATS bool merged_stats = false; #endif assert(binind < nbins); assert(rem <= tbin->ncached); - assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = - true; flush != NULL; flush = deferred, nflush = ndeferred) { + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); arena_t *arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; @@ -92,12 +93,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem tbin->tstats.nrequests = 0; } #endif - deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = flush; + ptr = tbin->avail[i]; assert(ptr != NULL); - flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - @@ -112,17 +111,11 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem * locked. Stash the object, so that it can be * handled in a future pass. */ - *(void **)ptr = deferred; - deferred = ptr; + tbin->avail[ndeferred] = ptr; ndeferred++; } } malloc_mutex_unlock(&bin->lock); - - if (first_pass) { - tbin->avail = flush; - first_pass = false; - } } #ifdef JEMALLOC_STATS if (merged_stats == false) { @@ -139,6 +132,8 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } #endif + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); tbin->ncached = rem; if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -151,18 +146,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *flush, *deferred, *ptr; + void *ptr; unsigned i, nflush, ndeferred; - bool first_pass; +#ifdef JEMALLOC_STATS + bool merged_stats = false; +#endif assert(binind < nhbins); assert(rem <= tbin->ncached); - assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = - true; flush != NULL; flush = deferred, nflush = ndeferred) { + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); arena_t *arena = chunk->arena; malloc_mutex_lock(&arena->lock); @@ -174,6 +170,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem tcache->prof_accumbytes = 0; #endif #ifdef JEMALLOC_STATS + merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - nbins].nrequests += tbin->tstats.nrequests; @@ -182,12 +179,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) } #endif - deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = flush; + ptr = tbin->avail[i]; assert(ptr != NULL); - flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) arena_dalloc_large(arena, chunk, ptr); @@ -198,19 +193,30 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem * Stash the object, so that it can be handled * in a future pass. */ - *(void **)ptr = deferred; - deferred = ptr; + tbin->avail[ndeferred] = ptr; ndeferred++; } } malloc_mutex_unlock(&arena->lock); - - if (first_pass) { - tbin->avail = flush; - first_pass = false; - } } +#ifdef JEMALLOC_STATS + if (merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[binind - nbins].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&arena->lock); + } +#endif + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); tbin->ncached = rem; if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -220,10 +226,14 @@ tcache_t * tcache_create(arena_t *arena) { tcache_t *tcache; - size_t size; + size_t size, stack_offset; unsigned i; size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); + /* Naturally align the pointer stacks. */ + size = PTR_CEILING(size); + stack_offset = size; + size += stack_nelms * sizeof(void *); /* * Round up to the nearest multiple of the cacheline size, in order to * avoid the possibility of false cacheline sharing. @@ -236,6 +246,8 @@ tcache_create(arena_t *arena) if (size <= small_maxclass) tcache = (tcache_t *)arena_malloc_small(arena, size, true); + else if (size <= tcache_maxclass) + tcache = (tcache_t *)arena_malloc_large(arena, size, true); else tcache = (tcache_t *)icalloc(size); @@ -252,15 +264,11 @@ tcache_create(arena_t *arena) tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - for (i = 0; i < nbins; i++) { - if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { - tcache->tbins[i].ncached_max = (arena_bin_info[i].nregs - << 1); - } else - tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; + for (i = 0; i < nhbins; i++) { + tcache->tbins[i].avail = (void **)((uintptr_t)tcache + + (uintptr_t)stack_offset); + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } - for (; i < nhbins; i++) - tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; TCACHE_SET(tcache); @@ -271,6 +279,7 @@ void tcache_destroy(tcache_t *tcache) { unsigned i; + size_t tcache_size; #ifdef JEMALLOC_STATS /* Unlink from list of extant tcaches. */ @@ -327,7 +336,8 @@ tcache_destroy(tcache_t *tcache) } #endif - if (arena_salloc(tcache) <= small_maxclass) { + tcache_size = arena_salloc(tcache); + if (tcache_size <= small_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -341,6 +351,13 @@ tcache_destroy(tcache_t *tcache) malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, tcache, mapelm); malloc_mutex_unlock(&bin->lock); + } else if (tcache_size <= tcache_maxclass) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, tcache); + malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } @@ -397,11 +414,13 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } #endif -void +bool tcache_boot(void) { if (opt_tcache) { + unsigned i; + /* * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. @@ -416,6 +435,28 @@ tcache_boot(void) nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < nbins; i++) { + if ((arena_bin_info[i].nregs << 1) <= + TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; + } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; + } + /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / @@ -431,6 +472,8 @@ tcache_boot(void) abort(); } } + + return (false); } /******************************************************************************/ #endif /* JEMALLOC_TCACHE */ diff --git a/jemalloc/test/bitmap.c b/jemalloc/test/bitmap.c new file mode 100644 index 00000000..7a017c86 --- /dev/null +++ b/jemalloc/test/bitmap.c @@ -0,0 +1,153 @@ +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +/* + * Avoid using the assert() from jemalloc_internal.h, since it requires + * internal libjemalloc functionality. + * */ +#include + +/* + * Directly include the bitmap code, since it isn't exposed outside + * libjemalloc. + */ +#include "../src/bitmap.c" + +#define MAXBITS 4500 + +static void +test_bitmap_size(void) +{ + size_t i, prev_size; + + prev_size = 0; + for (i = 1; i <= MAXBITS; i++) { + size_t size = bitmap_size(i); + assert(size >= prev_size); + prev_size = size; + } +} + +static void +test_bitmap_init(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + size_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + for (j = 0; j < i; j++) + assert(bitmap_get(bitmap, &binfo, j) == false); + + } + } +} + +static void +test_bitmap_set(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + size_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + for (j = 0; j < i; j++) + bitmap_set(bitmap, &binfo, j); + assert(bitmap_full(bitmap, &binfo)); + } + } +} + +static void +test_bitmap_unset(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + size_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + for (j = 0; j < i; j++) + bitmap_set(bitmap, &binfo, j); + assert(bitmap_full(bitmap, &binfo)); + for (j = 0; j < i; j++) + bitmap_unset(bitmap, &binfo, j); + for (j = 0; j < i; j++) + bitmap_set(bitmap, &binfo, j); + assert(bitmap_full(bitmap, &binfo)); + } + } +} + +static void +test_bitmap_sfu(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + ssize_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + /* Iteratively set bits starting at the beginning. */ + for (j = 0; j < i; j++) + assert(bitmap_sfu(bitmap, &binfo) == j); + assert(bitmap_full(bitmap, &binfo)); + + /* + * Iteratively unset bits starting at the end, and + * verify that bitmap_sfu() reaches the unset bits. + */ + for (j = i - 1; j >= 0; j--) { + bitmap_unset(bitmap, &binfo, j); + assert(bitmap_sfu(bitmap, &binfo) == j); + bitmap_unset(bitmap, &binfo, j); + } + assert(bitmap_get(bitmap, &binfo, 0) == false); + + /* + * Iteratively set bits starting at the beginning, and + * verify that bitmap_sfu() looks past them. + */ + for (j = 1; j < i; j++) { + bitmap_set(bitmap, &binfo, j - 1); + assert(bitmap_sfu(bitmap, &binfo) == j); + bitmap_unset(bitmap, &binfo, j); + } + assert(bitmap_sfu(bitmap, &binfo) == i - 1); + assert(bitmap_full(bitmap, &binfo)); + } + } +} + +int +main(void) +{ + fprintf(stderr, "Test begin\n"); + + test_bitmap_size(); + test_bitmap_init(); + test_bitmap_set(); + test_bitmap_unset(); + test_bitmap_sfu(); + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/jemalloc/test/bitmap.exp b/jemalloc/test/bitmap.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/jemalloc/test/bitmap.exp @@ -0,0 +1,2 @@ +Test begin +Test end