From 3ee7a5c5b0abe610fa6a9b3b8ad0a5e8e99312d4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 29 Dec 2009 00:09:15 -0800 Subject: [PATCH] Remove the dynamic rebalancing code, since magazines reduce its utility. --- jemalloc/INSTALL | 3 - jemalloc/configure.ac | 23 -- jemalloc/doc/jemalloc.3.in | 13 - jemalloc/src/jemalloc.c | 447 +++++--------------------------- jemalloc/src/jemalloc_defs.h.in | 7 - 5 files changed, 69 insertions(+), 424 deletions(-) diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index 0572039a..a7841f59 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -51,9 +51,6 @@ any of the following arguments (not a definitive list) to 'configure': cached and released in bulk using "magazines" -- a term coined by the developers of Solaris's umem allocator. ---disable-balance - Disable dynamic rebalancing of thread-->arena assignments. - --enable-dss Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index bf5224a4..7c6a7fb7 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -381,28 +381,6 @@ else fi AC_SUBST([roff_mag]) -dnl Enable dynamic arena load balancing by default. -AC_ARG_ENABLE([balance], - [AS_HELP_STRING([--disable-balance], [Disable dynamic arena load balancing])], -[if test "x$enable_balance" = "xno" ; then - enable_balance="0" -else - enable_balance="1" -fi -], -[enable_balance="1"] -) -if test "x$enable_balance" = "x1" ; then - AC_DEFINE([JEMALLOC_BALANCE], [ ]) -fi -AC_SUBST([enable_balance]) -if test "x$enable_balance" = "x0" ; then - roff_balance=".\\\" " -else - roff_balance="" -fi -AC_SUBST([roff_balance]) - dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], @@ -654,7 +632,6 @@ AC_MSG_RESULT([stats : ${enable_stats}]) AC_MSG_RESULT([trace : ${enable_trace}]) AC_MSG_RESULT([tiny : ${enable_tiny}]) AC_MSG_RESULT([mag : ${enable_mag}]) -AC_MSG_RESULT([balance : ${enable_balance}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([sysv : ${enable_sysv}]) diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index 5e696f7d..ae42f62a 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -188,19 +188,6 @@ flags being set) become fatal. The process will call .Xr abort 3 in these cases. -@roff_balance@@roff_tls@.It B -@roff_balance@@roff_tls@Double/halve the per-arena lock contention threshold at -@roff_balance@@roff_tls@which a thread is randomly re-assigned to an arena. -@roff_balance@@roff_tls@This dynamic load balancing tends to push threads away -@roff_balance@@roff_tls@from highly contended arenas, which avoids worst case -@roff_balance@@roff_tls@contention scenarios in which threads disproportionately -@roff_balance@@roff_tls@utilize arenas. -@roff_balance@@roff_tls@However, due to the highly dynamic load that -@roff_balance@@roff_tls@applications may place on the allocator, it is -@roff_balance@@roff_tls@impossible for the allocator to know in advance how -@roff_balance@@roff_tls@sensitive it should be to contention over arenas. -@roff_balance@@roff_tls@Therefore, some applications may benefit from increasing -@roff_balance@@roff_tls@or decreasing this threshold parameter. .It C Double/halve the size of the maximum size class that is a multiple of the cacheline size (64). diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index b4dbced8..cdadc5e6 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -237,10 +237,6 @@ __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.183 2008/12/01 10:20:59 jas /* JEMALLOC_MAG requires TLS. */ # ifdef JEMALLOC_MAG # undef JEMALLOC_MAG -# endif - /* JEMALLOC_BALANCE requires TLS. */ -# ifdef JEMALLOC_BALANCE -# undef JEMALLOC_BALANCE # endif #endif @@ -315,20 +311,6 @@ __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.183 2008/12/01 10:20:59 jas /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */ #define RUN_MAX_SMALL (12 * PAGE_SIZE) -/* - * Adaptive spinning must eventually switch to blocking, in order to avoid the - * potential for priority inversion deadlock. Backing off past a certain point - * can actually waste time. - */ -#define SPIN_LIMIT_2POW 11 - -/* - * Conversion from spinning to blocking is expensive; we use (1U << - * BLOCK_COST_2POW) to estimate how many more times costly blocking is than - * worst-case spinning. - */ -#define BLOCK_COST_2POW 4 - #ifdef JEMALLOC_MAG /* * Default magazine size, in bytes. max_rounds is calculated to make @@ -338,28 +320,9 @@ __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.183 2008/12/01 10:20:59 jas # define MAG_SIZE_2POW_DEFAULT 9 #endif -#ifdef JEMALLOC_BALANCE - /* - * We use an exponential moving average to track recent lock contention, - * where the size of the history window is N, and alpha=2/(N+1). - * - * Due to integer math rounding, very small values here can cause - * substantial degradation in accuracy, thus making the moving average decay - * faster than it would with precise calculation. - */ -# define BALANCE_ALPHA_INV_2POW 9 - - /* - * Threshold value for the exponential moving contention average at which to - * re-assign a thread. - */ -# define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4)) -#endif - /******************************************************************************/ typedef pthread_mutex_t malloc_mutex_t; -typedef pthread_mutex_t malloc_spinlock_t; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -428,11 +391,6 @@ struct arena_stats_s { size_t allocated_large; uint64_t nmalloc_large; uint64_t ndalloc_large; - -#ifdef JEMALLOC_BALANCE - /* Number of times this arena reassigned a thread due to contention. */ - uint64_t nbalance; -#endif }; typedef struct chunk_stats_s chunk_stats_t; @@ -628,7 +586,7 @@ struct arena_s { #endif /* All operations on this arena require that lock be locked. */ - pthread_mutex_t lock; + malloc_mutex_t lock; #ifdef JEMALLOC_STATS arena_stats_t stats; @@ -670,14 +628,6 @@ struct arena_s { */ arena_avail_tree_t runs_avail; -#ifdef JEMALLOC_BALANCE - /* - * The arena load balancing machinery needs to keep track of how much - * lock contention there is. This value is exponentially averaged. - */ - uint32_t contention; -#endif - /* * bins is used to store rings of free regions of the following sizes, * assuming a 16-byte quantum, 4kB page size, and default @@ -1010,13 +960,9 @@ static size_t base_mapped; static arena_t **arenas; static unsigned narenas; #ifndef NO_TLS -# ifdef JEMALLOC_BALANCE -static unsigned narenas_2pow; -# else static unsigned next_arena; -# endif #endif -static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */ +static malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ #ifndef NO_TLS /* @@ -1083,9 +1029,6 @@ static bool opt_mag = true; static size_t opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT; #endif static size_t opt_dirty_max = DIRTY_MAX_DEFAULT; -#ifdef JEMALLOC_BALANCE -static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT; -#endif static bool opt_print_stats = false; static size_t opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT; static size_t opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT; @@ -1110,7 +1053,9 @@ static int opt_narenas_lshift = 0; */ static bool malloc_mutex_init(malloc_mutex_t *mutex); -static bool malloc_spin_init(pthread_mutex_t *lock); +#ifdef JEMALLOC_TINY +static size_t pow2_ceil(size_t x); +#endif static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4); #ifdef JEMALLOC_STATS @@ -1161,9 +1106,6 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); -#ifdef JEMALLOC_BALANCE -static void arena_lock_balance_hard(arena_t *arena); -#endif #ifdef JEMALLOC_MAG static void mag_load(mag_t *mag); #endif @@ -1396,71 +1338,6 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) * End mutex. */ /******************************************************************************/ -/* - * Begin spin lock. Spin locks here are actually adaptive mutexes that block - * after a period of spinning, because unbounded spinning would allow for - * priority inversion. - */ - -static bool -malloc_spin_init(pthread_mutex_t *lock) -{ - - if (pthread_mutex_init(lock, NULL) != 0) - return (true); - - return (false); -} - -static inline unsigned -malloc_spin_lock(pthread_mutex_t *lock) -{ - unsigned ret = 0; - - if (isthreaded) { - if (pthread_mutex_trylock(lock) != 0) { - /* Exponentially back off if there are multiple CPUs. */ - if (ncpus > 1) { - unsigned i; - volatile unsigned j; - - for (i = 1; i <= SPIN_LIMIT_2POW; i++) { - for (j = 0; j < (1U << i); j++) { - ret++; - CPU_SPINWAIT; - } - - if (pthread_mutex_trylock(lock) == 0) - return (ret); - } - } - - /* - * Spinning failed. Block until the lock becomes - * available, in order to avoid indefinite priority - * inversion. - */ - pthread_mutex_lock(lock); - assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1); - return (ret << BLOCK_COST_2POW); - } - } - - return (ret); -} - -static inline void -malloc_spin_unlock(pthread_mutex_t *lock) -{ - - if (isthreaded) - pthread_mutex_unlock(lock); -} - -/* - * End spin lock. - */ -/******************************************************************************/ /* * Begin Utility functions/macros. */ @@ -1495,7 +1372,7 @@ malloc_spin_unlock(pthread_mutex_t *lock) #ifdef JEMALLOC_TINY /* Compute the smallest power of 2 that is >= x. */ -static inline size_t +static size_t pow2_ceil(size_t x) { @@ -1513,56 +1390,6 @@ pow2_ceil(size_t x) } #endif -#ifdef JEMALLOC_BALANCE -/* - * Use a simple linear congruential pseudo-random number generator: - * - * prn(y) = (a*x + c) % m - * - * where the following constants ensure maximal period: - * - * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. - * c == Odd number (relatively prime to 2^n). - * m == 2^32 - * - * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. - * - * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example. the lowest bit has a cycle of 2, - * the next has a cycle of 4, etc. For this reason, we prefer to use the upper - * bits. - */ -# define PRN_DEFINE(suffix, var, a, c) \ -static inline void \ -sprn_##suffix(uint32_t seed) \ -{ \ - var = seed; \ -} \ - \ -static inline uint32_t \ -prn_##suffix(uint32_t lg_range) \ -{ \ - uint32_t ret, x; \ - \ - assert(lg_range > 0); \ - assert(lg_range <= 32); \ - \ - x = (var * (a)) + (c); \ - var = x; \ - ret = x >> (32 - lg_range); \ - \ - return (ret); \ -} -# define SPRN(suffix, seed) sprn_##suffix(seed) -# define PRN(suffix, lg_range) prn_##suffix(lg_range) -#endif - -#ifdef JEMALLOC_BALANCE -/* Define the PRNG used for arena assignment. */ -static __thread uint32_t balance_x; -PRN_DEFINE(balance, balance_x, 1297, 1301) -#endif - /******************************************************************************/ #ifdef JEMALLOC_DSS @@ -2347,12 +2174,12 @@ choose_arena(void) * Avoid races with another thread that may have already * initialized arenas[ind]. */ - malloc_spin_lock(&arenas_lock); + malloc_mutex_lock(&arenas_lock); if (arenas[ind] == NULL) ret = arenas_extend((unsigned)ind); else ret = arenas[ind]; - malloc_spin_unlock(&arenas_lock); + malloc_mutex_unlock(&arenas_lock); } } else ret = arenas[0]; @@ -2374,29 +2201,12 @@ choose_arena_hard(void) assert(isthreaded); -#ifdef JEMALLOC_BALANCE - /* Seed the PRNG used for arena load balancing. */ - SPRN(balance, (uint32_t)(uintptr_t)(pthread_self())); -#endif - if (narenas > 1) { -#ifdef JEMALLOC_BALANCE - unsigned ind; - - ind = PRN(balance, narenas_2pow); - if ((ret = arenas[ind]) == NULL) { - malloc_spin_lock(&arenas_lock); - if ((ret = arenas[ind]) == NULL) - ret = arenas_extend(ind); - malloc_spin_unlock(&arenas_lock); - } -#else - malloc_spin_lock(&arenas_lock); + malloc_mutex_lock(&arenas_lock); if ((ret = arenas[next_arena]) == NULL) ret = arenas_extend(next_arena); next_arena = (next_arena + 1) % narenas; - malloc_spin_unlock(&arenas_lock); -#endif + malloc_mutex_unlock(&arenas_lock); } else ret = arenas[0]; @@ -3225,50 +3035,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) return (good_run_size); } -#ifdef JEMALLOC_BALANCE -static inline void -arena_lock_balance(arena_t *arena) -{ - unsigned contention; - - contention = malloc_spin_lock(&arena->lock); - if (narenas > 1) { - /* - * Calculate the exponentially averaged contention for this - * arena. Due to integer math always rounding down, this value - * decays somewhat faster than normal. - */ - arena->contention = (((uint64_t)arena->contention - * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1)) - + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW; - if (arena->contention >= opt_balance_threshold) - arena_lock_balance_hard(arena); - } -} - -static void -arena_lock_balance_hard(arena_t *arena) -{ - uint32_t ind; - - arena->contention = 0; -#ifdef JEMALLOC_STATS - arena->stats.nbalance++; -#endif - ind = PRN(balance, narenas_2pow); - if (arenas[ind] != NULL) - arenas_map = arenas[ind]; - else { - malloc_spin_lock(&arenas_lock); - if (arenas[ind] != NULL) - arenas_map = arenas[ind]; - else - arenas_map = arenas_extend(ind); - malloc_spin_unlock(&arenas_lock); - } -} -#endif - #ifdef JEMALLOC_MAG static inline void * mag_alloc(mag_t *mag) @@ -3292,11 +3058,7 @@ mag_load(mag_t *mag) arena = choose_arena(); bin = &arena->bins[mag->binind]; -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); for (i = mag->nrounds; i < max_rounds; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) round = arena_bin_malloc_easy(arena, bin, run); @@ -3311,7 +3073,7 @@ mag_load(mag_t *mag) arena->stats.nmalloc_small += (i - mag->nrounds); arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size; #endif - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); mag->nrounds = i; } @@ -3391,18 +3153,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin = &arena->bins[binind]; size = bin->reg_size; -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) ret = arena_bin_malloc_easy(arena, bin, run); else ret = arena_bin_malloc_hard(arena, bin); if (ret == NULL) { - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); return (NULL); } @@ -3411,7 +3169,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena->stats.nmalloc_small++; arena->stats.allocated_small += size; #endif - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); if (zero == false) { #ifdef JEMALLOC_FILL @@ -3433,21 +3191,17 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Large allocation. */ size = PAGE_CEILING(size); -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); ret = (void *)arena_run_alloc(arena, size, true, zero); if (ret == NULL) { - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); return (NULL); } #ifdef JEMALLOC_STATS arena->stats.nmalloc_large++; arena->stats.allocated_large += size; #endif - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); if (zero == false) { #ifdef JEMALLOC_FILL @@ -3520,14 +3274,10 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size) assert((size & PAGE_MASK) == 0); assert((alignment & PAGE_MASK) == 0); -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); ret = (void *)arena_run_alloc(arena, alloc_size, true, false); if (ret == NULL) { - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); return (NULL); } @@ -3561,7 +3311,7 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size) arena->stats.nmalloc_large++; arena->stats.allocated_large += size; #endif - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); #ifdef JEMALLOC_FILL if (opt_junk) @@ -3826,11 +3576,7 @@ mag_unload(mag_t *mag) /* Lock the arena associated with the first round. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]); arena = chunk->arena; -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); /* Deallocate every round that belongs to the locked arena. */ for (i = ndeferred = 0; i < nrounds; i++) { round = mag->rounds[i]; @@ -3852,7 +3598,7 @@ mag_unload(mag_t *mag) ndeferred++; } } - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } mag->nrounds = 0; @@ -3893,9 +3639,9 @@ mag_rack_dalloc(mag_rack_t *rack, void *ptr) assert(bin_mags->sparemag == NULL); mag = mag_create(choose_arena(), binind); if (mag == NULL) { - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); arena_dalloc_small(arena, chunk, ptr, mapelm); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); return; } bin_mags->curmag = mag; @@ -3934,7 +3680,7 @@ static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { /* Large allocation. */ - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_FILL #ifndef JEMALLOC_STATS @@ -3963,7 +3709,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) #endif arena_run_dalloc(arena, (arena_run_t *)ptr, true); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static inline void @@ -3992,10 +3738,10 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) if (rack == NULL) { rack = mag_rack_create(arena); if (rack == NULL) { - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); arena_dalloc_small(arena, chunk, ptr, mapelm); - malloc_spin_unlock( + malloc_mutex_unlock( &arena->lock); } mag_rack = rack; @@ -4007,17 +3753,17 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) * directly deallocate. */ assert(rack == (void *)(uintptr_t)1); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); arena_dalloc_small(arena, chunk, ptr, mapelm); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } } } else { #endif - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); arena_dalloc_small(arena, chunk, ptr, mapelm); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); #ifdef JEMALLOC_MAG } #endif @@ -4050,17 +3796,13 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, * Shrink the run, and make trailing pages available for other * allocations. */ -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, true); #ifdef JEMALLOC_STATS arena->stats.allocated_large -= oldsize - size; #endif - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static bool @@ -4074,11 +3816,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, /* Try to extend the run. */ assert(size > oldsize); -#ifdef JEMALLOC_BALANCE - arena_lock_balance(arena); -#else - malloc_spin_lock(&arena->lock); -#endif + malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits & ~PAGE_MASK) >= size - oldsize) { @@ -4099,10 +3837,10 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, #ifdef JEMALLOC_STATS arena->stats.allocated_large += size - oldsize; #endif - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); return (false); } - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); return (true); } @@ -4225,7 +3963,7 @@ arena_new(arena_t *arena, unsigned ind) arena_bin_t *bin; size_t prev_run_size; - if (malloc_spin_init(&arena->lock)) + if (malloc_mutex_init(&arena->lock)) return (true); #ifdef JEMALLOC_STATS @@ -4283,10 +4021,6 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail); -#ifdef JEMALLOC_BALANCE - arena->contention = 0; -#endif - /* Initialize bins. */ prev_run_size = PAGE_SIZE; @@ -4429,9 +4163,9 @@ mag_destroy(mag_t *mag) assert(mag->nrounds == 0); if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <= bin_maxclass) { - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); arena_dalloc_small(arena, chunk, mag, mapelm); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } else idalloc(mag); } @@ -4474,9 +4208,9 @@ mag_rack_destroy(mag_rack_t *rack) pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> PAGE_SHIFT); mapelm = &chunk->map[pageind]; - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); arena_dalloc_small(arena, chunk, rack, mapelm); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } #endif @@ -4767,9 +4501,9 @@ malloc_trace_flush_all(void) for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) { - malloc_spin_lock(&arenas[i]->lock); + malloc_mutex_lock(&arenas[i]->lock); trace_flush(arenas[i]); - malloc_spin_unlock(&arenas[i]->lock); + malloc_mutex_unlock(&arenas[i]->lock); } } } @@ -4780,7 +4514,7 @@ trace_malloc(const void *ptr, size_t size) char buf[UMAX2S_BUFSIZE]; arena_t *arena = trace_arena(ptr); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " m 0x"); @@ -4789,7 +4523,7 @@ trace_malloc(const void *ptr, size_t size) trace_write(arena, umax2s(size, 10, buf)); trace_write(arena, "\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static void @@ -4798,7 +4532,7 @@ trace_calloc(const void *ptr, size_t number, size_t size) char buf[UMAX2S_BUFSIZE]; arena_t *arena = trace_arena(ptr); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " c 0x"); @@ -4809,7 +4543,7 @@ trace_calloc(const void *ptr, size_t number, size_t size) trace_write(arena, umax2s(size, 10, buf)); trace_write(arena, "\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static void @@ -4818,7 +4552,7 @@ trace_posix_memalign(const void *ptr, size_t alignment, size_t size) char buf[UMAX2S_BUFSIZE]; arena_t *arena = trace_arena(ptr); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " a 0x"); @@ -4829,7 +4563,7 @@ trace_posix_memalign(const void *ptr, size_t alignment, size_t size) trace_write(arena, umax2s(size, 10, buf)); trace_write(arena, "\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static void @@ -4839,7 +4573,7 @@ trace_realloc(const void *ptr, const void *old_ptr, size_t size, char buf[UMAX2S_BUFSIZE]; arena_t *arena = trace_arena(ptr); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " r 0x"); @@ -4852,7 +4586,7 @@ trace_realloc(const void *ptr, const void *old_ptr, size_t size, trace_write(arena, umax2s(old_size, 10, buf)); trace_write(arena, "\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static void @@ -4861,7 +4595,7 @@ trace_free(const void *ptr, size_t size) char buf[UMAX2S_BUFSIZE]; arena_t *arena = trace_arena(ptr); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " f 0x"); @@ -4870,7 +4604,7 @@ trace_free(const void *ptr, size_t size) trace_write(arena, umax2s(isalloc(ptr), 10, buf)); trace_write(arena, "\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static void @@ -4879,7 +4613,7 @@ trace_malloc_usable_size(size_t size, const void *ptr) char buf[UMAX2S_BUFSIZE]; arena_t *arena = trace_arena(ptr); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " s "); @@ -4888,7 +4622,7 @@ trace_malloc_usable_size(size_t size, const void *ptr) trace_write(arena, umax2s((uintptr_t)ptr, 16, buf)); trace_write(arena, "\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } static void @@ -4897,12 +4631,12 @@ trace_thread_exit(void) char buf[UMAX2S_BUFSIZE]; arena_t *arena = choose_arena(); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); trace_write(arena, umax2s(trace_get_tid(), 10, buf)); trace_write(arena, " x\n"); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } #endif @@ -4950,10 +4684,6 @@ malloc_print_stats(void) malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", ""); malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n", ""); -#ifdef JEMALLOC_BALANCE - malloc_message("Arena balance threshold: ", - umax2s(opt_balance_threshold, 10, s), "\n", ""); -#endif malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s), "\n", ""); malloc_message("Quantum size: ", umax2s(QUANTUM, 10, s), "\n", ""); @@ -4986,9 +4716,6 @@ malloc_print_stats(void) #ifdef JEMALLOC_STATS { size_t allocated, mapped; -#ifdef JEMALLOC_BALANCE - uint64_t nbalance = 0; -#endif unsigned i; arena_t *arena; @@ -4997,13 +4724,10 @@ malloc_print_stats(void) /* arenas. */ for (i = 0, allocated = 0; i < narenas; i++) { if (arenas[i] != NULL) { - malloc_spin_lock(&arenas[i]->lock); + malloc_mutex_lock(&arenas[i]->lock); allocated += arenas[i]->stats.allocated_small; allocated += arenas[i]->stats.allocated_large; -#ifdef JEMALLOC_BALANCE - nbalance += arenas[i]->stats.nbalance; -#endif - malloc_spin_unlock(&arenas[i]->lock); + malloc_mutex_unlock(&arenas[i]->lock); } } @@ -5020,10 +4744,6 @@ malloc_print_stats(void) malloc_printf("Allocated: %zu, mapped: %zu\n", allocated, mapped); -#ifdef JEMALLOC_BALANCE - malloc_printf("Arena balance reassignments: %llu\n", nbalance); -#endif - /* Print chunk stats. */ { chunk_stats_t chunks_stats; @@ -5050,9 +4770,9 @@ malloc_print_stats(void) arena = arenas[i]; if (arena != NULL) { malloc_printf("\narenas[%u]:\n", i); - malloc_spin_lock(&arena->lock); + malloc_mutex_lock(&arena->lock); stats_print(arena); - malloc_spin_unlock(&arena->lock); + malloc_mutex_unlock(&arena->lock); } } } @@ -5337,20 +5057,6 @@ MALLOC_OUT: case 'A': opt_abort = true; break; - case 'b': -#ifdef JEMALLOC_BALANCE - opt_balance_threshold >>= 1; -#endif - break; - case 'B': -#ifdef JEMALLOC_BALANCE - if (opt_balance_threshold == 0) - opt_balance_threshold = 1; - else if ((opt_balance_threshold << 1) - > opt_balance_threshold) - opt_balance_threshold <<= 1; -#endif - break; case 'c': if (opt_cspace_max_2pow - 1 > opt_qspace_max_2pow && @@ -5675,15 +5381,8 @@ MALLOC_OUT: abort(); } #endif - /* - * Seed here for the initial thread, since choose_arena_hard() is only - * called for other threads. The seed value doesn't really matter. - */ -#ifdef JEMALLOC_BALANCE - SPRN(balance, 42); -#endif - malloc_spin_init(&arenas_lock); + malloc_mutex_init(&arenas_lock); /* Get number of CPUs. */ malloc_initializer = pthread_self(); @@ -5717,12 +5416,6 @@ MALLOC_OUT: if (narenas == 0) narenas = 1; } -#ifdef JEMALLOC_BALANCE - assert(narenas != 0); - for (narenas_2pow = 0; - (narenas >> (narenas_2pow + 1)) != 0; - narenas_2pow++); -#endif #ifdef NO_TLS if (narenas > 1) { @@ -5752,9 +5445,7 @@ MALLOC_OUT: #endif #ifndef NO_TLS -# ifndef JEMALLOC_BALANCE next_arena = 0; -# endif #endif /* Allocate and initialize arenas. */ @@ -6120,16 +5811,16 @@ jemalloc_prefork(void) do { again = false; - malloc_spin_lock(&arenas_lock); + malloc_mutex_lock(&arenas_lock); for (i = 0; i < narenas; i++) { if (arenas[i] != larenas[i]) { memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - malloc_spin_unlock(&arenas_lock); + malloc_mutex_unlock(&arenas_lock); for (j = 0; j < narenas; j++) { if (larenas[j] != tarenas[j]) { larenas[j] = tarenas[j]; - malloc_spin_lock( + malloc_mutex_lock( &larenas[j]->lock); } } @@ -6165,10 +5856,10 @@ jemalloc_postfork(void) malloc_mutex_unlock(&base_mtx); memcpy(larenas, arenas, sizeof(arena_t *) * narenas); - malloc_spin_unlock(&arenas_lock); + malloc_mutex_unlock(&arenas_lock); for (i = 0; i < narenas; i++) { if (larenas[i] != NULL) - malloc_spin_unlock(&larenas[i]->lock); + malloc_mutex_unlock(&larenas[i]->lock); } } diff --git a/jemalloc/src/jemalloc_defs.h.in b/jemalloc/src/jemalloc_defs.h.in index 12fc2aaa..5a822112 100644 --- a/jemalloc/src/jemalloc_defs.h.in +++ b/jemalloc/src/jemalloc_defs.h.in @@ -97,13 +97,6 @@ */ #undef JEMALLOC_MAG -/* - * JEMALLOC_BALANCE enables monitoring of arena lock contention and dynamically - * re-balances arena load if exponentially averaged contention exceeds a - * certain threshold. - */ -#undef JEMALLOC_BALANCE - /* * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage * segment (DSS).