From e2bcf037d445a84a71c7997670819ebd0a893b4a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 13 Oct 2016 12:18:38 -0700 Subject: [PATCH] Make dss operations lockless. Rather than protecting dss operations with a mutex, use atomic operations. This has negligible impact on synchronization overhead during typical dss allocation, but is a substantial improvement for chunk_in_dss() and the newly added chunk_dss_mergeable(), which can be called multiple times during chunk deallocations. This change also has the advantage of avoiding tsd in deallocation paths associated with purging, which resolves potential deadlocks during thread exit due to attempted tsd resurrection. This resolves #425. --- include/jemalloc/internal/chunk.h | 3 - include/jemalloc/internal/chunk_dss.h | 12 +- include/jemalloc/internal/huge.h | 2 +- include/jemalloc/internal/private_symbols.txt | 7 +- src/arena.c | 2 +- src/chunk.c | 46 +---- src/chunk_dss.c | 193 ++++++++++-------- src/ctl.c | 4 +- src/huge.c | 8 +- src/jemalloc.c | 6 +- test/unit/junk.c | 4 +- 11 files changed, 134 insertions(+), 153 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index c9fd4ecb..e199a037 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -71,9 +71,6 @@ bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); -void chunk_prefork(tsdn_t *tsdn); -void chunk_postfork_parent(tsdn_t *tsdn); -void chunk_postfork_child(tsdn_t *tsdn); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 724fa579..da8511ba 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -21,15 +21,13 @@ extern const char *dss_prec_names[]; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -dss_prec_t chunk_dss_prec_get(tsdn_t *tsdn); -bool chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec); +dss_prec_t chunk_dss_prec_get(void); +bool chunk_dss_prec_set(dss_prec_t dss_prec); void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); -bool chunk_in_dss(tsdn_t *tsdn, void *chunk); -bool chunk_dss_boot(void); -void chunk_dss_prefork(tsdn_t *tsdn); -void chunk_dss_postfork_parent(tsdn_t *tsdn); -void chunk_dss_postfork_child(tsdn_t *tsdn); +bool chunk_in_dss(void *chunk); +bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); +void chunk_dss_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index b5fa9e63..22184d9b 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -17,7 +17,7 @@ bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET -typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t); +typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif void huge_dalloc(tsdn_t *tsdn, void *ptr); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index cd6681c8..642c3de7 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -167,20 +167,15 @@ chunk_dalloc_mmap chunk_dalloc_wrapper chunk_deregister chunk_dss_boot -chunk_dss_postfork_child -chunk_dss_postfork_parent +chunk_dss_mergeable chunk_dss_prec_get chunk_dss_prec_set -chunk_dss_prefork chunk_hooks_default chunk_hooks_get chunk_hooks_set chunk_in_dss chunk_lookup chunk_npages -chunk_postfork_child -chunk_postfork_parent -chunk_prefork chunk_purge_wrapper chunk_register chunks_rtree diff --git a/src/arena.c b/src/arena.c index 90b9d822..76514955 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3499,7 +3499,7 @@ arena_new(tsdn_t *tsdn, unsigned ind) (uint64_t)(uintptr_t)arena; } - arena->dss_prec = chunk_dss_prec_get(tsdn); + arena->dss_prec = chunk_dss_prec_get(); ql_new(&arena->achunks); diff --git a/src/chunk.c b/src/chunk.c index dff537f5..302b98cb 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -611,10 +611,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } static bool -chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size) +chunk_dalloc_default_impl(void *chunk, size_t size) { - if (!have_dss || !chunk_in_dss(tsdn, chunk)) + if (!have_dss || !chunk_in_dss(chunk)) return (chunk_dalloc_mmap(chunk, size)); return (true); } @@ -623,11 +623,8 @@ static bool chunk_dalloc_default(void *chunk, size_t size, bool committed, unsigned arena_ind) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - - return (chunk_dalloc_default_impl(tsdn, chunk, size)); + return (chunk_dalloc_default_impl(chunk, size)); } void @@ -645,7 +642,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* Try to deallocate. */ if (chunk_hooks->dalloc == chunk_dalloc_default) { /* Call directly to propagate tsdn. */ - err = chunk_dalloc_default_impl(tsdn, chunk, size); + err = chunk_dalloc_default_impl(chunk, size); } else err = chunk_hooks->dalloc(chunk, size, committed, arena->ind); @@ -718,13 +715,12 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b, } static bool -chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b) +chunk_merge_default_impl(void *chunk_a, void *chunk_b) { if (!maps_coalesce) return (true); - if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, - chunk_b)) + if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b)) return (true); return (false); @@ -734,11 +730,8 @@ static bool chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, bool committed, unsigned arena_ind) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - - return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b)); + return (chunk_merge_default_impl(chunk_a, chunk_b)); } static rtree_node_elm_t * @@ -782,32 +775,11 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (have_dss && chunk_dss_boot()) - return (true); + if (have_dss) + chunk_dss_boot(); if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk), chunks_rtree_node_alloc, NULL)) return (true); return (false); } - -void -chunk_prefork(tsdn_t *tsdn) -{ - - chunk_dss_prefork(tsdn); -} - -void -chunk_postfork_parent(tsdn_t *tsdn) -{ - - chunk_dss_postfork_parent(tsdn); -} - -void -chunk_postfork_child(tsdn_t *tsdn) -{ - - chunk_dss_postfork_child(tsdn); -} diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 0b1f82bd..85a13548 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -10,20 +10,19 @@ const char *dss_prec_names[] = { "N/A" }; -/* Current dss precedence default, used when creating new arenas. */ -static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; - /* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. + * Current dss precedence default, used when creating new arenas. NB: This is + * stored as unsigned rather than dss_prec_t because in principle there's no + * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use + * atomic operations to synchronize the setting. */ -static malloc_mutex_t dss_mtx; +static unsigned dss_prec_default = (unsigned)DSS_PREC_DEFAULT; /* Base address of the DSS. */ static void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; -/* Current upper limit on DSS addresses. */ +/* Atomic boolean indicating whether the DSS is exhausted. */ +static unsigned dss_exhausted; +/* Atomic current upper limit on DSS addresses. */ static void *dss_max; /******************************************************************************/ @@ -41,30 +40,59 @@ chunk_dss_sbrk(intptr_t increment) } dss_prec_t -chunk_dss_prec_get(tsdn_t *tsdn) +chunk_dss_prec_get(void) { dss_prec_t ret; if (!have_dss) return (dss_prec_disabled); - malloc_mutex_lock(tsdn, &dss_mtx); - ret = dss_prec_default; - malloc_mutex_unlock(tsdn, &dss_mtx); + ret = (dss_prec_t)atomic_read_u(&dss_prec_default); return (ret); } bool -chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec) +chunk_dss_prec_set(dss_prec_t dss_prec) { if (!have_dss) return (dss_prec != dss_prec_disabled); - malloc_mutex_lock(tsdn, &dss_mtx); - dss_prec_default = dss_prec; - malloc_mutex_unlock(tsdn, &dss_mtx); + atomic_write_u(&dss_prec_default, (unsigned)dss_prec); return (false); } +static void * +chunk_dss_max_update(void *new_addr) +{ + void *max_cur; + spin_t spinner; + + /* + * Get the current end of the DSS as max_cur and assure that dss_max is + * up to date. + */ + spin_init(&spinner); + while (true) { + void *max_prev = atomic_read_p(&dss_max); + + max_cur = chunk_dss_sbrk(0); + if ((uintptr_t)max_prev > (uintptr_t)max_cur) { + /* + * Another thread optimistically updated dss_max. Wait + * for it to finish. + */ + spin_adaptive(&spinner); + continue; + } + if (!atomic_cas_p(&dss_max, max_prev, max_cur)) + break; + } + /* Fixed new_addr can only be supported if it is at the edge of DSS. */ + if (new_addr != NULL && max_cur != new_addr) + return (NULL); + + return (max_cur); +} + void * chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) @@ -80,28 +108,20 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, if ((intptr_t)size < 0) return (NULL); - malloc_mutex_lock(tsdn, &dss_mtx); - if (dss_prev != (void *)-1) { - + if (!atomic_read_u(&dss_exhausted)) { /* * The loop is necessary to recover from races with other * threads that are using the DSS for something other than * malloc. */ - do { - void *ret, *cpad, *dss_next; + while (true) { + void *ret, *cpad, *max_cur, *dss_next, *dss_prev; size_t gap_size, cpad_size; intptr_t incr; - /* Avoid an unnecessary system call. */ - if (new_addr != NULL && dss_max != new_addr) - break; - /* Get the current end of the DSS. */ - dss_max = chunk_dss_sbrk(0); - - /* Make sure the earlier condition still holds. */ - if (new_addr != NULL && dss_max != new_addr) - break; + max_cur = chunk_dss_max_update(new_addr); + if (max_cur == NULL) + goto label_oom; /* * Calculate how much padding is necessary to @@ -120,17 +140,23 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) { - /* Wrap-around. */ - malloc_mutex_unlock(tsdn, &dss_mtx); - return (NULL); - } + (uintptr_t)dss_next < (uintptr_t)dss_max) + goto label_oom; /* Wrap-around. */ incr = gap_size + cpad_size + size; + + /* + * Optimistically update dss_max, and roll back below if + * sbrk() fails. No other thread will try to extend the + * DSS while dss_max is greater than the current DSS + * max reported by sbrk(0). + */ + if (atomic_cas_p(&dss_max, max_cur, dss_next)) + continue; + + /* Try to allocate. */ dss_prev = chunk_dss_sbrk(incr); - if (dss_prev == dss_max) { + if (dss_prev == max_cur) { /* Success. */ - dss_max = dss_next; - malloc_mutex_unlock(tsdn, &dss_mtx); if (cpad_size != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; @@ -147,68 +173,65 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, *commit = pages_decommit(ret, size); return (ret); } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(tsdn, &dss_mtx); + /* + * Failure, whether due to OOM or a race with a raw + * sbrk() call from outside the allocator. Try to roll + * back optimistic dss_max update; if rollback fails, + * it's due to another caller of this function having + * succeeded since this invocation started, in which + * case rollback is not necessary. + */ + atomic_cas_p(&dss_max, dss_next, max_cur); + if (dss_prev == (void *)-1) { + /* OOM. */ + atomic_write_u(&dss_exhausted, (unsigned)true); + goto label_oom; + } + } + } +label_oom: return (NULL); } -bool -chunk_in_dss(tsdn_t *tsdn, void *chunk) +static bool +chunk_in_dss_helper(void *chunk, void *max) { - bool ret; - cassert(have_dss); - - malloc_mutex_lock(tsdn, &dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(tsdn, &dss_mtx); - - return (ret); + return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < + (uintptr_t)max); } bool +chunk_in_dss(void *chunk) +{ + + cassert(have_dss); + + return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max))); +} + +bool +chunk_dss_mergeable(void *chunk_a, void *chunk_b) +{ + void *max; + + cassert(have_dss); + + max = atomic_read_p(&dss_max); + return (chunk_in_dss_helper(chunk_a, max) == + chunk_in_dss_helper(chunk_b, max)); +} + +void chunk_dss_boot(void) { cassert(have_dss); - if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS)) - return (true); dss_base = chunk_dss_sbrk(0); - dss_prev = dss_base; + dss_exhausted = (unsigned)(dss_base == (void *)-1); dss_max = dss_base; - - return (false); -} - -void -chunk_dss_prefork(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_prefork(tsdn, &dss_mtx); -} - -void -chunk_dss_postfork_parent(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_postfork_parent(tsdn, &dss_mtx); -} - -void -chunk_dss_postfork_child(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_postfork_child(tsdn, &dss_mtx); } /******************************************************************************/ diff --git a/src/ctl.c b/src/ctl.c index dad80086..5d2c8db4 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1685,11 +1685,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena); } else { if (dss_prec != dss_prec_limit && - chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) { + chunk_dss_prec_set(dss_prec)) { ret = EFAULT; goto label_return; } - dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd)); + dss_prec_old = chunk_dss_prec_get(); } dss = dss_prec_names[dss_prec_old]; diff --git a/src/huge.c b/src/huge.c index 3a2877ca..19ca3f03 100644 --- a/src/huge.c +++ b/src/huge.c @@ -114,7 +114,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, #define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) #endif static void -huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize) +huge_dalloc_junk(void *ptr, size_t usize) { if (config_fill && have_dss && unlikely(opt_junk_free)) { @@ -122,7 +122,7 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize) * Only bother junk filling if the chunk isn't about to be * unmapped. */ - if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr))) + if (!config_munmap || (have_dss && chunk_in_dss(ptr))) memset(ptr, JEMALLOC_FREE_JUNK, usize); } } @@ -221,7 +221,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, if (oldsize > usize) { size_t sdiff = oldsize - usize; if (config_fill && unlikely(opt_junk_free)) { - huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize), + huge_dalloc_junk((void *)((uintptr_t)ptr + usize), sdiff); post_zeroed = false; } else { @@ -402,7 +402,7 @@ huge_dalloc(tsdn_t *tsdn, void *ptr) ql_remove(&arena->huge, node, ql_link); malloc_mutex_unlock(tsdn, &arena->huge_mtx); - huge_dalloc_junk(tsdn, extent_node_addr_get(node), + huge_dalloc_junk(extent_node_addr_get(node), extent_node_size_get(node)); arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); diff --git a/src/jemalloc.c b/src/jemalloc.c index aec2a5eb..b0ebf810 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1115,8 +1115,7 @@ malloc_conf_init(void) for (i = 0; i < dss_prec_limit; i++) { if (strncmp(dss_prec_names[i], v, vlen) == 0) { - if (chunk_dss_prec_set(NULL, - i)) { + if (chunk_dss_prec_set(i)) { malloc_conf_error( "Error setting dss", k, klen, v, vlen); @@ -2783,7 +2782,6 @@ _malloc_prefork(void) } } base_prefork(tsd_tsdn(tsd)); - chunk_prefork(tsd_tsdn(tsd)); for (i = 0; i < narenas; i++) { if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) arena_prefork3(tsd_tsdn(tsd), arena); @@ -2812,7 +2810,6 @@ _malloc_postfork(void) witness_postfork_parent(tsd); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_parent(tsd_tsdn(tsd)); base_postfork_parent(tsd_tsdn(tsd)); for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena; @@ -2837,7 +2834,6 @@ jemalloc_postfork_child(void) witness_postfork_child(tsd); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_child(tsd_tsdn(tsd)); base_postfork_child(tsd_tsdn(tsd)); for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena; diff --git a/test/unit/junk.c b/test/unit/junk.c index acddc601..460bd524 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize) } static void -huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize) +huge_dalloc_junk_intercept(void *ptr, size_t usize) { - huge_dalloc_junk_orig(tsdn, ptr, usize); + huge_dalloc_junk_orig(ptr, usize); /* * The conditions under which junk filling actually occurs are nuanced * enough that it doesn't make sense to duplicate the decision logic in