From c2fcf9c2cfcbaba58db1941c91c7a8a4b6623401 Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Wed, 12 Apr 2017 16:16:27 -0700 Subject: [PATCH] Switch to fine-grained reentrancy support. Previously we had a general detection and support of reentrancy, at the cost of having branches and inc / dec operations on fast paths. To avoid taxing fast paths, we move the reentrancy operations onto tsd slow state, and only modify reentrancy level around external calls (that might trigger reentrancy). --- .../internal/jemalloc_internal_inlines_a.h | 23 ++++ .../internal/jemalloc_internal_inlines_b.h | 2 +- .../internal/jemalloc_internal_inlines_c.h | 4 +- include/jemalloc/internal/tsd_inlines.h | 8 +- include/jemalloc/internal/tsd_structs.h | 2 +- src/arena.c | 6 +- src/jemalloc.c | 127 ++++++++---------- src/tsd.c | 4 +- 8 files changed, 90 insertions(+), 86 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 822b4d75..600d7226 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -33,6 +33,8 @@ tcache_t *tcache_get(tsd_t *tsd); malloc_cpuid_t malloc_getcpu(void); unsigned percpu_arena_choose(void); unsigned percpu_arena_ind_limit(void); +void pre_reentrancy(tsd_t *tsd); +void post_reentrancy(tsd_t *tsd); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -445,6 +447,27 @@ tcache_get(tsd_t *tsd) { return tsd_tcachep_get(tsd); } + +JEMALLOC_INLINE void +pre_reentrancy(tsd_t *tsd) { + bool fast = tsd_fast(tsd); + ++*tsd_reentrancy_levelp_get(tsd); + if (fast) { + /* Prepare slow path for reentrancy. */ + tsd_slow_update(tsd); + assert(tsd->state == tsd_state_nominal_slow); + } +} + +JEMALLOC_INLINE void +post_reentrancy(tsd_t *tsd) { + int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd); + assert(*reentrancy_level > 0); + if (--*reentrancy_level == 0) { + tsd_slow_update(tsd); + } +} + #endif #endif /* JEMALLOC_INTERNAL_INLINES_A_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h index 52afb42d..e7d564ce 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -16,7 +16,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { } /* During reentrancy, arena 0 is the safest bet. */ - if (*tsd_reentrancy_levelp_get(tsd) > 1) { + if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { return arena_get(tsd_tsdn(tsd), 0, true); } diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 7884a206..bb1f2deb 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -117,8 +117,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx, if (config_stats && is_internal) { arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); } - if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) { - tcache = NULL; + if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { + assert(tcache == NULL); } arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); } diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h index 46eefb6e..93469bca 100644 --- a/include/jemalloc/internal/tsd_inlines.h +++ b/include/jemalloc/internal/tsd_inlines.h @@ -20,7 +20,7 @@ tsd_t *tsdn_tsd(tsdn_t *tsdn); rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd); rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback); bool tsd_fast(tsd_t *tsd); -void tsd_assert_fast(tsd_t *tsd); +bool tsd_assert_fast(tsd_t *tsd); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) @@ -52,9 +52,11 @@ MALLOC_TSD #undef MALLOC_TSD_getset_no #undef O -JEMALLOC_ALWAYS_INLINE void +JEMALLOC_ALWAYS_INLINE bool tsd_assert_fast(tsd_t *tsd) { - assert(!malloc_slow && tsd_tcache_enabled_get(tsd)); + assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && + tsd_reentrancy_level_get(tsd) == 0); + return true; } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h index c166fe6b..40fea97b 100644 --- a/include/jemalloc/internal/tsd_structs.h +++ b/include/jemalloc/internal/tsd_structs.h @@ -55,7 +55,7 @@ struct tsd_init_head_s { /* O(name, type, [gs]et, init, cleanup) */ \ O(tcache_enabled, bool, yes, yes, no) \ O(arenas_tdata_bypass, bool, no, no, no) \ - O(reentrancy_level, int8_t, no, no, no) \ + O(reentrancy_level, int8_t, yes, no, no) \ O(narenas_tdata, uint32_t, yes, no, no) \ O(thread_allocated, uint64_t, yes, no, no) \ O(thread_deallocated, uint64_t, yes, no, no) \ diff --git a/src/arena.c b/src/arena.c index 4f5dcf6e..5b540ce3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1966,11 +1966,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { * If we're here, then arena 0 already exists, so bootstrapping * is done enough that we should have tsd. */ - int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsdn_tsd( - tsdn)); - ++*reentrancy_level; + pre_reentrancy(tsdn_tsd(tsdn)); hooks_arena_new_hook(); - --*reentrancy_level; + post_reentrancy(tsdn_tsd(tsdn)); } return arena; diff --git a/src/jemalloc.c b/src/jemalloc.c index 4bec2dea..4c38517b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1663,13 +1663,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { szind_t ind = 0; size_t usize = 0; - /* - * For reentrancy checking, we get the old reentrancy level from tsd and - * reset it once we're done. In case of early bailout though, we never - * bother getting the old level, so we shouldn't try to reset it. This - * is indicated by leaving the pointer as NULL. - */ - int8_t *reentrancy_level = NULL; + /* Reentrancy is only checked on slow path. */ + int8_t reentrancy_level; /* Compute the amount of memory the user wants. */ if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts, @@ -1716,12 +1711,11 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { * If we need to handle reentrancy, we can do it out of a * known-initialized arena (i.e. arena 0). */ - reentrancy_level = tsd_reentrancy_levelp_get(tsd); - ++*reentrancy_level; - if (*reentrancy_level == 1) { + reentrancy_level = tsd_reentrancy_level_get(tsd); + if (reentrancy_level == 0) { witness_assert_lockless(tsd_tsdn(tsd)); } - if (unlikely(*reentrancy_level > 1)) { + if (sopts->slow && unlikely(reentrancy_level > 0)) { /* * We should never specify particular arenas or tcaches from * within our internal allocations. @@ -1795,14 +1789,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { } /* Success! */ - if (*reentrancy_level == 1) { + if (reentrancy_level == 0) { witness_assert_lockless(tsd_tsdn(tsd)); } - /* - * If we got here, we never bailed out on a failure path, so - * reentrancy_level is non-null. - */ - --*reentrancy_level; *dopts->result = allocation; return 0; @@ -1826,10 +1815,6 @@ label_oom: *dopts->result = NULL; } - if (reentrancy_level != NULL) { - --*reentrancy_level; - } - return ENOMEM; /* @@ -1857,10 +1842,6 @@ label_invalid_alignment: *dopts->result = NULL; } - if (reentrancy_level != NULL) { - --*reentrancy_level; - } - return EINVAL; } @@ -2053,8 +2034,11 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, JEMALLOC_ALWAYS_INLINE_C void ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { - if (*tsd_reentrancy_levelp_get(tsd) == 0) { + assert(slow_path || tsd_assert_fast(tsd)); + if (tsd_reentrancy_level_get(tsd) == 0) { witness_assert_lockless(tsd_tsdn(tsd)); + } else { + assert(slow_path); } assert(ptr != NULL); @@ -2088,8 +2072,11 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { JEMALLOC_ALWAYS_INLINE_C void isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { - if (*tsd_reentrancy_levelp_get(tsd) == 0) { + assert(slow_path || tsd_assert_fast(tsd)); + if (tsd_reentrancy_level_get(tsd) == 0) { witness_assert_lockless(tsd_tsdn(tsd)); + } else { + assert(slow_path); } assert(ptr != NULL); @@ -2129,14 +2116,14 @@ je_realloc(void *ptr, size_t size) { if (unlikely(size == 0)) { if (ptr != NULL) { - tsd_t *tsd; - /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); - tsd = tsd_fetch(); - tcache_t *tcache = NULL; - if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) { + tcache_t *tcache; + tsd_t *tsd = tsd_fetch(); + if (tsd_reentrancy_level_get(tsd) == 0) { tcache = tcache_get(tsd); + } else { + tcache = NULL; } ifree(tsd, ptr, tcache, true); return NULL; @@ -2200,29 +2187,25 @@ je_free(void *ptr) { UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); - if (*tsd_reentrancy_levelp_get(tsd) == 0) { + if (tsd_reentrancy_level_get(tsd) == 0) { witness_assert_lockless(tsd_tsdn(tsd)); } + tcache_t *tcache; if (likely(tsd_fast(tsd))) { tsd_assert_fast(tsd); - if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) { - /* Getting tcache ptr unconditionally. */ - tcache = tsd_tcachep_get(tsd); - assert(tcache == tcache_get(tsd)); - } else { - tcache = NULL; - } + /* Unconditionally get tcache ptr on fast path. */ + tcache = tsd_tcachep_get(tsd); ifree(tsd, ptr, tcache, false); } else { - if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) { + if (likely(tsd_reentrancy_level_get(tsd) == 0)) { tcache = tcache_get(tsd); } else { tcache = NULL; } ifree(tsd, ptr, tcache, true); } - if (*tsd_reentrancy_levelp_get(tsd) == 0) { + if (tsd_reentrancy_level_get(tsd) == 0) { witness_assert_lockless(tsd_tsdn(tsd)); } } @@ -2707,33 +2690,32 @@ je_sallocx(const void *ptr, int flags) { JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags) { - tsd_t *tsd; - tcache_t *tcache; - assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); - tsd = tsd_fetch(); + tsd_t *tsd = tsd_fetch(); bool fast = tsd_fast(tsd); witness_assert_lockless(tsd_tsdn(tsd)); + + tcache_t *tcache; if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { /* Not allowed to be reentrant and specify a custom tcache. */ - assert(*tsd_reentrancy_levelp_get(tsd) == 0); + assert(tsd_reentrancy_level_get(tsd) == 0); if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) { tcache = NULL; } else { tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } } else { - if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) { - if (likely(fast)) { - tcache = tsd_tcachep_get(tsd); - assert(tcache == tcache_get(tsd)); - } else { - tcache = tcache_get(tsd); - } + if (likely(fast)) { + tcache = tsd_tcachep_get(tsd); + assert(tcache == tcache_get(tsd)); } else { - tcache = NULL; + if (likely(tsd_reentrancy_level_get(tsd) == 0)) { + tcache = tcache_get(tsd); + } else { + tcache = NULL; + } } } @@ -2749,10 +2731,9 @@ je_dallocx(void *ptr, int flags) { JEMALLOC_ALWAYS_INLINE_C size_t inallocx(tsdn_t *tsdn, size_t size, int flags) { - size_t usize; - witness_assert_lockless(tsdn); + size_t usize; if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) { usize = s2u(size); } else { @@ -2764,36 +2745,34 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) { JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, int flags) { - tsd_t *tsd; - size_t usize; - tcache_t *tcache; - assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); - tsd = tsd_fetch(); - bool fast = tsd_fast(tsd); - usize = inallocx(tsd_tsdn(tsd), size, flags); - assert(usize == isalloc(tsd_tsdn(tsd), ptr)); + tsd_t *tsd = tsd_fetch(); + bool fast = tsd_fast(tsd); + size_t usize = inallocx(tsd_tsdn(tsd), size, flags); + assert(usize == isalloc(tsd_tsdn(tsd), ptr)); witness_assert_lockless(tsd_tsdn(tsd)); + + tcache_t *tcache; if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { /* Not allowed to be reentrant and specify a custom tcache. */ - assert(*tsd_reentrancy_levelp_get(tsd) == 0); + assert(tsd_reentrancy_level_get(tsd) == 0); if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) { tcache = NULL; } else { tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } } else { - if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) { - if (likely(fast)) { - tcache = tsd_tcachep_get(tsd); - assert(tcache == tcache_get(tsd)); - } else { - tcache = tcache_get(tsd); - } + if (likely(fast)) { + tcache = tsd_tcachep_get(tsd); + assert(tcache == tcache_get(tsd)); } else { - tcache = NULL; + if (likely(tsd_reentrancy_level_get(tsd) == 0)) { + tcache = tcache_get(tsd); + } else { + tcache = NULL; + } } } diff --git a/src/tsd.c b/src/tsd.c index bdd3f3c5..cb7dd3fb 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -15,7 +15,8 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER) void tsd_slow_update(tsd_t *tsd) { if (tsd_nominal(tsd)) { - if (malloc_slow || !tsd->tcache_enabled) { + if (malloc_slow || !tsd->tcache_enabled || + tsd_reentrancy_level_get(tsd) > 0) { tsd->state = tsd_state_nominal_slow; } else { tsd->state = tsd_state_nominal; @@ -28,6 +29,7 @@ tsd_fetch_slow(tsd_t *tsd) { if (tsd->state == tsd_state_nominal_slow) { /* On slow path but no work needed. */ assert(malloc_slow || !tsd_tcache_enabled_get(tsd) || + tsd_reentrancy_level_get(tsd) > 0 || *tsd_arenas_tdata_bypassp_get(tsd)); } else if (tsd->state == tsd_state_uninitialized) { tsd->state = tsd_state_nominal;