From 6c3491ad3105994f8b804fc6ddb1aa88024a4d4b Mon Sep 17 00:00:00 2001 From: "David T. Goldblatt" Date: Sun, 23 Feb 2020 20:33:04 -0800 Subject: [PATCH] Tcache: Unify bin flush logic. The small and large pathways share most of their logic, even if some of the individual operations are different. We pull out the common logic into a force-inlined function, and then specialize twice, once for each value of "small". --- .../internal/jemalloc_internal_decls.h | 9 + src/tcache.c | 310 +++++++++--------- 2 files changed, 172 insertions(+), 147 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index 042a1fa4..32058ced 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -92,4 +92,13 @@ isblank(int c) { #endif #include +/* + * The Win32 midl compiler has #define small char; we don't use midl, but + * "small" is a nice identifier to have available when talking about size + * classes. + */ +#ifdef small +# undef small +#endif + #endif /* JEMALLOC_INTERNAL_H */ diff --git a/src/tcache.c b/src/tcache.c index 782d8833..7ffa6fc3 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -139,21 +139,44 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, } } -void -tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, - szind_t binind, unsigned rem) { - assert(binind < SC_NBINS); +JEMALLOC_ALWAYS_INLINE bool +tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind, + unsigned cur_binshard, bool small) { + if (small) { + return edata_arena_ind_get(edata) == cur_arena_ind + && edata_binshard_get(edata) == cur_binshard; + } else { + return edata_arena_ind_get(edata) == cur_arena_ind; + } +} + +JEMALLOC_ALWAYS_INLINE void +tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, + szind_t binind, unsigned rem, bool small) { + /* + * A couple lookup calls take tsdn; declare it once for convenience + * instead of calling tsd_tsdn(tsd) all the time. + */ + tsdn_t *tsdn = tsd_tsdn(tsd); + + if (small) { + assert(binind < SC_NBINS); + } else { + assert(binind < nhbins); + } cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind); assert((cache_bin_sz_t)rem <= ncached); + arena_t *tcache_arena = tcache->arena; + assert(tcache_arena != NULL); - arena_t *arena = tcache->arena; - assert(arena != NULL); unsigned nflush = ncached - rem; - /* Variable length array must have > 0 length. */ + /* + * Variable length array must have > 0 length; the last element is never + * touched (it's just included to satisfy the no-zero-length rule). + */ VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1); - void **bottom_item = cache_bin_bottom_item_get(tbin, binind); - tsdn_t *tsdn = tsd_tsdn(tsd); + /* Look up edata once per item. */ if (config_opt_safety_checks) { tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush, @@ -165,71 +188,154 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, } } - bool merged_stats = false; + /* + * The slabs where we freed the last remaining object in the slab (and + * so need to free the slab itself). + * Used only if small == true. + */ unsigned dalloc_count = 0; VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1); - while (nflush > 0) { - /* Lock the arena bin associated with the first object. */ - edata_t *edata = item_edata[0]; - unsigned bin_arena_ind = edata_arena_ind_get(edata); - arena_t *bin_arena = arena_get(tsdn, bin_arena_ind, false); - unsigned binshard = edata_binshard_get(edata); - assert(binshard < bin_infos[binind].n_shards); - bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard]; - malloc_mutex_lock(tsdn, &bin->lock); - if (config_stats && bin_arena == arena && !merged_stats) { - merged_stats = true; - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; + /* + * We're about to grab a bunch of locks. If one of them happens to be + * the one guarding the arena-level stats counters we flush our + * thread-local ones to, we do so under one critical section. + */ + bool merged_stats = false; + while (nflush > 0) { + /* Lock the arena, or bin, associated with the first object. */ + edata_t *edata = item_edata[0]; + unsigned cur_arena_ind = edata_arena_ind_get(edata); + arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false); + + /* + * These assignments are always overwritten when small is true, + * and their values are always ignored when small is false, but + * to avoid the technical UB when we pass them as parameters, we + * need to intialize them. + */ + unsigned cur_binshard = 0; + bin_t *cur_bin = NULL; + if (small) { + cur_binshard = edata_binshard_get(edata); + cur_bin = &cur_arena->bins[binind].bin_shards[ + cur_binshard]; + assert(cur_binshard < bin_infos[binind].n_shards); } + + if (small) { + malloc_mutex_lock(tsdn, &cur_bin->lock); + } + if (!small && !arena_is_auto(cur_arena)) { + malloc_mutex_lock(tsdn, &cur_arena->large_mtx); + } + + /* + * If we acquired the right lock and have some stats to flush, + * flush them. + */ + if (config_stats && tcache_arena == cur_arena + && !merged_stats) { + merged_stats = true; + if (small) { + cur_bin->stats.nflushes++; + cur_bin->stats.nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } else { + arena_stats_large_flush_nrequests_add(tsdn, + &tcache_arena->stats, binind, + tbin->tstats.nrequests); + tbin->tstats.nrequests = 0; + } + } + + /* + * Large allocations need special prep done. Afterwards, we can + * drop the large lock. + */ + if (!small) { + for (unsigned i = 0; i < nflush; i++) { + void *ptr = *(bottom_item - i); + edata = item_edata[i]; + assert(ptr != NULL && edata != NULL); + + if (tcache_bin_flush_match(edata, cur_arena_ind, + cur_binshard, small)) { + large_dalloc_prep_junked_locked(tsdn, + edata); + } + } + } + if (!small && !arena_is_auto(cur_arena)) { + malloc_mutex_unlock(tsdn, &cur_arena->large_mtx); + } + + /* Deallocate whatever we can. */ unsigned ndeferred = 0; for (unsigned i = 0; i < nflush; i++) { void *ptr = *(bottom_item - i); edata = item_edata[i]; assert(ptr != NULL && edata != NULL); - - if (edata_arena_ind_get(edata) == bin_arena_ind - && edata_binshard_get(edata) == binshard) { - if (arena_dalloc_bin_junked_locked(tsdn, - bin_arena, bin, binind, edata, ptr)) { - dalloc_slabs[dalloc_count++] = edata; - } - } else { + if (!tcache_bin_flush_match(edata, cur_arena_ind, + cur_binshard, small)) { /* - * This object was allocated via a different - * arena bin than the one that is currently - * locked. Stash the object, so that it can be - * handled in a future pass. + * The object was allocated either via a + * different arena, or a different bin in this + * arena. Either way, stash the object so that + * it can be handled in a future pass. */ *(bottom_item - ndeferred) = ptr; item_edata[ndeferred] = edata; ndeferred++; + continue; + } + if (small) { + if (arena_dalloc_bin_junked_locked(tsdn, + cur_arena, cur_bin, binind, edata, ptr)) { + dalloc_slabs[dalloc_count] = edata; + dalloc_count++; + } + } else { + large_dalloc_finish(tsdn, edata); } } - malloc_mutex_unlock(tsdn, &bin->lock); - arena_decay_ticks(tsdn, bin_arena, nflush - ndeferred); + + if (small) { + malloc_mutex_unlock(tsdn, &cur_bin->lock); + } + arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred); nflush = ndeferred; } + /* Handle all deferred slab dalloc. */ + assert(small || dalloc_count == 0); for (unsigned i = 0; i < dalloc_count; i++) { edata_t *slab = dalloc_slabs[i]; arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); + } if (config_stats && !merged_stats) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - unsigned binshard; - bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, - &binshard); - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(tsdn, &bin->lock); + if (small) { + /* + * The flush loop didn't happen to flush to this + * thread's arena, so the stats didn't get merged. + * Manually do so now. + */ + unsigned binshard; + bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena, + binind, &binshard); + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(tsdn, &bin->lock); + } else { + arena_stats_large_flush_nrequests_add(tsdn, + &tcache_arena->stats, binind, + tbin->tstats.nrequests); + tbin->tstats.nrequests = 0; + } } memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem * @@ -241,105 +347,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, } void -tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, - unsigned rem) { - bool merged_stats = false; +tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, + szind_t binind, unsigned rem) { + tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true); +} - assert(binind < nhbins); - cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind); - assert((cache_bin_sz_t)rem <= ncached); - - arena_t *tcache_arena = tcache->arena; - assert(tcache_arena != NULL); - unsigned nflush = ncached - rem; - /* Variable length array must have > 0 length. */ - VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1); - - void **bottom_item = cache_bin_bottom_item_get(tbin, binind); -#ifndef JEMALLOC_EXTRA_SIZE_CHECK - /* Look up edata once per item. */ - for (unsigned i = 0 ; i < nflush; i++) { - item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, - *(bottom_item - i)); - } -#else - tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush, - item_edata); -#endif - while (nflush > 0) { - /* Lock the arena associated with the first object. */ - edata_t *edata = item_edata[0]; - unsigned locked_arena_ind = edata_arena_ind_get(edata); - arena_t *locked_arena = arena_get(tsd_tsdn(tsd), - locked_arena_ind, false); - - bool lock_large = !arena_is_auto(locked_arena); - if (lock_large) { - malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx); - } - for (unsigned i = 0; i < nflush; i++) { - void *ptr = *(bottom_item - i); - assert(ptr != NULL); - edata = item_edata[i]; - if (edata_arena_ind_get(edata) == locked_arena_ind) { - large_dalloc_prep_junked_locked(tsd_tsdn(tsd), - edata); - } - } - if ((config_prof || config_stats) && - (locked_arena == tcache_arena)) { - if (config_stats) { - merged_stats = true; - arena_stats_large_flush_nrequests_add( - tsd_tsdn(tsd), &tcache_arena->stats, binind, - tbin->tstats.nrequests); - tbin->tstats.nrequests = 0; - } - } - if (lock_large) { - malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx); - } - - unsigned ndeferred = 0; - for (unsigned i = 0; i < nflush; i++) { - void *ptr = *(bottom_item - i); - edata = item_edata[i]; - assert(ptr != NULL && edata != NULL); - - if (edata_arena_ind_get(edata) == locked_arena_ind) { - large_dalloc_finish(tsd_tsdn(tsd), edata); - } else { - /* - * This object was allocated via a different - * arena than the one that is currently locked. - * Stash the object, so that it can be handled - * in a future pass. - */ - *(bottom_item - ndeferred) = ptr; - item_edata[ndeferred] = edata; - ndeferred++; - } - } - arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush - - ndeferred); - nflush = ndeferred; - } - if (config_stats && !merged_stats) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd), - &tcache_arena->stats, binind, tbin->tstats.nrequests); - tbin->tstats.nrequests = 0; - } - - memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem * - sizeof(void *)); - cache_bin_ncached_set(tbin, binind, rem); - if (tbin->cur_ptr.lowbits > tbin->low_water_position) { - tbin->low_water_position = tbin->cur_ptr.lowbits; - } +void +tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, + szind_t binind, unsigned rem) { + tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false); } void