2017-01-20 13:41:41 +08:00
|
|
|
#define JEMALLOC_TCACHE_C_
|
2017-04-11 09:17:55 +08:00
|
|
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
2012-02-14 04:29:49 +08:00
|
|
|
|
2017-04-12 05:43:12 +08:00
|
|
|
#include "jemalloc/internal/assert.h"
|
2017-05-24 03:28:19 +08:00
|
|
|
#include "jemalloc/internal/mutex.h"
|
2019-03-21 04:06:53 +08:00
|
|
|
#include "jemalloc/internal/safety_check.h"
|
2017-12-15 04:46:39 +08:00
|
|
|
#include "jemalloc/internal/sc.h"
|
2017-04-12 05:43:12 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
/* Data. */
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
bool opt_tcache = true;
|
2010-10-24 09:37:06 +08:00
|
|
|
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_info_t *tcache_bin_info;
|
2019-08-10 13:12:47 +08:00
|
|
|
|
|
|
|
/* Total stack size required (per tcache). Include the padding above. */
|
2020-03-01 06:41:47 +08:00
|
|
|
static size_t tcache_bin_alloc_size;
|
|
|
|
static size_t tcache_bin_alloc_alignment;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2016-02-25 03:02:14 +08:00
|
|
|
unsigned nhbins;
|
2012-03-22 09:33:03 +08:00
|
|
|
size_t tcache_maxclass;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *tcaches;
|
|
|
|
|
|
|
|
/* Index of first element within tcaches that has never been used. */
|
|
|
|
static unsigned tcaches_past;
|
|
|
|
|
|
|
|
/* Head of singly linked list tracking available tcaches elements. */
|
|
|
|
static tcaches_t *tcaches_avail;
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
/* Protects tcaches{,_past,_avail}. */
|
|
|
|
static malloc_mutex_t tcaches_mtx;
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2016-03-24 06:32:07 +08:00
|
|
|
size_t
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_salloc(tsdn_t *tsdn, const void *ptr) {
|
2017-03-17 16:25:12 +08:00
|
|
|
return arena_salloc(tsdn, ptr);
|
2012-04-20 09:28:03 +08:00
|
|
|
}
|
|
|
|
|
2012-05-02 15:30:36 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
2015-08-20 06:21:32 +08:00
|
|
|
szind_t binind = tcache->next_gc_bin;
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin;
|
2019-08-21 09:14:18 +08:00
|
|
|
bool is_small;
|
2017-12-15 04:46:39 +08:00
|
|
|
if (binind < SC_NBINS) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tbin = tcache_small_bin_get(tcache, binind);
|
2019-08-21 09:14:18 +08:00
|
|
|
is_small = true;
|
2017-04-07 03:35:22 +08:00
|
|
|
} else {
|
|
|
|
tbin = tcache_large_bin_get(tcache, binind);
|
2019-08-21 09:14:18 +08:00
|
|
|
is_small = false;
|
2017-04-07 03:35:22 +08:00
|
|
|
}
|
2019-08-10 13:12:47 +08:00
|
|
|
|
2020-02-29 10:55:33 +08:00
|
|
|
cache_bin_sz_t low_water = cache_bin_low_water_get(tbin,
|
|
|
|
&tcache_bin_info[binind]);
|
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
|
|
|
|
&tcache_bin_info[binind]);
|
2019-08-10 13:12:47 +08:00
|
|
|
if (low_water > 0) {
|
2012-05-02 15:30:36 +08:00
|
|
|
/*
|
|
|
|
* Flush (ceiling) 3/4 of the objects below the low water mark.
|
|
|
|
*/
|
2019-08-21 09:14:18 +08:00
|
|
|
if (is_small) {
|
|
|
|
assert(!tcache->bin_refilled[binind]);
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
2019-08-10 13:12:47 +08:00
|
|
|
ncached - low_water + (low_water >> 2));
|
2017-04-07 03:35:22 +08:00
|
|
|
/*
|
|
|
|
* Reduce fill count by 2X. Limit lg_fill_div such that
|
|
|
|
* the fill count is always at least 1.
|
|
|
|
*/
|
2020-02-27 09:23:47 +08:00
|
|
|
if ((cache_bin_info_ncached_max(
|
|
|
|
&tcache_bin_info[binind]) >>
|
|
|
|
(tcache->lg_fill_div[binind] + 1)) >= 1) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tcache->lg_fill_div[binind]++;
|
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
} else {
|
2019-08-13 02:08:39 +08:00
|
|
|
tcache_bin_flush_large(tsd, tcache, tbin, binind,
|
2019-08-10 13:12:47 +08:00
|
|
|
ncached - low_water + (low_water >> 2));
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2019-08-21 09:14:18 +08:00
|
|
|
} else if (is_small && tcache->bin_refilled[binind]) {
|
|
|
|
assert(low_water == 0);
|
2017-04-29 04:31:09 +08:00
|
|
|
/*
|
|
|
|
* Increase fill count by 2X for small bins. Make sure
|
|
|
|
* lg_fill_div stays greater than 0.
|
|
|
|
*/
|
2019-08-21 09:14:18 +08:00
|
|
|
if (tcache->lg_fill_div[binind] > 1) {
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache->lg_fill_div[binind]--;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2019-08-21 09:14:18 +08:00
|
|
|
tcache->bin_refilled[binind] = false;
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2020-03-01 07:07:38 +08:00
|
|
|
cache_bin_low_water_set(tbin);
|
2017-04-29 04:31:09 +08:00
|
|
|
|
|
|
|
tcache->next_gc_bin++;
|
|
|
|
if (tcache->next_gc_bin == nhbins) {
|
|
|
|
tcache->next_gc_bin = 0;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
void *
|
2016-05-11 13:21:10 +08:00
|
|
|
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
|
2010-01-17 01:53:50 +08:00
|
|
|
void *ret;
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache->arena != NULL);
|
2019-10-15 00:35:51 +08:00
|
|
|
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
|
2020-02-29 11:12:07 +08:00
|
|
|
ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind],
|
|
|
|
tcache_success);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 05:59:23 +08:00
|
|
|
/* Enabled with --enable-extra-size-check. */
|
|
|
|
static void
|
2020-02-27 09:10:12 +08:00
|
|
|
tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
|
|
|
|
szind_t binind, size_t nflush, edata_t **edatas) {
|
2020-02-07 05:16:07 +08:00
|
|
|
/* Avoids null-checking tsdn in the loop below. */
|
|
|
|
util_assume(tsd != NULL);
|
2019-01-23 05:59:23 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that the items in the tcache all have the correct size; this
|
|
|
|
* is useful for catching sized deallocation bugs, also to fail early
|
|
|
|
* instead of corrupting metadata. Since this can be turned on for opt
|
|
|
|
* builds, avoid the branch in the loop.
|
|
|
|
*/
|
2020-02-07 05:16:07 +08:00
|
|
|
size_t szind_sum = binind * nflush;
|
2020-02-27 09:10:12 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2020-02-07 05:16:07 +08:00
|
|
|
emap_full_alloc_ctx_t full_alloc_ctx;
|
2020-02-07 05:45:04 +08:00
|
|
|
emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
|
2020-02-27 09:10:12 +08:00
|
|
|
cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
|
2020-02-07 05:16:07 +08:00
|
|
|
edatas[i] = full_alloc_ctx.edata;
|
|
|
|
szind_sum -= full_alloc_ctx.szind;
|
2019-01-23 05:59:23 +08:00
|
|
|
}
|
2020-02-27 09:10:12 +08:00
|
|
|
|
2020-02-07 05:16:07 +08:00
|
|
|
if (szind_sum != 0) {
|
2020-01-31 06:35:54 +08:00
|
|
|
safety_check_fail_sized_dealloc(false);
|
2019-01-23 05:59:23 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE bool
|
|
|
|
tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
|
|
|
|
unsigned cur_binshard, bool small) {
|
|
|
|
if (small) {
|
|
|
|
return edata_arena_ind_get(edata) == cur_arena_ind
|
|
|
|
&& edata_binshard_get(edata) == cur_binshard;
|
|
|
|
} else {
|
|
|
|
return edata_arena_ind_get(edata) == cur_arena_ind;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
|
|
|
tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
|
|
|
szind_t binind, unsigned rem, bool small) {
|
|
|
|
/*
|
|
|
|
* A couple lookup calls take tsdn; declare it once for convenience
|
|
|
|
* instead of calling tsd_tsdn(tsd) all the time.
|
|
|
|
*/
|
|
|
|
tsdn_t *tsdn = tsd_tsdn(tsd);
|
|
|
|
|
|
|
|
if (small) {
|
|
|
|
assert(binind < SC_NBINS);
|
|
|
|
} else {
|
|
|
|
assert(binind < nhbins);
|
|
|
|
}
|
2020-02-29 10:55:33 +08:00
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
|
|
|
|
&tcache_bin_info[binind]);
|
2019-08-10 13:12:47 +08:00
|
|
|
assert((cache_bin_sz_t)rem <= ncached);
|
2020-02-24 12:33:04 +08:00
|
|
|
arena_t *tcache_arena = tcache->arena;
|
|
|
|
assert(tcache_arena != NULL);
|
2010-03-14 12:32:56 +08:00
|
|
|
|
2019-08-10 13:12:47 +08:00
|
|
|
unsigned nflush = ncached - rem;
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* Variable length array must have > 0 length; the last element is never
|
|
|
|
* touched (it's just included to satisfy the no-zero-length rule).
|
|
|
|
*/
|
2020-02-14 12:04:22 +08:00
|
|
|
VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
|
2020-02-27 09:10:12 +08:00
|
|
|
CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
|
|
|
|
|
2020-02-29 11:12:07 +08:00
|
|
|
cache_bin_init_ptr_array_for_flush(tbin, &tcache_bin_info[binind],
|
|
|
|
&ptrs, nflush);
|
2020-02-29 03:37:39 +08:00
|
|
|
|
2019-12-10 06:36:45 +08:00
|
|
|
/* Look up edata once per item. */
|
2019-03-20 07:04:35 +08:00
|
|
|
if (config_opt_safety_checks) {
|
2020-02-27 09:10:12 +08:00
|
|
|
tbin_edatas_lookup_size_check(tsd, &ptrs, binind, nflush,
|
2020-02-08 06:53:36 +08:00
|
|
|
item_edata);
|
2019-03-20 07:04:35 +08:00
|
|
|
} else {
|
|
|
|
for (unsigned i = 0 ; i < nflush; i++) {
|
2020-02-07 05:45:04 +08:00
|
|
|
item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
|
2020-02-27 09:10:12 +08:00
|
|
|
&emap_global, cache_bin_ptr_array_get(&ptrs, i));
|
2019-03-20 07:04:35 +08:00
|
|
|
}
|
2017-03-28 08:22:01 +08:00
|
|
|
}
|
2020-02-08 06:53:36 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* The slabs where we freed the last remaining object in the slab (and
|
|
|
|
* so need to free the slab itself).
|
|
|
|
* Used only if small == true.
|
|
|
|
*/
|
2020-02-08 06:53:36 +08:00
|
|
|
unsigned dalloc_count = 0;
|
|
|
|
VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We're about to grab a bunch of locks. If one of them happens to be
|
|
|
|
* the one guarding the arena-level stats counters we flush our
|
|
|
|
* thread-local ones to, we do so under one critical section.
|
|
|
|
*/
|
|
|
|
bool merged_stats = false;
|
2017-03-28 08:22:01 +08:00
|
|
|
while (nflush > 0) {
|
2020-02-24 12:33:04 +08:00
|
|
|
/* Lock the arena, or bin, associated with the first object. */
|
2019-12-10 06:36:45 +08:00
|
|
|
edata_t *edata = item_edata[0];
|
2020-02-24 12:33:04 +08:00
|
|
|
unsigned cur_arena_ind = edata_arena_ind_get(edata);
|
|
|
|
arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
|
2016-03-24 11:29:33 +08:00
|
|
|
|
2011-03-15 03:56:51 +08:00
|
|
|
/*
|
2020-02-24 12:33:04 +08:00
|
|
|
* These assignments are always overwritten when small is true,
|
|
|
|
* and their values are always ignored when small is false, but
|
|
|
|
* to avoid the technical UB when we pass them as parameters, we
|
|
|
|
* need to intialize them.
|
2011-03-15 03:56:51 +08:00
|
|
|
*/
|
2020-02-24 12:33:04 +08:00
|
|
|
unsigned cur_binshard = 0;
|
|
|
|
bin_t *cur_bin = NULL;
|
|
|
|
if (small) {
|
|
|
|
cur_binshard = edata_binshard_get(edata);
|
|
|
|
cur_bin = &cur_arena->bins[binind].bin_shards[
|
|
|
|
cur_binshard];
|
|
|
|
assert(cur_binshard < bin_infos[binind].n_shards);
|
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
if (small) {
|
|
|
|
malloc_mutex_lock(tsdn, &cur_bin->lock);
|
2018-05-30 06:55:04 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
if (!small && !arena_is_auto(cur_arena)) {
|
|
|
|
malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
|
2017-01-30 13:57:14 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we acquired the right lock and have some stats to flush,
|
|
|
|
* flush them.
|
|
|
|
*/
|
|
|
|
if (config_stats && tcache_arena == cur_arena
|
|
|
|
&& !merged_stats) {
|
|
|
|
merged_stats = true;
|
|
|
|
if (small) {
|
|
|
|
cur_bin->stats.nflushes++;
|
|
|
|
cur_bin->stats.nrequests +=
|
|
|
|
tbin->tstats.nrequests;
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&tcache_arena->stats, binind,
|
2017-02-13 09:43:33 +08:00
|
|
|
tbin->tstats.nrequests);
|
2012-02-11 12:22:09 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Large allocations need special prep done. Afterwards, we can
|
|
|
|
* drop the large lock.
|
|
|
|
*/
|
|
|
|
if (!small) {
|
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2020-02-27 09:10:12 +08:00
|
|
|
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
2020-02-24 12:33:04 +08:00
|
|
|
edata = item_edata[i];
|
|
|
|
assert(ptr != NULL && edata != NULL);
|
|
|
|
|
|
|
|
if (tcache_bin_flush_match(edata, cur_arena_ind,
|
|
|
|
cur_binshard, small)) {
|
2020-02-29 03:37:39 +08:00
|
|
|
large_dalloc_prep_locked(tsdn,
|
2020-02-24 12:33:04 +08:00
|
|
|
edata);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!small && !arena_is_auto(cur_arena)) {
|
|
|
|
malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
|
2018-05-30 06:55:04 +08:00
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
/* Deallocate whatever we can. */
|
2017-01-30 13:57:14 +08:00
|
|
|
unsigned ndeferred = 0;
|
2017-04-29 04:31:09 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2020-02-27 09:10:12 +08:00
|
|
|
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
2019-12-10 06:36:45 +08:00
|
|
|
edata = item_edata[i];
|
|
|
|
assert(ptr != NULL && edata != NULL);
|
2020-02-24 12:33:04 +08:00
|
|
|
if (!tcache_bin_flush_match(edata, cur_arena_ind,
|
|
|
|
cur_binshard, small)) {
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
2020-02-24 12:33:04 +08:00
|
|
|
* The object was allocated either via a
|
|
|
|
* different arena, or a different bin in this
|
|
|
|
* arena. Either way, stash the object so that
|
|
|
|
* it can be handled in a future pass.
|
2010-03-18 07:27:39 +08:00
|
|
|
*/
|
2020-02-27 09:10:12 +08:00
|
|
|
cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
|
2019-12-10 06:36:45 +08:00
|
|
|
item_edata[ndeferred] = edata;
|
2010-03-18 07:27:39 +08:00
|
|
|
ndeferred++;
|
2020-02-24 12:33:04 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (small) {
|
2020-02-29 03:37:39 +08:00
|
|
|
if (arena_dalloc_bin_locked(tsdn, cur_arena,
|
|
|
|
cur_bin, binind, edata, ptr)) {
|
2020-02-24 12:33:04 +08:00
|
|
|
dalloc_slabs[dalloc_count] = edata;
|
|
|
|
dalloc_count++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
large_dalloc_finish(tsdn, edata);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
if (small) {
|
|
|
|
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
|
|
|
}
|
|
|
|
arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
|
2017-01-30 13:57:14 +08:00
|
|
|
nflush = ndeferred;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/* Handle all deferred slab dalloc. */
|
|
|
|
assert(small || dalloc_count == 0);
|
|
|
|
for (unsigned i = 0; i < dalloc_count; i++) {
|
|
|
|
edata_t *slab = dalloc_slabs[i];
|
|
|
|
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-10-04 01:16:09 +08:00
|
|
|
if (config_stats && !merged_stats) {
|
2020-02-24 12:33:04 +08:00
|
|
|
if (small) {
|
|
|
|
/*
|
|
|
|
* The flush loop didn't happen to flush to this
|
|
|
|
* thread's arena, so the stats didn't get merged.
|
|
|
|
* Manually do so now.
|
|
|
|
*/
|
|
|
|
unsigned binshard;
|
|
|
|
bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
|
|
|
|
binind, &binshard);
|
|
|
|
bin->stats.nflushes++;
|
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&tcache_arena->stats, binind,
|
|
|
|
tbin->tstats.nrequests);
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2020-03-01 02:48:59 +08:00
|
|
|
cache_bin_finish_flush(tbin, &tcache_bin_info[binind], &ptrs,
|
|
|
|
ncached - rem);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
void
|
|
|
|
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
|
|
|
szind_t binind, unsigned rem) {
|
|
|
|
tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
|
|
|
szind_t binind, unsigned rem) {
|
|
|
|
tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2017-03-28 12:50:38 +08:00
|
|
|
assert(tcache->arena == NULL);
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache->arena = arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Link into list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2017-08-12 08:34:21 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
ql_elm_new(tcache, link);
|
|
|
|
ql_tail_insert(&arena->tcache_ql, tcache, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
cache_bin_array_descriptor_init(
|
|
|
|
&tcache->cache_bin_array_descriptor, tcache->bins_small,
|
|
|
|
tcache->bins_large);
|
|
|
|
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
|
|
|
|
&tcache->cache_bin_array_descriptor, link);
|
|
|
|
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
static void
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
|
|
|
|
arena_t *arena = tcache->arena;
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(arena != NULL);
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Unlink from list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (config_debug) {
|
|
|
|
bool in_ql = false;
|
|
|
|
tcache_t *iter;
|
|
|
|
ql_foreach(iter, &arena->tcache_ql, link) {
|
|
|
|
if (iter == tcache) {
|
|
|
|
in_ql = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(in_ql);
|
|
|
|
}
|
|
|
|
ql_remove(&arena->tcache_ql, tcache, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
ql_remove(&arena->cache_bin_array_descriptor_ql,
|
|
|
|
&tcache->cache_bin_array_descriptor, link);
|
2016-05-11 13:21:10 +08:00
|
|
|
tcache_stats_merge(tsdn, tcache, arena);
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache->arena = NULL;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
void
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
|
|
|
tcache_arena_dissociate(tsdn, tcache);
|
|
|
|
tcache_arena_associate(tsdn, tcache, arena);
|
2016-05-11 13:21:10 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
bool
|
|
|
|
tsd_tcache_enabled_data_init(tsd_t *tsd) {
|
|
|
|
/* Called upon tsd initialization. */
|
2017-04-06 10:23:41 +08:00
|
|
|
tsd_tcache_enabled_set(tsd, opt_tcache);
|
2017-04-12 14:13:45 +08:00
|
|
|
tsd_slow_update(tsd);
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
if (opt_tcache) {
|
|
|
|
/* Trigger tcache init. */
|
|
|
|
tsd_tcache_data_init(tsd);
|
|
|
|
}
|
2014-04-16 04:28:37 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2020-03-03 10:28:17 +08:00
|
|
|
tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
|
2017-03-28 12:50:38 +08:00
|
|
|
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
|
|
|
|
tcache->next_gc_bin = 0;
|
|
|
|
tcache->arena = NULL;
|
2020-03-03 10:28:17 +08:00
|
|
|
tcache->dyn_alloc = mem;
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
|
2017-12-15 04:46:39 +08:00
|
|
|
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
|
|
|
|
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
|
2019-08-10 13:12:47 +08:00
|
|
|
|
2017-04-07 03:35:22 +08:00
|
|
|
unsigned i = 0;
|
2020-03-01 06:41:47 +08:00
|
|
|
size_t cur_offset = 0;
|
2020-03-03 10:28:17 +08:00
|
|
|
cache_bin_preincrement(tcache_bin_info, nhbins, mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2017-12-15 04:46:39 +08:00
|
|
|
for (; i < SC_NBINS; i++) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tcache->lg_fill_div[i] = 1;
|
2019-08-21 09:14:18 +08:00
|
|
|
tcache->bin_refilled[i] = false;
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_t *bin = tcache_small_bin_get(tcache, i);
|
2020-03-03 10:28:17 +08:00
|
|
|
cache_bin_init(bin, &tcache_bin_info[i], mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2017-04-07 03:35:22 +08:00
|
|
|
}
|
|
|
|
for (; i < nhbins; i++) {
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_t *bin = tcache_large_bin_get(tcache, i);
|
2020-03-03 10:28:17 +08:00
|
|
|
cache_bin_init(bin, &tcache_bin_info[i], mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
2020-03-03 10:28:17 +08:00
|
|
|
cache_bin_postincrement(tcache_bin_info, nhbins, mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2019-08-10 13:12:47 +08:00
|
|
|
/* Sanity check that the whole stack is used. */
|
2020-03-01 06:41:47 +08:00
|
|
|
assert(cur_offset == tcache_bin_alloc_size);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize auto tcache (embedded in TSD). */
|
|
|
|
bool
|
|
|
|
tsd_tcache_data_init(tsd_t *tsd) {
|
2017-04-27 09:37:44 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
|
2020-03-03 10:40:31 +08:00
|
|
|
assert(cache_bin_still_zero_initialized(
|
|
|
|
tcache_small_bin_get(tcache, 0)));
|
2020-03-01 06:41:47 +08:00
|
|
|
size_t alignment = tcache_bin_alloc_alignment;
|
|
|
|
size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
|
2019-08-24 07:06:50 +08:00
|
|
|
|
2020-03-03 10:28:17 +08:00
|
|
|
void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
|
|
|
|
true, arena_get(TSDN_NULL, 0, true));
|
|
|
|
if (mem == NULL) {
|
2017-03-28 12:50:38 +08:00
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2020-03-03 10:28:17 +08:00
|
|
|
tcache_init(tsd, tcache, mem);
|
2017-03-28 12:50:38 +08:00
|
|
|
/*
|
|
|
|
* Initialization is a bit tricky here. After malloc init is done, all
|
|
|
|
* threads can rely on arena_choose and associate tcache accordingly.
|
|
|
|
* However, the thread that does actual malloc bootstrapping relies on
|
|
|
|
* functional tsd, and it can only rely on a0. In that case, we
|
|
|
|
* associate its tcache to a0 temporarily, and later on
|
|
|
|
* arena_choose_hard() will re-associate properly.
|
|
|
|
*/
|
|
|
|
tcache->arena = NULL;
|
|
|
|
arena_t *arena;
|
|
|
|
if (!malloc_initialized()) {
|
|
|
|
/* If in initialization, assign to a0. */
|
|
|
|
arena = arena_get(tsd_tsdn(tsd), 0, false);
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
|
|
|
|
} else {
|
|
|
|
arena = arena_choose(tsd, NULL);
|
|
|
|
/* This may happen if thread.tcache.enabled is used. */
|
|
|
|
if (tcache->arena == NULL) {
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(arena == tcache->arena);
|
|
|
|
|
|
|
|
return false;
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* Created manual tcache for tcache.create mallctl. */
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_create_explicit(tsd_t *tsd) {
|
2020-03-01 06:41:47 +08:00
|
|
|
/*
|
|
|
|
* We place the cache bin stacks, then the tcache_t, then a pointer to
|
|
|
|
* the beginning of the whole allocation (for freeing). The makes sure
|
|
|
|
* the cache bins have the requested alignment.
|
|
|
|
*/
|
2020-03-03 10:28:17 +08:00
|
|
|
size_t size = tcache_bin_alloc_size + sizeof(tcache_t);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/* Naturally align the pointer stacks. */
|
|
|
|
size = PTR_CEILING(size);
|
2020-03-01 06:41:47 +08:00
|
|
|
size = sz_sa2u(size, tcache_bin_alloc_alignment);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2020-03-01 06:41:47 +08:00
|
|
|
void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
|
|
|
|
true, NULL, true, arena_get(TSDN_NULL, 0, true));
|
|
|
|
if (mem == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2020-03-03 10:28:17 +08:00
|
|
|
tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
|
|
|
|
tcache_init(tsd, tcache, mem);
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return tcache;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 12:09:23 +08:00
|
|
|
static void
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache->arena != NULL);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-12-15 04:46:39 +08:00
|
|
|
for (unsigned i = 0; i < SC_NBINS; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
2015-02-14 07:28:56 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
|
2010-03-08 07:34:14 +08:00
|
|
|
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
|
|
|
assert(tbin->tstats.nrequests == 0);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
2017-12-15 04:46:39 +08:00
|
|
|
for (unsigned i = SC_NBINS; i < nhbins; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
2019-08-13 02:08:39 +08:00
|
|
|
tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
|
|
|
assert(tbin->tstats.nrequests == 0);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2010-02-12 05:19:21 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-06-16 06:16:18 +08:00
|
|
|
tcache_flush(tsd_t *tsd) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache_available(tsd));
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
|
|
|
tcache_flush_cache(tsd, tcache);
|
2018-11-09 04:24:38 +08:00
|
|
|
arena_t *arena = tcache->arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
|
|
|
|
|
|
|
|
if (tsd_tcache) {
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
|
2020-03-03 10:14:19 +08:00
|
|
|
cache_bin_assert_empty(bin, &tcache_bin_info[0]);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2020-03-03 10:28:17 +08:00
|
|
|
idalloctm(tsd_tsdn(tsd), tcache->dyn_alloc, NULL, NULL, true, true);
|
2018-11-09 04:24:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The deallocation and tcache flush above may not trigger decay since
|
|
|
|
* we are on the tcache shutdown path (potentially with non-nominal
|
|
|
|
* tsd). Manually trigger decay to avoid pathological cases. Also
|
|
|
|
* include arena 0 because the tcache array is allocated from it.
|
|
|
|
*/
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
|
|
|
|
false, false);
|
|
|
|
|
2019-01-12 03:22:11 +08:00
|
|
|
if (arena_nthreads_get(arena, false) == 0 &&
|
|
|
|
!background_thread_enabled()) {
|
2018-11-09 04:24:38 +08:00
|
|
|
/* Force purging when no threads assigned to the arena anymore. */
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena, false, true);
|
|
|
|
} else {
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena, false, false);
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* For auto tcache (embedded in TSD) only. */
|
2012-03-22 09:33:03 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_cleanup(tsd_t *tsd) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get(tsd);
|
|
|
|
if (!tcache_available(tsd)) {
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd) == false);
|
2020-03-03 10:40:31 +08:00
|
|
|
assert(cache_bin_still_zero_initialized(
|
|
|
|
tcache_small_bin_get(tcache, 0)));
|
2017-03-28 12:50:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd));
|
2020-03-03 10:40:31 +08:00
|
|
|
assert(!cache_bin_still_zero_initialized(
|
|
|
|
tcache_small_bin_get(tcache, 0)));
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
tcache_destroy(tsd, tcache, true);
|
|
|
|
if (config_debug) {
|
2020-03-03 10:40:31 +08:00
|
|
|
/*
|
|
|
|
* For debug testing only, we want to pretend we're still in the
|
|
|
|
* zero-initialized state.
|
|
|
|
*/
|
|
|
|
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
|
|
|
|
memset(tcache->bins_large, 0,
|
|
|
|
sizeof(cache_bin_t) * (nhbins - SC_NBINS));
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2010-01-17 01:53:50 +08:00
|
|
|
unsigned i;
|
|
|
|
|
2013-10-22 06:00:06 +08:00
|
|
|
cassert(config_stats);
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/* Merge and reset tcache stats. */
|
2017-12-15 04:46:39 +08:00
|
|
|
for (i = 0; i < SC_NBINS; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
2018-11-13 07:56:04 +08:00
|
|
|
unsigned binshard;
|
|
|
|
bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
|
2010-03-08 07:34:14 +08:00
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
2016-05-11 13:21:10 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
for (; i < nhbins; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
2019-05-07 07:36:55 +08:00
|
|
|
arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
|
2017-02-13 09:43:33 +08:00
|
|
|
tbin->tstats.nrequests);
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
static bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches_create_prep(tsd_t *tsd, base_t *base) {
|
2017-01-30 13:32:39 +08:00
|
|
|
bool err;
|
|
|
|
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
|
|
|
if (tcaches == NULL) {
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches = base_alloc(tsd_tsdn(tsd), base,
|
|
|
|
sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
err = false;
|
|
|
|
label_return:
|
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
bool err;
|
|
|
|
|
2020-02-18 06:09:29 +08:00
|
|
|
if (tcaches_create_prep(tsd, base)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tcache_create_explicit(tsd);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
tcaches_t *elm;
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (tcaches_avail != NULL) {
|
|
|
|
elm = tcaches_avail;
|
|
|
|
tcaches_avail = tcaches_avail->next;
|
|
|
|
elm->tcache = tcache;
|
2016-02-25 04:42:23 +08:00
|
|
|
*r_ind = (unsigned)(elm - tcaches);
|
2015-01-30 07:30:47 +08:00
|
|
|
} else {
|
|
|
|
elm = &tcaches[tcaches_past];
|
|
|
|
elm->tcache = tcache;
|
|
|
|
*r_ind = tcaches_past;
|
|
|
|
tcaches_past++;
|
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
err = false;
|
|
|
|
label_return:
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
return err;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-03-16 03:50:37 +08:00
|
|
|
static tcache_t *
|
2018-11-10 06:45:06 +08:00
|
|
|
tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (elm->tcache == NULL) {
|
2017-03-16 03:50:37 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
tcache_t *tcache = elm->tcache;
|
2018-11-10 06:45:06 +08:00
|
|
|
if (allow_reinit) {
|
|
|
|
elm->tcache = TCACHES_ELM_NEED_REINIT;
|
|
|
|
} else {
|
|
|
|
elm->tcache = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tcache == TCACHES_ELM_NEED_REINIT) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
return tcache;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_flush(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2018-11-10 06:45:06 +08:00
|
|
|
/* Destroy the tcache; recreate in tcaches_get() if needed. */
|
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_destroy(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *elm = &tcaches[ind];
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
|
2015-01-30 07:30:47 +08:00
|
|
|
elm->next = tcaches_avail;
|
|
|
|
tcaches_avail = elm;
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcache_boot(tsdn_t *tsdn, base_t *base) {
|
2016-05-28 15:17:28 +08:00
|
|
|
/* If necessary, clamp opt_lg_tcache_max. */
|
2017-01-16 08:56:30 +08:00
|
|
|
if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
|
2018-07-12 07:05:58 +08:00
|
|
|
SC_SMALL_MAXCLASS) {
|
|
|
|
tcache_maxclass = SC_SMALL_MAXCLASS;
|
2017-01-16 08:56:30 +08:00
|
|
|
} else {
|
2016-10-28 12:31:25 +08:00
|
|
|
tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-05-16 06:38:15 +08:00
|
|
|
if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
|
|
|
|
malloc_mutex_rank_exclusive)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-05-31 01:45:37 +08:00
|
|
|
nhbins = sz_size2index(tcache_maxclass) + 1;
|
2012-04-07 03:41:55 +08:00
|
|
|
|
|
|
|
/* Initialize tcache_bin_info. */
|
2020-02-18 06:09:29 +08:00
|
|
|
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
|
|
|
|
nhbins * sizeof(cache_bin_info_t), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache_bin_info == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2019-08-15 04:08:06 +08:00
|
|
|
unsigned i, ncached_max;
|
2017-12-15 04:46:39 +08:00
|
|
|
for (i = 0; i < SC_NBINS; i++) {
|
2017-10-02 08:22:06 +08:00
|
|
|
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
|
2019-08-15 04:08:06 +08:00
|
|
|
ncached_max = TCACHE_NSLOTS_SMALL_MIN;
|
2017-10-02 08:22:06 +08:00
|
|
|
} else if ((bin_infos[i].nregs << 1) <=
|
2015-05-20 08:47:16 +08:00
|
|
|
TCACHE_NSLOTS_SMALL_MAX) {
|
2019-08-15 04:08:06 +08:00
|
|
|
ncached_max = bin_infos[i].nregs << 1;
|
2012-04-07 03:41:55 +08:00
|
|
|
} else {
|
2019-08-15 04:08:06 +08:00
|
|
|
ncached_max = TCACHE_NSLOTS_SMALL_MAX;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
}
|
2020-03-01 06:41:47 +08:00
|
|
|
cache_bin_info_init(&tcache_bin_info[i], ncached_max);
|
2012-04-07 03:41:55 +08:00
|
|
|
}
|
|
|
|
for (; i < nhbins; i++) {
|
2020-03-01 06:41:47 +08:00
|
|
|
cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2020-03-01 06:41:47 +08:00
|
|
|
cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
|
|
|
|
&tcache_bin_alloc_alignment);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return false;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
void
|
|
|
|
tcache_prefork(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_prefork(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_parent(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_child(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|