2017-01-20 13:41:41 +08:00
|
|
|
#define JEMALLOC_TCACHE_C_
|
2017-04-11 09:17:55 +08:00
|
|
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
2012-02-14 04:29:49 +08:00
|
|
|
|
2017-04-12 05:43:12 +08:00
|
|
|
#include "jemalloc/internal/assert.h"
|
2017-05-24 03:28:19 +08:00
|
|
|
#include "jemalloc/internal/mutex.h"
|
2019-03-21 04:06:53 +08:00
|
|
|
#include "jemalloc/internal/safety_check.h"
|
2017-12-15 04:46:39 +08:00
|
|
|
#include "jemalloc/internal/sc.h"
|
2017-04-12 05:43:12 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
/* Data. */
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
bool opt_tcache = true;
|
2010-10-24 09:37:06 +08:00
|
|
|
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_info_t *tcache_bin_info;
|
2019-08-10 13:12:47 +08:00
|
|
|
/*
|
|
|
|
* For the total bin stack region (per tcache), reserve 2 more slots so that 1)
|
|
|
|
* the empty position can be safely read on the fast path before checking
|
2019-08-21 09:14:18 +08:00
|
|
|
* "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
|
|
|
|
* safely on the fast path (i.e. no overflow).
|
2019-08-10 13:12:47 +08:00
|
|
|
*/
|
|
|
|
static const unsigned total_stack_padding = sizeof(void *) * 2;
|
|
|
|
|
|
|
|
/* Total stack size required (per tcache). Include the padding above. */
|
|
|
|
static uint32_t total_stack_bytes;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2016-02-25 03:02:14 +08:00
|
|
|
unsigned nhbins;
|
2012-03-22 09:33:03 +08:00
|
|
|
size_t tcache_maxclass;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *tcaches;
|
|
|
|
|
|
|
|
/* Index of first element within tcaches that has never been used. */
|
|
|
|
static unsigned tcaches_past;
|
|
|
|
|
|
|
|
/* Head of singly linked list tracking available tcaches elements. */
|
|
|
|
static tcaches_t *tcaches_avail;
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
/* Protects tcaches{,_past,_avail}. */
|
|
|
|
static malloc_mutex_t tcaches_mtx;
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2016-03-24 06:32:07 +08:00
|
|
|
size_t
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_salloc(tsdn_t *tsdn, const void *ptr) {
|
2017-03-17 16:25:12 +08:00
|
|
|
return arena_salloc(tsdn, ptr);
|
2012-04-20 09:28:03 +08:00
|
|
|
}
|
|
|
|
|
2012-05-02 15:30:36 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
2015-08-20 06:21:32 +08:00
|
|
|
szind_t binind = tcache->next_gc_bin;
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin;
|
2019-08-21 09:14:18 +08:00
|
|
|
bool is_small;
|
2017-12-15 04:46:39 +08:00
|
|
|
if (binind < SC_NBINS) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tbin = tcache_small_bin_get(tcache, binind);
|
2019-08-21 09:14:18 +08:00
|
|
|
is_small = true;
|
2017-04-07 03:35:22 +08:00
|
|
|
} else {
|
|
|
|
tbin = tcache_large_bin_get(tcache, binind);
|
2019-08-21 09:14:18 +08:00
|
|
|
is_small = false;
|
2017-04-07 03:35:22 +08:00
|
|
|
}
|
2019-08-10 13:12:47 +08:00
|
|
|
|
|
|
|
cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
|
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
|
|
|
|
if (low_water > 0) {
|
2012-05-02 15:30:36 +08:00
|
|
|
/*
|
|
|
|
* Flush (ceiling) 3/4 of the objects below the low water mark.
|
|
|
|
*/
|
2019-08-21 09:14:18 +08:00
|
|
|
if (is_small) {
|
|
|
|
assert(!tcache->bin_refilled[binind]);
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
2019-08-10 13:12:47 +08:00
|
|
|
ncached - low_water + (low_water >> 2));
|
2017-04-07 03:35:22 +08:00
|
|
|
/*
|
|
|
|
* Reduce fill count by 2X. Limit lg_fill_div such that
|
|
|
|
* the fill count is always at least 1.
|
|
|
|
*/
|
2019-08-15 04:08:06 +08:00
|
|
|
if ((cache_bin_ncached_max_get(binind) >>
|
2017-04-07 03:35:22 +08:00
|
|
|
(tcache->lg_fill_div[binind] + 1)) >= 1) {
|
|
|
|
tcache->lg_fill_div[binind]++;
|
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
} else {
|
2019-08-13 02:08:39 +08:00
|
|
|
tcache_bin_flush_large(tsd, tcache, tbin, binind,
|
2019-08-10 13:12:47 +08:00
|
|
|
ncached - low_water + (low_water >> 2));
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2019-08-21 09:14:18 +08:00
|
|
|
} else if (is_small && tcache->bin_refilled[binind]) {
|
|
|
|
assert(low_water == 0);
|
2017-04-29 04:31:09 +08:00
|
|
|
/*
|
|
|
|
* Increase fill count by 2X for small bins. Make sure
|
|
|
|
* lg_fill_div stays greater than 0.
|
|
|
|
*/
|
2019-08-21 09:14:18 +08:00
|
|
|
if (tcache->lg_fill_div[binind] > 1) {
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache->lg_fill_div[binind]--;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2019-08-21 09:14:18 +08:00
|
|
|
tcache->bin_refilled[binind] = false;
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2019-08-10 13:12:47 +08:00
|
|
|
tbin->low_water_position = tbin->cur_ptr.lowbits;
|
2017-04-29 04:31:09 +08:00
|
|
|
|
|
|
|
tcache->next_gc_bin++;
|
|
|
|
if (tcache->next_gc_bin == nhbins) {
|
|
|
|
tcache->next_gc_bin = 0;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
void *
|
2016-05-11 13:21:10 +08:00
|
|
|
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
|
2010-01-17 01:53:50 +08:00
|
|
|
void *ret;
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache->arena != NULL);
|
2019-10-15 00:35:51 +08:00
|
|
|
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
|
2019-08-10 13:12:47 +08:00
|
|
|
ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 05:59:23 +08:00
|
|
|
/* Enabled with --enable-extra-size-check. */
|
|
|
|
static void
|
2020-02-07 05:16:07 +08:00
|
|
|
tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
|
|
|
|
size_t nflush, edata_t **edatas) {
|
|
|
|
/* Avoids null-checking tsdn in the loop below. */
|
|
|
|
util_assume(tsd != NULL);
|
2019-01-23 05:59:23 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that the items in the tcache all have the correct size; this
|
|
|
|
* is useful for catching sized deallocation bugs, also to fail early
|
|
|
|
* instead of corrupting metadata. Since this can be turned on for opt
|
|
|
|
* builds, avoid the branch in the loop.
|
|
|
|
*/
|
2020-02-07 05:16:07 +08:00
|
|
|
size_t szind_sum = binind * nflush;
|
2019-08-10 13:12:47 +08:00
|
|
|
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
|
2019-01-23 05:59:23 +08:00
|
|
|
for (unsigned i = 0 ; i < nflush; i++) {
|
2020-02-07 05:16:07 +08:00
|
|
|
emap_full_alloc_ctx_t full_alloc_ctx;
|
2020-02-07 05:45:04 +08:00
|
|
|
emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
|
2020-02-07 05:16:07 +08:00
|
|
|
*(bottom_item - i), &full_alloc_ctx);
|
|
|
|
edatas[i] = full_alloc_ctx.edata;
|
|
|
|
szind_sum -= full_alloc_ctx.szind;
|
2019-01-23 05:59:23 +08:00
|
|
|
}
|
2020-02-07 05:16:07 +08:00
|
|
|
if (szind_sum != 0) {
|
2020-01-31 06:35:54 +08:00
|
|
|
safety_check_fail_sized_dealloc(false);
|
2019-01-23 05:59:23 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE bool
|
|
|
|
tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
|
|
|
|
unsigned cur_binshard, bool small) {
|
|
|
|
if (small) {
|
|
|
|
return edata_arena_ind_get(edata) == cur_arena_ind
|
|
|
|
&& edata_binshard_get(edata) == cur_binshard;
|
|
|
|
} else {
|
|
|
|
return edata_arena_ind_get(edata) == cur_arena_ind;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
|
|
|
tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
|
|
|
szind_t binind, unsigned rem, bool small) {
|
|
|
|
/*
|
|
|
|
* A couple lookup calls take tsdn; declare it once for convenience
|
|
|
|
* instead of calling tsd_tsdn(tsd) all the time.
|
|
|
|
*/
|
|
|
|
tsdn_t *tsdn = tsd_tsdn(tsd);
|
|
|
|
|
|
|
|
if (small) {
|
|
|
|
assert(binind < SC_NBINS);
|
|
|
|
} else {
|
|
|
|
assert(binind < nhbins);
|
|
|
|
}
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
|
|
|
|
assert((cache_bin_sz_t)rem <= ncached);
|
2020-02-24 12:33:04 +08:00
|
|
|
arena_t *tcache_arena = tcache->arena;
|
|
|
|
assert(tcache_arena != NULL);
|
2010-03-14 12:32:56 +08:00
|
|
|
|
2019-08-10 13:12:47 +08:00
|
|
|
unsigned nflush = ncached - rem;
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* Variable length array must have > 0 length; the last element is never
|
|
|
|
* touched (it's just included to satisfy the no-zero-length rule).
|
|
|
|
*/
|
2020-02-14 12:04:22 +08:00
|
|
|
VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
|
2019-08-10 13:12:47 +08:00
|
|
|
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
|
2020-02-29 03:37:39 +08:00
|
|
|
|
2019-12-10 06:36:45 +08:00
|
|
|
/* Look up edata once per item. */
|
2019-03-20 07:04:35 +08:00
|
|
|
if (config_opt_safety_checks) {
|
2020-02-07 05:16:07 +08:00
|
|
|
tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
|
2020-02-08 06:53:36 +08:00
|
|
|
item_edata);
|
2019-03-20 07:04:35 +08:00
|
|
|
} else {
|
|
|
|
for (unsigned i = 0 ; i < nflush; i++) {
|
2020-02-07 05:45:04 +08:00
|
|
|
item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
|
|
|
|
&emap_global, *(bottom_item - i));
|
2019-03-20 07:04:35 +08:00
|
|
|
}
|
2017-03-28 08:22:01 +08:00
|
|
|
}
|
2020-02-08 06:53:36 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* The slabs where we freed the last remaining object in the slab (and
|
|
|
|
* so need to free the slab itself).
|
|
|
|
* Used only if small == true.
|
|
|
|
*/
|
2020-02-08 06:53:36 +08:00
|
|
|
unsigned dalloc_count = 0;
|
|
|
|
VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We're about to grab a bunch of locks. If one of them happens to be
|
|
|
|
* the one guarding the arena-level stats counters we flush our
|
|
|
|
* thread-local ones to, we do so under one critical section.
|
|
|
|
*/
|
|
|
|
bool merged_stats = false;
|
2017-03-28 08:22:01 +08:00
|
|
|
while (nflush > 0) {
|
2020-02-24 12:33:04 +08:00
|
|
|
/* Lock the arena, or bin, associated with the first object. */
|
2019-12-10 06:36:45 +08:00
|
|
|
edata_t *edata = item_edata[0];
|
2020-02-24 12:33:04 +08:00
|
|
|
unsigned cur_arena_ind = edata_arena_ind_get(edata);
|
|
|
|
arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
|
2016-03-24 11:29:33 +08:00
|
|
|
|
2011-03-15 03:56:51 +08:00
|
|
|
/*
|
2020-02-24 12:33:04 +08:00
|
|
|
* These assignments are always overwritten when small is true,
|
|
|
|
* and their values are always ignored when small is false, but
|
|
|
|
* to avoid the technical UB when we pass them as parameters, we
|
|
|
|
* need to intialize them.
|
2011-03-15 03:56:51 +08:00
|
|
|
*/
|
2020-02-24 12:33:04 +08:00
|
|
|
unsigned cur_binshard = 0;
|
|
|
|
bin_t *cur_bin = NULL;
|
|
|
|
if (small) {
|
|
|
|
cur_binshard = edata_binshard_get(edata);
|
|
|
|
cur_bin = &cur_arena->bins[binind].bin_shards[
|
|
|
|
cur_binshard];
|
|
|
|
assert(cur_binshard < bin_infos[binind].n_shards);
|
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
if (small) {
|
|
|
|
malloc_mutex_lock(tsdn, &cur_bin->lock);
|
2018-05-30 06:55:04 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
if (!small && !arena_is_auto(cur_arena)) {
|
|
|
|
malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
|
2017-01-30 13:57:14 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we acquired the right lock and have some stats to flush,
|
|
|
|
* flush them.
|
|
|
|
*/
|
|
|
|
if (config_stats && tcache_arena == cur_arena
|
|
|
|
&& !merged_stats) {
|
|
|
|
merged_stats = true;
|
|
|
|
if (small) {
|
|
|
|
cur_bin->stats.nflushes++;
|
|
|
|
cur_bin->stats.nrequests +=
|
|
|
|
tbin->tstats.nrequests;
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&tcache_arena->stats, binind,
|
2017-02-13 09:43:33 +08:00
|
|
|
tbin->tstats.nrequests);
|
2012-02-11 12:22:09 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Large allocations need special prep done. Afterwards, we can
|
|
|
|
* drop the large lock.
|
|
|
|
*/
|
|
|
|
if (!small) {
|
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
|
|
|
void *ptr = *(bottom_item - i);
|
|
|
|
edata = item_edata[i];
|
|
|
|
assert(ptr != NULL && edata != NULL);
|
|
|
|
|
|
|
|
if (tcache_bin_flush_match(edata, cur_arena_ind,
|
|
|
|
cur_binshard, small)) {
|
2020-02-29 03:37:39 +08:00
|
|
|
large_dalloc_prep_locked(tsdn,
|
2020-02-24 12:33:04 +08:00
|
|
|
edata);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!small && !arena_is_auto(cur_arena)) {
|
|
|
|
malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
|
2018-05-30 06:55:04 +08:00
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
/* Deallocate whatever we can. */
|
2017-01-30 13:57:14 +08:00
|
|
|
unsigned ndeferred = 0;
|
2017-04-29 04:31:09 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2019-08-10 13:12:47 +08:00
|
|
|
void *ptr = *(bottom_item - i);
|
2019-12-10 06:36:45 +08:00
|
|
|
edata = item_edata[i];
|
|
|
|
assert(ptr != NULL && edata != NULL);
|
2020-02-24 12:33:04 +08:00
|
|
|
if (!tcache_bin_flush_match(edata, cur_arena_ind,
|
|
|
|
cur_binshard, small)) {
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
2020-02-24 12:33:04 +08:00
|
|
|
* The object was allocated either via a
|
|
|
|
* different arena, or a different bin in this
|
|
|
|
* arena. Either way, stash the object so that
|
|
|
|
* it can be handled in a future pass.
|
2010-03-18 07:27:39 +08:00
|
|
|
*/
|
2019-08-10 13:12:47 +08:00
|
|
|
*(bottom_item - ndeferred) = ptr;
|
2019-12-10 06:36:45 +08:00
|
|
|
item_edata[ndeferred] = edata;
|
2010-03-18 07:27:39 +08:00
|
|
|
ndeferred++;
|
2020-02-24 12:33:04 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (small) {
|
2020-02-29 03:37:39 +08:00
|
|
|
if (arena_dalloc_bin_locked(tsdn, cur_arena,
|
|
|
|
cur_bin, binind, edata, ptr)) {
|
2020-02-24 12:33:04 +08:00
|
|
|
dalloc_slabs[dalloc_count] = edata;
|
|
|
|
dalloc_count++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
large_dalloc_finish(tsdn, edata);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
if (small) {
|
|
|
|
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
|
|
|
}
|
|
|
|
arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
|
2017-01-30 13:57:14 +08:00
|
|
|
nflush = ndeferred;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/* Handle all deferred slab dalloc. */
|
|
|
|
assert(small || dalloc_count == 0);
|
|
|
|
for (unsigned i = 0; i < dalloc_count; i++) {
|
|
|
|
edata_t *slab = dalloc_slabs[i];
|
|
|
|
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-10-04 01:16:09 +08:00
|
|
|
if (config_stats && !merged_stats) {
|
2020-02-24 12:33:04 +08:00
|
|
|
if (small) {
|
|
|
|
/*
|
|
|
|
* The flush loop didn't happen to flush to this
|
|
|
|
* thread's arena, so the stats didn't get merged.
|
|
|
|
* Manually do so now.
|
|
|
|
*/
|
|
|
|
unsigned binshard;
|
|
|
|
bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
|
|
|
|
binind, &binshard);
|
|
|
|
bin->stats.nflushes++;
|
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&tcache_arena->stats, binind,
|
|
|
|
tbin->tstats.nrequests);
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2019-08-10 13:12:47 +08:00
|
|
|
memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
|
2017-04-29 04:31:09 +08:00
|
|
|
sizeof(void *));
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_ncached_set(tbin, binind, rem);
|
|
|
|
if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
|
|
|
|
tbin->low_water_position = tbin->cur_ptr.lowbits;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
void
|
|
|
|
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
|
|
|
szind_t binind, unsigned rem) {
|
|
|
|
tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
|
|
|
szind_t binind, unsigned rem) {
|
|
|
|
tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2017-03-28 12:50:38 +08:00
|
|
|
assert(tcache->arena == NULL);
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache->arena = arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Link into list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2017-08-12 08:34:21 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
ql_elm_new(tcache, link);
|
|
|
|
ql_tail_insert(&arena->tcache_ql, tcache, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
cache_bin_array_descriptor_init(
|
|
|
|
&tcache->cache_bin_array_descriptor, tcache->bins_small,
|
|
|
|
tcache->bins_large);
|
|
|
|
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
|
|
|
|
&tcache->cache_bin_array_descriptor, link);
|
|
|
|
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
static void
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
|
|
|
|
arena_t *arena = tcache->arena;
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(arena != NULL);
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Unlink from list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (config_debug) {
|
|
|
|
bool in_ql = false;
|
|
|
|
tcache_t *iter;
|
|
|
|
ql_foreach(iter, &arena->tcache_ql, link) {
|
|
|
|
if (iter == tcache) {
|
|
|
|
in_ql = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(in_ql);
|
|
|
|
}
|
|
|
|
ql_remove(&arena->tcache_ql, tcache, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
ql_remove(&arena->cache_bin_array_descriptor_ql,
|
|
|
|
&tcache->cache_bin_array_descriptor, link);
|
2016-05-11 13:21:10 +08:00
|
|
|
tcache_stats_merge(tsdn, tcache, arena);
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache->arena = NULL;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
void
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
|
|
|
tcache_arena_dissociate(tsdn, tcache);
|
|
|
|
tcache_arena_associate(tsdn, tcache, arena);
|
2016-05-11 13:21:10 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
bool
|
|
|
|
tsd_tcache_enabled_data_init(tsd_t *tsd) {
|
|
|
|
/* Called upon tsd initialization. */
|
2017-04-06 10:23:41 +08:00
|
|
|
tsd_tcache_enabled_set(tsd, opt_tcache);
|
2017-04-12 14:13:45 +08:00
|
|
|
tsd_slow_update(tsd);
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
if (opt_tcache) {
|
|
|
|
/* Trigger tcache init. */
|
|
|
|
tsd_tcache_data_init(tsd);
|
|
|
|
}
|
2014-04-16 04:28:37 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-08-10 13:12:47 +08:00
|
|
|
static bool
|
|
|
|
tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
|
2019-11-22 06:10:03 +08:00
|
|
|
assert(sizeof(bin->cur_ptr) == sizeof(void *));
|
2019-08-10 13:12:47 +08:00
|
|
|
/*
|
|
|
|
* The full_position points to the lowest available space. Allocations
|
|
|
|
* will access the slots toward higher addresses (for the benefit of
|
|
|
|
* adjacent prefetch).
|
|
|
|
*/
|
|
|
|
void *full_position = (void *)*stack_cur;
|
2019-08-15 04:08:06 +08:00
|
|
|
uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
|
2019-08-10 13:12:47 +08:00
|
|
|
|
|
|
|
*stack_cur += bin_stack_size;
|
|
|
|
void *empty_position = (void *)*stack_cur;
|
|
|
|
|
|
|
|
/* Init to the empty position. */
|
|
|
|
bin->cur_ptr.ptr = empty_position;
|
|
|
|
bin->low_water_position = bin->cur_ptr.lowbits;
|
|
|
|
bin->full_position = (uint32_t)(uintptr_t)full_position;
|
|
|
|
assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
|
|
|
|
assert(cache_bin_ncached_get(bin, ind) == 0);
|
|
|
|
assert(cache_bin_empty_position_get(bin, ind) == empty_position);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sanity check only. */
|
|
|
|
static bool
|
|
|
|
tcache_bin_lowbits_overflowable(void *ptr) {
|
|
|
|
uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
|
|
|
|
return lowbits < (uint32_t)(uintptr_t)ptr;
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
static void
|
|
|
|
tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
|
2019-08-10 13:12:47 +08:00
|
|
|
assert(!tcache_bin_lowbits_overflowable(avail_stack));
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
|
|
|
|
tcache->next_gc_bin = 0;
|
|
|
|
tcache->arena = NULL;
|
|
|
|
|
|
|
|
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
|
2017-12-15 04:46:39 +08:00
|
|
|
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
|
|
|
|
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
|
2019-08-10 13:12:47 +08:00
|
|
|
|
2017-04-07 03:35:22 +08:00
|
|
|
unsigned i = 0;
|
2019-08-10 13:12:47 +08:00
|
|
|
uintptr_t stack_cur = (uintptr_t)avail_stack;
|
2017-12-15 04:46:39 +08:00
|
|
|
for (; i < SC_NBINS; i++) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tcache->lg_fill_div[i] = 1;
|
2019-08-21 09:14:18 +08:00
|
|
|
tcache->bin_refilled[i] = false;
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_t *bin = tcache_small_bin_get(tcache, i);
|
|
|
|
tcache_bin_init(bin, i, &stack_cur);
|
2017-04-07 03:35:22 +08:00
|
|
|
}
|
|
|
|
for (; i < nhbins; i++) {
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_t *bin = tcache_large_bin_get(tcache, i);
|
|
|
|
tcache_bin_init(bin, i, &stack_cur);
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
2019-08-10 13:12:47 +08:00
|
|
|
|
|
|
|
/* Sanity check that the whole stack is used. */
|
|
|
|
size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
|
|
|
|
assert(stack_offset + total_stack_padding == total_stack_bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
tcache_bin_stack_alignment (size_t size) {
|
2019-08-24 07:06:50 +08:00
|
|
|
/*
|
|
|
|
* 1) Align to at least PAGE, to minimize the # of TLBs needed by the
|
|
|
|
* smaller sizes; also helps if the larger sizes don't get used at all.
|
|
|
|
* 2) On 32-bit the pointers won't be compressed; use minimal alignment.
|
|
|
|
*/
|
|
|
|
if (LG_SIZEOF_PTR < 3 || size < PAGE) {
|
|
|
|
return PAGE;
|
|
|
|
}
|
2019-08-10 13:12:47 +08:00
|
|
|
/* Align pow2 to avoid overflow the cache bin compressed pointers. */
|
2019-08-24 07:06:50 +08:00
|
|
|
return pow2_ceil_zu(size);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize auto tcache (embedded in TSD). */
|
|
|
|
bool
|
|
|
|
tsd_tcache_data_init(tsd_t *tsd) {
|
2017-04-27 09:37:44 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
|
2019-08-10 13:12:47 +08:00
|
|
|
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
|
2019-08-24 07:06:50 +08:00
|
|
|
size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
|
|
|
|
size_t size = sz_sa2u(total_stack_bytes, alignment);
|
|
|
|
|
|
|
|
void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
|
|
|
|
NULL, true, arena_get(TSDN_NULL, 0, true));
|
2017-03-28 12:50:38 +08:00
|
|
|
if (avail_array == NULL) {
|
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
tcache_init(tsd, tcache, avail_array);
|
|
|
|
/*
|
|
|
|
* Initialization is a bit tricky here. After malloc init is done, all
|
|
|
|
* threads can rely on arena_choose and associate tcache accordingly.
|
|
|
|
* However, the thread that does actual malloc bootstrapping relies on
|
|
|
|
* functional tsd, and it can only rely on a0. In that case, we
|
|
|
|
* associate its tcache to a0 temporarily, and later on
|
|
|
|
* arena_choose_hard() will re-associate properly.
|
|
|
|
*/
|
|
|
|
tcache->arena = NULL;
|
|
|
|
arena_t *arena;
|
|
|
|
if (!malloc_initialized()) {
|
|
|
|
/* If in initialization, assign to a0. */
|
|
|
|
arena = arena_get(tsd_tsdn(tsd), 0, false);
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
|
|
|
|
} else {
|
|
|
|
arena = arena_choose(tsd, NULL);
|
|
|
|
/* This may happen if thread.tcache.enabled is used. */
|
|
|
|
if (tcache->arena == NULL) {
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(arena == tcache->arena);
|
|
|
|
|
|
|
|
return false;
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* Created manual tcache for tcache.create mallctl. */
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_create_explicit(tsd_t *tsd) {
|
2019-08-10 13:12:47 +08:00
|
|
|
size_t size = sizeof(tcache_t);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/* Naturally align the pointer stacks. */
|
|
|
|
size = PTR_CEILING(size);
|
2019-08-10 13:12:47 +08:00
|
|
|
size_t stack_offset = size;
|
|
|
|
size += total_stack_bytes;
|
2019-08-24 07:06:50 +08:00
|
|
|
size_t alignment = tcache_bin_stack_alignment(size);
|
|
|
|
size = sz_sa2u(size, alignment);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2019-08-24 07:06:50 +08:00
|
|
|
tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
|
|
|
|
NULL, true, arena_get(TSDN_NULL, 0, true));
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2019-08-10 13:12:47 +08:00
|
|
|
void *avail_array = (void *)((uintptr_t)tcache +
|
|
|
|
(uintptr_t)stack_offset);
|
|
|
|
tcache_init(tsd, tcache, avail_array);
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return tcache;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 12:09:23 +08:00
|
|
|
static void
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache->arena != NULL);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-12-15 04:46:39 +08:00
|
|
|
for (unsigned i = 0; i < SC_NBINS; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
2015-02-14 07:28:56 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
|
2010-03-08 07:34:14 +08:00
|
|
|
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
|
|
|
assert(tbin->tstats.nrequests == 0);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
2017-12-15 04:46:39 +08:00
|
|
|
for (unsigned i = SC_NBINS; i < nhbins; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
2019-08-13 02:08:39 +08:00
|
|
|
tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
|
|
|
assert(tbin->tstats.nrequests == 0);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2010-02-12 05:19:21 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-06-16 06:16:18 +08:00
|
|
|
tcache_flush(tsd_t *tsd) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache_available(tsd));
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
|
|
|
tcache_flush_cache(tsd, tcache);
|
2018-11-09 04:24:38 +08:00
|
|
|
arena_t *arena = tcache->arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
|
|
|
|
|
|
|
|
if (tsd_tcache) {
|
|
|
|
/* Release the avail array for the TSD embedded auto tcache. */
|
2019-08-10 13:12:47 +08:00
|
|
|
cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
|
|
|
|
assert(cache_bin_ncached_get(bin, 0) == 0);
|
|
|
|
assert(cache_bin_empty_position_get(bin, 0) ==
|
|
|
|
bin->cur_ptr.ptr);
|
2019-08-15 04:08:06 +08:00
|
|
|
void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
|
|
|
|
tcache_bin_info[0].stack_size);
|
2017-04-08 05:12:30 +08:00
|
|
|
idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
|
2017-03-28 12:50:38 +08:00
|
|
|
} else {
|
|
|
|
/* Release both the tcache struct and avail array. */
|
2017-04-08 05:12:30 +08:00
|
|
|
idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2018-11-09 04:24:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The deallocation and tcache flush above may not trigger decay since
|
|
|
|
* we are on the tcache shutdown path (potentially with non-nominal
|
|
|
|
* tsd). Manually trigger decay to avoid pathological cases. Also
|
|
|
|
* include arena 0 because the tcache array is allocated from it.
|
|
|
|
*/
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
|
|
|
|
false, false);
|
|
|
|
|
2019-01-12 03:22:11 +08:00
|
|
|
if (arena_nthreads_get(arena, false) == 0 &&
|
|
|
|
!background_thread_enabled()) {
|
2018-11-09 04:24:38 +08:00
|
|
|
/* Force purging when no threads assigned to the arena anymore. */
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena, false, true);
|
|
|
|
} else {
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena, false, false);
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* For auto tcache (embedded in TSD) only. */
|
2012-03-22 09:33:03 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_cleanup(tsd_t *tsd) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get(tsd);
|
|
|
|
if (!tcache_available(tsd)) {
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd) == false);
|
2017-03-28 12:50:38 +08:00
|
|
|
if (config_debug) {
|
2019-08-10 13:12:47 +08:00
|
|
|
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
|
|
|
|
== NULL);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd));
|
2019-08-10 13:12:47 +08:00
|
|
|
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
tcache_destroy(tsd, tcache, true);
|
|
|
|
if (config_debug) {
|
2019-08-10 13:12:47 +08:00
|
|
|
tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2010-01-17 01:53:50 +08:00
|
|
|
unsigned i;
|
|
|
|
|
2013-10-22 06:00:06 +08:00
|
|
|
cassert(config_stats);
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/* Merge and reset tcache stats. */
|
2017-12-15 04:46:39 +08:00
|
|
|
for (i = 0; i < SC_NBINS; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
2018-11-13 07:56:04 +08:00
|
|
|
unsigned binshard;
|
|
|
|
bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
|
2010-03-08 07:34:14 +08:00
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
2016-05-11 13:21:10 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
for (; i < nhbins; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
2019-05-07 07:36:55 +08:00
|
|
|
arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
|
2017-02-13 09:43:33 +08:00
|
|
|
tbin->tstats.nrequests);
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
static bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches_create_prep(tsd_t *tsd, base_t *base) {
|
2017-01-30 13:32:39 +08:00
|
|
|
bool err;
|
|
|
|
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
|
|
|
if (tcaches == NULL) {
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches = base_alloc(tsd_tsdn(tsd), base,
|
|
|
|
sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
err = false;
|
|
|
|
label_return:
|
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
bool err;
|
|
|
|
|
2020-02-18 06:09:29 +08:00
|
|
|
if (tcaches_create_prep(tsd, base)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tcache_create_explicit(tsd);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
tcaches_t *elm;
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (tcaches_avail != NULL) {
|
|
|
|
elm = tcaches_avail;
|
|
|
|
tcaches_avail = tcaches_avail->next;
|
|
|
|
elm->tcache = tcache;
|
2016-02-25 04:42:23 +08:00
|
|
|
*r_ind = (unsigned)(elm - tcaches);
|
2015-01-30 07:30:47 +08:00
|
|
|
} else {
|
|
|
|
elm = &tcaches[tcaches_past];
|
|
|
|
elm->tcache = tcache;
|
|
|
|
*r_ind = tcaches_past;
|
|
|
|
tcaches_past++;
|
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
err = false;
|
|
|
|
label_return:
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
return err;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-03-16 03:50:37 +08:00
|
|
|
static tcache_t *
|
2018-11-10 06:45:06 +08:00
|
|
|
tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (elm->tcache == NULL) {
|
2017-03-16 03:50:37 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
tcache_t *tcache = elm->tcache;
|
2018-11-10 06:45:06 +08:00
|
|
|
if (allow_reinit) {
|
|
|
|
elm->tcache = TCACHES_ELM_NEED_REINIT;
|
|
|
|
} else {
|
|
|
|
elm->tcache = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tcache == TCACHES_ELM_NEED_REINIT) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
return tcache;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_flush(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2018-11-10 06:45:06 +08:00
|
|
|
/* Destroy the tcache; recreate in tcaches_get() if needed. */
|
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_destroy(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *elm = &tcaches[ind];
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
|
2015-01-30 07:30:47 +08:00
|
|
|
elm->next = tcaches_avail;
|
|
|
|
tcaches_avail = elm;
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcache_boot(tsdn_t *tsdn, base_t *base) {
|
2016-05-28 15:17:28 +08:00
|
|
|
/* If necessary, clamp opt_lg_tcache_max. */
|
2017-01-16 08:56:30 +08:00
|
|
|
if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
|
2018-07-12 07:05:58 +08:00
|
|
|
SC_SMALL_MAXCLASS) {
|
|
|
|
tcache_maxclass = SC_SMALL_MAXCLASS;
|
2017-01-16 08:56:30 +08:00
|
|
|
} else {
|
2016-10-28 12:31:25 +08:00
|
|
|
tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-05-16 06:38:15 +08:00
|
|
|
if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
|
|
|
|
malloc_mutex_rank_exclusive)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-05-31 01:45:37 +08:00
|
|
|
nhbins = sz_size2index(tcache_maxclass) + 1;
|
2012-04-07 03:41:55 +08:00
|
|
|
|
|
|
|
/* Initialize tcache_bin_info. */
|
2020-02-18 06:09:29 +08:00
|
|
|
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
|
|
|
|
nhbins * sizeof(cache_bin_info_t), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache_bin_info == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2019-08-15 04:08:06 +08:00
|
|
|
unsigned i, ncached_max;
|
|
|
|
total_stack_bytes = 0;
|
2017-12-15 04:46:39 +08:00
|
|
|
for (i = 0; i < SC_NBINS; i++) {
|
2017-10-02 08:22:06 +08:00
|
|
|
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
|
2019-08-15 04:08:06 +08:00
|
|
|
ncached_max = TCACHE_NSLOTS_SMALL_MIN;
|
2017-10-02 08:22:06 +08:00
|
|
|
} else if ((bin_infos[i].nregs << 1) <=
|
2015-05-20 08:47:16 +08:00
|
|
|
TCACHE_NSLOTS_SMALL_MAX) {
|
2019-08-15 04:08:06 +08:00
|
|
|
ncached_max = bin_infos[i].nregs << 1;
|
2012-04-07 03:41:55 +08:00
|
|
|
} else {
|
2019-08-15 04:08:06 +08:00
|
|
|
ncached_max = TCACHE_NSLOTS_SMALL_MAX;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
}
|
2019-08-15 04:08:06 +08:00
|
|
|
unsigned stack_size = ncached_max * sizeof(void *);
|
2019-08-31 02:52:15 +08:00
|
|
|
assert(stack_size < ((uint64_t)1 <<
|
|
|
|
(sizeof(cache_bin_sz_t) * 8)));
|
2019-08-15 04:08:06 +08:00
|
|
|
tcache_bin_info[i].stack_size = stack_size;
|
|
|
|
total_stack_bytes += stack_size;
|
2012-04-07 03:41:55 +08:00
|
|
|
}
|
|
|
|
for (; i < nhbins; i++) {
|
2019-08-15 04:08:06 +08:00
|
|
|
unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
|
|
|
|
tcache_bin_info[i].stack_size = stack_size;
|
|
|
|
total_stack_bytes += stack_size;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2019-08-15 04:08:06 +08:00
|
|
|
total_stack_bytes += total_stack_padding;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return false;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
void
|
|
|
|
tcache_prefork(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_prefork(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_parent(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_child(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|