2017-04-11 09:17:55 +08:00
|
|
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
2012-02-14 04:29:49 +08:00
|
|
|
|
2017-04-12 05:43:12 +08:00
|
|
|
#include "jemalloc/internal/assert.h"
|
2017-05-24 03:28:19 +08:00
|
|
|
#include "jemalloc/internal/mutex.h"
|
2019-03-21 04:06:53 +08:00
|
|
|
#include "jemalloc/internal/safety_check.h"
|
2017-12-15 04:46:39 +08:00
|
|
|
#include "jemalloc/internal/sc.h"
|
2017-04-12 05:43:12 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
/* Data. */
|
|
|
|
|
2020-05-12 06:03:06 +08:00
|
|
|
bool opt_tcache = true;
|
2020-05-12 06:33:23 +08:00
|
|
|
|
2020-10-20 13:48:26 +08:00
|
|
|
/* tcache_maxclass is set to 32KB by default. */
|
|
|
|
size_t opt_tcache_max = ((size_t)1) << 15;
|
2020-05-12 06:03:06 +08:00
|
|
|
|
|
|
|
/* Reasonable defaults for min and max values. */
|
|
|
|
unsigned opt_tcache_nslots_small_min = 20;
|
|
|
|
unsigned opt_tcache_nslots_small_max = 200;
|
|
|
|
unsigned opt_tcache_nslots_large = 20;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2020-05-12 03:08:19 +08:00
|
|
|
/*
|
|
|
|
* We attempt to make the number of slots in a tcache bin for a given size class
|
|
|
|
* equal to the number of objects in a slab times some multiplier. By default,
|
2020-07-09 06:50:23 +08:00
|
|
|
* the multiplier is 2 (i.e. we set the maximum number of objects in the tcache
|
|
|
|
* to twice the number of objects in a slab).
|
2020-05-12 03:08:19 +08:00
|
|
|
* This is bounded by some other constraints as well, like the fact that it
|
2020-05-12 06:03:06 +08:00
|
|
|
* must be even, must be less than opt_tcache_nslots_small_max, etc..
|
2020-05-12 03:08:19 +08:00
|
|
|
*/
|
2020-07-09 06:50:23 +08:00
|
|
|
ssize_t opt_lg_tcache_nslots_mul = 1;
|
2020-05-12 03:08:19 +08:00
|
|
|
|
2020-05-14 01:36:27 +08:00
|
|
|
/*
|
|
|
|
* Number of allocation bytes between tcache incremental GCs. Again, this
|
|
|
|
* default just seems to work well; more tuning is possible.
|
|
|
|
*/
|
|
|
|
size_t opt_tcache_gc_incr_bytes = 65536;
|
|
|
|
|
2020-05-12 07:24:17 +08:00
|
|
|
/*
|
|
|
|
* With default settings, we may end up flushing small bins frequently with
|
|
|
|
* small flush amounts. To limit this tendency, we can set a number of bytes to
|
|
|
|
* "delay" by. If we try to flush N M-byte items, we decrease that size-class's
|
|
|
|
* delay by N * M. So, if delay is 1024 and we're looking at the 64-byte size
|
|
|
|
* class, we won't do any flushing until we've been asked to flush 1024/64 == 16
|
|
|
|
* items. This can happen in any configuration (i.e. being asked to flush 16
|
|
|
|
* items once, or 4 items 4 times).
|
|
|
|
*
|
|
|
|
* Practically, this is stored as a count of items in a uint8_t, so the
|
|
|
|
* effective maximum value for a size class is 255 * sz.
|
|
|
|
*/
|
|
|
|
size_t opt_tcache_gc_delay_bytes = 0;
|
|
|
|
|
2020-05-14 06:32:18 +08:00
|
|
|
/*
|
|
|
|
* When a cache bin is flushed because it's full, how much of it do we flush?
|
|
|
|
* By default, we flush half the maximum number of items.
|
|
|
|
*/
|
|
|
|
unsigned opt_lg_tcache_flush_small_div = 1;
|
|
|
|
unsigned opt_lg_tcache_flush_large_div = 1;
|
|
|
|
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_info_t *tcache_bin_info;
|
2019-08-10 13:12:47 +08:00
|
|
|
|
|
|
|
/* Total stack size required (per tcache). Include the padding above. */
|
2020-03-01 06:41:47 +08:00
|
|
|
static size_t tcache_bin_alloc_size;
|
|
|
|
static size_t tcache_bin_alloc_alignment;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2020-10-22 10:47:57 +08:00
|
|
|
/* Number of cache bins enabled, including both large and small. */
|
2016-02-25 03:02:14 +08:00
|
|
|
unsigned nhbins;
|
2020-10-22 10:47:57 +08:00
|
|
|
/* Max size class to be cached (can be small or large). */
|
2012-03-22 09:33:03 +08:00
|
|
|
size_t tcache_maxclass;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *tcaches;
|
|
|
|
|
|
|
|
/* Index of first element within tcaches that has never been used. */
|
|
|
|
static unsigned tcaches_past;
|
|
|
|
|
|
|
|
/* Head of singly linked list tracking available tcaches elements. */
|
|
|
|
static tcaches_t *tcaches_avail;
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
/* Protects tcaches{,_past,_avail}. */
|
|
|
|
static malloc_mutex_t tcaches_mtx;
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2016-03-24 06:32:07 +08:00
|
|
|
size_t
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_salloc(tsdn_t *tsdn, const void *ptr) {
|
2017-03-17 16:25:12 +08:00
|
|
|
return arena_salloc(tsdn, ptr);
|
2012-04-20 09:28:03 +08:00
|
|
|
}
|
|
|
|
|
2020-04-16 01:49:08 +08:00
|
|
|
uint64_t
|
|
|
|
tcache_gc_new_event_wait(tsd_t *tsd) {
|
2020-05-12 06:53:30 +08:00
|
|
|
return opt_tcache_gc_incr_bytes;
|
2020-04-16 01:49:08 +08:00
|
|
|
}
|
|
|
|
|
2020-04-17 04:33:56 +08:00
|
|
|
uint64_t
|
|
|
|
tcache_gc_postponed_event_wait(tsd_t *tsd) {
|
|
|
|
return TE_MIN_START_WAIT;
|
|
|
|
}
|
|
|
|
|
2020-04-16 01:49:08 +08:00
|
|
|
uint64_t
|
|
|
|
tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
|
2020-05-12 06:53:30 +08:00
|
|
|
return opt_tcache_gc_incr_bytes;
|
2020-04-16 01:49:08 +08:00
|
|
|
}
|
|
|
|
|
2020-04-17 04:33:56 +08:00
|
|
|
uint64_t
|
|
|
|
tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
|
|
|
|
return TE_MIN_START_WAIT;
|
|
|
|
}
|
|
|
|
|
2020-05-12 07:24:17 +08:00
|
|
|
static uint8_t
|
|
|
|
tcache_gc_item_delay_compute(szind_t szind) {
|
|
|
|
assert(szind < SC_NBINS);
|
|
|
|
size_t sz = sz_index2size(szind);
|
|
|
|
size_t item_delay = opt_tcache_gc_delay_bytes / sz;
|
|
|
|
size_t delay_max = ZU(1)
|
|
|
|
<< (sizeof(((tcache_slow_t *)NULL)->bin_flush_delay_items[0]) * 8);
|
|
|
|
if (item_delay >= delay_max) {
|
|
|
|
item_delay = delay_max - 1;
|
|
|
|
}
|
|
|
|
return item_delay;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
|
|
|
|
szind_t szind) {
|
|
|
|
/* Aim to flush 3/4 of items below low-water. */
|
|
|
|
assert(szind < SC_NBINS);
|
|
|
|
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[szind];
|
2021-01-08 05:22:08 +08:00
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
|
2020-05-12 07:24:17 +08:00
|
|
|
&tcache_bin_info[szind]);
|
|
|
|
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
|
|
|
|
&tcache_bin_info[szind]);
|
|
|
|
assert(!tcache_slow->bin_refilled[szind]);
|
|
|
|
|
|
|
|
size_t nflush = low_water - (low_water >> 2);
|
|
|
|
if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
|
|
|
|
tcache_slow->bin_flush_delay_items[szind] -= nflush;
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
tcache_slow->bin_flush_delay_items[szind]
|
|
|
|
= tcache_gc_item_delay_compute(szind);
|
|
|
|
}
|
|
|
|
|
2020-05-28 08:43:23 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
|
|
|
|
(unsigned)(ncached - nflush));
|
2020-05-12 07:24:17 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Reduce fill count by 2X. Limit lg_fill_div such that
|
|
|
|
* the fill count is always at least 1.
|
|
|
|
*/
|
|
|
|
if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
|
|
|
|
>> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
|
|
|
|
tcache_slow->lg_fill_div[szind]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
|
|
|
|
szind_t szind) {
|
|
|
|
/* Like the small GC; flush 3/4 of untouched items. */
|
|
|
|
assert(szind >= SC_NBINS);
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[szind];
|
2021-01-08 05:22:08 +08:00
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
|
2020-05-12 07:24:17 +08:00
|
|
|
&tcache_bin_info[szind]);
|
|
|
|
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
|
|
|
|
&tcache_bin_info[szind]);
|
|
|
|
tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
|
2020-05-28 08:43:23 +08:00
|
|
|
(unsigned)(ncached - low_water + (low_water >> 2)));
|
2020-05-12 07:24:17 +08:00
|
|
|
}
|
|
|
|
|
2020-04-18 01:38:06 +08:00
|
|
|
static void
|
|
|
|
tcache_event(tsd_t *tsd) {
|
|
|
|
tcache_t *tcache = tcache_get(tsd);
|
|
|
|
if (tcache == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
|
2020-05-12 07:24:17 +08:00
|
|
|
szind_t szind = tcache_slow->next_gc_bin;
|
|
|
|
bool is_small = (szind < SC_NBINS);
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[szind];
|
2019-08-10 13:12:47 +08:00
|
|
|
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
|
2020-05-12 07:24:17 +08:00
|
|
|
&tcache_bin_info[szind]);
|
2019-08-10 13:12:47 +08:00
|
|
|
if (low_water > 0) {
|
2019-08-21 09:14:18 +08:00
|
|
|
if (is_small) {
|
2020-05-12 07:24:17 +08:00
|
|
|
tcache_gc_small(tsd, tcache_slow, tcache, szind);
|
2012-05-02 15:30:36 +08:00
|
|
|
} else {
|
2020-05-12 07:24:17 +08:00
|
|
|
tcache_gc_large(tsd, tcache_slow, tcache, szind);
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2020-05-12 07:24:17 +08:00
|
|
|
} else if (is_small && tcache_slow->bin_refilled[szind]) {
|
2019-08-21 09:14:18 +08:00
|
|
|
assert(low_water == 0);
|
2017-04-29 04:31:09 +08:00
|
|
|
/*
|
|
|
|
* Increase fill count by 2X for small bins. Make sure
|
|
|
|
* lg_fill_div stays greater than 0.
|
|
|
|
*/
|
2020-05-12 07:24:17 +08:00
|
|
|
if (tcache_slow->lg_fill_div[szind] > 1) {
|
|
|
|
tcache_slow->lg_fill_div[szind]--;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2020-05-12 07:24:17 +08:00
|
|
|
tcache_slow->bin_refilled[szind] = false;
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin_low_water_set(cache_bin);
|
2017-04-29 04:31:09 +08:00
|
|
|
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow->next_gc_bin++;
|
|
|
|
if (tcache_slow->next_gc_bin == nhbins) {
|
|
|
|
tcache_slow->next_gc_bin = 0;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
|
|
|
|
2020-04-18 01:38:06 +08:00
|
|
|
void
|
|
|
|
tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
|
|
|
|
assert(elapsed == TE_INVALID_ELAPSED);
|
|
|
|
tcache_event(tsd);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
|
|
|
|
assert(elapsed == TE_INVALID_ELAPSED);
|
|
|
|
tcache_event(tsd);
|
|
|
|
}
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
void *
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
|
2020-04-08 11:04:46 +08:00
|
|
|
tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
|
2020-04-08 08:48:35 +08:00
|
|
|
bool *tcache_success) {
|
|
|
|
tcache_slow_t *tcache_slow = tcache->tcache_slow;
|
2010-01-17 01:53:50 +08:00
|
|
|
void *ret;
|
|
|
|
|
2020-04-08 08:48:35 +08:00
|
|
|
assert(tcache_slow->arena != NULL);
|
2020-04-08 08:49:50 +08:00
|
|
|
unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
|
2020-04-08 08:48:35 +08:00
|
|
|
>> tcache_slow->lg_fill_div[binind];
|
2020-04-08 11:04:46 +08:00
|
|
|
arena_cache_bin_fill_small(tsdn, arena, cache_bin,
|
|
|
|
&tcache_bin_info[binind], binind, nfill);
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow->bin_refilled[binind] = true;
|
2020-04-08 11:04:46 +08:00
|
|
|
ret = cache_bin_alloc(cache_bin, tcache_success);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2021-01-28 08:10:37 +08:00
|
|
|
static const void *
|
|
|
|
tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
|
|
|
|
cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
|
2021-01-30 05:10:44 +08:00
|
|
|
return arr->ptr[ind];
|
2021-01-28 08:10:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
|
|
|
|
emap_full_alloc_ctx_t *alloc_ctx) {
|
|
|
|
size_t *szind_sum = (size_t *)szind_sum_ctx;
|
|
|
|
*szind_sum -= alloc_ctx->szind;
|
2021-01-29 08:14:39 +08:00
|
|
|
util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
|
2021-01-28 08:10:37 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 05:59:23 +08:00
|
|
|
static void
|
2021-01-28 07:36:11 +08:00
|
|
|
tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
|
2021-01-28 09:14:38 +08:00
|
|
|
szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
|
2019-01-23 05:59:23 +08:00
|
|
|
|
|
|
|
/*
|
2021-01-28 07:36:11 +08:00
|
|
|
* This gets compiled away when config_opt_safety_checks is false.
|
|
|
|
* Checks for sized deallocation bugs, failing early rather than
|
|
|
|
* corrupting metadata.
|
2019-01-23 05:59:23 +08:00
|
|
|
*/
|
2020-02-07 05:16:07 +08:00
|
|
|
size_t szind_sum = binind * nflush;
|
2021-01-28 08:10:37 +08:00
|
|
|
emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
|
|
|
|
&tcache_bin_flush_ptr_getter, (void *)arr,
|
|
|
|
&tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
|
|
|
|
edatas);
|
2021-01-28 07:36:11 +08:00
|
|
|
if (config_opt_safety_checks && szind_sum != 0) {
|
2020-01-31 06:35:54 +08:00
|
|
|
safety_check_fail_sized_dealloc(false);
|
2019-01-23 05:59:23 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE bool
|
|
|
|
tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
|
|
|
|
unsigned cur_binshard, bool small) {
|
|
|
|
if (small) {
|
|
|
|
return edata_arena_ind_get(edata) == cur_arena_ind
|
|
|
|
&& edata_binshard_get(edata) == cur_binshard;
|
|
|
|
} else {
|
|
|
|
return edata_arena_ind_get(edata) == cur_arena_ind;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2020-04-08 11:04:46 +08:00
|
|
|
tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
2020-02-24 12:33:04 +08:00
|
|
|
szind_t binind, unsigned rem, bool small) {
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow_t *tcache_slow = tcache->tcache_slow;
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* A couple lookup calls take tsdn; declare it once for convenience
|
|
|
|
* instead of calling tsd_tsdn(tsd) all the time.
|
|
|
|
*/
|
|
|
|
tsdn_t *tsdn = tsd_tsdn(tsd);
|
|
|
|
|
|
|
|
if (small) {
|
|
|
|
assert(binind < SC_NBINS);
|
|
|
|
} else {
|
|
|
|
assert(binind < nhbins);
|
|
|
|
}
|
2021-01-08 05:22:08 +08:00
|
|
|
cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
|
2020-02-29 10:55:33 +08:00
|
|
|
&tcache_bin_info[binind]);
|
2019-08-10 13:12:47 +08:00
|
|
|
assert((cache_bin_sz_t)rem <= ncached);
|
2020-04-08 08:48:35 +08:00
|
|
|
arena_t *tcache_arena = tcache_slow->arena;
|
2020-02-24 12:33:04 +08:00
|
|
|
assert(tcache_arena != NULL);
|
2010-03-14 12:32:56 +08:00
|
|
|
|
2019-08-10 13:12:47 +08:00
|
|
|
unsigned nflush = ncached - rem;
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* Variable length array must have > 0 length; the last element is never
|
|
|
|
* touched (it's just included to satisfy the no-zero-length rule).
|
|
|
|
*/
|
2021-01-28 09:14:38 +08:00
|
|
|
VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
|
2020-02-27 09:10:12 +08:00
|
|
|
CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
|
|
|
|
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
|
2020-02-29 11:12:07 +08:00
|
|
|
&ptrs, nflush);
|
2020-02-29 03:37:39 +08:00
|
|
|
|
2021-01-28 07:36:11 +08:00
|
|
|
tcache_bin_flush_edatas_lookup(tsd, &ptrs, binind, nflush, item_edata);
|
2020-02-08 06:53:36 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
/*
|
|
|
|
* The slabs where we freed the last remaining object in the slab (and
|
|
|
|
* so need to free the slab itself).
|
|
|
|
* Used only if small == true.
|
|
|
|
*/
|
2020-02-08 06:53:36 +08:00
|
|
|
unsigned dalloc_count = 0;
|
|
|
|
VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We're about to grab a bunch of locks. If one of them happens to be
|
|
|
|
* the one guarding the arena-level stats counters we flush our
|
|
|
|
* thread-local ones to, we do so under one critical section.
|
|
|
|
*/
|
|
|
|
bool merged_stats = false;
|
2017-03-28 08:22:01 +08:00
|
|
|
while (nflush > 0) {
|
2020-02-24 12:33:04 +08:00
|
|
|
/* Lock the arena, or bin, associated with the first object. */
|
2021-01-28 09:14:38 +08:00
|
|
|
edata_t *edata = item_edata[0].edata;
|
2020-02-24 12:33:04 +08:00
|
|
|
unsigned cur_arena_ind = edata_arena_ind_get(edata);
|
|
|
|
arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
|
2016-03-24 11:29:33 +08:00
|
|
|
|
2011-03-15 03:56:51 +08:00
|
|
|
/*
|
2020-02-24 12:33:04 +08:00
|
|
|
* These assignments are always overwritten when small is true,
|
|
|
|
* and their values are always ignored when small is false, but
|
|
|
|
* to avoid the technical UB when we pass them as parameters, we
|
|
|
|
* need to intialize them.
|
2011-03-15 03:56:51 +08:00
|
|
|
*/
|
2020-02-24 12:33:04 +08:00
|
|
|
unsigned cur_binshard = 0;
|
|
|
|
bin_t *cur_bin = NULL;
|
|
|
|
if (small) {
|
|
|
|
cur_binshard = edata_binshard_get(edata);
|
|
|
|
cur_bin = &cur_arena->bins[binind].bin_shards[
|
|
|
|
cur_binshard];
|
|
|
|
assert(cur_binshard < bin_infos[binind].n_shards);
|
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
if (small) {
|
|
|
|
malloc_mutex_lock(tsdn, &cur_bin->lock);
|
2018-05-30 06:55:04 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
if (!small && !arena_is_auto(cur_arena)) {
|
|
|
|
malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
|
2017-01-30 13:57:14 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we acquired the right lock and have some stats to flush,
|
|
|
|
* flush them.
|
|
|
|
*/
|
|
|
|
if (config_stats && tcache_arena == cur_arena
|
|
|
|
&& !merged_stats) {
|
|
|
|
merged_stats = true;
|
|
|
|
if (small) {
|
|
|
|
cur_bin->stats.nflushes++;
|
|
|
|
cur_bin->stats.nrequests +=
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin->tstats.nrequests;
|
|
|
|
cache_bin->tstats.nrequests = 0;
|
2020-02-24 12:33:04 +08:00
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&tcache_arena->stats, binind,
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin->tstats.nrequests);
|
|
|
|
cache_bin->tstats.nrequests = 0;
|
2012-02-11 12:22:09 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Large allocations need special prep done. Afterwards, we can
|
|
|
|
* drop the large lock.
|
|
|
|
*/
|
|
|
|
if (!small) {
|
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2021-01-30 05:10:44 +08:00
|
|
|
void *ptr = ptrs.ptr[i];
|
2021-01-28 09:14:38 +08:00
|
|
|
edata = item_edata[i].edata;
|
2020-02-24 12:33:04 +08:00
|
|
|
assert(ptr != NULL && edata != NULL);
|
|
|
|
|
|
|
|
if (tcache_bin_flush_match(edata, cur_arena_ind,
|
|
|
|
cur_binshard, small)) {
|
2020-02-29 03:37:39 +08:00
|
|
|
large_dalloc_prep_locked(tsdn,
|
2020-02-24 12:33:04 +08:00
|
|
|
edata);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!small && !arena_is_auto(cur_arena)) {
|
|
|
|
malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
|
2018-05-30 06:55:04 +08:00
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
/* Deallocate whatever we can. */
|
2017-01-30 13:57:14 +08:00
|
|
|
unsigned ndeferred = 0;
|
2021-01-30 08:06:28 +08:00
|
|
|
arena_dalloc_bin_locked_info_t dalloc_bin_info;
|
|
|
|
if (small) {
|
|
|
|
arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
|
|
|
|
}
|
2017-04-29 04:31:09 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2021-01-30 05:10:44 +08:00
|
|
|
void *ptr = ptrs.ptr[i];
|
2021-01-28 09:14:38 +08:00
|
|
|
edata = item_edata[i].edata;
|
2019-12-10 06:36:45 +08:00
|
|
|
assert(ptr != NULL && edata != NULL);
|
2020-02-24 12:33:04 +08:00
|
|
|
if (!tcache_bin_flush_match(edata, cur_arena_ind,
|
|
|
|
cur_binshard, small)) {
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
2020-02-24 12:33:04 +08:00
|
|
|
* The object was allocated either via a
|
|
|
|
* different arena, or a different bin in this
|
|
|
|
* arena. Either way, stash the object so that
|
|
|
|
* it can be handled in a future pass.
|
2010-03-18 07:27:39 +08:00
|
|
|
*/
|
2021-01-30 05:10:44 +08:00
|
|
|
ptrs.ptr[ndeferred] = ptr;
|
2021-01-28 09:14:38 +08:00
|
|
|
item_edata[ndeferred].edata = edata;
|
2010-03-18 07:27:39 +08:00
|
|
|
ndeferred++;
|
2020-02-24 12:33:04 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (small) {
|
2021-01-30 08:06:28 +08:00
|
|
|
if (arena_dalloc_bin_locked_step(tsdn,
|
|
|
|
cur_arena, cur_bin, &dalloc_bin_info,
|
|
|
|
binind, edata, ptr)) {
|
2020-02-24 12:33:04 +08:00
|
|
|
dalloc_slabs[dalloc_count] = edata;
|
|
|
|
dalloc_count++;
|
|
|
|
}
|
|
|
|
} else {
|
2020-10-15 07:45:19 +08:00
|
|
|
if (large_dalloc_safety_checks(edata, binind)) {
|
|
|
|
/* See the comment in isfree. */
|
|
|
|
continue;
|
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
large_dalloc_finish(tsdn, edata);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
if (small) {
|
2021-01-30 08:06:28 +08:00
|
|
|
arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
|
|
|
|
&dalloc_bin_info);
|
2020-02-24 12:33:04 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
|
|
|
}
|
|
|
|
arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
|
2017-01-30 13:57:14 +08:00
|
|
|
nflush = ndeferred;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2020-02-24 12:33:04 +08:00
|
|
|
|
|
|
|
/* Handle all deferred slab dalloc. */
|
|
|
|
assert(small || dalloc_count == 0);
|
|
|
|
for (unsigned i = 0; i < dalloc_count; i++) {
|
|
|
|
edata_t *slab = dalloc_slabs[i];
|
|
|
|
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-10-04 01:16:09 +08:00
|
|
|
if (config_stats && !merged_stats) {
|
2020-02-24 12:33:04 +08:00
|
|
|
if (small) {
|
|
|
|
/*
|
|
|
|
* The flush loop didn't happen to flush to this
|
|
|
|
* thread's arena, so the stats didn't get merged.
|
|
|
|
* Manually do so now.
|
|
|
|
*/
|
2020-04-23 08:22:43 +08:00
|
|
|
bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
|
|
|
|
binind, NULL);
|
|
|
|
malloc_mutex_lock(tsdn, &bin->lock);
|
2020-02-24 12:33:04 +08:00
|
|
|
bin->stats.nflushes++;
|
2020-04-08 11:04:46 +08:00
|
|
|
bin->stats.nrequests += cache_bin->tstats.nrequests;
|
|
|
|
cache_bin->tstats.nrequests = 0;
|
2020-02-24 12:33:04 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&tcache_arena->stats, binind,
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin->tstats.nrequests);
|
|
|
|
cache_bin->tstats.nrequests = 0;
|
2020-02-24 12:33:04 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
|
2020-03-01 02:48:59 +08:00
|
|
|
ncached - rem);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
2020-02-24 12:33:04 +08:00
|
|
|
void
|
2020-04-08 11:04:46 +08:00
|
|
|
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
2020-02-24 12:33:04 +08:00
|
|
|
szind_t binind, unsigned rem) {
|
2020-04-08 11:04:46 +08:00
|
|
|
tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, true);
|
2020-02-24 12:33:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2020-04-08 11:04:46 +08:00
|
|
|
tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
2020-02-24 12:33:04 +08:00
|
|
|
szind_t binind, unsigned rem) {
|
2020-04-08 11:04:46 +08:00
|
|
|
tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, false);
|
2020-02-24 12:33:04 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
|
|
|
|
tcache_t *tcache, arena_t *arena) {
|
|
|
|
assert(tcache_slow->arena == NULL);
|
|
|
|
tcache_slow->arena = arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Link into list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2017-08-12 08:34:21 +08:00
|
|
|
|
2020-04-08 08:48:35 +08:00
|
|
|
ql_elm_new(tcache_slow, link);
|
|
|
|
ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
cache_bin_array_descriptor_init(
|
2020-04-08 11:04:46 +08:00
|
|
|
&tcache_slow->cache_bin_array_descriptor, tcache->bins);
|
2017-08-12 08:34:21 +08:00
|
|
|
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
|
2020-04-08 08:48:35 +08:00
|
|
|
&tcache_slow->cache_bin_array_descriptor, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
static void
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
|
|
|
|
tcache_t *tcache) {
|
|
|
|
arena_t *arena = tcache_slow->arena;
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(arena != NULL);
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Unlink from list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (config_debug) {
|
|
|
|
bool in_ql = false;
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow_t *iter;
|
2015-01-30 07:30:47 +08:00
|
|
|
ql_foreach(iter, &arena->tcache_ql, link) {
|
2020-04-08 08:48:35 +08:00
|
|
|
if (iter == tcache_slow) {
|
2015-01-30 07:30:47 +08:00
|
|
|
in_ql = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(in_ql);
|
|
|
|
}
|
2020-04-08 08:48:35 +08:00
|
|
|
ql_remove(&arena->tcache_ql, tcache_slow, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
ql_remove(&arena->cache_bin_array_descriptor_ql,
|
2020-04-08 08:48:35 +08:00
|
|
|
&tcache_slow->cache_bin_array_descriptor, link);
|
|
|
|
tcache_stats_merge(tsdn, tcache_slow->tcache, arena);
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow->arena = NULL;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
void
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
|
|
|
|
tcache_t *tcache, arena_t *arena) {
|
|
|
|
tcache_arena_dissociate(tsdn, tcache_slow, tcache);
|
|
|
|
tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
|
2016-05-11 13:21:10 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
bool
|
|
|
|
tsd_tcache_enabled_data_init(tsd_t *tsd) {
|
|
|
|
/* Called upon tsd initialization. */
|
2017-04-06 10:23:41 +08:00
|
|
|
tsd_tcache_enabled_set(tsd, opt_tcache);
|
2017-04-12 14:13:45 +08:00
|
|
|
tsd_slow_update(tsd);
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
if (opt_tcache) {
|
|
|
|
/* Trigger tcache init. */
|
|
|
|
tsd_tcache_data_init(tsd);
|
|
|
|
}
|
2014-04-16 04:28:37 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
|
|
|
|
void *mem) {
|
|
|
|
tcache->tcache_slow = tcache_slow;
|
|
|
|
tcache_slow->tcache = tcache;
|
|
|
|
|
|
|
|
memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
|
|
|
|
tcache_slow->next_gc_bin = 0;
|
|
|
|
tcache_slow->arena = NULL;
|
|
|
|
tcache_slow->dyn_alloc = mem;
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2020-10-22 10:47:57 +08:00
|
|
|
/*
|
|
|
|
* We reserve cache bins for all small size classes, even if some may
|
|
|
|
* not get used (i.e. bins higher than nhbins). This allows the fast
|
|
|
|
* and common paths to access cache bin metadata safely w/o worrying
|
|
|
|
* about which ones are disabled.
|
|
|
|
*/
|
|
|
|
unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
|
|
|
|
memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
|
2019-08-10 13:12:47 +08:00
|
|
|
|
2020-03-01 06:41:47 +08:00
|
|
|
size_t cur_offset = 0;
|
2020-03-03 10:28:17 +08:00
|
|
|
cache_bin_preincrement(tcache_bin_info, nhbins, mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2020-04-08 11:04:46 +08:00
|
|
|
for (unsigned i = 0; i < nhbins; i++) {
|
|
|
|
if (i < SC_NBINS) {
|
|
|
|
tcache_slow->lg_fill_div[i] = 1;
|
|
|
|
tcache_slow->bin_refilled[i] = false;
|
2020-10-22 10:47:57 +08:00
|
|
|
tcache_slow->bin_flush_delay_items[i]
|
|
|
|
= tcache_gc_item_delay_compute(i);
|
2020-04-08 11:04:46 +08:00
|
|
|
}
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[i];
|
|
|
|
cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
2020-10-22 10:47:57 +08:00
|
|
|
/*
|
|
|
|
* For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
|
|
|
|
* their cache bins are initialized to a state to safely and efficiently
|
|
|
|
* fail all fastpath alloc / free, so that no additional check around
|
|
|
|
* nhbins is needed on fastpath.
|
|
|
|
*/
|
|
|
|
for (unsigned i = nhbins; i < SC_NBINS; i++) {
|
|
|
|
/* Disabled small bins. */
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[i];
|
|
|
|
void *fake_stack = mem;
|
|
|
|
size_t fake_offset = 0;
|
|
|
|
|
|
|
|
cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
|
|
|
|
&fake_offset);
|
|
|
|
assert(tcache_small_bin_disabled(i, cache_bin));
|
|
|
|
}
|
|
|
|
|
2020-03-03 10:28:17 +08:00
|
|
|
cache_bin_postincrement(tcache_bin_info, nhbins, mem,
|
2020-03-01 06:41:47 +08:00
|
|
|
&cur_offset);
|
2019-08-10 13:12:47 +08:00
|
|
|
/* Sanity check that the whole stack is used. */
|
2020-03-01 06:41:47 +08:00
|
|
|
assert(cur_offset == tcache_bin_alloc_size);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize auto tcache (embedded in TSD). */
|
|
|
|
bool
|
|
|
|
tsd_tcache_data_init(tsd_t *tsd) {
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
|
2017-04-27 09:37:44 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
|
2020-04-08 08:48:35 +08:00
|
|
|
|
2020-04-08 11:04:46 +08:00
|
|
|
assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
|
2020-03-01 06:41:47 +08:00
|
|
|
size_t alignment = tcache_bin_alloc_alignment;
|
|
|
|
size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
|
2019-08-24 07:06:50 +08:00
|
|
|
|
2020-03-03 10:28:17 +08:00
|
|
|
void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
|
|
|
|
true, arena_get(TSDN_NULL, 0, true));
|
|
|
|
if (mem == NULL) {
|
2017-03-28 12:50:38 +08:00
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_init(tsd, tcache_slow, tcache, mem);
|
2017-03-28 12:50:38 +08:00
|
|
|
/*
|
|
|
|
* Initialization is a bit tricky here. After malloc init is done, all
|
|
|
|
* threads can rely on arena_choose and associate tcache accordingly.
|
|
|
|
* However, the thread that does actual malloc bootstrapping relies on
|
|
|
|
* functional tsd, and it can only rely on a0. In that case, we
|
|
|
|
* associate its tcache to a0 temporarily, and later on
|
|
|
|
* arena_choose_hard() will re-associate properly.
|
|
|
|
*/
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow->arena = NULL;
|
2017-03-28 12:50:38 +08:00
|
|
|
arena_t *arena;
|
|
|
|
if (!malloc_initialized()) {
|
|
|
|
/* If in initialization, assign to a0. */
|
|
|
|
arena = arena_get(tsd_tsdn(tsd), 0, false);
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
|
|
|
|
arena);
|
2017-03-28 12:50:38 +08:00
|
|
|
} else {
|
|
|
|
arena = arena_choose(tsd, NULL);
|
|
|
|
/* This may happen if thread.tcache.enabled is used. */
|
2020-04-08 08:48:35 +08:00
|
|
|
if (tcache_slow->arena == NULL) {
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
|
|
|
|
tcache, arena);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
|
|
|
}
|
2020-04-08 08:48:35 +08:00
|
|
|
assert(arena == tcache_slow->arena);
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
return false;
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* Created manual tcache for tcache.create mallctl. */
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_create_explicit(tsd_t *tsd) {
|
2020-03-01 06:41:47 +08:00
|
|
|
/*
|
|
|
|
* We place the cache bin stacks, then the tcache_t, then a pointer to
|
|
|
|
* the beginning of the whole allocation (for freeing). The makes sure
|
|
|
|
* the cache bins have the requested alignment.
|
|
|
|
*/
|
2020-04-08 08:48:35 +08:00
|
|
|
size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
|
|
|
|
+ sizeof(tcache_slow_t);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/* Naturally align the pointer stacks. */
|
|
|
|
size = PTR_CEILING(size);
|
2020-03-01 06:41:47 +08:00
|
|
|
size = sz_sa2u(size, tcache_bin_alloc_alignment);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2020-03-01 06:41:47 +08:00
|
|
|
void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
|
|
|
|
true, NULL, true, arena_get(TSDN_NULL, 0, true));
|
|
|
|
if (mem == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2020-03-03 10:28:17 +08:00
|
|
|
tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow_t *tcache_slow =
|
|
|
|
(void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
|
|
|
|
tcache_init(tsd, tcache_slow, tcache, mem);
|
2020-03-03 10:28:17 +08:00
|
|
|
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
|
|
|
|
arena_ichoose(tsd, NULL));
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return tcache;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 12:09:23 +08:00
|
|
|
static void
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow_t *tcache_slow = tcache->tcache_slow;
|
|
|
|
assert(tcache_slow->arena != NULL);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2020-04-08 11:04:46 +08:00
|
|
|
for (unsigned i = 0; i < nhbins; i++) {
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[i];
|
|
|
|
if (i < SC_NBINS) {
|
|
|
|
tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
|
|
|
|
} else {
|
|
|
|
tcache_bin_flush_large(tsd, tcache, cache_bin, i, 0);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
2020-04-08 11:04:46 +08:00
|
|
|
assert(cache_bin->tstats.nrequests == 0);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2010-02-12 05:19:21 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-06-16 06:16:18 +08:00
|
|
|
tcache_flush(tsd_t *tsd) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache_available(tsd));
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
2020-04-08 08:48:35 +08:00
|
|
|
tcache_slow_t *tcache_slow = tcache->tcache_slow;
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd, tcache);
|
2020-04-08 08:48:35 +08:00
|
|
|
arena_t *arena = tcache_slow->arena;
|
|
|
|
tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
if (tsd_tcache) {
|
2020-04-08 11:04:46 +08:00
|
|
|
cache_bin_t *cache_bin = &tcache->bins[0];
|
|
|
|
cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2020-04-08 08:48:35 +08:00
|
|
|
idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
|
|
|
|
true);
|
2018-11-09 04:24:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The deallocation and tcache flush above may not trigger decay since
|
|
|
|
* we are on the tcache shutdown path (potentially with non-nominal
|
|
|
|
* tsd). Manually trigger decay to avoid pathological cases. Also
|
|
|
|
* include arena 0 because the tcache array is allocated from it.
|
|
|
|
*/
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
|
|
|
|
false, false);
|
|
|
|
|
2019-01-12 03:22:11 +08:00
|
|
|
if (arena_nthreads_get(arena, false) == 0 &&
|
|
|
|
!background_thread_enabled()) {
|
2018-11-09 04:24:38 +08:00
|
|
|
/* Force purging when no threads assigned to the arena anymore. */
|
2020-10-17 04:14:59 +08:00
|
|
|
arena_decay(tsd_tsdn(tsd), arena,
|
|
|
|
/* is_background_thread */ false, /* all */ true);
|
2018-11-09 04:24:38 +08:00
|
|
|
} else {
|
2020-10-17 04:14:59 +08:00
|
|
|
arena_decay(tsd_tsdn(tsd), arena,
|
|
|
|
/* is_background_thread */ false, /* all */ false);
|
2018-11-09 04:24:38 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* For auto tcache (embedded in TSD) only. */
|
2012-03-22 09:33:03 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_cleanup(tsd_t *tsd) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get(tsd);
|
|
|
|
if (!tcache_available(tsd)) {
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd) == false);
|
2020-04-08 11:04:46 +08:00
|
|
|
assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
|
2017-03-28 12:50:38 +08:00
|
|
|
return;
|
|
|
|
}
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd));
|
2020-04-08 11:04:46 +08:00
|
|
|
assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
tcache_destroy(tsd, tcache, true);
|
|
|
|
if (config_debug) {
|
2020-03-03 10:40:31 +08:00
|
|
|
/*
|
|
|
|
* For debug testing only, we want to pretend we're still in the
|
|
|
|
* zero-initialized state.
|
|
|
|
*/
|
2020-04-08 11:04:46 +08:00
|
|
|
memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2013-10-22 06:00:06 +08:00
|
|
|
cassert(config_stats);
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/* Merge and reset tcache stats. */
|
2020-04-08 11:04:46 +08:00
|
|
|
for (unsigned i = 0; i < nhbins; i++) {
|
|
|
|
cache_bin_t *cache_bin = &tcache->bins[i];
|
|
|
|
if (i < SC_NBINS) {
|
2020-04-23 08:22:43 +08:00
|
|
|
bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
|
|
|
|
malloc_mutex_lock(tsdn, &bin->lock);
|
2020-04-08 11:04:46 +08:00
|
|
|
bin->stats.nrequests += cache_bin->tstats.nrequests;
|
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
|
|
|
} else {
|
|
|
|
arena_stats_large_flush_nrequests_add(tsdn,
|
|
|
|
&arena->stats, i, cache_bin->tstats.nrequests);
|
|
|
|
}
|
|
|
|
cache_bin->tstats.nrequests = 0;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
static bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches_create_prep(tsd_t *tsd, base_t *base) {
|
2017-01-30 13:32:39 +08:00
|
|
|
bool err;
|
|
|
|
|
2020-10-14 03:40:34 +08:00
|
|
|
malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
|
|
|
if (tcaches == NULL) {
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches = base_alloc(tsd_tsdn(tsd), base,
|
|
|
|
sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
err = false;
|
|
|
|
label_return:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
bool err;
|
|
|
|
|
2020-10-14 03:40:34 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
|
2020-02-18 06:09:29 +08:00
|
|
|
if (tcaches_create_prep(tsd, base)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tcache_create_explicit(tsd);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
tcaches_t *elm;
|
2015-01-30 07:30:47 +08:00
|
|
|
if (tcaches_avail != NULL) {
|
|
|
|
elm = tcaches_avail;
|
|
|
|
tcaches_avail = tcaches_avail->next;
|
|
|
|
elm->tcache = tcache;
|
2016-02-25 04:42:23 +08:00
|
|
|
*r_ind = (unsigned)(elm - tcaches);
|
2015-01-30 07:30:47 +08:00
|
|
|
} else {
|
|
|
|
elm = &tcaches[tcaches_past];
|
|
|
|
elm->tcache = tcache;
|
|
|
|
*r_ind = tcaches_past;
|
|
|
|
tcaches_past++;
|
|
|
|
}
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
err = false;
|
|
|
|
label_return:
|
2020-10-14 03:40:34 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
return err;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-03-16 03:50:37 +08:00
|
|
|
static tcache_t *
|
2018-11-10 06:45:06 +08:00
|
|
|
tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (elm->tcache == NULL) {
|
2017-03-16 03:50:37 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
tcache_t *tcache = elm->tcache;
|
2018-11-10 06:45:06 +08:00
|
|
|
if (allow_reinit) {
|
|
|
|
elm->tcache = TCACHES_ELM_NEED_REINIT;
|
|
|
|
} else {
|
|
|
|
elm->tcache = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tcache == TCACHES_ELM_NEED_REINIT) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
return tcache;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_flush(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2018-11-10 06:45:06 +08:00
|
|
|
/* Destroy the tcache; recreate in tcaches_get() if needed. */
|
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_destroy(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *elm = &tcaches[ind];
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
|
2015-01-30 07:30:47 +08:00
|
|
|
elm->next = tcaches_avail;
|
|
|
|
tcaches_avail = elm;
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2020-05-12 03:08:19 +08:00
|
|
|
static unsigned
|
|
|
|
tcache_ncached_max_compute(szind_t szind) {
|
|
|
|
if (szind >= SC_NBINS) {
|
|
|
|
assert(szind < nhbins);
|
2020-05-12 06:03:06 +08:00
|
|
|
return opt_tcache_nslots_large;
|
2020-05-12 03:08:19 +08:00
|
|
|
}
|
|
|
|
unsigned slab_nregs = bin_infos[szind].nregs;
|
|
|
|
|
2020-05-12 06:03:06 +08:00
|
|
|
/* We may modify these values; start with the opt versions. */
|
|
|
|
unsigned nslots_small_min = opt_tcache_nslots_small_min;
|
|
|
|
unsigned nslots_small_max = opt_tcache_nslots_small_max;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clamp values to meet our constraints -- even, nonzero, min < max, and
|
|
|
|
* suitable for a cache bin size.
|
|
|
|
*/
|
|
|
|
if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
|
|
|
|
nslots_small_max = CACHE_BIN_NCACHED_MAX;
|
|
|
|
}
|
|
|
|
if (nslots_small_min % 2 != 0) {
|
|
|
|
nslots_small_min++;
|
|
|
|
}
|
|
|
|
if (nslots_small_max % 2 != 0) {
|
|
|
|
nslots_small_max--;
|
|
|
|
}
|
|
|
|
if (nslots_small_min < 2) {
|
|
|
|
nslots_small_min = 2;
|
|
|
|
}
|
|
|
|
if (nslots_small_max < 2) {
|
|
|
|
nslots_small_max = 2;
|
|
|
|
}
|
|
|
|
if (nslots_small_min > nslots_small_max) {
|
|
|
|
nslots_small_min = nslots_small_max;
|
|
|
|
}
|
|
|
|
|
2020-05-12 03:08:19 +08:00
|
|
|
unsigned candidate;
|
|
|
|
if (opt_lg_tcache_nslots_mul < 0) {
|
|
|
|
candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
|
|
|
|
} else {
|
|
|
|
candidate = slab_nregs << opt_lg_tcache_nslots_mul;
|
|
|
|
}
|
|
|
|
if (candidate % 2 != 0) {
|
|
|
|
/*
|
|
|
|
* We need the candidate size to be even -- we assume that we
|
|
|
|
* can divide by two and get a positive number (e.g. when
|
|
|
|
* flushing).
|
|
|
|
*/
|
|
|
|
++candidate;
|
|
|
|
}
|
2020-05-12 06:03:06 +08:00
|
|
|
if (candidate <= nslots_small_min) {
|
|
|
|
return nslots_small_min;
|
|
|
|
} else if (candidate <= nslots_small_max) {
|
2020-05-12 03:08:19 +08:00
|
|
|
return candidate;
|
|
|
|
} else {
|
2020-05-12 06:03:06 +08:00
|
|
|
return nslots_small_max;
|
2020-05-12 03:08:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
bool
|
2020-02-18 06:09:29 +08:00
|
|
|
tcache_boot(tsdn_t *tsdn, base_t *base) {
|
2020-10-20 13:48:26 +08:00
|
|
|
tcache_maxclass = sz_s2u(opt_tcache_max);
|
|
|
|
assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
|
2020-10-16 07:37:16 +08:00
|
|
|
nhbins = sz_size2index(tcache_maxclass) + 1;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-05-16 06:38:15 +08:00
|
|
|
if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
|
|
|
|
malloc_mutex_rank_exclusive)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-10-22 10:47:57 +08:00
|
|
|
/* Initialize tcache_bin_info. See comments in tcache_init(). */
|
|
|
|
unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
|
|
|
|
size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
|
|
|
|
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
|
|
|
|
CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache_bin_info == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2020-10-22 10:47:57 +08:00
|
|
|
|
2020-05-12 03:08:19 +08:00
|
|
|
for (szind_t i = 0; i < nhbins; i++) {
|
|
|
|
unsigned ncached_max = tcache_ncached_max_compute(i);
|
2020-03-01 06:41:47 +08:00
|
|
|
cache_bin_info_init(&tcache_bin_info[i], ncached_max);
|
2012-04-07 03:41:55 +08:00
|
|
|
}
|
2020-10-22 10:47:57 +08:00
|
|
|
for (szind_t i = nhbins; i < SC_NBINS; i++) {
|
|
|
|
/* Disabled small bins. */
|
|
|
|
cache_bin_info_init(&tcache_bin_info[i], 0);
|
|
|
|
assert(tcache_small_bin_disabled(i, NULL));
|
|
|
|
}
|
|
|
|
|
2020-05-12 03:08:19 +08:00
|
|
|
cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
|
|
|
|
&tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return false;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
void
|
|
|
|
tcache_prefork(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_prefork(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_parent(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_child(tsdn_t *tsdn) {
|
2019-08-08 11:12:25 +08:00
|
|
|
malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
|
2017-01-30 13:32:39 +08:00
|
|
|
}
|
2020-04-08 11:04:46 +08:00
|
|
|
|
|
|
|
void tcache_assert_initialized(tcache_t *tcache) {
|
|
|
|
assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
|
|
|
|
}
|