2017-01-20 13:41:41 +08:00
|
|
|
#define JEMALLOC_TCACHE_C_
|
2017-04-11 09:17:55 +08:00
|
|
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
2012-02-14 04:29:49 +08:00
|
|
|
|
2017-04-12 05:43:12 +08:00
|
|
|
#include "jemalloc/internal/assert.h"
|
2017-05-24 03:28:19 +08:00
|
|
|
#include "jemalloc/internal/mutex.h"
|
2017-12-15 04:46:39 +08:00
|
|
|
#include "jemalloc/internal/sc.h"
|
2017-04-12 05:43:12 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
/* Data. */
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
bool opt_tcache = true;
|
2010-10-24 09:37:06 +08:00
|
|
|
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_info_t *tcache_bin_info;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
static unsigned stack_nelms; /* Total stack elms per tcache. */
|
|
|
|
|
2016-02-25 03:02:14 +08:00
|
|
|
unsigned nhbins;
|
2012-03-22 09:33:03 +08:00
|
|
|
size_t tcache_maxclass;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *tcaches;
|
|
|
|
|
|
|
|
/* Index of first element within tcaches that has never been used. */
|
|
|
|
static unsigned tcaches_past;
|
|
|
|
|
|
|
|
/* Head of singly linked list tracking available tcaches elements. */
|
|
|
|
static tcaches_t *tcaches_avail;
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
/* Protects tcaches{,_past,_avail}. */
|
|
|
|
static malloc_mutex_t tcaches_mtx;
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2016-03-24 06:32:07 +08:00
|
|
|
size_t
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_salloc(tsdn_t *tsdn, const void *ptr) {
|
2017-03-17 16:25:12 +08:00
|
|
|
return arena_salloc(tsdn, ptr);
|
2012-04-20 09:28:03 +08:00
|
|
|
}
|
|
|
|
|
2012-05-02 15:30:36 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
2015-08-20 06:21:32 +08:00
|
|
|
szind_t binind = tcache->next_gc_bin;
|
2012-05-02 15:30:36 +08:00
|
|
|
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin;
|
2017-12-15 04:46:39 +08:00
|
|
|
if (binind < SC_NBINS) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tbin = tcache_small_bin_get(tcache, binind);
|
|
|
|
} else {
|
|
|
|
tbin = tcache_large_bin_get(tcache, binind);
|
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
if (tbin->low_water > 0) {
|
|
|
|
/*
|
|
|
|
* Flush (ceiling) 3/4 of the objects below the low water mark.
|
|
|
|
*/
|
2017-12-15 04:46:39 +08:00
|
|
|
if (binind < SC_NBINS) {
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
|
|
|
tbin->ncached - tbin->low_water + (tbin->low_water
|
|
|
|
>> 2));
|
2017-04-07 03:35:22 +08:00
|
|
|
/*
|
|
|
|
* Reduce fill count by 2X. Limit lg_fill_div such that
|
|
|
|
* the fill count is always at least 1.
|
|
|
|
*/
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
|
2017-04-07 03:35:22 +08:00
|
|
|
if ((tbin_info->ncached_max >>
|
|
|
|
(tcache->lg_fill_div[binind] + 1)) >= 1) {
|
|
|
|
tcache->lg_fill_div[binind]++;
|
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
} else {
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
|
|
|
|
- tbin->low_water + (tbin->low_water >> 2), tcache);
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2017-04-29 04:31:09 +08:00
|
|
|
} else if (tbin->low_water < 0) {
|
|
|
|
/*
|
|
|
|
* Increase fill count by 2X for small bins. Make sure
|
|
|
|
* lg_fill_div stays greater than 0.
|
|
|
|
*/
|
2017-12-15 04:46:39 +08:00
|
|
|
if (binind < SC_NBINS && tcache->lg_fill_div[binind] > 1) {
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache->lg_fill_div[binind]--;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
2017-04-29 04:31:09 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
|
|
|
|
|
|
|
tcache->next_gc_bin++;
|
|
|
|
if (tcache->next_gc_bin == nhbins) {
|
|
|
|
tcache->next_gc_bin = 0;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2012-05-02 15:30:36 +08:00
|
|
|
}
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
void *
|
2016-05-11 13:21:10 +08:00
|
|
|
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
|
2010-01-17 01:53:50 +08:00
|
|
|
void *ret;
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache->arena != NULL);
|
2017-04-07 03:35:22 +08:00
|
|
|
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
|
|
|
|
config_prof ? tcache->prof_accumbytes : 0);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (config_prof) {
|
2012-02-11 12:22:09 +08:00
|
|
|
tcache->prof_accumbytes = 0;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-10-02 09:54:25 +08:00
|
|
|
ret = cache_bin_alloc_easy(tbin, tcache_success);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 05:59:23 +08:00
|
|
|
/* Enabled with --enable-extra-size-check. */
|
|
|
|
#ifdef JEMALLOC_EXTRA_SIZE_CHECK
|
|
|
|
static void
|
|
|
|
tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
|
|
|
|
size_t nflush, extent_t **extents){
|
|
|
|
rtree_ctx_t rtree_ctx_fallback;
|
|
|
|
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that the items in the tcache all have the correct size; this
|
|
|
|
* is useful for catching sized deallocation bugs, also to fail early
|
|
|
|
* instead of corrupting metadata. Since this can be turned on for opt
|
|
|
|
* builds, avoid the branch in the loop.
|
|
|
|
*/
|
|
|
|
szind_t szind;
|
|
|
|
size_t sz_sum = binind * nflush;
|
|
|
|
for (unsigned i = 0 ; i < nflush; i++) {
|
|
|
|
rtree_extent_szind_read(tsdn, &extents_rtree,
|
|
|
|
rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true,
|
|
|
|
&extents[i], &szind);
|
|
|
|
sz_sum -= szind;
|
|
|
|
}
|
|
|
|
if (sz_sum != 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2017-04-29 04:31:09 +08:00
|
|
|
void
|
2017-08-11 05:27:58 +08:00
|
|
|
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
2017-04-29 04:31:09 +08:00
|
|
|
szind_t binind, unsigned rem) {
|
|
|
|
bool merged_stats = false;
|
|
|
|
|
2017-12-15 04:46:39 +08:00
|
|
|
assert(binind < SC_NBINS);
|
2017-08-11 05:27:58 +08:00
|
|
|
assert((cache_bin_sz_t)rem <= tbin->ncached);
|
2010-03-14 12:32:56 +08:00
|
|
|
|
2017-03-28 08:22:01 +08:00
|
|
|
arena_t *arena = tcache->arena;
|
2015-01-30 07:30:47 +08:00
|
|
|
assert(arena != NULL);
|
2017-03-28 08:22:01 +08:00
|
|
|
unsigned nflush = tbin->ncached - rem;
|
|
|
|
VARIABLE_ARRAY(extent_t *, item_extent, nflush);
|
2019-01-23 05:59:23 +08:00
|
|
|
|
|
|
|
#ifndef JEMALLOC_EXTRA_SIZE_CHECK
|
2017-03-28 08:22:01 +08:00
|
|
|
/* Look up extent once per item. */
|
|
|
|
for (unsigned i = 0 ; i < nflush; i++) {
|
|
|
|
item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
|
|
|
|
}
|
2019-01-23 05:59:23 +08:00
|
|
|
#else
|
|
|
|
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
|
|
|
|
item_extent);
|
|
|
|
#endif
|
2017-03-28 08:22:01 +08:00
|
|
|
while (nflush > 0) {
|
2010-03-14 12:32:56 +08:00
|
|
|
/* Lock the arena bin associated with the first object. */
|
2017-04-29 04:31:09 +08:00
|
|
|
extent_t *extent = item_extent[0];
|
2018-12-04 10:30:58 +08:00
|
|
|
unsigned bin_arena_ind = extent_arena_ind_get(extent);
|
|
|
|
arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
|
|
|
|
false);
|
2018-11-13 07:56:04 +08:00
|
|
|
unsigned binshard = extent_binshard_get(extent);
|
|
|
|
assert(binshard < bin_infos[binind].n_shards);
|
|
|
|
bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
|
2010-03-14 12:32:56 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
if (config_prof && bin_arena == arena) {
|
2016-05-11 13:21:10 +08:00
|
|
|
if (arena_prof_accum(tsd_tsdn(tsd), arena,
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache->prof_accumbytes)) {
|
2016-05-11 13:21:10 +08:00
|
|
|
prof_idump(tsd_tsdn(tsd));
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2010-02-12 05:19:21 +08:00
|
|
|
tcache->prof_accumbytes = 0;
|
2010-03-16 13:25:23 +08:00
|
|
|
}
|
|
|
|
|
2017-04-29 04:31:09 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
|
2018-12-08 10:06:04 +08:00
|
|
|
if (config_stats && bin_arena == arena && !merged_stats) {
|
2011-03-15 03:56:51 +08:00
|
|
|
merged_stats = true;
|
2010-03-14 12:32:56 +08:00
|
|
|
bin->stats.nflushes++;
|
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
|
|
|
tbin->tstats.nrequests = 0;
|
2010-02-12 05:19:21 +08:00
|
|
|
}
|
2017-03-28 08:22:01 +08:00
|
|
|
unsigned ndeferred = 0;
|
2017-04-29 04:31:09 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2017-03-28 08:22:01 +08:00
|
|
|
void *ptr = *(tbin->avail - 1 - i);
|
|
|
|
extent = item_extent[i];
|
|
|
|
assert(ptr != NULL && extent != NULL);
|
2016-03-24 11:29:33 +08:00
|
|
|
|
2018-12-04 10:30:58 +08:00
|
|
|
if (extent_arena_ind_get(extent) == bin_arena_ind
|
2018-11-13 07:56:04 +08:00
|
|
|
&& extent_binshard_get(extent) == binshard) {
|
2016-05-11 13:21:10 +08:00
|
|
|
arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
|
2018-11-13 07:56:04 +08:00
|
|
|
bin_arena, bin, binind, extent, ptr);
|
2010-01-17 01:53:50 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* This object was allocated via a different
|
2010-03-14 12:32:56 +08:00
|
|
|
* arena bin than the one that is currently
|
|
|
|
* locked. Stash the object, so that it can be
|
|
|
|
* handled in a future pass.
|
2010-01-17 01:53:50 +08:00
|
|
|
*/
|
2017-04-29 04:31:09 +08:00
|
|
|
*(tbin->avail - 1 - ndeferred) = ptr;
|
|
|
|
item_extent[ndeferred] = extent;
|
2010-01-17 01:53:50 +08:00
|
|
|
ndeferred++;
|
|
|
|
}
|
|
|
|
}
|
2016-05-11 13:21:10 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
|
|
|
|
arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
|
2017-03-28 08:22:01 +08:00
|
|
|
nflush = ndeferred;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
2014-10-04 01:16:09 +08:00
|
|
|
if (config_stats && !merged_stats) {
|
2011-03-15 03:56:51 +08:00
|
|
|
/*
|
|
|
|
* The flush loop didn't happen to flush to this thread's
|
|
|
|
* arena, so the stats didn't get merged. Manually do so now.
|
|
|
|
*/
|
2018-11-13 07:56:04 +08:00
|
|
|
unsigned binshard;
|
|
|
|
bin_t *bin = arena_bin_choose_lock(tsd_tsdn(tsd), arena, binind,
|
|
|
|
&binshard);
|
2017-04-29 04:31:09 +08:00
|
|
|
bin->stats.nflushes++;
|
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
|
2011-03-15 03:56:51 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-04-29 04:31:09 +08:00
|
|
|
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
|
|
|
sizeof(void *));
|
|
|
|
tbin->ncached = rem;
|
2017-08-11 05:27:58 +08:00
|
|
|
if (tbin->ncached < tbin->low_water) {
|
2010-03-14 12:32:56 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2010-03-18 07:27:39 +08:00
|
|
|
void
|
2017-08-11 05:27:58 +08:00
|
|
|
tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
|
2017-04-29 04:31:09 +08:00
|
|
|
unsigned rem, tcache_t *tcache) {
|
|
|
|
bool merged_stats = false;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
assert(binind < nhbins);
|
2017-08-11 05:27:58 +08:00
|
|
|
assert((cache_bin_sz_t)rem <= tbin->ncached);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2018-06-27 02:40:53 +08:00
|
|
|
arena_t *tcache_arena = tcache->arena;
|
|
|
|
assert(tcache_arena != NULL);
|
2017-01-30 13:57:14 +08:00
|
|
|
unsigned nflush = tbin->ncached - rem;
|
2017-03-28 08:22:01 +08:00
|
|
|
VARIABLE_ARRAY(extent_t *, item_extent, nflush);
|
2019-01-23 05:59:23 +08:00
|
|
|
|
|
|
|
#ifndef JEMALLOC_EXTRA_SIZE_CHECK
|
2017-03-28 08:22:01 +08:00
|
|
|
/* Look up extent once per item. */
|
|
|
|
for (unsigned i = 0 ; i < nflush; i++) {
|
|
|
|
item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
|
|
|
|
}
|
2019-01-23 05:59:23 +08:00
|
|
|
#else
|
|
|
|
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
|
|
|
|
item_extent);
|
|
|
|
#endif
|
2017-01-30 13:57:14 +08:00
|
|
|
while (nflush > 0) {
|
2010-03-18 07:27:39 +08:00
|
|
|
/* Lock the arena associated with the first object. */
|
2017-04-29 04:31:09 +08:00
|
|
|
extent_t *extent = item_extent[0];
|
2018-12-04 10:30:58 +08:00
|
|
|
unsigned locked_arena_ind = extent_arena_ind_get(extent);
|
|
|
|
arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
|
|
|
|
locked_arena_ind, false);
|
2018-05-03 17:40:53 +08:00
|
|
|
bool idump;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (config_prof) {
|
2013-02-07 03:59:30 +08:00
|
|
|
idump = false;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
2018-06-27 02:40:53 +08:00
|
|
|
bool lock_large = !arena_is_auto(locked_arena);
|
2018-05-30 06:55:04 +08:00
|
|
|
if (lock_large) {
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
|
|
|
|
}
|
2017-04-29 04:31:09 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2017-01-30 13:57:14 +08:00
|
|
|
void *ptr = *(tbin->avail - 1 - i);
|
|
|
|
assert(ptr != NULL);
|
2017-04-26 04:33:22 +08:00
|
|
|
extent = item_extent[i];
|
2018-12-04 10:30:58 +08:00
|
|
|
if (extent_arena_ind_get(extent) == locked_arena_ind) {
|
2017-01-30 13:57:14 +08:00
|
|
|
large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
|
|
|
|
extent);
|
|
|
|
}
|
|
|
|
}
|
2018-06-27 02:40:53 +08:00
|
|
|
if ((config_prof || config_stats) &&
|
|
|
|
(locked_arena == tcache_arena)) {
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_prof) {
|
2018-06-27 02:40:53 +08:00
|
|
|
idump = arena_prof_accum(tsd_tsdn(tsd),
|
|
|
|
tcache_arena, tcache->prof_accumbytes);
|
2012-02-11 12:22:09 +08:00
|
|
|
tcache->prof_accumbytes = 0;
|
|
|
|
}
|
|
|
|
if (config_stats) {
|
|
|
|
merged_stats = true;
|
2017-02-13 09:43:33 +08:00
|
|
|
arena_stats_large_nrequests_add(tsd_tsdn(tsd),
|
2018-06-27 02:40:53 +08:00
|
|
|
&tcache_arena->stats, binind,
|
2017-02-13 09:43:33 +08:00
|
|
|
tbin->tstats.nrequests);
|
2012-02-11 12:22:09 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2018-05-30 06:55:04 +08:00
|
|
|
if (lock_large) {
|
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
|
|
|
|
}
|
2017-01-30 13:57:14 +08:00
|
|
|
|
|
|
|
unsigned ndeferred = 0;
|
2017-04-29 04:31:09 +08:00
|
|
|
for (unsigned i = 0; i < nflush; i++) {
|
2017-01-30 13:57:14 +08:00
|
|
|
void *ptr = *(tbin->avail - 1 - i);
|
2017-03-28 08:22:01 +08:00
|
|
|
extent = item_extent[i];
|
|
|
|
assert(ptr != NULL && extent != NULL);
|
|
|
|
|
2018-12-04 10:30:58 +08:00
|
|
|
if (extent_arena_ind_get(extent) == locked_arena_ind) {
|
2017-01-30 13:57:14 +08:00
|
|
|
large_dalloc_finish(tsd_tsdn(tsd), extent);
|
2014-10-10 08:54:06 +08:00
|
|
|
} else {
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
|
|
|
* This object was allocated via a different
|
|
|
|
* arena than the one that is currently locked.
|
|
|
|
* Stash the object, so that it can be handled
|
|
|
|
* in a future pass.
|
|
|
|
*/
|
2017-04-29 04:31:09 +08:00
|
|
|
*(tbin->avail - 1 - ndeferred) = ptr;
|
|
|
|
item_extent[ndeferred] = extent;
|
2010-03-18 07:27:39 +08:00
|
|
|
ndeferred++;
|
|
|
|
}
|
|
|
|
}
|
2017-01-16 08:56:30 +08:00
|
|
|
if (config_prof && idump) {
|
2016-05-11 13:21:10 +08:00
|
|
|
prof_idump(tsd_tsdn(tsd));
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2016-05-11 13:21:10 +08:00
|
|
|
arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
|
|
|
|
ndeferred);
|
2017-01-30 13:57:14 +08:00
|
|
|
nflush = ndeferred;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2014-10-04 01:16:09 +08:00
|
|
|
if (config_stats && !merged_stats) {
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* The flush loop didn't happen to flush to this thread's
|
|
|
|
* arena, so the stats didn't get merged. Manually do so now.
|
|
|
|
*/
|
2018-06-27 02:40:53 +08:00
|
|
|
arena_stats_large_nrequests_add(tsd_tsdn(tsd),
|
|
|
|
&tcache_arena->stats, binind, tbin->tstats.nrequests);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2017-04-29 04:31:09 +08:00
|
|
|
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
|
|
|
sizeof(void *));
|
|
|
|
tbin->ncached = rem;
|
2017-08-11 05:27:58 +08:00
|
|
|
if (tbin->ncached < tbin->low_water) {
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2017-03-28 12:50:38 +08:00
|
|
|
assert(tcache->arena == NULL);
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache->arena = arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Link into list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2017-08-12 08:34:21 +08:00
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
ql_elm_new(tcache, link);
|
|
|
|
ql_tail_insert(&arena->tcache_ql, tcache, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
cache_bin_array_descriptor_init(
|
|
|
|
&tcache->cache_bin_array_descriptor, tcache->bins_small,
|
|
|
|
tcache->bins_large);
|
|
|
|
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
|
|
|
|
&tcache->cache_bin_array_descriptor, link);
|
|
|
|
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
static void
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
|
|
|
|
arena_t *arena = tcache->arena;
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(arena != NULL);
|
2012-03-22 09:33:03 +08:00
|
|
|
if (config_stats) {
|
|
|
|
/* Unlink from list of extant tcaches. */
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (config_debug) {
|
|
|
|
bool in_ql = false;
|
|
|
|
tcache_t *iter;
|
|
|
|
ql_foreach(iter, &arena->tcache_ql, link) {
|
|
|
|
if (iter == tcache) {
|
|
|
|
in_ql = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(in_ql);
|
|
|
|
}
|
|
|
|
ql_remove(&arena->tcache_ql, tcache, link);
|
2017-08-12 08:34:21 +08:00
|
|
|
ql_remove(&arena->cache_bin_array_descriptor_ql,
|
|
|
|
&tcache->cache_bin_array_descriptor, link);
|
2016-05-11 13:21:10 +08:00
|
|
|
tcache_stats_merge(tsdn, tcache, arena);
|
2017-02-13 10:50:53 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache->arena = NULL;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
2016-05-11 13:21:10 +08:00
|
|
|
void
|
2017-03-07 04:51:41 +08:00
|
|
|
tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
|
|
|
tcache_arena_dissociate(tsdn, tcache);
|
|
|
|
tcache_arena_associate(tsdn, tcache, arena);
|
2016-05-11 13:21:10 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
bool
|
|
|
|
tsd_tcache_enabled_data_init(tsd_t *tsd) {
|
|
|
|
/* Called upon tsd initialization. */
|
2017-04-06 10:23:41 +08:00
|
|
|
tsd_tcache_enabled_set(tsd, opt_tcache);
|
2017-04-12 14:13:45 +08:00
|
|
|
tsd_slow_update(tsd);
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
if (opt_tcache) {
|
|
|
|
/* Trigger tcache init. */
|
|
|
|
tsd_tcache_data_init(tsd);
|
|
|
|
}
|
2014-04-16 04:28:37 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize auto tcache (embedded in TSD). */
|
|
|
|
static void
|
|
|
|
tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
|
|
|
|
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
|
|
|
|
tcache->prof_accumbytes = 0;
|
|
|
|
tcache->next_gc_bin = 0;
|
|
|
|
tcache->arena = NULL;
|
|
|
|
|
|
|
|
ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
|
|
|
|
|
|
|
|
size_t stack_offset = 0;
|
|
|
|
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
|
2017-12-15 04:46:39 +08:00
|
|
|
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
|
|
|
|
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
|
2017-04-07 03:35:22 +08:00
|
|
|
unsigned i = 0;
|
2017-12-15 04:46:39 +08:00
|
|
|
for (; i < SC_NBINS; i++) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tcache->lg_fill_div[i] = 1;
|
2017-03-28 12:50:38 +08:00
|
|
|
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
|
|
|
/*
|
|
|
|
* avail points past the available space. Allocations will
|
|
|
|
* access the slots toward higher addresses (for the benefit of
|
|
|
|
* prefetch).
|
|
|
|
*/
|
2017-04-07 03:35:22 +08:00
|
|
|
tcache_small_bin_get(tcache, i)->avail =
|
|
|
|
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
|
|
|
|
}
|
|
|
|
for (; i < nhbins; i++) {
|
|
|
|
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
|
|
|
tcache_large_bin_get(tcache, i)->avail =
|
|
|
|
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
assert(stack_offset == stack_nelms * sizeof(void *));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize auto tcache (embedded in TSD). */
|
|
|
|
bool
|
|
|
|
tsd_tcache_data_init(tsd_t *tsd) {
|
2017-04-27 09:37:44 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
|
2017-04-07 03:35:22 +08:00
|
|
|
assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
|
2017-03-28 12:50:38 +08:00
|
|
|
size_t size = stack_nelms * sizeof(void *);
|
|
|
|
/* Avoid false cacheline sharing. */
|
2017-05-31 01:45:37 +08:00
|
|
|
size = sz_sa2u(size, CACHELINE);
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
|
|
|
|
NULL, true, arena_get(TSDN_NULL, 0, true));
|
|
|
|
if (avail_array == NULL) {
|
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
tcache_init(tsd, tcache, avail_array);
|
|
|
|
/*
|
|
|
|
* Initialization is a bit tricky here. After malloc init is done, all
|
|
|
|
* threads can rely on arena_choose and associate tcache accordingly.
|
|
|
|
* However, the thread that does actual malloc bootstrapping relies on
|
|
|
|
* functional tsd, and it can only rely on a0. In that case, we
|
|
|
|
* associate its tcache to a0 temporarily, and later on
|
|
|
|
* arena_choose_hard() will re-associate properly.
|
|
|
|
*/
|
|
|
|
tcache->arena = NULL;
|
|
|
|
arena_t *arena;
|
|
|
|
if (!malloc_initialized()) {
|
|
|
|
/* If in initialization, assign to a0. */
|
|
|
|
arena = arena_get(tsd_tsdn(tsd), 0, false);
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
|
|
|
|
} else {
|
|
|
|
arena = arena_choose(tsd, NULL);
|
|
|
|
/* This may happen if thread.tcache.enabled is used. */
|
|
|
|
if (tcache->arena == NULL) {
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(arena == tcache->arena);
|
|
|
|
|
|
|
|
return false;
|
2014-04-16 04:28:37 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* Created manual tcache for tcache.create mallctl. */
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_create_explicit(tsd_t *tsd) {
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *tcache;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
size_t size, stack_offset;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
size = sizeof(tcache_t);
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/* Naturally align the pointer stacks. */
|
|
|
|
size = PTR_CEILING(size);
|
|
|
|
stack_offset = size;
|
|
|
|
size += stack_nelms * sizeof(void *);
|
2014-10-10 08:54:06 +08:00
|
|
|
/* Avoid false cacheline sharing. */
|
2017-05-31 01:45:37 +08:00
|
|
|
size = sz_sa2u(size, CACHELINE);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
|
2016-05-11 13:21:10 +08:00
|
|
|
arena_get(TSDN_NULL, 0, true));
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_init(tsd, tcache,
|
|
|
|
(void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
|
|
|
|
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return tcache;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2014-09-23 12:09:23 +08:00
|
|
|
static void
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache->arena != NULL);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2017-12-15 04:46:39 +08:00
|
|
|
for (unsigned i = 0; i < SC_NBINS; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
2015-02-14 07:28:56 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
|
2010-03-08 07:34:14 +08:00
|
|
|
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
|
|
|
assert(tbin->tstats.nrequests == 0);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
2017-12-15 04:46:39 +08:00
|
|
|
for (unsigned i = SC_NBINS; i < nhbins; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
2017-04-29 04:31:09 +08:00
|
|
|
tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2017-01-14 07:22:16 +08:00
|
|
|
if (config_stats) {
|
|
|
|
assert(tbin->tstats.nrequests == 0);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-21 08:21:37 +08:00
|
|
|
if (config_prof && tcache->prof_accumbytes > 0 &&
|
|
|
|
arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
|
|
|
|
tcache->prof_accumbytes)) {
|
2016-05-11 13:21:10 +08:00
|
|
|
prof_idump(tsd_tsdn(tsd));
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2010-02-12 05:19:21 +08:00
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
void
|
2017-06-16 06:16:18 +08:00
|
|
|
tcache_flush(tsd_t *tsd) {
|
2017-04-21 08:21:37 +08:00
|
|
|
assert(tcache_available(tsd));
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
|
|
|
tcache_flush_cache(tsd, tcache);
|
2018-11-09 04:24:38 +08:00
|
|
|
arena_t *arena = tcache->arena;
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
|
|
|
|
|
|
|
|
if (tsd_tcache) {
|
|
|
|
/* Release the avail array for the TSD embedded auto tcache. */
|
2017-04-07 03:35:22 +08:00
|
|
|
void *avail_array =
|
|
|
|
(void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
|
2017-03-28 12:50:38 +08:00
|
|
|
(uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
|
2017-04-08 05:12:30 +08:00
|
|
|
idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
|
2017-03-28 12:50:38 +08:00
|
|
|
} else {
|
|
|
|
/* Release both the tcache struct and avail array. */
|
2017-04-08 05:12:30 +08:00
|
|
|
idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
2018-11-09 04:24:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The deallocation and tcache flush above may not trigger decay since
|
|
|
|
* we are on the tcache shutdown path (potentially with non-nominal
|
|
|
|
* tsd). Manually trigger decay to avoid pathological cases. Also
|
|
|
|
* include arena 0 because the tcache array is allocated from it.
|
|
|
|
*/
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
|
|
|
|
false, false);
|
|
|
|
|
2019-01-12 03:22:11 +08:00
|
|
|
if (arena_nthreads_get(arena, false) == 0 &&
|
|
|
|
!background_thread_enabled()) {
|
2018-11-09 04:24:38 +08:00
|
|
|
/* Force purging when no threads assigned to the arena anymore. */
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena, false, true);
|
|
|
|
} else {
|
|
|
|
arena_decay(tsd_tsdn(tsd), arena, false, false);
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
/* For auto tcache (embedded in TSD) only. */
|
2012-03-22 09:33:03 +08:00
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_cleanup(tsd_t *tsd) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tsd_tcachep_get(tsd);
|
|
|
|
if (!tcache_available(tsd)) {
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd) == false);
|
2017-03-28 12:50:38 +08:00
|
|
|
if (config_debug) {
|
2017-04-07 03:35:22 +08:00
|
|
|
assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
|
2017-03-28 12:50:38 +08:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2017-04-06 10:23:41 +08:00
|
|
|
assert(tsd_tcache_enabled_get(tsd));
|
2017-04-07 03:35:22 +08:00
|
|
|
assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
|
2017-03-28 12:50:38 +08:00
|
|
|
|
|
|
|
tcache_destroy(tsd, tcache, true);
|
|
|
|
if (config_debug) {
|
2017-04-07 03:35:22 +08:00
|
|
|
tcache_small_bin_get(tcache, 0)->avail = NULL;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
2010-01-17 01:53:50 +08:00
|
|
|
unsigned i;
|
|
|
|
|
2013-10-22 06:00:06 +08:00
|
|
|
cassert(config_stats);
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/* Merge and reset tcache stats. */
|
2017-12-15 04:46:39 +08:00
|
|
|
for (i = 0; i < SC_NBINS; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
2018-11-13 07:56:04 +08:00
|
|
|
unsigned binshard;
|
|
|
|
bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
|
2010-03-08 07:34:14 +08:00
|
|
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
2016-05-11 13:21:10 +08:00
|
|
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
for (; i < nhbins; i++) {
|
2017-08-11 05:27:58 +08:00
|
|
|
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
2017-02-13 09:43:33 +08:00
|
|
|
arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
|
|
|
|
tbin->tstats.nrequests);
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->tstats.nrequests = 0;
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
static bool
|
|
|
|
tcaches_create_prep(tsd_t *tsd) {
|
|
|
|
bool err;
|
|
|
|
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
|
|
|
if (tcaches == NULL) {
|
2016-12-23 06:39:10 +08:00
|
|
|
tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
|
|
|
|
* (MALLOCX_TCACHE_MAX+1), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
err = false;
|
|
|
|
label_return:
|
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
tcaches_create(tsd_t *tsd, unsigned *r_ind) {
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
bool err;
|
|
|
|
|
|
|
|
if (tcaches_create_prep(tsd)) {
|
|
|
|
err = true;
|
|
|
|
goto label_return;
|
|
|
|
}
|
|
|
|
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_t *tcache = tcache_create_explicit(tsd);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache == NULL) {
|
2017-01-30 13:32:39 +08:00
|
|
|
err = true;
|
|
|
|
goto label_return;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
tcaches_t *elm;
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
if (tcaches_avail != NULL) {
|
|
|
|
elm = tcaches_avail;
|
|
|
|
tcaches_avail = tcaches_avail->next;
|
|
|
|
elm->tcache = tcache;
|
2016-02-25 04:42:23 +08:00
|
|
|
*r_ind = (unsigned)(elm - tcaches);
|
2015-01-30 07:30:47 +08:00
|
|
|
} else {
|
|
|
|
elm = &tcaches[tcaches_past];
|
|
|
|
elm->tcache = tcache;
|
|
|
|
*r_ind = tcaches_past;
|
|
|
|
tcaches_past++;
|
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
|
2017-01-30 13:32:39 +08:00
|
|
|
err = false;
|
|
|
|
label_return:
|
2017-05-23 10:32:04 +08:00
|
|
|
witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
|
2017-01-30 13:32:39 +08:00
|
|
|
return err;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
2017-03-16 03:50:37 +08:00
|
|
|
static tcache_t *
|
2018-11-10 06:45:06 +08:00
|
|
|
tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
|
|
|
|
|
2017-01-16 08:56:30 +08:00
|
|
|
if (elm->tcache == NULL) {
|
2017-03-16 03:50:37 +08:00
|
|
|
return NULL;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
tcache_t *tcache = elm->tcache;
|
2018-11-10 06:45:06 +08:00
|
|
|
if (allow_reinit) {
|
|
|
|
elm->tcache = TCACHES_ELM_NEED_REINIT;
|
|
|
|
} else {
|
|
|
|
elm->tcache = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tcache == TCACHES_ELM_NEED_REINIT) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-03-16 03:50:37 +08:00
|
|
|
return tcache;
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_flush(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2018-11-10 06:45:06 +08:00
|
|
|
/* Destroy the tcache; recreate in tcaches_get() if needed. */
|
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2017-01-16 08:56:30 +08:00
|
|
|
tcaches_destroy(tsd_t *tsd, unsigned ind) {
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
|
2015-01-30 07:30:47 +08:00
|
|
|
tcaches_t *elm = &tcaches[ind];
|
2018-11-10 06:45:06 +08:00
|
|
|
tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
|
2015-01-30 07:30:47 +08:00
|
|
|
elm->next = tcaches_avail;
|
|
|
|
tcaches_avail = elm;
|
2017-01-30 13:32:39 +08:00
|
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
|
2017-03-16 03:50:37 +08:00
|
|
|
if (tcache != NULL) {
|
2017-03-28 12:50:38 +08:00
|
|
|
tcache_destroy(tsd, tcache, false);
|
2017-03-16 03:50:37 +08:00
|
|
|
}
|
2015-01-30 07:30:47 +08:00
|
|
|
}
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
bool
|
2017-01-16 08:56:30 +08:00
|
|
|
tcache_boot(tsdn_t *tsdn) {
|
2016-05-28 15:17:28 +08:00
|
|
|
/* If necessary, clamp opt_lg_tcache_max. */
|
2017-01-16 08:56:30 +08:00
|
|
|
if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
|
2018-07-12 07:05:58 +08:00
|
|
|
SC_SMALL_MAXCLASS) {
|
|
|
|
tcache_maxclass = SC_SMALL_MAXCLASS;
|
2017-01-16 08:56:30 +08:00
|
|
|
} else {
|
2016-10-28 12:31:25 +08:00
|
|
|
tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-05-16 06:38:15 +08:00
|
|
|
if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
|
|
|
|
malloc_mutex_rank_exclusive)) {
|
2017-01-30 13:32:39 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-05-31 01:45:37 +08:00
|
|
|
nhbins = sz_size2index(tcache_maxclass) + 1;
|
2012-04-07 03:41:55 +08:00
|
|
|
|
|
|
|
/* Initialize tcache_bin_info. */
|
2017-08-11 05:27:58 +08:00
|
|
|
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
|
|
|
|
* sizeof(cache_bin_info_t), CACHELINE);
|
2017-01-16 08:56:30 +08:00
|
|
|
if (tcache_bin_info == NULL) {
|
2017-01-20 10:15:45 +08:00
|
|
|
return true;
|
2017-01-16 08:56:30 +08:00
|
|
|
}
|
2012-04-07 03:41:55 +08:00
|
|
|
stack_nelms = 0;
|
2017-04-21 08:21:37 +08:00
|
|
|
unsigned i;
|
2017-12-15 04:46:39 +08:00
|
|
|
for (i = 0; i < SC_NBINS; i++) {
|
2017-10-02 08:22:06 +08:00
|
|
|
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
|
2015-05-20 08:47:16 +08:00
|
|
|
tcache_bin_info[i].ncached_max =
|
|
|
|
TCACHE_NSLOTS_SMALL_MIN;
|
2017-10-02 08:22:06 +08:00
|
|
|
} else if ((bin_infos[i].nregs << 1) <=
|
2015-05-20 08:47:16 +08:00
|
|
|
TCACHE_NSLOTS_SMALL_MAX) {
|
2012-04-07 03:41:55 +08:00
|
|
|
tcache_bin_info[i].ncached_max =
|
2017-10-02 08:22:06 +08:00
|
|
|
(bin_infos[i].nregs << 1);
|
2012-04-07 03:41:55 +08:00
|
|
|
} else {
|
|
|
|
tcache_bin_info[i].ncached_max =
|
|
|
|
TCACHE_NSLOTS_SMALL_MAX;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
}
|
2012-04-07 03:41:55 +08:00
|
|
|
stack_nelms += tcache_bin_info[i].ncached_max;
|
|
|
|
}
|
|
|
|
for (; i < nhbins; i++) {
|
2016-06-01 05:50:21 +08:00
|
|
|
tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
|
2012-04-07 03:41:55 +08:00
|
|
|
stack_nelms += tcache_bin_info[i].ncached_max;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
|
2017-01-20 10:15:45 +08:00
|
|
|
return false;
|
2012-03-22 09:33:03 +08:00
|
|
|
}
|
2017-01-30 13:32:39 +08:00
|
|
|
|
|
|
|
void
|
|
|
|
tcache_prefork(tsdn_t *tsdn) {
|
|
|
|
if (!config_prof && opt_tcache) {
|
|
|
|
malloc_mutex_prefork(tsdn, &tcaches_mtx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_parent(tsdn_t *tsdn) {
|
|
|
|
if (!config_prof && opt_tcache) {
|
|
|
|
malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcache_postfork_child(tsdn_t *tsdn) {
|
|
|
|
if (!config_prof && opt_tcache) {
|
|
|
|
malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
|
|
|
|
}
|
|
|
|
}
|