2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_TYPES
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
typedef struct tcache_bin_info_s tcache_bin_info_t;
|
2010-01-17 01:53:50 +08:00
|
|
|
typedef struct tcache_bin_s tcache_bin_t;
|
|
|
|
typedef struct tcache_s tcache_t;
|
2015-01-30 07:30:47 +08:00
|
|
|
typedef struct tcaches_s tcaches_t;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-03-27 09:54:44 +08:00
|
|
|
/*
|
|
|
|
* tcache pointers close to NULL are used to encode state information that is
|
|
|
|
* used for two purposes: preventing thread caching on a per thread basis and
|
|
|
|
* cleaning up during thread shutdown.
|
|
|
|
*/
|
|
|
|
#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1)
|
|
|
|
#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2)
|
|
|
|
#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3)
|
|
|
|
#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY
|
|
|
|
|
2015-05-20 08:47:16 +08:00
|
|
|
/*
|
|
|
|
* Absolute minimum number of cache slots for each small bin.
|
|
|
|
*/
|
|
|
|
#define TCACHE_NSLOTS_SMALL_MIN 20
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/*
|
2010-03-18 07:27:39 +08:00
|
|
|
* Absolute maximum number of cache slots for each small bin in the thread
|
|
|
|
* cache. This is an additional constraint beyond that imposed as: twice the
|
|
|
|
* number of regions per run for this size class.
|
2010-03-08 07:34:14 +08:00
|
|
|
*
|
|
|
|
* This constant must be an even number.
|
2010-01-17 01:53:50 +08:00
|
|
|
*/
|
2010-03-18 07:27:39 +08:00
|
|
|
#define TCACHE_NSLOTS_SMALL_MAX 200
|
|
|
|
|
|
|
|
/* Number of cache slots for large size classes. */
|
|
|
|
#define TCACHE_NSLOTS_LARGE 20
|
|
|
|
|
2010-10-24 09:37:06 +08:00
|
|
|
/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
|
2010-03-18 07:27:39 +08:00
|
|
|
#define LG_TCACHE_MAXCLASS_DEFAULT 15
|
|
|
|
|
|
|
|
/*
|
2012-03-06 06:34:37 +08:00
|
|
|
* TCACHE_GC_SWEEP is the approximate number of allocation events between
|
|
|
|
* full GC sweeps. Integer rounding may cause the actual number to be
|
|
|
|
* slightly higher, since GC is performed incrementally.
|
2010-03-18 07:27:39 +08:00
|
|
|
*/
|
2012-03-06 06:34:37 +08:00
|
|
|
#define TCACHE_GC_SWEEP 8192
|
|
|
|
|
|
|
|
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
|
|
|
#define TCACHE_GC_INCR \
|
|
|
|
((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_TYPES */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_STRUCTS
|
|
|
|
|
2012-03-27 09:54:44 +08:00
|
|
|
typedef enum {
|
|
|
|
tcache_enabled_false = 0, /* Enable cast to/from bool. */
|
|
|
|
tcache_enabled_true = 1,
|
|
|
|
tcache_enabled_default = 2
|
|
|
|
} tcache_enabled_t;
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* Read-only information associated with each element of tcache_t's tbins array
|
|
|
|
* is stored separately, mainly to reduce memory usage.
|
|
|
|
*/
|
|
|
|
struct tcache_bin_info_s {
|
|
|
|
unsigned ncached_max; /* Upper limit on ncached. */
|
|
|
|
};
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
struct tcache_bin_s {
|
|
|
|
tcache_bin_stats_t tstats;
|
2011-03-21 15:18:17 +08:00
|
|
|
int low_water; /* Min # cached since last GC. */
|
|
|
|
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
|
2010-01-17 01:53:50 +08:00
|
|
|
unsigned ncached; /* # of cached objects. */
|
2015-10-28 06:12:10 +08:00
|
|
|
/*
|
|
|
|
* To make use of adjacent cacheline prefetch, the items in the avail
|
|
|
|
* stack goes to higher address for newer allocations. avail points
|
|
|
|
* just above the available space, which means that
|
2015-11-13 02:51:32 +08:00
|
|
|
* avail[-ncached, ... -1] are available items and the lowest item will
|
2015-10-28 06:12:10 +08:00
|
|
|
* be allocated first.
|
|
|
|
*/
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
void **avail; /* Stack of available objects. */
|
2010-01-17 01:53:50 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct tcache_s {
|
|
|
|
ql_elm(tcache_t) link; /* Used for aggregating stats. */
|
2014-12-09 06:40:14 +08:00
|
|
|
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
|
2016-02-03 12:37:24 +08:00
|
|
|
ticker_t gc_ticker; /* Drives incremental GC. */
|
2015-08-20 06:21:32 +08:00
|
|
|
szind_t next_gc_bin; /* Next bin to GC. */
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t tbins[1]; /* Dynamically sized. */
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* The pointer stacks associated with tbins follow as a contiguous
|
|
|
|
* array. During tcache initialization, the avail pointer in each
|
|
|
|
* element of tbins is initialized to point to the proper offset within
|
|
|
|
* this array.
|
|
|
|
*/
|
2010-01-17 01:53:50 +08:00
|
|
|
};
|
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
/* Linkage for list of available (previously used) explicit tcache IDs. */
|
|
|
|
struct tcaches_s {
|
|
|
|
union {
|
|
|
|
tcache_t *tcache;
|
|
|
|
tcaches_t *next;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
#endif /* JEMALLOC_H_STRUCTS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_EXTERNS
|
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
extern bool opt_tcache;
|
2010-10-24 09:37:06 +08:00
|
|
|
extern ssize_t opt_lg_tcache_max;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
extern tcache_bin_info_t *tcache_bin_info;
|
|
|
|
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
2012-02-29 08:50:47 +08:00
|
|
|
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
|
2010-03-18 07:27:39 +08:00
|
|
|
* large-object bins.
|
|
|
|
*/
|
2016-02-25 03:02:14 +08:00
|
|
|
extern unsigned nhbins;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
/* Maximum cached size class. */
|
2015-01-30 07:30:47 +08:00
|
|
|
extern size_t tcache_maxclass;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
|
|
|
|
* usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
|
|
|
|
* completely disjoint from this data structure. tcaches starts off as a sparse
|
|
|
|
* array, so it has no physical memory footprint until individual pages are
|
|
|
|
* touched. This allows the entire array to be allocated the first time an
|
|
|
|
* explicit tcache is created without a disproportionate impact on memory usage.
|
|
|
|
*/
|
|
|
|
extern tcaches_t *tcaches;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-04-20 09:28:03 +08:00
|
|
|
size_t tcache_salloc(const void *ptr);
|
2015-01-30 07:30:47 +08:00
|
|
|
void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
|
2015-02-14 07:28:56 +08:00
|
|
|
void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
2015-10-28 06:12:10 +08:00
|
|
|
tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
|
2015-02-14 07:28:56 +08:00
|
|
|
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
|
2015-08-20 06:21:32 +08:00
|
|
|
szind_t binind, unsigned rem);
|
|
|
|
void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
|
2015-01-30 07:30:47 +08:00
|
|
|
unsigned rem, tcache_t *tcache);
|
2012-03-22 09:33:03 +08:00
|
|
|
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
|
2015-01-30 07:30:47 +08:00
|
|
|
void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
|
|
|
|
arena_t *newarena);
|
|
|
|
void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
|
2014-09-23 12:09:23 +08:00
|
|
|
tcache_t *tcache_get_hard(tsd_t *tsd);
|
2014-10-10 08:54:06 +08:00
|
|
|
tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
|
2014-09-23 12:09:23 +08:00
|
|
|
void tcache_cleanup(tsd_t *tsd);
|
|
|
|
void tcache_enabled_cleanup(tsd_t *tsd);
|
2010-01-17 01:53:50 +08:00
|
|
|
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
|
2015-01-30 07:30:47 +08:00
|
|
|
bool tcaches_create(tsd_t *tsd, unsigned *r_ind);
|
|
|
|
void tcaches_flush(tsd_t *tsd, unsigned ind);
|
|
|
|
void tcaches_destroy(tsd_t *tsd, unsigned ind);
|
2014-09-23 12:09:23 +08:00
|
|
|
bool tcache_boot(void);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_EXTERNS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_INLINES
|
|
|
|
|
|
|
|
#ifndef JEMALLOC_ENABLE_INLINE
|
2015-01-30 07:30:47 +08:00
|
|
|
void tcache_event(tsd_t *tsd, tcache_t *tcache);
|
2012-03-27 09:54:44 +08:00
|
|
|
void tcache_flush(void);
|
|
|
|
bool tcache_enabled_get(void);
|
2014-09-23 12:09:23 +08:00
|
|
|
tcache_t *tcache_get(tsd_t *tsd, bool create);
|
2012-03-27 09:54:44 +08:00
|
|
|
void tcache_enabled_set(bool enabled);
|
2015-10-28 06:12:10 +08:00
|
|
|
void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
|
2015-02-14 07:28:56 +08:00
|
|
|
void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
2015-10-28 06:12:10 +08:00
|
|
|
size_t size, szind_t ind, bool zero, bool slow_path);
|
2015-02-14 07:28:56 +08:00
|
|
|
void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
2015-10-28 06:12:10 +08:00
|
|
|
size_t size, szind_t ind, bool zero, bool slow_path);
|
2015-01-30 07:30:47 +08:00
|
|
|
void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
|
2015-10-28 06:12:10 +08:00
|
|
|
szind_t binind, bool slow_path);
|
2015-01-30 07:30:47 +08:00
|
|
|
void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
|
2015-10-28 06:12:10 +08:00
|
|
|
size_t size, bool slow_path);
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_t *tcaches_get(tsd_t *tsd, unsigned ind);
|
2010-01-17 01:53:50 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
|
2012-03-27 09:54:44 +08:00
|
|
|
JEMALLOC_INLINE void
|
|
|
|
tcache_flush(void)
|
|
|
|
{
|
2014-09-23 12:09:23 +08:00
|
|
|
tsd_t *tsd;
|
2012-03-27 09:54:44 +08:00
|
|
|
|
|
|
|
cassert(config_tcache);
|
|
|
|
|
2014-10-05 02:12:53 +08:00
|
|
|
tsd = tsd_fetch();
|
|
|
|
tcache_cleanup(tsd);
|
2012-03-27 09:54:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE bool
|
|
|
|
tcache_enabled_get(void)
|
|
|
|
{
|
2014-09-23 12:09:23 +08:00
|
|
|
tsd_t *tsd;
|
2012-03-27 09:54:44 +08:00
|
|
|
tcache_enabled_t tcache_enabled;
|
|
|
|
|
|
|
|
cassert(config_tcache);
|
|
|
|
|
2014-10-05 02:12:53 +08:00
|
|
|
tsd = tsd_fetch();
|
2014-09-23 12:09:23 +08:00
|
|
|
tcache_enabled = tsd_tcache_enabled_get(tsd);
|
2012-03-27 09:54:44 +08:00
|
|
|
if (tcache_enabled == tcache_enabled_default) {
|
|
|
|
tcache_enabled = (tcache_enabled_t)opt_tcache;
|
2014-09-23 12:09:23 +08:00
|
|
|
tsd_tcache_enabled_set(tsd, tcache_enabled);
|
2012-03-27 09:54:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ((bool)tcache_enabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void
|
|
|
|
tcache_enabled_set(bool enabled)
|
|
|
|
{
|
2014-09-23 12:09:23 +08:00
|
|
|
tsd_t *tsd;
|
2012-03-27 09:54:44 +08:00
|
|
|
tcache_enabled_t tcache_enabled;
|
|
|
|
|
|
|
|
cassert(config_tcache);
|
|
|
|
|
2014-10-05 02:12:53 +08:00
|
|
|
tsd = tsd_fetch();
|
2014-09-23 12:09:23 +08:00
|
|
|
|
2012-03-27 09:54:44 +08:00
|
|
|
tcache_enabled = (tcache_enabled_t)enabled;
|
2014-09-23 12:09:23 +08:00
|
|
|
tsd_tcache_enabled_set(tsd, tcache_enabled);
|
|
|
|
|
|
|
|
if (!enabled)
|
|
|
|
tcache_cleanup(tsd);
|
2012-03-27 09:54:44 +08:00
|
|
|
}
|
2012-03-22 09:33:03 +08:00
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE tcache_t *
|
2014-09-23 12:09:23 +08:00
|
|
|
tcache_get(tsd_t *tsd, bool create)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
tcache_t *tcache;
|
|
|
|
|
2014-10-04 01:16:09 +08:00
|
|
|
if (!config_tcache)
|
2012-02-14 04:29:49 +08:00
|
|
|
return (NULL);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2014-09-23 12:09:23 +08:00
|
|
|
tcache = tsd_tcache_get(tsd);
|
|
|
|
if (!create)
|
|
|
|
return (tcache);
|
2014-10-05 02:12:53 +08:00
|
|
|
if (unlikely(tcache == NULL) && tsd_nominal(tsd)) {
|
2014-09-23 12:09:23 +08:00
|
|
|
tcache = tcache_get_hard(tsd);
|
|
|
|
tsd_tcache_set(tsd, tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return (tcache);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_event(tsd_t *tsd, tcache_t *tcache)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
|
2012-03-06 06:34:37 +08:00
|
|
|
if (TCACHE_GC_INCR == 0)
|
2010-01-17 01:53:50 +08:00
|
|
|
return;
|
|
|
|
|
2016-02-03 12:37:24 +08:00
|
|
|
if (unlikely(ticker_tick(&tcache->gc_ticker)))
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_event_hard(tsd, tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void *
|
2015-10-28 06:12:10 +08:00
|
|
|
tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
2010-03-08 07:34:14 +08:00
|
|
|
void *ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2014-09-12 07:20:44 +08:00
|
|
|
if (unlikely(tbin->ncached == 0)) {
|
2011-03-21 15:18:17 +08:00
|
|
|
tbin->low_water = -1;
|
2015-10-28 06:12:10 +08:00
|
|
|
*tcache_success = false;
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
2011-03-21 15:18:17 +08:00
|
|
|
}
|
2015-10-28 06:12:10 +08:00
|
|
|
/*
|
|
|
|
* tcache_success (instead of ret) should be checked upon the return of
|
|
|
|
* this function. We avoid checking (ret == NULL) because there is
|
|
|
|
* never a null stored on the avail stack (which is unknown to the
|
|
|
|
* compiler), and eagerly checking ret would cause pipeline stall
|
|
|
|
* (waiting for the cacheline).
|
|
|
|
*/
|
|
|
|
*tcache_success = true;
|
|
|
|
ret = *(tbin->avail - tbin->ncached);
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->ncached--;
|
2015-10-28 06:12:10 +08:00
|
|
|
|
2014-09-12 07:20:44 +08:00
|
|
|
if (unlikely((int)tbin->ncached < tbin->low_water))
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
2015-10-28 06:12:10 +08:00
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
return (ret);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void *
|
2015-02-14 07:28:56 +08:00
|
|
|
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
2015-10-28 06:12:10 +08:00
|
|
|
szind_t binind, bool zero, bool slow_path)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
void *ret;
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t *tbin;
|
2015-10-28 06:12:10 +08:00
|
|
|
bool tcache_success;
|
|
|
|
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-02-29 08:50:47 +08:00
|
|
|
assert(binind < NBINS);
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin = &tcache->tbins[binind];
|
2015-10-28 06:12:10 +08:00
|
|
|
ret = tcache_alloc_easy(tbin, &tcache_success);
|
|
|
|
assert(tcache_success == (ret != NULL));
|
|
|
|
if (unlikely(!tcache_success)) {
|
|
|
|
bool tcache_hard_success;
|
|
|
|
arena = arena_choose(tsd, arena);
|
|
|
|
if (unlikely(arena == NULL))
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
|
|
|
|
&tcache_hard_success);
|
|
|
|
if (tcache_hard_success == false)
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
|
|
|
}
|
2015-10-28 06:12:10 +08:00
|
|
|
|
|
|
|
assert(ret);
|
|
|
|
/*
|
|
|
|
* Only compute usize if required. The checks in the following if
|
|
|
|
* statement are all static.
|
|
|
|
*/
|
|
|
|
if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
|
|
|
|
usize = index2size(binind);
|
|
|
|
assert(tcache_salloc(ret) == usize);
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2014-10-04 01:16:09 +08:00
|
|
|
if (likely(!zero)) {
|
2015-10-28 06:12:10 +08:00
|
|
|
if (slow_path && config_fill) {
|
2014-12-09 05:12:41 +08:00
|
|
|
if (unlikely(opt_junk_alloc)) {
|
2012-04-06 15:35:09 +08:00
|
|
|
arena_alloc_junk_small(ret,
|
|
|
|
&arena_bin_info[binind], false);
|
2014-09-12 07:20:44 +08:00
|
|
|
} else if (unlikely(opt_zero))
|
2014-10-06 08:54:10 +08:00
|
|
|
memset(ret, 0, usize);
|
2012-02-11 12:22:09 +08:00
|
|
|
}
|
2012-04-06 15:35:09 +08:00
|
|
|
} else {
|
2015-10-28 06:12:10 +08:00
|
|
|
if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
|
2012-04-06 15:35:09 +08:00
|
|
|
arena_alloc_junk_small(ret, &arena_bin_info[binind],
|
|
|
|
true);
|
|
|
|
}
|
2014-10-06 08:54:10 +08:00
|
|
|
memset(ret, 0, usize);
|
2012-04-06 15:35:09 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_stats)
|
|
|
|
tbin->tstats.nrequests++;
|
|
|
|
if (config_prof)
|
2014-10-06 08:54:10 +08:00
|
|
|
tcache->prof_accumbytes += usize;
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_event(tsd, tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void *
|
2015-02-14 07:28:56 +08:00
|
|
|
tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
2015-10-28 06:12:10 +08:00
|
|
|
szind_t binind, bool zero, bool slow_path)
|
2010-03-18 07:27:39 +08:00
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
tcache_bin_t *tbin;
|
2015-10-28 06:12:10 +08:00
|
|
|
bool tcache_success;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
assert(binind < nhbins);
|
|
|
|
tbin = &tcache->tbins[binind];
|
2015-10-28 06:12:10 +08:00
|
|
|
ret = tcache_alloc_easy(tbin, &tcache_success);
|
|
|
|
assert(tcache_success == (ret != NULL));
|
|
|
|
if (unlikely(!tcache_success)) {
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
|
|
|
* Only allocate one large object at a time, because it's quite
|
|
|
|
* expensive to create one and not use it.
|
|
|
|
*/
|
2015-10-28 06:12:10 +08:00
|
|
|
arena = arena_choose(tsd, arena);
|
|
|
|
if (unlikely(arena == NULL))
|
|
|
|
return (NULL);
|
|
|
|
|
2016-02-26 07:29:49 +08:00
|
|
|
ret = arena_malloc_large(tsd, arena, binind, zero);
|
2010-03-18 07:27:39 +08:00
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
} else {
|
2016-02-26 07:29:49 +08:00
|
|
|
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
|
|
|
|
|
2015-10-28 06:12:10 +08:00
|
|
|
/* Only compute usize on demand */
|
2016-02-26 07:29:49 +08:00
|
|
|
if (config_prof || (slow_path && config_fill) ||
|
|
|
|
unlikely(zero)) {
|
2015-10-28 06:12:10 +08:00
|
|
|
usize = index2size(binind);
|
|
|
|
assert(usize <= tcache_maxclass);
|
|
|
|
}
|
|
|
|
|
2014-10-06 08:54:10 +08:00
|
|
|
if (config_prof && usize == LARGE_MINCLASS) {
|
2012-02-11 12:22:09 +08:00
|
|
|
arena_chunk_t *chunk =
|
|
|
|
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
|
|
|
|
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
|
2012-04-02 22:04:34 +08:00
|
|
|
LG_PAGE);
|
2012-05-02 15:30:36 +08:00
|
|
|
arena_mapbits_large_binind_set(chunk, pageind,
|
|
|
|
BININD_INVALID);
|
2012-02-11 12:22:09 +08:00
|
|
|
}
|
2014-10-04 01:16:09 +08:00
|
|
|
if (likely(!zero)) {
|
2015-10-28 06:12:10 +08:00
|
|
|
if (slow_path && config_fill) {
|
2016-03-28 14:28:39 +08:00
|
|
|
if (unlikely(opt_junk_alloc)) {
|
|
|
|
memset(ret, JEMALLOC_ALLOC_JUNK,
|
|
|
|
usize);
|
|
|
|
} else if (unlikely(opt_zero))
|
2014-10-06 08:54:10 +08:00
|
|
|
memset(ret, 0, usize);
|
2012-02-11 12:22:09 +08:00
|
|
|
}
|
2014-04-16 07:35:08 +08:00
|
|
|
} else
|
2014-10-06 08:54:10 +08:00
|
|
|
memset(ret, 0, usize);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_stats)
|
|
|
|
tbin->tstats.nrequests++;
|
|
|
|
if (config_prof)
|
2014-10-06 08:54:10 +08:00
|
|
|
tcache->prof_accumbytes += usize;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_event(tsd, tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2015-10-28 06:12:10 +08:00
|
|
|
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
|
|
|
bool slow_path)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
tcache_bin_t *tbin;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tcache_bin_info_t *tbin_info;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-04-20 09:28:03 +08:00
|
|
|
assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
|
2010-04-01 07:45:04 +08:00
|
|
|
|
2015-10-28 06:12:10 +08:00
|
|
|
if (slow_path && config_fill && unlikely(opt_junk_free))
|
2012-04-06 15:35:09 +08:00
|
|
|
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin = &tcache->tbins[binind];
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin_info = &tcache_bin_info[binind];
|
2014-09-12 07:20:44 +08:00
|
|
|
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
|
2015-02-14 07:28:56 +08:00
|
|
|
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
|
|
|
(tbin_info->ncached_max >> 1));
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
assert(tbin->ncached < tbin_info->ncached_max);
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->ncached++;
|
2015-10-28 06:12:10 +08:00
|
|
|
*(tbin->avail - tbin->ncached) = ptr;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_event(tsd, tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2015-10-28 06:12:10 +08:00
|
|
|
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
|
|
|
|
bool slow_path)
|
2010-03-18 07:27:39 +08:00
|
|
|
{
|
2015-08-20 06:21:32 +08:00
|
|
|
szind_t binind;
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_bin_t *tbin;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tcache_bin_info_t *tbin_info;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
assert((size & PAGE_MASK) == 0);
|
2012-04-20 09:28:03 +08:00
|
|
|
assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
|
|
|
|
assert(tcache_salloc(ptr) <= tcache_maxclass);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2014-10-06 08:54:10 +08:00
|
|
|
binind = size2index(size);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2015-10-28 06:12:10 +08:00
|
|
|
if (slow_path && config_fill && unlikely(opt_junk_free))
|
2014-10-10 08:54:06 +08:00
|
|
|
arena_dalloc_junk_large(ptr, size);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
tbin = &tcache->tbins[binind];
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin_info = &tcache_bin_info[binind];
|
2014-09-12 07:20:44 +08:00
|
|
|
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_bin_flush_large(tsd, tbin, binind,
|
|
|
|
(tbin_info->ncached_max >> 1), tcache);
|
2010-03-08 07:34:14 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
assert(tbin->ncached < tbin_info->ncached_max);
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->ncached++;
|
2015-10-28 06:12:10 +08:00
|
|
|
*(tbin->avail - tbin->ncached) = ptr;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2015-01-30 07:30:47 +08:00
|
|
|
tcache_event(tsd, tcache);
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_ALWAYS_INLINE tcache_t *
|
|
|
|
tcaches_get(tsd_t *tsd, unsigned ind)
|
|
|
|
{
|
|
|
|
tcaches_t *elm = &tcaches[ind];
|
|
|
|
if (unlikely(elm->tcache == NULL))
|
|
|
|
elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
|
|
|
|
return (elm->tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_INLINES */
|
|
|
|
/******************************************************************************/
|