2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_TYPES
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
typedef struct tcache_bin_info_s tcache_bin_info_t;
|
2010-01-17 01:53:50 +08:00
|
|
|
typedef struct tcache_bin_s tcache_bin_t;
|
|
|
|
typedef struct tcache_s tcache_t;
|
|
|
|
|
2012-03-27 09:54:44 +08:00
|
|
|
/*
|
|
|
|
* tcache pointers close to NULL are used to encode state information that is
|
|
|
|
* used for two purposes: preventing thread caching on a per thread basis and
|
|
|
|
* cleaning up during thread shutdown.
|
|
|
|
*/
|
|
|
|
#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1)
|
|
|
|
#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2)
|
|
|
|
#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3)
|
|
|
|
#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
/*
|
2010-03-18 07:27:39 +08:00
|
|
|
* Absolute maximum number of cache slots for each small bin in the thread
|
|
|
|
* cache. This is an additional constraint beyond that imposed as: twice the
|
|
|
|
* number of regions per run for this size class.
|
2010-03-08 07:34:14 +08:00
|
|
|
*
|
|
|
|
* This constant must be an even number.
|
2010-01-17 01:53:50 +08:00
|
|
|
*/
|
2010-03-18 07:27:39 +08:00
|
|
|
#define TCACHE_NSLOTS_SMALL_MAX 200
|
|
|
|
|
|
|
|
/* Number of cache slots for large size classes. */
|
|
|
|
#define TCACHE_NSLOTS_LARGE 20
|
|
|
|
|
2010-10-24 09:37:06 +08:00
|
|
|
/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
|
2010-03-18 07:27:39 +08:00
|
|
|
#define LG_TCACHE_MAXCLASS_DEFAULT 15
|
|
|
|
|
|
|
|
/*
|
2012-03-06 06:34:37 +08:00
|
|
|
* TCACHE_GC_SWEEP is the approximate number of allocation events between
|
|
|
|
* full GC sweeps. Integer rounding may cause the actual number to be
|
|
|
|
* slightly higher, since GC is performed incrementally.
|
2010-03-18 07:27:39 +08:00
|
|
|
*/
|
2012-03-06 06:34:37 +08:00
|
|
|
#define TCACHE_GC_SWEEP 8192
|
|
|
|
|
|
|
|
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
|
|
|
#define TCACHE_GC_INCR \
|
|
|
|
((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_TYPES */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_STRUCTS
|
|
|
|
|
2012-03-27 09:54:44 +08:00
|
|
|
typedef enum {
|
|
|
|
tcache_enabled_false = 0, /* Enable cast to/from bool. */
|
|
|
|
tcache_enabled_true = 1,
|
|
|
|
tcache_enabled_default = 2
|
|
|
|
} tcache_enabled_t;
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* Read-only information associated with each element of tcache_t's tbins array
|
|
|
|
* is stored separately, mainly to reduce memory usage.
|
|
|
|
*/
|
|
|
|
struct tcache_bin_info_s {
|
|
|
|
unsigned ncached_max; /* Upper limit on ncached. */
|
|
|
|
};
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
struct tcache_bin_s {
|
|
|
|
tcache_bin_stats_t tstats;
|
2011-03-21 15:18:17 +08:00
|
|
|
int low_water; /* Min # cached since last GC. */
|
|
|
|
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
|
2010-01-17 01:53:50 +08:00
|
|
|
unsigned ncached; /* # of cached objects. */
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
void **avail; /* Stack of available objects. */
|
2010-01-17 01:53:50 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct tcache_s {
|
|
|
|
ql_elm(tcache_t) link; /* Used for aggregating stats. */
|
2010-02-12 05:19:21 +08:00
|
|
|
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */
|
2010-01-17 01:53:50 +08:00
|
|
|
arena_t *arena; /* This thread's arena. */
|
|
|
|
unsigned ev_cnt; /* Event count since incremental GC. */
|
|
|
|
unsigned next_gc_bin; /* Next bin to GC. */
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t tbins[1]; /* Dynamically sized. */
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* The pointer stacks associated with tbins follow as a contiguous
|
|
|
|
* array. During tcache initialization, the avail pointer in each
|
|
|
|
* element of tbins is initialized to point to the proper offset within
|
|
|
|
* this array.
|
|
|
|
*/
|
2010-01-17 01:53:50 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_STRUCTS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_EXTERNS
|
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
extern bool opt_tcache;
|
2010-10-24 09:37:06 +08:00
|
|
|
extern ssize_t opt_lg_tcache_max;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
extern tcache_bin_info_t *tcache_bin_info;
|
|
|
|
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
2012-02-29 08:50:47 +08:00
|
|
|
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
|
2010-03-18 07:27:39 +08:00
|
|
|
* large-object bins.
|
|
|
|
*/
|
|
|
|
extern size_t nhbins;
|
|
|
|
|
|
|
|
/* Maximum cached size class. */
|
|
|
|
extern size_t tcache_maxclass;
|
|
|
|
|
2012-04-20 09:28:03 +08:00
|
|
|
size_t tcache_salloc(const void *ptr);
|
2012-05-02 15:30:36 +08:00
|
|
|
void tcache_event_hard(tcache_t *tcache);
|
2012-04-20 09:28:03 +08:00
|
|
|
void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
|
|
|
|
size_t binind);
|
2012-02-11 12:22:09 +08:00
|
|
|
void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
|
|
|
|
tcache_t *tcache);
|
|
|
|
void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
|
|
|
|
tcache_t *tcache);
|
2012-03-22 09:33:03 +08:00
|
|
|
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
|
|
|
|
void tcache_arena_dissociate(tcache_t *tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *tcache_create(arena_t *arena);
|
|
|
|
void tcache_destroy(tcache_t *tcache);
|
2012-03-22 09:33:03 +08:00
|
|
|
void tcache_thread_cleanup(void *arg);
|
2010-01-17 01:53:50 +08:00
|
|
|
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
|
2012-03-22 09:33:03 +08:00
|
|
|
bool tcache_boot0(void);
|
|
|
|
bool tcache_boot1(void);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_EXTERNS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_INLINES
|
|
|
|
|
|
|
|
#ifndef JEMALLOC_ENABLE_INLINE
|
2012-03-22 09:33:03 +08:00
|
|
|
malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
|
2012-03-27 09:54:44 +08:00
|
|
|
malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t)
|
2012-03-22 09:33:03 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
void tcache_event(tcache_t *tcache);
|
2012-03-27 09:54:44 +08:00
|
|
|
void tcache_flush(void);
|
|
|
|
bool tcache_enabled_get(void);
|
2012-03-31 03:11:03 +08:00
|
|
|
tcache_t *tcache_get(bool create);
|
2012-03-27 09:54:44 +08:00
|
|
|
void tcache_enabled_set(bool enabled);
|
2010-03-18 07:27:39 +08:00
|
|
|
void *tcache_alloc_easy(tcache_bin_t *tbin);
|
|
|
|
void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
|
|
|
|
void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
|
2012-05-02 15:30:36 +08:00
|
|
|
void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
|
2010-03-18 07:27:39 +08:00
|
|
|
void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
|
2010-01-17 01:53:50 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
|
2012-03-22 09:33:03 +08:00
|
|
|
/* Map of thread-specific caches. */
|
|
|
|
malloc_tsd_externs(tcache, tcache_t *)
|
2013-01-23 00:45:43 +08:00
|
|
|
malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL,
|
2012-03-22 09:33:03 +08:00
|
|
|
tcache_thread_cleanup)
|
2012-03-27 09:54:44 +08:00
|
|
|
/* Per thread flag that allows thread caches to be disabled. */
|
|
|
|
malloc_tsd_externs(tcache_enabled, tcache_enabled_t)
|
2013-01-23 00:45:43 +08:00
|
|
|
malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t,
|
2012-03-27 09:54:44 +08:00
|
|
|
tcache_enabled_default, malloc_tsd_no_cleanup)
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void
|
|
|
|
tcache_flush(void)
|
|
|
|
{
|
|
|
|
tcache_t *tcache;
|
|
|
|
|
|
|
|
cassert(config_tcache);
|
|
|
|
|
|
|
|
tcache = *tcache_tsd_get();
|
|
|
|
if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)
|
|
|
|
return;
|
|
|
|
tcache_destroy(tcache);
|
|
|
|
tcache = NULL;
|
|
|
|
tcache_tsd_set(&tcache);
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE bool
|
|
|
|
tcache_enabled_get(void)
|
|
|
|
{
|
|
|
|
tcache_enabled_t tcache_enabled;
|
|
|
|
|
|
|
|
cassert(config_tcache);
|
|
|
|
|
|
|
|
tcache_enabled = *tcache_enabled_tsd_get();
|
|
|
|
if (tcache_enabled == tcache_enabled_default) {
|
|
|
|
tcache_enabled = (tcache_enabled_t)opt_tcache;
|
|
|
|
tcache_enabled_tsd_set(&tcache_enabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ((bool)tcache_enabled);
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void
|
|
|
|
tcache_enabled_set(bool enabled)
|
|
|
|
{
|
|
|
|
tcache_enabled_t tcache_enabled;
|
|
|
|
tcache_t *tcache;
|
|
|
|
|
|
|
|
cassert(config_tcache);
|
|
|
|
|
|
|
|
tcache_enabled = (tcache_enabled_t)enabled;
|
|
|
|
tcache_enabled_tsd_set(&tcache_enabled);
|
|
|
|
tcache = *tcache_tsd_get();
|
|
|
|
if (enabled) {
|
|
|
|
if (tcache == TCACHE_STATE_DISABLED) {
|
|
|
|
tcache = NULL;
|
|
|
|
tcache_tsd_set(&tcache);
|
|
|
|
}
|
|
|
|
} else /* disabled */ {
|
|
|
|
if (tcache > TCACHE_STATE_MAX) {
|
|
|
|
tcache_destroy(tcache);
|
|
|
|
tcache = NULL;
|
|
|
|
}
|
|
|
|
if (tcache == NULL) {
|
|
|
|
tcache = TCACHE_STATE_DISABLED;
|
|
|
|
tcache_tsd_set(&tcache);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-03-22 09:33:03 +08:00
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE tcache_t *
|
2012-03-31 03:11:03 +08:00
|
|
|
tcache_get(bool create)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
tcache_t *tcache;
|
|
|
|
|
2012-02-14 04:29:49 +08:00
|
|
|
if (config_tcache == false)
|
|
|
|
return (NULL);
|
2012-03-27 09:54:44 +08:00
|
|
|
if (config_lazy_lock && isthreaded == false)
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
tcache = *tcache_tsd_get();
|
2012-03-27 09:54:44 +08:00
|
|
|
if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
|
|
|
|
if (tcache == TCACHE_STATE_DISABLED)
|
|
|
|
return (NULL);
|
2010-01-17 01:53:50 +08:00
|
|
|
if (tcache == NULL) {
|
2012-03-31 03:11:03 +08:00
|
|
|
if (create == false) {
|
|
|
|
/*
|
|
|
|
* Creating a tcache here would cause
|
|
|
|
* allocation as a side effect of free().
|
|
|
|
* Ordinarily that would be okay since
|
|
|
|
* tcache_create() failure is a soft failure
|
|
|
|
* that doesn't propagate. However, if TLS
|
|
|
|
* data are freed via free() as in glibc,
|
2012-03-31 03:36:52 +08:00
|
|
|
* subtle corruption could result from setting
|
|
|
|
* a TLS variable after its backing memory is
|
|
|
|
* freed.
|
2012-03-31 03:11:03 +08:00
|
|
|
*/
|
|
|
|
return (NULL);
|
|
|
|
}
|
2012-03-27 09:54:44 +08:00
|
|
|
if (tcache_enabled_get() == false) {
|
|
|
|
tcache_enabled_set(false); /* Memoize. */
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
2010-09-06 01:35:13 +08:00
|
|
|
}
|
2012-04-04 00:28:00 +08:00
|
|
|
return (tcache_create(choose_arena(NULL)));
|
2012-03-27 09:54:44 +08:00
|
|
|
}
|
|
|
|
if (tcache == TCACHE_STATE_PURGATORY) {
|
|
|
|
/*
|
|
|
|
* Make a note that an allocator function was called
|
|
|
|
* after tcache_thread_cleanup() was called.
|
|
|
|
*/
|
|
|
|
tcache = TCACHE_STATE_REINCARNATED;
|
|
|
|
tcache_tsd_set(&tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
2010-09-06 01:35:13 +08:00
|
|
|
}
|
2012-03-27 09:54:44 +08:00
|
|
|
if (tcache == TCACHE_STATE_REINCARNATED)
|
|
|
|
return (NULL);
|
|
|
|
not_reached();
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return (tcache);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_event(tcache_t *tcache)
|
|
|
|
{
|
|
|
|
|
2012-03-06 06:34:37 +08:00
|
|
|
if (TCACHE_GC_INCR == 0)
|
2010-01-17 01:53:50 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
tcache->ev_cnt++;
|
2012-03-06 06:34:37 +08:00
|
|
|
assert(tcache->ev_cnt <= TCACHE_GC_INCR);
|
2012-05-02 15:30:36 +08:00
|
|
|
if (tcache->ev_cnt == TCACHE_GC_INCR)
|
|
|
|
tcache_event_hard(tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void *
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_alloc_easy(tcache_bin_t *tbin)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
2010-03-08 07:34:14 +08:00
|
|
|
void *ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2011-03-21 15:18:17 +08:00
|
|
|
if (tbin->ncached == 0) {
|
|
|
|
tbin->low_water = -1;
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
2011-03-21 15:18:17 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->ncached--;
|
2011-03-21 15:18:17 +08:00
|
|
|
if ((int)tbin->ncached < tbin->low_water)
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
ret = tbin->avail[tbin->ncached];
|
2010-03-08 07:34:14 +08:00
|
|
|
return (ret);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void *
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t binind;
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t *tbin;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2011-03-07 14:56:36 +08:00
|
|
|
binind = SMALL_SIZE2BIN(size);
|
2012-02-29 08:50:47 +08:00
|
|
|
assert(binind < NBINS);
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin = &tcache->tbins[binind];
|
2013-12-16 13:49:40 +08:00
|
|
|
size = arena_bin_info[binind].reg_size;
|
2010-03-18 07:27:39 +08:00
|
|
|
ret = tcache_alloc_easy(tbin);
|
2010-01-17 01:53:50 +08:00
|
|
|
if (ret == NULL) {
|
2010-03-18 07:27:39 +08:00
|
|
|
ret = tcache_alloc_small_hard(tcache, tbin, binind);
|
2010-01-17 01:53:50 +08:00
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
}
|
2012-04-20 09:28:03 +08:00
|
|
|
assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
if (zero == false) {
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill) {
|
2012-04-06 15:35:09 +08:00
|
|
|
if (opt_junk) {
|
|
|
|
arena_alloc_junk_small(ret,
|
|
|
|
&arena_bin_info[binind], false);
|
|
|
|
} else if (opt_zero)
|
2012-02-11 12:22:09 +08:00
|
|
|
memset(ret, 0, size);
|
|
|
|
}
|
2013-10-20 14:48:40 +08:00
|
|
|
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
|
2012-04-06 15:35:09 +08:00
|
|
|
} else {
|
|
|
|
if (config_fill && opt_junk) {
|
|
|
|
arena_alloc_junk_small(ret, &arena_bin_info[binind],
|
|
|
|
true);
|
|
|
|
}
|
|
|
|
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
|
2010-01-17 01:53:50 +08:00
|
|
|
memset(ret, 0, size);
|
2012-04-06 15:35:09 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_stats)
|
|
|
|
tbin->tstats.nrequests++;
|
|
|
|
if (config_prof)
|
|
|
|
tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_event(tcache);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void *
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t binind;
|
|
|
|
tcache_bin_t *tbin;
|
|
|
|
|
|
|
|
size = PAGE_CEILING(size);
|
|
|
|
assert(size <= tcache_maxclass);
|
2012-04-02 22:04:34 +08:00
|
|
|
binind = NBINS + (size >> LG_PAGE) - 1;
|
2010-03-18 07:27:39 +08:00
|
|
|
assert(binind < nhbins);
|
|
|
|
tbin = &tcache->tbins[binind];
|
|
|
|
ret = tcache_alloc_easy(tbin);
|
|
|
|
if (ret == NULL) {
|
|
|
|
/*
|
|
|
|
* Only allocate one large object at a time, because it's quite
|
|
|
|
* expensive to create one and not use it.
|
|
|
|
*/
|
|
|
|
ret = arena_malloc_large(tcache->arena, size, zero);
|
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
} else {
|
2012-05-02 15:30:36 +08:00
|
|
|
if (config_prof && prof_promote && size == PAGE) {
|
2012-02-11 12:22:09 +08:00
|
|
|
arena_chunk_t *chunk =
|
|
|
|
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
|
|
|
|
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
|
2012-04-02 22:04:34 +08:00
|
|
|
LG_PAGE);
|
2012-05-02 15:30:36 +08:00
|
|
|
arena_mapbits_large_binind_set(chunk, pageind,
|
|
|
|
BININD_INVALID);
|
2012-02-11 12:22:09 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
if (zero == false) {
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill) {
|
|
|
|
if (opt_junk)
|
|
|
|
memset(ret, 0xa5, size);
|
|
|
|
else if (opt_zero)
|
|
|
|
memset(ret, 0, size);
|
|
|
|
}
|
2013-10-20 14:48:40 +08:00
|
|
|
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
|
2012-04-06 15:35:09 +08:00
|
|
|
} else {
|
|
|
|
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
|
2010-03-18 07:27:39 +08:00
|
|
|
memset(ret, 0, size);
|
2012-04-06 15:35:09 +08:00
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_stats)
|
|
|
|
tbin->tstats.nrequests++;
|
|
|
|
if (config_prof)
|
|
|
|
tcache->prof_accumbytes += size;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
tcache_event(tcache);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2012-05-02 15:30:36 +08:00
|
|
|
tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
tcache_bin_t *tbin;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tcache_bin_info_t *tbin_info;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-04-20 09:28:03 +08:00
|
|
|
assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
|
2010-04-01 07:45:04 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill && opt_junk)
|
2012-04-06 15:35:09 +08:00
|
|
|
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin = &tcache->tbins[binind];
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin_info = &tcache_bin_info[binind];
|
|
|
|
if (tbin->ncached == tbin_info->ncached_max) {
|
|
|
|
tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
|
2012-02-11 12:22:09 +08:00
|
|
|
1), tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
assert(tbin->ncached < tbin_info->ncached_max);
|
|
|
|
tbin->avail[tbin->ncached] = ptr;
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->ncached++;
|
|
|
|
|
|
|
|
tcache_event(tcache);
|
|
|
|
}
|
|
|
|
|
2013-01-23 00:45:43 +08:00
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
|
|
|
|
{
|
2012-02-29 13:08:19 +08:00
|
|
|
size_t binind;
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_bin_t *tbin;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tcache_bin_info_t *tbin_info;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
assert((size & PAGE_MASK) == 0);
|
2012-04-20 09:28:03 +08:00
|
|
|
assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
|
|
|
|
assert(tcache_salloc(ptr) <= tcache_maxclass);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-04-02 22:04:34 +08:00
|
|
|
binind = NBINS + (size >> LG_PAGE) - 1;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill && opt_junk)
|
2010-04-29 03:00:59 +08:00
|
|
|
memset(ptr, 0x5a, size);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
tbin = &tcache->tbins[binind];
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin_info = &tcache_bin_info[binind];
|
|
|
|
if (tbin->ncached == tbin_info->ncached_max) {
|
|
|
|
tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
|
2012-02-11 12:22:09 +08:00
|
|
|
1), tcache);
|
2010-03-08 07:34:14 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
assert(tbin->ncached < tbin_info->ncached_max);
|
|
|
|
tbin->avail[tbin->ncached] = ptr;
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->ncached++;
|
|
|
|
|
|
|
|
tcache_event(tcache);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_INLINES */
|
|
|
|
/******************************************************************************/
|