2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_TYPES
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
typedef struct tcache_bin_info_s tcache_bin_info_t;
|
2010-01-17 01:53:50 +08:00
|
|
|
typedef struct tcache_bin_s tcache_bin_t;
|
|
|
|
typedef struct tcache_s tcache_t;
|
|
|
|
|
|
|
|
/*
|
2010-03-18 07:27:39 +08:00
|
|
|
* Absolute maximum number of cache slots for each small bin in the thread
|
|
|
|
* cache. This is an additional constraint beyond that imposed as: twice the
|
|
|
|
* number of regions per run for this size class.
|
2010-03-08 07:34:14 +08:00
|
|
|
*
|
|
|
|
* This constant must be an even number.
|
2010-01-17 01:53:50 +08:00
|
|
|
*/
|
2010-03-18 07:27:39 +08:00
|
|
|
#define TCACHE_NSLOTS_SMALL_MAX 200
|
|
|
|
|
|
|
|
/* Number of cache slots for large size classes. */
|
|
|
|
#define TCACHE_NSLOTS_LARGE 20
|
|
|
|
|
2010-10-24 09:37:06 +08:00
|
|
|
/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
|
2010-03-18 07:27:39 +08:00
|
|
|
#define LG_TCACHE_MAXCLASS_DEFAULT 15
|
|
|
|
|
|
|
|
/*
|
2012-03-06 06:34:37 +08:00
|
|
|
* TCACHE_GC_SWEEP is the approximate number of allocation events between
|
|
|
|
* full GC sweeps. Integer rounding may cause the actual number to be
|
|
|
|
* slightly higher, since GC is performed incrementally.
|
2010-03-18 07:27:39 +08:00
|
|
|
*/
|
2012-03-06 06:34:37 +08:00
|
|
|
#define TCACHE_GC_SWEEP 8192
|
|
|
|
|
|
|
|
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
|
|
|
#define TCACHE_GC_INCR \
|
|
|
|
((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_TYPES */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_STRUCTS
|
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* Read-only information associated with each element of tcache_t's tbins array
|
|
|
|
* is stored separately, mainly to reduce memory usage.
|
|
|
|
*/
|
|
|
|
struct tcache_bin_info_s {
|
|
|
|
unsigned ncached_max; /* Upper limit on ncached. */
|
|
|
|
};
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
struct tcache_bin_s {
|
|
|
|
tcache_bin_stats_t tstats;
|
2011-03-21 15:18:17 +08:00
|
|
|
int low_water; /* Min # cached since last GC. */
|
|
|
|
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
|
2010-01-17 01:53:50 +08:00
|
|
|
unsigned ncached; /* # of cached objects. */
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
void **avail; /* Stack of available objects. */
|
2010-01-17 01:53:50 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct tcache_s {
|
|
|
|
ql_elm(tcache_t) link; /* Used for aggregating stats. */
|
2010-02-12 05:19:21 +08:00
|
|
|
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */
|
2010-01-17 01:53:50 +08:00
|
|
|
arena_t *arena; /* This thread's arena. */
|
|
|
|
unsigned ev_cnt; /* Event count since incremental GC. */
|
|
|
|
unsigned next_gc_bin; /* Next bin to GC. */
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t tbins[1]; /* Dynamically sized. */
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
/*
|
|
|
|
* The pointer stacks associated with tbins follow as a contiguous
|
|
|
|
* array. During tcache initialization, the avail pointer in each
|
|
|
|
* element of tbins is initialized to point to the proper offset within
|
|
|
|
* this array.
|
|
|
|
*/
|
2010-01-17 01:53:50 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_STRUCTS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_EXTERNS
|
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
extern bool opt_tcache;
|
2010-10-24 09:37:06 +08:00
|
|
|
extern ssize_t opt_lg_tcache_max;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
extern tcache_bin_info_t *tcache_bin_info;
|
|
|
|
|
2010-03-18 07:27:39 +08:00
|
|
|
/*
|
2012-02-29 08:50:47 +08:00
|
|
|
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
|
2010-03-18 07:27:39 +08:00
|
|
|
* large-object bins.
|
|
|
|
*/
|
|
|
|
extern size_t nhbins;
|
|
|
|
|
|
|
|
/* Maximum cached size class. */
|
|
|
|
extern size_t tcache_maxclass;
|
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
|
|
|
|
tcache_t *tcache);
|
|
|
|
void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
|
|
|
|
tcache_t *tcache);
|
2012-03-22 09:33:03 +08:00
|
|
|
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
|
|
|
|
void tcache_arena_dissociate(tcache_t *tcache);
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_t *tcache_create(arena_t *arena);
|
2010-03-18 07:27:39 +08:00
|
|
|
void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
|
|
|
|
size_t binind);
|
2010-01-17 01:53:50 +08:00
|
|
|
void tcache_destroy(tcache_t *tcache);
|
2012-03-22 09:33:03 +08:00
|
|
|
void tcache_thread_cleanup(void *arg);
|
2010-01-17 01:53:50 +08:00
|
|
|
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
|
2012-03-22 09:33:03 +08:00
|
|
|
bool tcache_boot0(void);
|
|
|
|
bool tcache_boot1(void);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_EXTERNS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_INLINES
|
|
|
|
|
|
|
|
#ifndef JEMALLOC_ENABLE_INLINE
|
2012-03-22 09:33:03 +08:00
|
|
|
malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
void tcache_event(tcache_t *tcache);
|
|
|
|
tcache_t *tcache_get(void);
|
2010-03-18 07:27:39 +08:00
|
|
|
void *tcache_alloc_easy(tcache_bin_t *tbin);
|
|
|
|
void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
|
|
|
|
void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
|
|
|
|
void tcache_dalloc_small(tcache_t *tcache, void *ptr);
|
|
|
|
void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
|
2010-01-17 01:53:50 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
|
2012-03-22 09:33:03 +08:00
|
|
|
/* Map of thread-specific caches. */
|
|
|
|
malloc_tsd_externs(tcache, tcache_t *)
|
|
|
|
malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL,
|
|
|
|
tcache_thread_cleanup)
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
JEMALLOC_INLINE tcache_t *
|
|
|
|
tcache_get(void)
|
|
|
|
{
|
|
|
|
tcache_t *tcache;
|
|
|
|
|
2012-02-14 04:29:49 +08:00
|
|
|
if (config_tcache == false)
|
|
|
|
return (NULL);
|
|
|
|
if (config_lazy_lock && (isthreaded & opt_tcache) == false)
|
|
|
|
return (NULL);
|
|
|
|
else if (opt_tcache == false)
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
|
|
|
|
2012-03-22 09:33:03 +08:00
|
|
|
tcache = *tcache_tsd_get();
|
2010-09-06 01:35:13 +08:00
|
|
|
if ((uintptr_t)tcache <= (uintptr_t)2) {
|
2010-01-17 01:53:50 +08:00
|
|
|
if (tcache == NULL) {
|
|
|
|
tcache = tcache_create(choose_arena());
|
|
|
|
if (tcache == NULL)
|
|
|
|
return (NULL);
|
2010-09-06 01:35:13 +08:00
|
|
|
} else {
|
|
|
|
if (tcache == (void *)(uintptr_t)1) {
|
|
|
|
/*
|
|
|
|
* Make a note that an allocator function was
|
|
|
|
* called after the tcache_thread_cleanup() was
|
|
|
|
* called.
|
|
|
|
*/
|
2012-03-22 09:33:03 +08:00
|
|
|
tcache = (tcache_t *)(uintptr_t)2;
|
|
|
|
tcache_tsd_set(&tcache);
|
2010-09-06 01:35:13 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
2010-09-06 01:35:13 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return (tcache);
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void
|
|
|
|
tcache_event(tcache_t *tcache)
|
|
|
|
{
|
|
|
|
|
2012-03-06 06:34:37 +08:00
|
|
|
if (TCACHE_GC_INCR == 0)
|
2010-01-17 01:53:50 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
tcache->ev_cnt++;
|
2012-03-06 06:34:37 +08:00
|
|
|
assert(tcache->ev_cnt <= TCACHE_GC_INCR);
|
|
|
|
if (tcache->ev_cnt == TCACHE_GC_INCR) {
|
2010-01-17 01:53:50 +08:00
|
|
|
size_t binind = tcache->next_gc_bin;
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t *tbin = &tcache->tbins[binind];
|
2011-03-21 15:18:17 +08:00
|
|
|
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
|
2010-03-08 07:34:14 +08:00
|
|
|
|
|
|
|
if (tbin->low_water > 0) {
|
|
|
|
/*
|
|
|
|
* Flush (ceiling) 3/4 of the objects below the low
|
|
|
|
* water mark.
|
|
|
|
*/
|
2012-02-29 08:50:47 +08:00
|
|
|
if (binind < NBINS) {
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_bin_flush_small(tbin, binind,
|
|
|
|
tbin->ncached - tbin->low_water +
|
2012-02-11 12:22:09 +08:00
|
|
|
(tbin->low_water >> 2), tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
} else {
|
|
|
|
tcache_bin_flush_large(tbin, binind,
|
|
|
|
tbin->ncached - tbin->low_water +
|
2012-02-11 12:22:09 +08:00
|
|
|
(tbin->low_water >> 2), tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
2011-03-21 15:18:17 +08:00
|
|
|
/*
|
|
|
|
* Reduce fill count by 2X. Limit lg_fill_div such that
|
|
|
|
* the fill count is always at least 1.
|
|
|
|
*/
|
|
|
|
if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
|
|
|
|
>= 1)
|
|
|
|
tbin->lg_fill_div++;
|
|
|
|
} else if (tbin->low_water < 0) {
|
|
|
|
/*
|
|
|
|
* Increase fill count by 2X. Make sure lg_fill_div
|
|
|
|
* stays greater than 0.
|
|
|
|
*/
|
|
|
|
if (tbin->lg_fill_div > 1)
|
|
|
|
tbin->lg_fill_div--;
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
tcache->next_gc_bin++;
|
2010-03-18 07:27:39 +08:00
|
|
|
if (tcache->next_gc_bin == nhbins)
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache->next_gc_bin = 0;
|
|
|
|
tcache->ev_cnt = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void *
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_alloc_easy(tcache_bin_t *tbin)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
2010-03-08 07:34:14 +08:00
|
|
|
void *ret;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2011-03-21 15:18:17 +08:00
|
|
|
if (tbin->ncached == 0) {
|
|
|
|
tbin->low_water = -1;
|
2010-01-17 01:53:50 +08:00
|
|
|
return (NULL);
|
2011-03-21 15:18:17 +08:00
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->ncached--;
|
2011-03-21 15:18:17 +08:00
|
|
|
if ((int)tbin->ncached < tbin->low_water)
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->low_water = tbin->ncached;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
ret = tbin->avail[tbin->ncached];
|
2010-03-08 07:34:14 +08:00
|
|
|
return (ret);
|
2010-01-17 01:53:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void *
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t binind;
|
2010-03-08 07:34:14 +08:00
|
|
|
tcache_bin_t *tbin;
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2011-03-07 14:56:36 +08:00
|
|
|
binind = SMALL_SIZE2BIN(size);
|
2012-02-29 08:50:47 +08:00
|
|
|
assert(binind < NBINS);
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin = &tcache->tbins[binind];
|
2010-03-18 07:27:39 +08:00
|
|
|
ret = tcache_alloc_easy(tbin);
|
2010-01-17 01:53:50 +08:00
|
|
|
if (ret == NULL) {
|
2010-03-18 07:27:39 +08:00
|
|
|
ret = tcache_alloc_small_hard(tcache, tbin, binind);
|
2010-01-17 01:53:50 +08:00
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
}
|
2011-03-16 04:59:15 +08:00
|
|
|
assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
if (zero == false) {
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill) {
|
|
|
|
if (opt_junk)
|
|
|
|
memset(ret, 0xa5, size);
|
|
|
|
else if (opt_zero)
|
|
|
|
memset(ret, 0, size);
|
|
|
|
}
|
2010-01-17 01:53:50 +08:00
|
|
|
} else
|
|
|
|
memset(ret, 0, size);
|
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_stats)
|
|
|
|
tbin->tstats.nrequests++;
|
|
|
|
if (config_prof)
|
|
|
|
tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
|
2010-01-17 01:53:50 +08:00
|
|
|
tcache_event(tcache);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2010-03-18 07:27:39 +08:00
|
|
|
JEMALLOC_INLINE void *
|
|
|
|
tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t binind;
|
|
|
|
tcache_bin_t *tbin;
|
|
|
|
|
|
|
|
size = PAGE_CEILING(size);
|
|
|
|
assert(size <= tcache_maxclass);
|
2012-02-29 08:50:47 +08:00
|
|
|
binind = NBINS + (size >> PAGE_SHIFT) - 1;
|
2010-03-18 07:27:39 +08:00
|
|
|
assert(binind < nhbins);
|
|
|
|
tbin = &tcache->tbins[binind];
|
|
|
|
ret = tcache_alloc_easy(tbin);
|
|
|
|
if (ret == NULL) {
|
|
|
|
/*
|
|
|
|
* Only allocate one large object at a time, because it's quite
|
|
|
|
* expensive to create one and not use it.
|
|
|
|
*/
|
|
|
|
ret = arena_malloc_large(tcache->arena, size, zero);
|
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
} else {
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_prof) {
|
|
|
|
arena_chunk_t *chunk =
|
|
|
|
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
|
|
|
|
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
|
|
|
|
PAGE_SHIFT);
|
|
|
|
chunk->map[pageind-map_bias].bits &=
|
|
|
|
~CHUNK_MAP_CLASS_MASK;
|
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
if (zero == false) {
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill) {
|
|
|
|
if (opt_junk)
|
|
|
|
memset(ret, 0xa5, size);
|
|
|
|
else if (opt_zero)
|
|
|
|
memset(ret, 0, size);
|
|
|
|
}
|
2010-03-18 07:27:39 +08:00
|
|
|
} else
|
|
|
|
memset(ret, 0, size);
|
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_stats)
|
|
|
|
tbin->tstats.nrequests++;
|
|
|
|
if (config_prof)
|
|
|
|
tcache->prof_accumbytes += size;
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
tcache_event(tcache);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
JEMALLOC_INLINE void
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_dalloc_small(tcache_t *tcache, void *ptr)
|
2010-01-17 01:53:50 +08:00
|
|
|
{
|
|
|
|
arena_t *arena;
|
|
|
|
arena_chunk_t *chunk;
|
|
|
|
arena_run_t *run;
|
|
|
|
arena_bin_t *bin;
|
|
|
|
tcache_bin_t *tbin;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tcache_bin_info_t *tbin_info;
|
2010-01-17 01:53:50 +08:00
|
|
|
size_t pageind, binind;
|
|
|
|
arena_chunk_map_t *mapelm;
|
|
|
|
|
2012-02-29 08:50:47 +08:00
|
|
|
assert(arena_salloc(ptr) <= SMALL_MAXCLASS);
|
2010-04-01 07:45:04 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
|
|
arena = chunk->arena;
|
2010-10-02 08:35:43 +08:00
|
|
|
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
|
|
|
mapelm = &chunk->map[pageind-map_bias];
|
2010-01-17 01:53:50 +08:00
|
|
|
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
|
2010-03-19 11:36:40 +08:00
|
|
|
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
|
2010-01-17 01:53:50 +08:00
|
|
|
bin = run->bin;
|
|
|
|
binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
|
|
|
|
sizeof(arena_bin_t);
|
2012-02-29 08:50:47 +08:00
|
|
|
assert(binind < NBINS);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill && opt_junk)
|
2011-03-16 04:59:15 +08:00
|
|
|
memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
2010-03-08 07:34:14 +08:00
|
|
|
tbin = &tcache->tbins[binind];
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin_info = &tcache_bin_info[binind];
|
|
|
|
if (tbin->ncached == tbin_info->ncached_max) {
|
|
|
|
tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
|
2012-02-11 12:22:09 +08:00
|
|
|
1), tcache);
|
2010-03-18 07:27:39 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
assert(tbin->ncached < tbin_info->ncached_max);
|
|
|
|
tbin->avail[tbin->ncached] = ptr;
|
2010-03-18 07:27:39 +08:00
|
|
|
tbin->ncached++;
|
|
|
|
|
|
|
|
tcache_event(tcache);
|
|
|
|
}
|
|
|
|
|
|
|
|
JEMALLOC_INLINE void
|
|
|
|
tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
|
|
|
|
{
|
2012-02-29 13:08:19 +08:00
|
|
|
size_t binind;
|
2010-03-18 07:27:39 +08:00
|
|
|
tcache_bin_t *tbin;
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tcache_bin_info_t *tbin_info;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
assert((size & PAGE_MASK) == 0);
|
2012-02-29 08:50:47 +08:00
|
|
|
assert(arena_salloc(ptr) > SMALL_MAXCLASS);
|
2010-04-01 07:45:04 +08:00
|
|
|
assert(arena_salloc(ptr) <= tcache_maxclass);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-02-29 08:50:47 +08:00
|
|
|
binind = NBINS + (size >> PAGE_SHIFT) - 1;
|
2010-03-18 07:27:39 +08:00
|
|
|
|
2012-02-11 12:22:09 +08:00
|
|
|
if (config_fill && opt_junk)
|
2010-04-29 03:00:59 +08:00
|
|
|
memset(ptr, 0x5a, size);
|
2010-03-18 07:27:39 +08:00
|
|
|
|
|
|
|
tbin = &tcache->tbins[binind];
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
tbin_info = &tcache_bin_info[binind];
|
|
|
|
if (tbin->ncached == tbin_info->ncached_max) {
|
|
|
|
tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
|
2012-02-11 12:22:09 +08:00
|
|
|
1), tcache);
|
2010-03-08 07:34:14 +08:00
|
|
|
}
|
Use bitmaps to track small regions.
The previous free list implementation, which embedded singly linked
lists in available regions, had the unfortunate side effect of causing
many cache misses during thread cache fills. Fix this in two places:
- arena_run_t: Use a new bitmap implementation to track which regions
are available. Furthermore, revert to preferring the
lowest available region (as jemalloc did with its old
bitmap-based approach).
- tcache_t: Move read-only tcache_bin_t metadata into
tcache_bin_info_t, and add a contiguous array of pointers
to tcache_t in order to track cached objects. This
substantially increases the size of tcache_t, but results
in much higher data locality for common tcache operations.
As a side benefit, it is again possible to efficiently
flush the least recently used cached objects, so this
change changes flushing from MRU to LRU.
The new bitmap implementation uses a multi-level summary approach to
make finding the lowest available region very fast. In practice,
bitmaps only have one or two levels, though the implementation is
general enough to handle extremely large bitmaps, mainly so that large
page sizes can still be entertained.
Fix tcache_bin_flush_large() to always flush statistics, in the same way
that tcache_bin_flush_small() was recently fixed.
Use JEMALLOC_DEBUG rather than NDEBUG.
Add dassert(), and use it for debug-only asserts.
2011-03-17 01:30:13 +08:00
|
|
|
assert(tbin->ncached < tbin_info->ncached_max);
|
|
|
|
tbin->avail[tbin->ncached] = ptr;
|
2010-01-17 01:53:50 +08:00
|
|
|
tbin->ncached++;
|
|
|
|
|
|
|
|
tcache_event(tcache);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_INLINES */
|
|
|
|
/******************************************************************************/
|