Purge unused dirty pages in a fragmentation-reducing order.

Purge unused dirty pages in an order that first performs clean/dirty run
defragmentation, in order to mitigate available run fragmentation.

Remove the limitation that prevented purging unless at least one chunk
worth of dirty pages had accumulated in an arena.  This limitation was
intended to avoid excessive purging for small applications, but the
threshold was arbitrary, and the effect of questionable utility.

Relax opt_lg_dirty_mult from 5 to 3.  This compensates for increased
likelihood of allocating clean runs, given the same ratio of clean:dirty
runs, and reduces the potential for repeated purging in pathological
large malloc/free loops that push the active:dirty page ratio just over
the purge threshold.
This commit is contained in:
Jason Evans 2012-10-30 15:42:37 -07:00
parent 34457f5144
commit e3d13060c8
4 changed files with 336 additions and 220 deletions

View File

@ -8,9 +8,14 @@ found in the git revision history:
* 3.x.x (XXX Not released) * 3.x.x (XXX Not released)
Incompatible changes:
- Change the "opt.lg_dirty_mult" from 5 to 3 (32:1 to 8:1).
Bug fixes: Bug fixes:
- Fix dss/mmap allocation precedence code to use recyclable mmap memory only - Fix dss/mmap allocation precedence code to use recyclable mmap memory only
after primary dss allocation fails. after primary dss allocation fails.
- Fix deadlock in the "arenas.purge" mallctl. This regression was introduced
in 3.1.0 by the addition of the "arena.<i>.purge" mallctl.
* 3.1.0 (October 16, 2012) * 3.1.0 (October 16, 2012)

View File

@ -833,7 +833,7 @@ for (i = 0; i < nbins; i++) {
<manvolnum>2</manvolnum></citerefentry> or a similar system call. This <manvolnum>2</manvolnum></citerefentry> or a similar system call. This
provides the kernel with sufficient information to recycle dirty pages provides the kernel with sufficient information to recycle dirty pages
if physical memory becomes scarce and the pages remain unused. The if physical memory becomes scarce and the pages remain unused. The
default minimum ratio is 32:1 (2^5:1); an option value of -1 will default minimum ratio is 8:1 (2^3:1); an option value of -1 will
disable dirty page purging.</para></listitem> disable dirty page purging.</para></listitem>
</varlistentry> </varlistentry>

View File

@ -38,10 +38,10 @@
* *
* (nactive >> opt_lg_dirty_mult) >= ndirty * (nactive >> opt_lg_dirty_mult) >= ndirty
* *
* So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times
* times as many active pages as dirty pages. * as many active pages as dirty pages.
*/ */
#define LG_DIRTY_MULT_DEFAULT 5 #define LG_DIRTY_MULT_DEFAULT 3
typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_map_s arena_chunk_map_t;
typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_chunk_s arena_chunk_t;
@ -69,7 +69,7 @@ struct arena_chunk_map_s {
/* /*
* Linkage for run trees. There are two disjoint uses: * Linkage for run trees. There are two disjoint uses:
* *
* 1) arena_t's runs_avail_{clean,dirty} trees. * 1) arena_t's runs_avail tree.
* 2) arena_run_t conceptually uses this linkage for in-use * 2) arena_run_t conceptually uses this linkage for in-use
* non-full runs, rather than directly embedding linkage. * non-full runs, rather than directly embedding linkage.
*/ */
@ -162,20 +162,24 @@ typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
/* Arena chunk header. */ /* Arena chunk header. */
struct arena_chunk_s { struct arena_chunk_s {
/* Arena that owns the chunk. */ /* Arena that owns the chunk. */
arena_t *arena; arena_t *arena;
/* Linkage for the arena's chunks_dirty list. */ /* Linkage for tree of arena chunks that contain dirty runs. */
ql_elm(arena_chunk_t) link_dirty; rb_node(arena_chunk_t) dirty_link;
/*
* True if the chunk is currently in the chunks_dirty list, due to
* having at some point contained one or more dirty pages. Removal
* from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
*/
bool dirtied;
/* Number of dirty pages. */ /* Number of dirty pages. */
size_t ndirty; size_t ndirty;
/* Number of available runs. */
size_t nruns_avail;
/*
* Number of available run adjacencies. Clean and dirty available runs
* are not coalesced, which causes virtual memory fragmentation. The
* ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking
* this fragmentation.
* */
size_t nruns_adjac;
/* /*
* Map of pages within chunk that keeps track of free/large/small. The * Map of pages within chunk that keeps track of free/large/small. The
@ -183,7 +187,7 @@ struct arena_chunk_s {
* need to be tracked in the map. This omission saves a header page * need to be tracked in the map. This omission saves a header page
* for common chunk sizes (e.g. 4 MiB). * for common chunk sizes (e.g. 4 MiB).
*/ */
arena_chunk_map_t map[1]; /* Dynamically sized. */ arena_chunk_map_t map[1]; /* Dynamically sized. */
}; };
typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
@ -333,8 +337,8 @@ struct arena_s {
dss_prec_t dss_prec; dss_prec_t dss_prec;
/* List of dirty-page-containing chunks this arena manages. */ /* Tree of dirty-page-containing chunks this arena manages. */
ql_head(arena_chunk_t) chunks_dirty; arena_chunk_tree_t chunks_dirty;
/* /*
* In order to avoid rapid chunk allocation/deallocation when an arena * In order to avoid rapid chunk allocation/deallocation when an arena
@ -369,18 +373,9 @@ struct arena_s {
/* /*
* Size/address-ordered trees of this arena's available runs. The trees * Size/address-ordered trees of this arena's available runs. The trees
* are used for first-best-fit run allocation. The dirty tree contains * are used for first-best-fit run allocation.
* runs with dirty pages (i.e. very likely to have been touched and
* therefore have associated physical pages), whereas the clean tree
* contains runs with pages that either have no associated physical
* pages, or have pages that the kernel may recycle at any time due to
* previous madvise(2) calls. The dirty tree is used in preference to
* the clean tree for allocations, because using dirty pages reduces
* the amount of dirty purging necessary to keep the active:dirty page
* ratio below the purge threshold.
*/ */
arena_avail_tree_t runs_avail_clean; arena_avail_tree_t runs_avail;
arena_avail_tree_t runs_avail_dirty;
/* bins is used to store trees of free regions. */ /* bins is used to store trees of free regions. */
arena_bin_t bins[NBINS]; arena_bin_t bins[NBINS];

View File

@ -40,6 +40,12 @@ const uint8_t small_size2bin[] = {
/******************************************************************************/ /******************************************************************************/
/* Function prototypes for non-inline static functions. */ /* Function prototypes for non-inline static functions. */
static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk,
size_t pageind, size_t npages, bool maybe_adjac_pred,
bool maybe_adjac_succ);
static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk,
size_t pageind, size_t npages, bool maybe_adjac_pred,
bool maybe_adjac_succ);
static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
bool large, size_t binind, bool zero); bool large, size_t binind, bool zero);
static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
@ -48,8 +54,11 @@ static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size,
bool large, size_t binind, bool zero); bool large, size_t binind, bool zero);
static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
size_t binind, bool zero); size_t binind, bool zero);
static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree,
arena_chunk_t *chunk, void *arg);
static void arena_purge(arena_t *arena, bool all); static void arena_purge(arena_t *arena, bool all);
static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
bool cleaned);
static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, size_t oldsize, size_t newsize); arena_run_t *run, size_t oldsize, size_t newsize);
static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
@ -101,9 +110,6 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
size_t a_size = a->bits & ~PAGE_MASK; size_t a_size = a->bits & ~PAGE_MASK;
size_t b_size = b->bits & ~PAGE_MASK; size_t b_size = b->bits & ~PAGE_MASK;
assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
ret = (a_size > b_size) - (a_size < b_size); ret = (a_size > b_size) - (a_size < b_size);
if (ret == 0) { if (ret == 0) {
uintptr_t a_mapelm, b_mapelm; uintptr_t a_mapelm, b_mapelm;
@ -129,6 +135,159 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
u.rb_link, arena_avail_comp) u.rb_link, arena_avail_comp)
static inline int
arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b)
{
size_t a_val, b_val;
assert(a != NULL);
assert(b != NULL);
/*
* Order such that chunks with higher fragmentation are "less than"
* those with lower fragmentation. Fragmentation is measured as:
*
* mean current avail run size
* --------------------------------
* mean defragmented avail run size
*
* navail
* -----------
* nruns_avail nruns_avail-nruns_adjac
* = ========================= = -----------------------
* navail nruns_avail
* -----------------------
* nruns_avail-nruns_adjac
*
* The following code multiplies away the denominator prior to
* comparison, in order to avoid division.
*
*/
a_val = (a->nruns_avail - a->nruns_adjac) * b->nruns_avail;
b_val = (b->nruns_avail - b->nruns_adjac) * a->nruns_avail;
if (a_val < b_val)
return (1);
if (a_val > b_val)
return (-1);
/* Break ties by chunk address. */
{
uintptr_t a_chunk = (uintptr_t)a;
uintptr_t b_chunk = (uintptr_t)b;
return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
}
}
/* Generate red-black tree functions. */
rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t,
dirty_link, arena_chunk_dirty_comp)
static inline bool
arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind)
{
bool ret;
if (pageind-1 < map_bias)
ret = false;
else {
ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0);
assert(ret == false || arena_mapbits_dirty_get(chunk,
pageind-1) != arena_mapbits_dirty_get(chunk, pageind));
}
return (ret);
}
static inline bool
arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages)
{
bool ret;
if (pageind+npages == chunk_npages)
ret = false;
else {
assert(pageind+npages < chunk_npages);
ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0);
assert(ret == false || arena_mapbits_dirty_get(chunk, pageind)
!= arena_mapbits_dirty_get(chunk, pageind+npages));
}
return (ret);
}
static inline bool
arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages)
{
return (arena_avail_adjac_pred(chunk, pageind) ||
arena_avail_adjac_succ(chunk, pageind, npages));
}
static void
arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
{
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
/*
* chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
* removed and reinserted even if the run to be inserted is clean.
*/
if (chunk->ndirty != 0)
arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
chunk->nruns_adjac++;
if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
chunk->nruns_adjac++;
chunk->nruns_avail++;
assert(chunk->nruns_avail > chunk->nruns_adjac);
if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
arena->ndirty += npages;
chunk->ndirty += npages;
}
if (chunk->ndirty != 0)
arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk,
pageind));
}
static void
arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
{
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
/*
* chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
* removed and reinserted even if the run to be removed is clean.
*/
if (chunk->ndirty != 0)
arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
chunk->nruns_adjac--;
if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
chunk->nruns_adjac--;
chunk->nruns_avail--;
assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail
== 0 && chunk->nruns_adjac == 0));
if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
arena->ndirty -= npages;
chunk->ndirty -= npages;
}
if (chunk->ndirty != 0)
arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk,
pageind));
}
static inline void * static inline void *
arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
{ {
@ -193,7 +352,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
arena_chunk_t *chunk; arena_chunk_t *chunk;
size_t run_ind, total_pages, need_pages, rem_pages, i; size_t run_ind, total_pages, need_pages, rem_pages, i;
size_t flag_dirty; size_t flag_dirty;
arena_avail_tree_t *runs_avail;
assert((large && binind == BININD_INVALID) || (large == false && binind assert((large && binind == BININD_INVALID) || (large == false && binind
!= BININD_INVALID)); != BININD_INVALID));
@ -201,8 +359,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
&arena->runs_avail_clean;
total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
LG_PAGE; LG_PAGE;
assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
@ -212,7 +368,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
assert(need_pages <= total_pages); assert(need_pages <= total_pages);
rem_pages = total_pages - need_pages; rem_pages = total_pages - need_pages;
arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); arena_avail_remove(arena, chunk, run_ind, total_pages, true, true);
if (config_stats) { if (config_stats) {
/* /*
* Update stats_cactive if nactive is crossing a chunk * Update stats_cactive if nactive is crossing a chunk
@ -244,14 +400,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
arena_mapbits_unzeroed_get(chunk, arena_mapbits_unzeroed_get(chunk,
run_ind+total_pages-1)); run_ind+total_pages-1));
} }
arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages,
run_ind+need_pages)); false, true);
}
/* Update dirty page accounting. */
if (flag_dirty != 0) {
chunk->ndirty -= need_pages;
arena->ndirty -= need_pages;
} }
/* /*
@ -344,8 +494,6 @@ arena_chunk_alloc(arena_t *arena)
size_t i; size_t i;
if (arena->spare != NULL) { if (arena->spare != NULL) {
arena_avail_tree_t *runs_avail;
chunk = arena->spare; chunk = arena->spare;
arena->spare = NULL; arena->spare = NULL;
@ -357,14 +505,6 @@ arena_chunk_alloc(arena_t *arena)
chunk_npages-1) == arena_maxclass); chunk_npages-1) == arena_maxclass);
assert(arena_mapbits_dirty_get(chunk, map_bias) == assert(arena_mapbits_dirty_get(chunk, map_bias) ==
arena_mapbits_dirty_get(chunk, chunk_npages-1)); arena_mapbits_dirty_get(chunk, chunk_npages-1));
/* Insert the run into the appropriate runs_avail_* tree. */
if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
runs_avail = &arena->runs_avail_clean;
else
runs_avail = &arena->runs_avail_dirty;
arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
map_bias));
} else { } else {
bool zero; bool zero;
size_t unzeroed; size_t unzeroed;
@ -380,8 +520,6 @@ arena_chunk_alloc(arena_t *arena)
arena->stats.mapped += chunksize; arena->stats.mapped += chunksize;
chunk->arena = arena; chunk->arena = arena;
ql_elm_new(chunk, link_dirty);
chunk->dirtied = false;
/* /*
* Claim that no pages are in use, since the header is merely * Claim that no pages are in use, since the header is merely
@ -389,6 +527,9 @@ arena_chunk_alloc(arena_t *arena)
*/ */
chunk->ndirty = 0; chunk->ndirty = 0;
chunk->nruns_avail = 0;
chunk->nruns_adjac = 0;
/* /*
* Initialize the map to contain one maximal free untouched run. * Initialize the map to contain one maximal free untouched run.
* Mark the pages as zeroed iff chunk_alloc() returned a zeroed * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
@ -412,20 +553,18 @@ arena_chunk_alloc(arena_t *arena)
} }
arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_mapbits_unallocated_set(chunk, chunk_npages-1,
arena_maxclass, unzeroed); arena_maxclass, unzeroed);
/* Insert the run into the runs_avail_clean tree. */
arena_avail_tree_insert(&arena->runs_avail_clean,
arena_mapp_get(chunk, map_bias));
} }
/* Insert the run into the runs_avail tree. */
arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias,
false, false);
return (chunk); return (chunk);
} }
static void static void
arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
{ {
arena_avail_tree_t *runs_avail;
assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
@ -436,24 +575,16 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
arena_mapbits_dirty_get(chunk, chunk_npages-1)); arena_mapbits_dirty_get(chunk, chunk_npages-1));
/* /*
* Remove run from the appropriate runs_avail_* tree, so that the arena * Remove run from the runs_avail tree, so that the arena does not use
* does not use it. * it.
*/ */
if (arena_mapbits_dirty_get(chunk, map_bias) == 0) arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias,
runs_avail = &arena->runs_avail_clean; false, false);
else
runs_avail = &arena->runs_avail_dirty;
arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias));
if (arena->spare != NULL) { if (arena->spare != NULL) {
arena_chunk_t *spare = arena->spare; arena_chunk_t *spare = arena->spare;
arena->spare = chunk; arena->spare = chunk;
if (spare->dirtied) {
ql_remove(&chunk->arena->chunks_dirty, spare,
link_dirty);
arena->ndirty -= spare->ndirty;
}
malloc_mutex_unlock(&arena->lock); malloc_mutex_unlock(&arena->lock);
chunk_dealloc((void *)spare, chunksize, true); chunk_dealloc((void *)spare, chunksize, true);
malloc_mutex_lock(&arena->lock); malloc_mutex_lock(&arena->lock);
@ -471,19 +602,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind,
arena_chunk_map_t *mapelm, key; arena_chunk_map_t *mapelm, key;
key.bits = size | CHUNK_MAP_KEY; key.bits = size | CHUNK_MAP_KEY;
mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
if (mapelm != NULL) {
arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
size_t pageind = (((uintptr_t)mapelm -
(uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+ map_bias;
run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
LG_PAGE));
arena_run_split(arena, run, size, large, binind, zero);
return (run);
}
mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
if (mapelm != NULL) { if (mapelm != NULL) {
arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
size_t pageind = (((uintptr_t)mapelm - size_t pageind = (((uintptr_t)mapelm -
@ -537,29 +656,40 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind,
static inline void static inline void
arena_maybe_purge(arena_t *arena) arena_maybe_purge(arena_t *arena)
{ {
size_t npurgeable, threshold;
/* Enforce opt_lg_dirty_mult. */ /* Don't purge if the option is disabled. */
if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && if (opt_lg_dirty_mult < 0)
(arena->ndirty - arena->npurgatory) > chunk_npages && return;
(arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - /* Don't purge if all dirty pages are already being purged. */
arena->npurgatory)) if (arena->ndirty <= arena->npurgatory)
arena_purge(arena, false); return;
npurgeable = arena->ndirty - arena->npurgatory;
threshold = (arena->nactive >> opt_lg_dirty_mult);
/*
* Don't purge unless the number of purgeable pages exceeds the
* threshold.
*/
if (npurgeable <= threshold)
return;
arena_purge(arena, false);
} }
static inline void static inline size_t
arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all)
{ {
size_t npurged;
ql_head(arena_chunk_map_t) mapelms; ql_head(arena_chunk_map_t) mapelms;
arena_chunk_map_t *mapelm; arena_chunk_map_t *mapelm;
size_t pageind; size_t pageind, npages;
size_t ndirty;
size_t nmadvise; size_t nmadvise;
ql_new(&mapelms); ql_new(&mapelms);
/* /*
* If chunk is the spare, temporarily re-allocate it, 1) so that its * If chunk is the spare, temporarily re-allocate it, 1) so that its
* run is reinserted into runs_avail_dirty, and 2) so that it cannot be * run is reinserted into runs_avail, and 2) so that it cannot be
* completely discarded by another thread while arena->lock is dropped * completely discarded by another thread while arena->lock is dropped
* by this thread. Note that the arena_run_dalloc() call will * by this thread. Note that the arena_run_dalloc() call will
* implicitly deallocate the chunk, so no explicit action is required * implicitly deallocate the chunk, so no explicit action is required
@ -579,54 +709,50 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
arena_chunk_alloc(arena); arena_chunk_alloc(arena);
} }
/* Temporarily allocate all free dirty runs within chunk. */ if (config_stats)
for (pageind = map_bias; pageind < chunk_npages;) { arena->stats.purged += chunk->ndirty;
/*
* Operate on all dirty runs if there is no clean/dirty run
* fragmentation.
*/
if (chunk->nruns_adjac == 0)
all = true;
/*
* Temporarily allocate free dirty runs within chunk. If all is false,
* only operate on dirty runs that are fragments; otherwise operate on
* all dirty runs.
*/
for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
mapelm = arena_mapp_get(chunk, pageind); mapelm = arena_mapp_get(chunk, pageind);
if (arena_mapbits_allocated_get(chunk, pageind) == 0) { if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
size_t npages; size_t run_size =
arena_mapbits_unallocated_size_get(chunk, pageind);
npages = arena_mapbits_unallocated_size_get(chunk, npages = run_size >> LG_PAGE;
pageind) >> LG_PAGE;
assert(pageind + npages <= chunk_npages); assert(pageind + npages <= chunk_npages);
assert(arena_mapbits_dirty_get(chunk, pageind) == assert(arena_mapbits_dirty_get(chunk, pageind) ==
arena_mapbits_dirty_get(chunk, pageind+npages-1)); arena_mapbits_dirty_get(chunk, pageind+npages-1));
if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
arena_avail_tree_remove(
&arena->runs_avail_dirty, mapelm);
arena_mapbits_large_set(chunk, pageind, if (arena_mapbits_dirty_get(chunk, pageind) != 0 &&
(npages << LG_PAGE), 0); (all || arena_avail_adjac(chunk, pageind,
if (npages > 1) { npages))) {
arena_mapbits_large_set(chunk, arena_run_t *run = (arena_run_t *)((uintptr_t)
pageind+npages-1, 0, 0); chunk + (uintptr_t)(pageind << LG_PAGE));
}
if (config_stats) { arena_run_split(arena, run, run_size, true,
/* BININD_INVALID, false);
* Update stats_cactive if nactive is
* crossing a chunk multiple.
*/
size_t cactive_diff =
CHUNK_CEILING((arena->nactive +
npages) << LG_PAGE) -
CHUNK_CEILING(arena->nactive <<
LG_PAGE);
if (cactive_diff != 0)
stats_cactive_add(cactive_diff);
}
arena->nactive += npages;
/* Append to list for later processing. */ /* Append to list for later processing. */
ql_elm_new(mapelm, u.ql_link); ql_elm_new(mapelm, u.ql_link);
ql_tail_insert(&mapelms, mapelm, u.ql_link); ql_tail_insert(&mapelms, mapelm, u.ql_link);
} }
pageind += npages;
} else { } else {
/* Skip allocated run. */ /* Skip run. */
if (arena_mapbits_large_get(chunk, pageind)) if (arena_mapbits_large_get(chunk, pageind) != 0) {
pageind += arena_mapbits_large_size_get(chunk, npages = arena_mapbits_large_size_get(chunk,
pageind) >> LG_PAGE; pageind) >> LG_PAGE;
else { } else {
size_t binind; size_t binind;
arena_bin_info_t *bin_info; arena_bin_info_t *bin_info;
arena_run_t *run = (arena_run_t *)((uintptr_t) arena_run_t *run = (arena_run_t *)((uintptr_t)
@ -636,36 +762,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
pageind) == 0); pageind) == 0);
binind = arena_bin_index(arena, run->bin); binind = arena_bin_index(arena, run->bin);
bin_info = &arena_bin_info[binind]; bin_info = &arena_bin_info[binind];
pageind += bin_info->run_size >> LG_PAGE; npages = bin_info->run_size >> LG_PAGE;
} }
} }
} }
assert(pageind == chunk_npages); assert(pageind == chunk_npages);
assert(chunk->ndirty == 0 || all == false);
if (config_debug) assert(chunk->nruns_adjac == 0);
ndirty = chunk->ndirty;
if (config_stats)
arena->stats.purged += chunk->ndirty;
arena->ndirty -= chunk->ndirty;
chunk->ndirty = 0;
ql_remove(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = false;
malloc_mutex_unlock(&arena->lock); malloc_mutex_unlock(&arena->lock);
if (config_stats) if (config_stats)
nmadvise = 0; nmadvise = 0;
npurged = 0;
ql_foreach(mapelm, &mapelms, u.ql_link) { ql_foreach(mapelm, &mapelms, u.ql_link) {
size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
sizeof(arena_chunk_map_t)) + map_bias;
size_t npages = arena_mapbits_large_size_get(chunk, pageind) >>
LG_PAGE;
bool unzeroed; bool unzeroed;
size_t flag_unzeroed, i; size_t flag_unzeroed, i;
pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
sizeof(arena_chunk_map_t)) + map_bias;
npages = arena_mapbits_large_size_get(chunk, pageind) >>
LG_PAGE;
assert(pageind + npages <= chunk_npages); assert(pageind + npages <= chunk_npages);
assert(ndirty >= npages);
if (config_debug)
ndirty -= npages;
unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind <<
LG_PAGE)), (npages << LG_PAGE)); LG_PAGE)), (npages << LG_PAGE));
flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0;
@ -683,10 +800,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
arena_mapbits_unzeroed_set(chunk, pageind+i, arena_mapbits_unzeroed_set(chunk, pageind+i,
flag_unzeroed); flag_unzeroed);
} }
npurged += npages;
if (config_stats) if (config_stats)
nmadvise++; nmadvise++;
} }
assert(ndirty == 0);
malloc_mutex_lock(&arena->lock); malloc_mutex_lock(&arena->lock);
if (config_stats) if (config_stats)
arena->stats.nmadvise += nmadvise; arena->stats.nmadvise += nmadvise;
@ -694,14 +811,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
/* Deallocate runs. */ /* Deallocate runs. */
for (mapelm = ql_first(&mapelms); mapelm != NULL; for (mapelm = ql_first(&mapelms); mapelm != NULL;
mapelm = ql_first(&mapelms)) { mapelm = ql_first(&mapelms)) {
size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / arena_run_t *run;
sizeof(arena_chunk_map_t)) + map_bias;
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)(pageind << LG_PAGE));
pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
sizeof(arena_chunk_map_t)) + map_bias;
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind <<
LG_PAGE));
ql_remove(&mapelms, mapelm, u.ql_link); ql_remove(&mapelms, mapelm, u.ql_link);
arena_run_dalloc(arena, run, false); arena_run_dalloc(arena, run, false, true);
} }
return (npurged);
}
static arena_chunk_t *
chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
{
size_t *ndirty = (size_t *)arg;
assert(chunk->ndirty != 0);
*ndirty += chunk->ndirty;
return (NULL);
} }
static void static void
@ -712,14 +842,11 @@ arena_purge(arena_t *arena, bool all)
if (config_debug) { if (config_debug) {
size_t ndirty = 0; size_t ndirty = 0;
ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { arena_chunk_dirty_iter(&arena->chunks_dirty, NULL,
assert(chunk->dirtied); chunks_dirty_iter_cb, (void *)&ndirty);
ndirty += chunk->ndirty;
}
assert(ndirty == arena->ndirty); assert(ndirty == arena->ndirty);
} }
assert(arena->ndirty > arena->npurgatory || all); assert(arena->ndirty > arena->npurgatory || all);
assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
arena->npurgatory) || all); arena->npurgatory) || all);
@ -731,16 +858,24 @@ arena_purge(arena_t *arena, bool all)
* purge, and add the result to arena->npurgatory. This will keep * purge, and add the result to arena->npurgatory. This will keep
* multiple threads from racing to reduce ndirty below the threshold. * multiple threads from racing to reduce ndirty below the threshold.
*/ */
npurgatory = arena->ndirty - arena->npurgatory; {
if (all == false) { size_t npurgeable = arena->ndirty - arena->npurgatory;
assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
npurgatory -= arena->nactive >> opt_lg_dirty_mult; if (all == false) {
size_t threshold = (arena->nactive >>
opt_lg_dirty_mult);
npurgatory = npurgeable - threshold;
} else
npurgatory = npurgeable;
} }
arena->npurgatory += npurgatory; arena->npurgatory += npurgatory;
while (npurgatory > 0) { while (npurgatory > 0) {
size_t npurgeable, npurged, nunpurged;
/* Get next chunk with dirty pages. */ /* Get next chunk with dirty pages. */
chunk = ql_first(&arena->chunks_dirty); chunk = arena_chunk_dirty_first(&arena->chunks_dirty);
if (chunk == NULL) { if (chunk == NULL) {
/* /*
* This thread was unable to purge as many pages as * This thread was unable to purge as many pages as
@ -751,23 +886,15 @@ arena_purge(arena_t *arena, bool all)
arena->npurgatory -= npurgatory; arena->npurgatory -= npurgatory;
return; return;
} }
while (chunk->ndirty == 0) { npurgeable = chunk->ndirty;
ql_remove(&arena->chunks_dirty, chunk, link_dirty); assert(npurgeable != 0);
chunk->dirtied = false;
chunk = ql_first(&arena->chunks_dirty);
if (chunk == NULL) {
/* Same logic as for above. */
arena->npurgatory -= npurgatory;
return;
}
}
if (chunk->ndirty > npurgatory) { if (npurgeable > npurgatory && chunk->nruns_adjac == 0) {
/* /*
* This thread will, at a minimum, purge all the dirty * This thread will purge all the dirty pages in chunk,
* pages in chunk, so set npurgatory to reflect this * so set npurgatory to reflect this thread's intent to
* thread's commitment to purge the pages. This tends * purge the pages. This tends to reduce the chances
* to reduce the chances of the following scenario: * of the following scenario:
* *
* 1) This thread sets arena->npurgatory such that * 1) This thread sets arena->npurgatory such that
* (arena->ndirty - arena->npurgatory) is at the * (arena->ndirty - arena->npurgatory) is at the
@ -781,13 +908,20 @@ arena_purge(arena_t *arena, bool all)
* because all of the purging work being done really * because all of the purging work being done really
* needs to happen. * needs to happen.
*/ */
arena->npurgatory += chunk->ndirty - npurgatory; arena->npurgatory += npurgeable - npurgatory;
npurgatory = chunk->ndirty; npurgatory = npurgeable;
} }
arena->npurgatory -= chunk->ndirty; /*
npurgatory -= chunk->ndirty; * Keep track of how many pages are purgeable, versus how many
arena_chunk_purge(arena, chunk); * actually get purged, and adjust counters accordingly.
*/
arena->npurgatory -= npurgeable;
npurgatory -= npurgeable;
npurged = arena_chunk_purge(arena, chunk, all);
nunpurged = npurgeable - npurged;
arena->npurgatory += nunpurged;
npurgatory += nunpurged;
} }
} }
@ -801,11 +935,10 @@ arena_purge_all(arena_t *arena)
} }
static void static void
arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
{ {
arena_chunk_t *chunk; arena_chunk_t *chunk;
size_t size, run_ind, run_pages, flag_dirty; size_t size, run_ind, run_pages, flag_dirty;
arena_avail_tree_t *runs_avail;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
@ -836,15 +969,14 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
/* /*
* The run is dirty if the caller claims to have dirtied it, as well as * The run is dirty if the caller claims to have dirtied it, as well as
* if it was already dirty before being allocated. * if it was already dirty before being allocated and the caller
* doesn't claim to have cleaned it.
*/ */
assert(arena_mapbits_dirty_get(chunk, run_ind) == assert(arena_mapbits_dirty_get(chunk, run_ind) ==
arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
if (arena_mapbits_dirty_get(chunk, run_ind) != 0) if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0)
dirty = true; dirty = true;
flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
runs_avail = dirty ? &arena->runs_avail_dirty :
&arena->runs_avail_clean;
/* Mark pages as unallocated in the chunk map. */ /* Mark pages as unallocated in the chunk map. */
if (dirty) { if (dirty) {
@ -852,9 +984,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
CHUNK_MAP_DIRTY); CHUNK_MAP_DIRTY);
arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
CHUNK_MAP_DIRTY); CHUNK_MAP_DIRTY);
chunk->ndirty += run_pages;
arena->ndirty += run_pages;
} else { } else {
arena_mapbits_unallocated_set(chunk, run_ind, size, arena_mapbits_unallocated_set(chunk, run_ind, size,
arena_mapbits_unzeroed_get(chunk, run_ind)); arena_mapbits_unzeroed_get(chunk, run_ind));
@ -878,8 +1007,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
run_ind+run_pages+nrun_pages-1) == nrun_size); run_ind+run_pages+nrun_pages-1) == nrun_size);
assert(arena_mapbits_dirty_get(chunk, assert(arena_mapbits_dirty_get(chunk,
run_ind+run_pages+nrun_pages-1) == flag_dirty); run_ind+run_pages+nrun_pages-1) == flag_dirty);
arena_avail_tree_remove(runs_avail, arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages,
arena_mapp_get(chunk, run_ind+run_pages)); false, true);
size += nrun_size; size += nrun_size;
run_pages += nrun_pages; run_pages += nrun_pages;
@ -905,8 +1034,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
prun_size); prun_size);
assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, arena_avail_remove(arena, chunk, run_ind, prun_pages, true,
run_ind)); false);
size += prun_size; size += prun_size;
run_pages += prun_pages; run_pages += prun_pages;
@ -921,19 +1050,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
assert(arena_mapbits_dirty_get(chunk, run_ind) == assert(arena_mapbits_dirty_get(chunk, run_ind) ==
arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); arena_avail_insert(arena, chunk, run_ind, run_pages, true, true);
if (dirty) {
/*
* Insert into chunks_dirty before potentially calling
* arena_chunk_dealloc(), so that chunks_dirty and
* arena->ndirty are consistent.
*/
if (chunk->dirtied == false) {
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = true;
}
}
/* Deallocate chunk if it is now completely unused. */ /* Deallocate chunk if it is now completely unused. */
if (size == arena_maxclass) { if (size == arena_maxclass) {
@ -982,7 +1099,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
arena_mapbits_large_set(chunk, pageind+head_npages, newsize, arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
flag_dirty); flag_dirty);
arena_run_dalloc(arena, run, false); arena_run_dalloc(arena, run, false, false);
} }
static void static void
@ -1015,7 +1132,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
flag_dirty); flag_dirty);
arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
dirty); dirty, false);
} }
static arena_run_t * static arena_run_t *
@ -1526,7 +1643,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
((past - run_ind) << LG_PAGE), false); ((past - run_ind) << LG_PAGE), false);
/* npages = past - run_ind; */ /* npages = past - run_ind; */
} }
arena_run_dalloc(arena, run, true); arena_run_dalloc(arena, run, true, false);
malloc_mutex_unlock(&arena->lock); malloc_mutex_unlock(&arena->lock);
/****************************/ /****************************/
malloc_mutex_lock(&bin->lock); malloc_mutex_lock(&bin->lock);
@ -1638,7 +1755,7 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
} }
} }
arena_run_dalloc(arena, (arena_run_t *)ptr, true); arena_run_dalloc(arena, (arena_run_t *)ptr, true, false);
} }
void void
@ -1985,15 +2102,14 @@ arena_new(arena_t *arena, unsigned ind)
arena->dss_prec = chunk_dss_prec_get(); arena->dss_prec = chunk_dss_prec_get();
/* Initialize chunks. */ /* Initialize chunks. */
ql_new(&arena->chunks_dirty); arena_chunk_dirty_new(&arena->chunks_dirty);
arena->spare = NULL; arena->spare = NULL;
arena->nactive = 0; arena->nactive = 0;
arena->ndirty = 0; arena->ndirty = 0;
arena->npurgatory = 0; arena->npurgatory = 0;
arena_avail_tree_new(&arena->runs_avail_clean); arena_avail_tree_new(&arena->runs_avail);
arena_avail_tree_new(&arena->runs_avail_dirty);
/* Initialize bins. */ /* Initialize bins. */
for (i = 0; i < NBINS; i++) { for (i = 0; i < NBINS; i++) {