Modify dirty page purging algorithm.

Convert chunks_dirty from a red-black tree to a doubly linked list,
and use it to purge dirty pages from chunks in FIFO order.

Add a lock around the code that purges dirty pages via madvise(2), in
order to avoid kernel contention.  If lock acquisition fails,
indefinitely postpone purging dirty pages.

Add a lower limit of one chunk worth of dirty pages per arena for
purging, in addition to the active:dirty ratio.

When purging, purge all dirty pages from at least one chunk, but rather
than purging enough pages to drop to half the purging threshold, merely
drop to the threshold.
This commit is contained in:
Jason Evans 2010-03-04 21:35:07 -08:00
parent 3c2d9c899c
commit 2caa4715ed
4 changed files with 80 additions and 75 deletions

View File

@ -38,7 +38,7 @@
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
.\"
.Dd March 1, 2010
.Dd March 4, 2010
.Dt JEMALLOC 3
.Os
.Sh NAME
@ -333,7 +333,8 @@ The default value is 512 bytes.
.It D
Halve/double the per-arena minimum ratio of active to dirty pages.
Some dirty unused pages may be allowed to accumulate, within the limit set by
the ratio, before informing the kernel about at least half of those pages via
the ratio (or one chunk worth of dirty pages, whichever is greater), before
informing the kernel about some of those pages via
.Xr madvise 2 .
This provides the kernel with sufficient information to recycle dirty pages if
physical memory becomes scarce and the pages remain unused.

View File

@ -178,11 +178,11 @@ struct arena_chunk_s {
/* Arena that owns the chunk. */
arena_t *arena;
/* Linkage for the arena's chunks_dirty tree. */
rb_node(arena_chunk_t) link_dirty;
/* Linkage for the arena's chunks_dirty list. */
ql_elm(arena_chunk_t) link_dirty;
/*
* True if the chunk is currently in the chunks_dirty tree, due to
* True if the chunk is currently in the chunks_dirty list, due to
* having at some point contained one or more dirty pages. Removal
* from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
*/
@ -287,8 +287,8 @@ struct arena_s {
uint64_t prof_accumbytes;
#endif
/* Tree of dirty-page-containing chunks this arena manages. */
arena_chunk_tree_t chunks_dirty;
/* List of dirty-page-containing chunks this arena manages. */
ql_head(arena_chunk_t) chunks_dirty;
/*
* In order to avoid rapid chunk allocation/deallocation when an arena

View File

@ -25,6 +25,7 @@ bool malloc_mutex_init(malloc_mutex_t *mutex);
#ifndef JEMALLOC_ENABLE_INLINE
void malloc_mutex_lock(malloc_mutex_t *mutex);
bool malloc_mutex_trylock(malloc_mutex_t *mutex);
void malloc_mutex_unlock(malloc_mutex_t *mutex);
#endif
@ -37,6 +38,16 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
pthread_mutex_lock(mutex);
}
JEMALLOC_INLINE bool
malloc_mutex_trylock(malloc_mutex_t *mutex)
{
if (isthreaded)
return (pthread_mutex_trylock(mutex) != 0);
else
return (false);
}
JEMALLOC_INLINE void
malloc_mutex_unlock(malloc_mutex_t *mutex)
{

View File

@ -27,6 +27,9 @@ size_t medium_max;
size_t lg_mspace;
size_t mspace_mask;
/* Used to prevent threads from concurrently calling madvise(2). */
static malloc_mutex_t purge_lock;
/*
* const_small_size2bin is a static constant lookup table that in the common
* case can be used as-is for small_size2bin. For dynamically linked programs,
@ -188,22 +191,6 @@ static bool small_size2bin_init_hard(void);
/******************************************************************************/
static inline int
arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
{
uintptr_t a_chunk = (uintptr_t)a;
uintptr_t b_chunk = (uintptr_t)b;
assert(a != NULL);
assert(b != NULL);
return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
}
/* Generate red-black tree functions. */
rb_gen(static JEMALLOC_ATTR(unused), arena_chunk_tree_dirty_,
arena_chunk_tree_t, arena_chunk_t, link_dirty, arena_chunk_comp)
static inline int
arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
{
@ -339,14 +326,13 @@ arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
if (dirtier) {
if (chunk->dirtied == false) {
arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
chunk);
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = true;
}
/* Enforce opt_lg_dirty_mult. */
if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
opt_lg_dirty_mult) < arena->ndirty)
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
arena_purge(arena);
}
}
@ -595,6 +581,7 @@ arena_chunk_alloc(arena_t *arena)
#endif
chunk->arena = arena;
ql_elm_new(chunk, link_dirty);
chunk->dirtied = false;
/*
@ -630,8 +617,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
if (arena->spare != NULL) {
if (arena->spare->dirtied) {
arena_chunk_tree_dirty_remove(
&chunk->arena->chunks_dirty, arena->spare);
ql_remove(&chunk->arena->chunks_dirty, arena->spare,
link_dirty);
arena->ndirty -= arena->spare->ndirty;
}
chunk_dealloc((void *)arena->spare, chunksize);
@ -641,10 +628,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
}
/*
* Remove run from runs_avail, regardless of whether this chunk
* will be cached, so that the arena does not use it. Dirty page
* flushing only uses the chunks_dirty tree, so leaving this chunk in
* the chunks_* trees is sufficient for that purpose.
* Remove run from runs_avail, regardless of whether this chunk will be
* cached, so that the arena does not use it.
*/
arena_avail_tree_remove(&arena->runs_avail,
&chunk->map[arena_chunk_header_npages]);
@ -689,30 +674,21 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
return (run);
}
#ifdef JEMALLOC_DEBUG
static arena_chunk_t *
chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
{
size_t *ndirty = (size_t *)arg;
assert(chunk->dirtied);
*ndirty += chunk->ndirty;
return (NULL);
}
#endif
static void
arena_purge(arena_t *arena)
{
arena_chunk_t *chunk;
size_t i, npages;
size_t i, j, npages;
#ifdef JEMALLOC_DEBUG
size_t ndirty = 0;
arena_chunk_tree_dirty_iter(&arena->chunks_dirty, NULL,
chunks_dirty_iter_cb, (void *)&ndirty);
ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
assert(chunk->dirtied);
ndirty += chunk->ndirty;
}
assert(ndirty == arena->ndirty);
#endif
assert(arena->ndirty > chunk_npages);
assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
#ifdef JEMALLOC_STATS
@ -720,49 +696,64 @@ arena_purge(arena_t *arena)
#endif
/*
* Iterate downward through chunks until enough dirty memory has been
* purged. Terminate as soon as possible in order to minimize the
* number of system calls, even if a chunk has only been partially
* purged.
* Only allow one thread at a time to purge dirty pages. madvise(2)
* causes the kernel to modify virtual memory data structures that are
* typically protected by a lock, and purging isn't important enough to
* suffer lock contention in the kernel. The result of failing to
* acquire purge_lock here is that this arena will operate with ndirty
* above the threshold until some dirty pages are re-used, or the
* creation of more dirty pages causes this function to be called
* again.
*/
if (malloc_mutex_trylock(&purge_lock))
return;
while ((arena->nactive >> (opt_lg_dirty_mult + 1)) < arena->ndirty) {
chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
/*
* Iterate through chunks until enough dirty memory has been
* purged (all dirty pages in one chunk, or enough pages to drop to
* threshold, whichever is greater). Terminate as soon as possible in
* order to minimize the number of system calls, even if a chunk has
* only been partially purged.
*/
for (i = 0; (arena->nactive >> opt_lg_dirty_mult) < arena->ndirty;
i++) {
chunk = ql_first(&arena->chunks_dirty);
assert(chunk != NULL);
for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
assert(i >= arena_chunk_header_npages);
if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
/* Purge pages from high to low within each chunk. */
for (j = chunk_npages - 1; chunk->ndirty > 0; j--) {
assert(j >= arena_chunk_header_npages);
if (chunk->map[j].bits & CHUNK_MAP_DIRTY) {
chunk->map[j].bits ^= CHUNK_MAP_DIRTY;
/* Find adjacent dirty run(s). */
for (npages = 1; i > arena_chunk_header_npages
&& (chunk->map[i - 1].bits &
for (npages = 1; j > arena_chunk_header_npages
&& (chunk->map[j - 1].bits &
CHUNK_MAP_DIRTY); npages++) {
i--;
chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
j--;
chunk->map[j].bits ^= CHUNK_MAP_DIRTY;
}
chunk->ndirty -= npages;
arena->ndirty -= npages;
madvise((void *)((uintptr_t)chunk + (i <<
madvise((void *)((uintptr_t)chunk + (j <<
PAGE_SHIFT)), (npages << PAGE_SHIFT),
MADV_DONTNEED);
#ifdef JEMALLOC_STATS
arena->stats.nmadvise++;
arena->stats.purged += npages;
#endif
if ((arena->nactive >> (opt_lg_dirty_mult + 1))
>= arena->ndirty)
if ((arena->nactive >> opt_lg_dirty_mult) >=
arena->ndirty && i > 0)
break;
}
}
if (chunk->ndirty == 0) {
arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
chunk);
ql_remove(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = false;
}
}
malloc_mutex_unlock(&purge_lock);
}
static void
@ -885,14 +876,13 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
*/
if (dirty) {
if (chunk->dirtied == false) {
arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
chunk);
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = true;
}
/* Enforce opt_lg_dirty_mult. */
if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
opt_lg_dirty_mult) < arena->ndirty)
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
arena_purge(arena);
}
}
@ -1629,12 +1619,12 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
#endif
if (chunk->dirtied == false) {
arena_chunk_tree_dirty_insert(&arena->chunks_dirty, chunk);
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = true;
}
/* Enforce opt_lg_dirty_mult. */
if (opt_lg_dirty_mult >= 0 && (arena->nactive >> opt_lg_dirty_mult) <
arena->ndirty)
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
arena_purge(arena);
}
@ -2037,7 +2027,7 @@ arena_new(arena_t *arena, unsigned ind)
#endif
/* Initialize chunks. */
arena_chunk_tree_dirty_new(&arena->chunks_dirty);
ql_new(&arena->chunks_dirty);
arena->spare = NULL;
arena->nactive = 0;
@ -2345,5 +2335,8 @@ arena_boot(void)
((header_size & PAGE_MASK) != 0);
arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT);
if (malloc_mutex_init(&purge_lock))
return (true);
return (false);
}