Purge dirty pages without arena->lock.
This commit is contained in:
parent
86815df9dc
commit
05b21be347
@ -95,14 +95,23 @@ typedef struct arena_s arena_t;
|
|||||||
|
|
||||||
/* Each element of the chunk map corresponds to one page within the chunk. */
|
/* Each element of the chunk map corresponds to one page within the chunk. */
|
||||||
struct arena_chunk_map_s {
|
struct arena_chunk_map_s {
|
||||||
/*
|
union {
|
||||||
* Linkage for run trees. There are two disjoint uses:
|
/*
|
||||||
*
|
* Linkage for run trees. There are two disjoint uses:
|
||||||
* 1) arena_t's runs_avail tree.
|
*
|
||||||
* 2) arena_run_t conceptually uses this linkage for in-use non-full
|
* 1) arena_t's runs_avail tree.
|
||||||
* runs, rather than directly embedding linkage.
|
* 2) arena_run_t conceptually uses this linkage for in-use
|
||||||
*/
|
* non-full runs, rather than directly embedding linkage.
|
||||||
rb_node(arena_chunk_map_t) link;
|
*/
|
||||||
|
rb_node(arena_chunk_map_t) rb_link;
|
||||||
|
/*
|
||||||
|
* List of runs currently in purgatory. arena_chunk_purge()
|
||||||
|
* temporarily allocates runs that contain dirty pages while
|
||||||
|
* purging, so that other threads cannot use the runs while the
|
||||||
|
* purging thread is operating without the arena lock held.
|
||||||
|
*/
|
||||||
|
ql_elm(arena_chunk_map_t) ql_link;
|
||||||
|
} u;
|
||||||
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
/* Profile counters, used for large object runs. */
|
/* Profile counters, used for large object runs. */
|
||||||
@ -311,6 +320,14 @@ struct arena_s {
|
|||||||
*/
|
*/
|
||||||
size_t ndirty;
|
size_t ndirty;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Approximate number of pages being purged. It is possible for
|
||||||
|
* multiple threads to purge dirty pages concurrently, and they use
|
||||||
|
* npurgatory to indicate the total number of pages all threads are
|
||||||
|
* attempting to purge.
|
||||||
|
*/
|
||||||
|
size_t npurgatory;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Size/address-ordered tree of this arena's available runs. This tree
|
* Size/address-ordered tree of this arena's available runs. This tree
|
||||||
* is used for first-best-fit run allocation.
|
* is used for first-best-fit run allocation.
|
||||||
|
@ -27,9 +27,6 @@ size_t medium_max;
|
|||||||
size_t lg_mspace;
|
size_t lg_mspace;
|
||||||
size_t mspace_mask;
|
size_t mspace_mask;
|
||||||
|
|
||||||
/* Used to prevent threads from concurrently calling madvise(2). */
|
|
||||||
static malloc_mutex_t purge_lock;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* const_small_size2bin is a static constant lookup table that in the common
|
* const_small_size2bin is a static constant lookup table that in the common
|
||||||
* case can be used as-is for small_size2bin. For dynamically linked programs,
|
* case can be used as-is for small_size2bin. For dynamically linked programs,
|
||||||
@ -215,7 +212,7 @@ arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
|||||||
|
|
||||||
/* Generate red-black tree functions. */
|
/* Generate red-black tree functions. */
|
||||||
rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t,
|
rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t,
|
||||||
arena_chunk_map_t, link, arena_run_comp)
|
arena_chunk_map_t, u.rb_link, arena_run_comp)
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
||||||
@ -247,7 +244,7 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
|||||||
|
|
||||||
/* Generate red-black tree functions. */
|
/* Generate red-black tree functions. */
|
||||||
rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
|
rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
|
||||||
arena_chunk_map_t, link, arena_avail_comp)
|
arena_chunk_map_t, u.rb_link, arena_avail_comp)
|
||||||
|
|
||||||
static inline void *
|
static inline void *
|
||||||
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
|
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
|
||||||
@ -480,11 +477,183 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
|
|||||||
return (run);
|
return (run);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
arena_maybe_purge(arena_t *arena)
|
||||||
|
{
|
||||||
|
|
||||||
|
/* Enforce opt_lg_dirty_mult. */
|
||||||
|
if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory &&
|
||||||
|
(arena->ndirty - arena->npurgatory) > chunk_npages &&
|
||||||
|
(arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
|
||||||
|
arena->npurgatory))
|
||||||
|
arena_purge(arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
|
||||||
|
{
|
||||||
|
ql_head(arena_chunk_map_t) mapelms;
|
||||||
|
arena_chunk_map_t *mapelm;
|
||||||
|
size_t pageind;
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
size_t ndirty;
|
||||||
|
#endif
|
||||||
|
#ifdef JEMALLOC_STATS
|
||||||
|
size_t nmadvise;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ql_new(&mapelms);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If chunk is the spare, temporarily re-allocate it, 1) so that its
|
||||||
|
* run is reinserted into runs_avail, and 2) so that it cannot be
|
||||||
|
* completely discarded by another thread while arena->lock is dropped
|
||||||
|
* by this thread. Note that the arena_run_dalloc() call will
|
||||||
|
* implicitly deallocate the chunk, so no explicit action is required
|
||||||
|
* in this function to deallocate the chunk.
|
||||||
|
*/
|
||||||
|
if (chunk == arena->spare)
|
||||||
|
arena_chunk_alloc(arena);
|
||||||
|
|
||||||
|
/* Temporarily allocate all free runs that contain dirty pages. */
|
||||||
|
for (pageind = arena_chunk_header_npages; pageind < chunk_npages;) {
|
||||||
|
mapelm = &chunk->map[pageind];
|
||||||
|
if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
|
||||||
|
size_t npages, i;
|
||||||
|
|
||||||
|
npages = (mapelm->bits & CHUNK_MAP_PG_MASK) >>
|
||||||
|
CHUNK_MAP_PG_SHIFT;
|
||||||
|
for (i = 0; i < npages; i++) {
|
||||||
|
if (chunk->map[pageind + i].bits &
|
||||||
|
CHUNK_MAP_DIRTY) {
|
||||||
|
/*
|
||||||
|
* This run contains at least one dirty
|
||||||
|
* page. Temporarily allocate it.
|
||||||
|
* Don't bother setting the
|
||||||
|
* CHUNK_MAP_ALLOCATED bit for interior
|
||||||
|
* pages yet. The bit will be set
|
||||||
|
* during the loop that actually does
|
||||||
|
* the madvise() calls, so that the run
|
||||||
|
* looks like a normal large run by the
|
||||||
|
* time it is passed to
|
||||||
|
* arena_run_dalloc().
|
||||||
|
*/
|
||||||
|
arena_avail_tree_remove(
|
||||||
|
&arena->runs_avail, mapelm);
|
||||||
|
mapelm->bits |= (CHUNK_MAP_LARGE |
|
||||||
|
CHUNK_MAP_ALLOCATED);
|
||||||
|
chunk->map[pageind + npages - 1].bits |=
|
||||||
|
(CHUNK_MAP_LARGE |
|
||||||
|
CHUNK_MAP_ALLOCATED);
|
||||||
|
/*
|
||||||
|
* Append to list for later processing.
|
||||||
|
*/
|
||||||
|
ql_elm_new(mapelm, u.ql_link);
|
||||||
|
ql_tail_insert(&mapelms, mapelm,
|
||||||
|
u.ql_link);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pageind += npages;
|
||||||
|
} else {
|
||||||
|
/* Skip allocated run. */
|
||||||
|
if (mapelm->bits & CHUNK_MAP_LARGE) {
|
||||||
|
pageind += (mapelm->bits & CHUNK_MAP_PG_MASK)
|
||||||
|
>> CHUNK_MAP_PG_SHIFT;
|
||||||
|
} else {
|
||||||
|
arena_run_t *run = (arena_run_t *)((uintptr_t)
|
||||||
|
chunk + (uintptr_t)((pageind -
|
||||||
|
((mapelm->bits & CHUNK_MAP_PG_MASK) >>
|
||||||
|
CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
|
||||||
|
pageind += run->bin->run_size >> PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
ndirty = chunk->ndirty;
|
||||||
|
#endif
|
||||||
|
#ifdef JEMALLOC_STATS
|
||||||
|
arena->stats.purged += chunk->ndirty;
|
||||||
|
#endif
|
||||||
|
arena->ndirty -= chunk->ndirty;
|
||||||
|
chunk->ndirty = 0;
|
||||||
|
ql_remove(&arena->chunks_dirty, chunk, link_dirty);
|
||||||
|
chunk->dirtied = false;
|
||||||
|
|
||||||
|
malloc_mutex_unlock(&arena->lock);
|
||||||
|
#ifdef JEMALLOC_STATS
|
||||||
|
nmadvise = 0;
|
||||||
|
#endif
|
||||||
|
ql_foreach(mapelm, &mapelms, u.ql_link) {
|
||||||
|
size_t i, j;
|
||||||
|
size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
|
||||||
|
sizeof(arena_chunk_map_t);
|
||||||
|
size_t npages = (mapelm->bits & CHUNK_MAP_PG_MASK) >>
|
||||||
|
CHUNK_MAP_PG_SHIFT;
|
||||||
|
|
||||||
|
for (i = 0; i < npages;) {
|
||||||
|
if (chunk->map[pageind + i].bits & CHUNK_MAP_DIRTY) {
|
||||||
|
chunk->map[pageind + i].bits ^= CHUNK_MAP_DIRTY;
|
||||||
|
chunk->map[pageind + i].bits |=
|
||||||
|
(CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
|
||||||
|
|
||||||
|
/* Find adjacent dirty page(s). */
|
||||||
|
for (j = 1; i + j < npages; j++) {
|
||||||
|
if ((chunk->map[pageind + i + j].bits &
|
||||||
|
CHUNK_MAP_DIRTY) == 0)
|
||||||
|
break;
|
||||||
|
chunk->map[pageind + i + j].bits ^=
|
||||||
|
CHUNK_MAP_DIRTY;
|
||||||
|
chunk->map[pageind + i + j].bits |=
|
||||||
|
(CHUNK_MAP_LARGE |
|
||||||
|
CHUNK_MAP_ALLOCATED);
|
||||||
|
}
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
assert(ndirty >= j);
|
||||||
|
ndirty -= j;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
madvise((void *)((uintptr_t)chunk + ((pageind +
|
||||||
|
i) << PAGE_SHIFT)), (j << PAGE_SHIFT),
|
||||||
|
MADV_DONTNEED);
|
||||||
|
#ifdef JEMALLOC_STATS
|
||||||
|
nmadvise++;
|
||||||
|
#endif
|
||||||
|
i += j;
|
||||||
|
} else {
|
||||||
|
chunk->map[pageind + i].bits |=
|
||||||
|
(CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
assert(ndirty == 0);
|
||||||
|
#endif
|
||||||
|
malloc_mutex_lock(&arena->lock);
|
||||||
|
#ifdef JEMALLOC_STATS
|
||||||
|
arena->stats.nmadvise += nmadvise;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Deallocate runs. */
|
||||||
|
for (mapelm = ql_first(&mapelms); mapelm != NULL;
|
||||||
|
mapelm = ql_first(&mapelms)) {
|
||||||
|
size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
|
||||||
|
sizeof(arena_chunk_map_t);
|
||||||
|
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
|
||||||
|
(uintptr_t)(pageind << PAGE_SHIFT));
|
||||||
|
|
||||||
|
ql_remove(&mapelms, mapelm, u.ql_link);
|
||||||
|
arena_run_dalloc(arena, run, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_purge(arena_t *arena)
|
arena_purge(arena_t *arena)
|
||||||
{
|
{
|
||||||
arena_chunk_t *chunk;
|
arena_chunk_t *chunk;
|
||||||
size_t i, j, npages;
|
size_t npurgatory;
|
||||||
#ifdef JEMALLOC_DEBUG
|
#ifdef JEMALLOC_DEBUG
|
||||||
size_t ndirty = 0;
|
size_t ndirty = 0;
|
||||||
|
|
||||||
@ -494,6 +663,7 @@ arena_purge(arena_t *arena)
|
|||||||
}
|
}
|
||||||
assert(ndirty == arena->ndirty);
|
assert(ndirty == arena->ndirty);
|
||||||
#endif
|
#endif
|
||||||
|
assert(arena->ndirty > arena->npurgatory);
|
||||||
assert(arena->ndirty > chunk_npages);
|
assert(arena->ndirty > chunk_npages);
|
||||||
assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
|
assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
|
||||||
|
|
||||||
@ -502,64 +672,65 @@ arena_purge(arena_t *arena)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only allow one thread at a time to purge dirty pages. madvise(2)
|
* Compute the minimum number of pages that this thread should try to
|
||||||
* causes the kernel to modify virtual memory data structures that are
|
* purge, and add the result to arena->npurgatory. This will keep
|
||||||
* typically protected by a lock, and purging isn't important enough to
|
* multiple threads from racing to reduce ndirty below the threshold.
|
||||||
* suffer lock contention in the kernel. The result of failing to
|
|
||||||
* acquire purge_lock here is that this arena will operate with ndirty
|
|
||||||
* above the threshold until some dirty pages are re-used, or the
|
|
||||||
* creation of more dirty pages causes this function to be called
|
|
||||||
* again.
|
|
||||||
*/
|
*/
|
||||||
if (malloc_mutex_trylock(&purge_lock))
|
npurgatory = (arena->ndirty - arena->npurgatory) - (arena->nactive >>
|
||||||
return;
|
opt_lg_dirty_mult);
|
||||||
|
arena->npurgatory += npurgatory;
|
||||||
|
|
||||||
/*
|
while (npurgatory > 0) {
|
||||||
* Iterate through chunks until enough dirty memory has been
|
/* Get next chunk with dirty pages. */
|
||||||
* purged (all dirty pages in one chunk, or enough pages to drop to
|
|
||||||
* threshold, whichever is greater). Terminate as soon as possible in
|
|
||||||
* order to minimize the number of system calls, even if a chunk has
|
|
||||||
* only been partially purged.
|
|
||||||
*/
|
|
||||||
for (i = 0; (arena->nactive >> opt_lg_dirty_mult) < arena->ndirty;
|
|
||||||
i++) {
|
|
||||||
chunk = ql_first(&arena->chunks_dirty);
|
chunk = ql_first(&arena->chunks_dirty);
|
||||||
assert(chunk != NULL);
|
if (chunk == NULL) {
|
||||||
|
/*
|
||||||
/* Purge pages from high to low within each chunk. */
|
* This thread was unable to purge as many pages as
|
||||||
for (j = chunk_npages - 1; chunk->ndirty > 0; j--) {
|
* originally intended, due to races with other threads
|
||||||
assert(j >= arena_chunk_header_npages);
|
* that either did some of the purging work, or re-used
|
||||||
if (chunk->map[j].bits & CHUNK_MAP_DIRTY) {
|
* dirty pages.
|
||||||
chunk->map[j].bits ^= CHUNK_MAP_DIRTY;
|
*/
|
||||||
/* Find adjacent dirty run(s). */
|
arena->npurgatory -= npurgatory;
|
||||||
for (npages = 1; j > arena_chunk_header_npages
|
return;
|
||||||
&& (chunk->map[j - 1].bits &
|
}
|
||||||
CHUNK_MAP_DIRTY); npages++) {
|
while (chunk->ndirty == 0) {
|
||||||
j--;
|
ql_remove(&arena->chunks_dirty, chunk, link_dirty);
|
||||||
chunk->map[j].bits ^= CHUNK_MAP_DIRTY;
|
chunk->dirtied = false;
|
||||||
}
|
chunk = ql_first(&arena->chunks_dirty);
|
||||||
chunk->ndirty -= npages;
|
if (chunk == NULL) {
|
||||||
arena->ndirty -= npages;
|
/* Same logic as for above. */
|
||||||
|
arena->npurgatory -= npurgatory;
|
||||||
madvise((void *)((uintptr_t)chunk + (j <<
|
return;
|
||||||
PAGE_SHIFT)), (npages << PAGE_SHIFT),
|
|
||||||
MADV_DONTNEED);
|
|
||||||
#ifdef JEMALLOC_STATS
|
|
||||||
arena->stats.nmadvise++;
|
|
||||||
arena->stats.purged += npages;
|
|
||||||
#endif
|
|
||||||
if ((arena->nactive >> opt_lg_dirty_mult) >=
|
|
||||||
arena->ndirty && i > 0)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (chunk->ndirty == 0) {
|
if (chunk->ndirty > npurgatory) {
|
||||||
ql_remove(&arena->chunks_dirty, chunk, link_dirty);
|
/*
|
||||||
chunk->dirtied = false;
|
* This thread will, at a minimum, purge all the dirty
|
||||||
|
* pages in chunk, so set npurgatory to reflect this
|
||||||
|
* thread's commitment to purge the pages. This tends
|
||||||
|
* to reduce the chances of the following scenario:
|
||||||
|
*
|
||||||
|
* 1) This thread sets arena->npurgatory such that
|
||||||
|
* (arena->ndirty - arena->npurgatory) is at the
|
||||||
|
* threshold.
|
||||||
|
* 2) This thread drops arena->lock.
|
||||||
|
* 3) Another thread causes one or more pages to be
|
||||||
|
* dirtied, and immediately determines that it must
|
||||||
|
* purge dirty pages.
|
||||||
|
*
|
||||||
|
* If this scenario *does* play out, that's okay,
|
||||||
|
* because all of the purging work being done really
|
||||||
|
* needs to happen.
|
||||||
|
*/
|
||||||
|
arena->npurgatory += chunk->ndirty - npurgatory;
|
||||||
|
npurgatory = chunk->ndirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
arena->npurgatory -= chunk->ndirty;
|
||||||
|
npurgatory -= chunk->ndirty;
|
||||||
|
arena_chunk_purge(arena, chunk);
|
||||||
}
|
}
|
||||||
malloc_mutex_unlock(&purge_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -685,11 +856,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
|
|||||||
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
|
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
|
||||||
chunk->dirtied = true;
|
chunk->dirtied = true;
|
||||||
}
|
}
|
||||||
|
arena_maybe_purge(arena);
|
||||||
/* Enforce opt_lg_dirty_mult. */
|
|
||||||
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
|
|
||||||
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
|
|
||||||
arena_purge(arena);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1426,10 +1593,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
|
|||||||
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
|
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
|
||||||
chunk->dirtied = true;
|
chunk->dirtied = true;
|
||||||
}
|
}
|
||||||
/* Enforce opt_lg_dirty_mult. */
|
arena_maybe_purge(arena);
|
||||||
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
|
|
||||||
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
|
|
||||||
arena_purge(arena);
|
|
||||||
|
|
||||||
malloc_mutex_unlock(&arena->lock);
|
malloc_mutex_unlock(&arena->lock);
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
@ -1842,6 +2006,7 @@ arena_new(arena_t *arena, unsigned ind)
|
|||||||
|
|
||||||
arena->nactive = 0;
|
arena->nactive = 0;
|
||||||
arena->ndirty = 0;
|
arena->ndirty = 0;
|
||||||
|
arena->npurgatory = 0;
|
||||||
|
|
||||||
arena_avail_tree_new(&arena->runs_avail);
|
arena_avail_tree_new(&arena->runs_avail);
|
||||||
|
|
||||||
@ -2155,8 +2320,5 @@ arena_boot(void)
|
|||||||
((header_size & PAGE_MASK) != 0);
|
((header_size & PAGE_MASK) != 0);
|
||||||
arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT);
|
arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT);
|
||||||
|
|
||||||
if (malloc_mutex_init(&purge_lock))
|
|
||||||
return (true);
|
|
||||||
|
|
||||||
return (false);
|
return (false);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user