diff --git a/ChangeLog b/ChangeLog index 9a618724..edba5269 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,9 +8,14 @@ found in the git revision history: * 3.x.x (XXX Not released) + Incompatible changes: + - Change the "opt.lg_dirty_mult" from 5 to 3 (32:1 to 8:1). + Bug fixes: - Fix dss/mmap allocation precedence code to use recyclable mmap memory only after primary dss allocation fails. + - Fix deadlock in the "arenas.purge" mallctl. This regression was introduced + in 3.1.0 by the addition of the "arena..purge" mallctl. * 3.1.0 (October 16, 2012) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 441c1a4a..754fdcc7 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -833,7 +833,7 @@ for (i = 0; i < nbins; i++) { 2 or a similar system call. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused. The - default minimum ratio is 32:1 (2^5:1); an option value of -1 will + default minimum ratio is 8:1 (2^3:1); an option value of -1 will disable dirty page purging. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 49213e32..561c9b6f 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -38,10 +38,10 @@ * * (nactive >> opt_lg_dirty_mult) >= ndirty * - * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 - * times as many active pages as dirty pages. + * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times + * as many active pages as dirty pages. */ -#define LG_DIRTY_MULT_DEFAULT 5 +#define LG_DIRTY_MULT_DEFAULT 3 typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_s arena_chunk_t; @@ -69,7 +69,7 @@ struct arena_chunk_map_s { /* * Linkage for run trees. There are two disjoint uses: * - * 1) arena_t's runs_avail_{clean,dirty} trees. + * 1) arena_t's runs_avail tree. * 2) arena_run_t conceptually uses this linkage for in-use * non-full runs, rather than directly embedding linkage. */ @@ -162,20 +162,24 @@ typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; /* Arena chunk header. */ struct arena_chunk_s { /* Arena that owns the chunk. */ - arena_t *arena; + arena_t *arena; - /* Linkage for the arena's chunks_dirty list. */ - ql_elm(arena_chunk_t) link_dirty; - - /* - * True if the chunk is currently in the chunks_dirty list, due to - * having at some point contained one or more dirty pages. Removal - * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. - */ - bool dirtied; + /* Linkage for tree of arena chunks that contain dirty runs. */ + rb_node(arena_chunk_t) dirty_link; /* Number of dirty pages. */ - size_t ndirty; + size_t ndirty; + + /* Number of available runs. */ + size_t nruns_avail; + + /* + * Number of available run adjacencies. Clean and dirty available runs + * are not coalesced, which causes virtual memory fragmentation. The + * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking + * this fragmentation. + * */ + size_t nruns_adjac; /* * Map of pages within chunk that keeps track of free/large/small. The @@ -183,7 +187,7 @@ struct arena_chunk_s { * need to be tracked in the map. This omission saves a header page * for common chunk sizes (e.g. 4 MiB). */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ }; typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; @@ -333,8 +337,8 @@ struct arena_s { dss_prec_t dss_prec; - /* List of dirty-page-containing chunks this arena manages. */ - ql_head(arena_chunk_t) chunks_dirty; + /* Tree of dirty-page-containing chunks this arena manages. */ + arena_chunk_tree_t chunks_dirty; /* * In order to avoid rapid chunk allocation/deallocation when an arena @@ -369,18 +373,9 @@ struct arena_s { /* * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. The dirty tree contains - * runs with dirty pages (i.e. very likely to have been touched and - * therefore have associated physical pages), whereas the clean tree - * contains runs with pages that either have no associated physical - * pages, or have pages that the kernel may recycle at any time due to - * previous madvise(2) calls. The dirty tree is used in preference to - * the clean tree for allocations, because using dirty pages reduces - * the amount of dirty purging necessary to keep the active:dirty page - * ratio below the purge threshold. + * are used for first-best-fit run allocation. */ - arena_avail_tree_t runs_avail_clean; - arena_avail_tree_t runs_avail_dirty; + arena_avail_tree_t runs_avail; /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; diff --git a/src/arena.c b/src/arena.c index 1e6964a8..b93a6799 100644 --- a/src/arena.c +++ b/src/arena.c @@ -40,6 +40,12 @@ const uint8_t small_size2bin[] = { /******************************************************************************/ /* Function prototypes for non-inline static functions. */ +static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, + size_t pageind, size_t npages, bool maybe_adjac_pred, + bool maybe_adjac_succ); +static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, + size_t pageind, size_t npages, bool maybe_adjac_pred, + bool maybe_adjac_succ); static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); @@ -48,8 +54,11 @@ static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, bool zero); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, bool zero); +static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree, + arena_chunk_t *chunk, void *arg); static void arena_purge(arena_t *arena, bool all); -static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, + bool cleaned); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, @@ -101,9 +110,6 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) size_t a_size = a->bits & ~PAGE_MASK; size_t b_size = b->bits & ~PAGE_MASK; - assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits & - CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY)); - ret = (a_size > b_size) - (a_size < b_size); if (ret == 0) { uintptr_t a_mapelm, b_mapelm; @@ -129,6 +135,159 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) +static inline int +arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) +{ + size_t a_val, b_val; + + assert(a != NULL); + assert(b != NULL); + + /* + * Order such that chunks with higher fragmentation are "less than" + * those with lower fragmentation. Fragmentation is measured as: + * + * mean current avail run size + * -------------------------------- + * mean defragmented avail run size + * + * navail + * ----------- + * nruns_avail nruns_avail-nruns_adjac + * = ========================= = ----------------------- + * navail nruns_avail + * ----------------------- + * nruns_avail-nruns_adjac + * + * The following code multiplies away the denominator prior to + * comparison, in order to avoid division. + * + */ + a_val = (a->nruns_avail - a->nruns_adjac) * b->nruns_avail; + b_val = (b->nruns_avail - b->nruns_adjac) * a->nruns_avail; + if (a_val < b_val) + return (1); + if (a_val > b_val) + return (-1); + /* Break ties by chunk address. */ + { + uintptr_t a_chunk = (uintptr_t)a; + uintptr_t b_chunk = (uintptr_t)b; + + return ((a_chunk > b_chunk) - (a_chunk < b_chunk)); + } +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, + dirty_link, arena_chunk_dirty_comp) + +static inline bool +arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) +{ + bool ret; + + if (pageind-1 < map_bias) + ret = false; + else { + ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, + pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); + } + return (ret); +} + +static inline bool +arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + bool ret; + + if (pageind+npages == chunk_npages) + ret = false; + else { + assert(pageind+npages < chunk_npages); + ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) + != arena_mapbits_dirty_get(chunk, pageind+npages)); + } + return (ret); +} + +static inline bool +arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + + return (arena_avail_adjac_pred(chunk, pageind) || + arena_avail_adjac_succ(chunk, pageind, npages)); +} + +static void +arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be inserted is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac++; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac++; + chunk->nruns_avail++; + assert(chunk->nruns_avail > chunk->nruns_adjac); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty += npages; + chunk->ndirty += npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + +static void +arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be removed is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac--; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac--; + chunk->nruns_avail--; + assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail + == 0 && chunk->nruns_adjac == 0)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty -= npages; + chunk->ndirty -= npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { @@ -193,7 +352,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_chunk_t *chunk; size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; - arena_avail_tree_t *runs_avail; assert((large && binind == BININD_INVALID) || (large == false && binind != BININD_INVALID)); @@ -201,8 +359,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); - runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == @@ -212,7 +368,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); + arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk @@ -244,14 +400,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1)); } - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, - run_ind+need_pages)); - } - - /* Update dirty page accounting. */ - if (flag_dirty != 0) { - chunk->ndirty -= need_pages; - arena->ndirty -= need_pages; + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, + false, true); } /* @@ -344,8 +494,6 @@ arena_chunk_alloc(arena_t *arena) size_t i; if (arena->spare != NULL) { - arena_avail_tree_t *runs_avail; - chunk = arena->spare; arena->spare = NULL; @@ -357,14 +505,6 @@ arena_chunk_alloc(arena_t *arena) chunk_npages-1) == arena_maxclass); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); - - /* Insert the run into the appropriate runs_avail_* tree. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, - map_bias)); } else { bool zero; size_t unzeroed; @@ -380,8 +520,6 @@ arena_chunk_alloc(arena_t *arena) arena->stats.mapped += chunksize; chunk->arena = arena; - ql_elm_new(chunk, link_dirty); - chunk->dirtied = false; /* * Claim that no pages are in use, since the header is merely @@ -389,6 +527,9 @@ arena_chunk_alloc(arena_t *arena) */ chunk->ndirty = 0; + chunk->nruns_avail = 0; + chunk->nruns_adjac = 0; + /* * Initialize the map to contain one maximal free untouched run. * Mark the pages as zeroed iff chunk_alloc() returned a zeroed @@ -412,20 +553,18 @@ arena_chunk_alloc(arena_t *arena) } arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, unzeroed); - - /* Insert the run into the runs_avail_clean tree. */ - arena_avail_tree_insert(&arena->runs_avail_clean, - arena_mapp_get(chunk, map_bias)); } + /* Insert the run into the runs_avail tree. */ + arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); + return (chunk); } static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { - arena_avail_tree_t *runs_avail; - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == @@ -436,24 +575,16 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena_mapbits_dirty_get(chunk, chunk_npages-1)); /* - * Remove run from the appropriate runs_avail_* tree, so that the arena - * does not use it. + * Remove run from the runs_avail tree, so that the arena does not use + * it. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias)); + arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; arena->spare = chunk; - if (spare->dirtied) { - ql_remove(&chunk->arena->chunks_dirty, spare, - link_dirty); - arena->ndirty -= spare->ndirty; - } malloc_mutex_unlock(&arena->lock); chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); @@ -471,19 +602,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, arena_chunk_map_t *mapelm, key; key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); size_t pageind = (((uintptr_t)mapelm - @@ -537,29 +656,40 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, static inline void arena_maybe_purge(arena_t *arena) { + size_t npurgeable, threshold; - /* Enforce opt_lg_dirty_mult. */ - if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && - (arena->ndirty - arena->npurgatory) > chunk_npages && - (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory)) - arena_purge(arena, false); + /* Don't purge if the option is disabled. */ + if (opt_lg_dirty_mult < 0) + return; + /* Don't purge if all dirty pages are already being purged. */ + if (arena->ndirty <= arena->npurgatory) + return; + npurgeable = arena->ndirty - arena->npurgatory; + threshold = (arena->nactive >> opt_lg_dirty_mult); + /* + * Don't purge unless the number of purgeable pages exceeds the + * threshold. + */ + if (npurgeable <= threshold) + return; + + arena_purge(arena, false); } -static inline void -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) +static inline size_t +arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) { + size_t npurged; ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; - size_t pageind; - size_t ndirty; + size_t pageind, npages; size_t nmadvise; ql_new(&mapelms); /* * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail_dirty, and 2) so that it cannot be + * run is reinserted into runs_avail, and 2) so that it cannot be * completely discarded by another thread while arena->lock is dropped * by this thread. Note that the arena_run_dalloc() call will * implicitly deallocate the chunk, so no explicit action is required @@ -579,54 +709,50 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_chunk_alloc(arena); } - /* Temporarily allocate all free dirty runs within chunk. */ - for (pageind = map_bias; pageind < chunk_npages;) { + if (config_stats) + arena->stats.purged += chunk->ndirty; + + /* + * Operate on all dirty runs if there is no clean/dirty run + * fragmentation. + */ + if (chunk->nruns_adjac == 0) + all = true; + + /* + * Temporarily allocate free dirty runs within chunk. If all is false, + * only operate on dirty runs that are fragments; otherwise operate on + * all dirty runs. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { mapelm = arena_mapp_get(chunk, pageind); if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t npages; + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); - npages = arena_mapbits_unallocated_size_get(chunk, - pageind) >> LG_PAGE; + npages = run_size >> LG_PAGE; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+npages-1)); - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena_avail_tree_remove( - &arena->runs_avail_dirty, mapelm); - arena_mapbits_large_set(chunk, pageind, - (npages << LG_PAGE), 0); - if (npages > 1) { - arena_mapbits_large_set(chunk, - pageind+npages-1, 0, 0); - } + if (arena_mapbits_dirty_get(chunk, pageind) != 0 && + (all || arena_avail_adjac(chunk, pageind, + npages))) { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); - if (config_stats) { - /* - * Update stats_cactive if nactive is - * crossing a chunk multiple. - */ - size_t cactive_diff = - CHUNK_CEILING((arena->nactive + - npages) << LG_PAGE) - - CHUNK_CEILING(arena->nactive << - LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } - arena->nactive += npages; + arena_run_split(arena, run, run_size, true, + BININD_INVALID, false); /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); ql_tail_insert(&mapelms, mapelm, u.ql_link); } - - pageind += npages; } else { - /* Skip allocated run. */ - if (arena_mapbits_large_get(chunk, pageind)) - pageind += arena_mapbits_large_size_get(chunk, + /* Skip run. */ + if (arena_mapbits_large_get(chunk, pageind) != 0) { + npages = arena_mapbits_large_size_get(chunk, pageind) >> LG_PAGE; - else { + } else { size_t binind; arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) @@ -636,36 +762,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind) == 0); binind = arena_bin_index(arena, run->bin); bin_info = &arena_bin_info[binind]; - pageind += bin_info->run_size >> LG_PAGE; + npages = bin_info->run_size >> LG_PAGE; } } } assert(pageind == chunk_npages); - - if (config_debug) - ndirty = chunk->ndirty; - if (config_stats) - arena->stats.purged += chunk->ndirty; - arena->ndirty -= chunk->ndirty; - chunk->ndirty = 0; - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; + assert(chunk->ndirty == 0 || all == false); + assert(chunk->nruns_adjac == 0); malloc_mutex_unlock(&arena->lock); if (config_stats) nmadvise = 0; + npurged = 0; ql_foreach(mapelm, &mapelms, u.ql_link) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; bool unzeroed; size_t flag_unzeroed, i; + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; assert(pageind + npages <= chunk_npages); - assert(ndirty >= npages); - if (config_debug) - ndirty -= npages; unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), (npages << LG_PAGE)); flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; @@ -683,10 +800,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_mapbits_unzeroed_set(chunk, pageind+i, flag_unzeroed); } + npurged += npages; if (config_stats) nmadvise++; } - assert(ndirty == 0); malloc_mutex_lock(&arena->lock); if (config_stats) arena->stats.nmadvise += nmadvise; @@ -694,14 +811,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; mapelm = ql_first(&mapelms)) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << LG_PAGE)); + arena_run_t *run; + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false); + arena_run_dalloc(arena, run, false, true); } + + return (npurged); +} + +static arena_chunk_t * +chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +{ + size_t *ndirty = (size_t *)arg; + + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); } static void @@ -712,14 +842,11 @@ arena_purge(arena_t *arena, bool all) if (config_debug) { size_t ndirty = 0; - ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { - assert(chunk->dirtied); - ndirty += chunk->ndirty; - } + arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, + chunks_dirty_iter_cb, (void *)&ndirty); assert(ndirty == arena->ndirty); } assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty - arena->npurgatory > chunk_npages || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory) || all); @@ -731,16 +858,24 @@ arena_purge(arena_t *arena, bool all) * purge, and add the result to arena->npurgatory. This will keep * multiple threads from racing to reduce ndirty below the threshold. */ - npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) { - assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); - npurgatory -= arena->nactive >> opt_lg_dirty_mult; + { + size_t npurgeable = arena->ndirty - arena->npurgatory; + + if (all == false) { + size_t threshold = (arena->nactive >> + opt_lg_dirty_mult); + + npurgatory = npurgeable - threshold; + } else + npurgatory = npurgeable; } arena->npurgatory += npurgatory; while (npurgatory > 0) { + size_t npurgeable, npurged, nunpurged; + /* Get next chunk with dirty pages. */ - chunk = ql_first(&arena->chunks_dirty); + chunk = arena_chunk_dirty_first(&arena->chunks_dirty); if (chunk == NULL) { /* * This thread was unable to purge as many pages as @@ -751,23 +886,15 @@ arena_purge(arena_t *arena, bool all) arena->npurgatory -= npurgatory; return; } - while (chunk->ndirty == 0) { - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; - chunk = ql_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* Same logic as for above. */ - arena->npurgatory -= npurgatory; - return; - } - } + npurgeable = chunk->ndirty; + assert(npurgeable != 0); - if (chunk->ndirty > npurgatory) { + if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { /* - * This thread will, at a minimum, purge all the dirty - * pages in chunk, so set npurgatory to reflect this - * thread's commitment to purge the pages. This tends - * to reduce the chances of the following scenario: + * This thread will purge all the dirty pages in chunk, + * so set npurgatory to reflect this thread's intent to + * purge the pages. This tends to reduce the chances + * of the following scenario: * * 1) This thread sets arena->npurgatory such that * (arena->ndirty - arena->npurgatory) is at the @@ -781,13 +908,20 @@ arena_purge(arena_t *arena, bool all) * because all of the purging work being done really * needs to happen. */ - arena->npurgatory += chunk->ndirty - npurgatory; - npurgatory = chunk->ndirty; + arena->npurgatory += npurgeable - npurgatory; + npurgatory = npurgeable; } - arena->npurgatory -= chunk->ndirty; - npurgatory -= chunk->ndirty; - arena_chunk_purge(arena, chunk); + /* + * Keep track of how many pages are purgeable, versus how many + * actually get purged, and adjust counters accordingly. + */ + arena->npurgatory -= npurgeable; + npurgatory -= npurgeable; + npurged = arena_chunk_purge(arena, chunk, all); + nunpurged = npurgeable - npurged; + arena->npurgatory += nunpurged; + npurgatory += nunpurged; } } @@ -801,11 +935,10 @@ arena_purge_all(arena_t *arena) } static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) { arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; - arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); @@ -836,15 +969,14 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated. + * if it was already dirty before being allocated and the caller + * doesn't claim to have cleaned it. */ assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (arena_mapbits_dirty_get(chunk, run_ind) != 0) + if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - runs_avail = dirty ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; /* Mark pages as unallocated in the chunk map. */ if (dirty) { @@ -852,9 +984,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY); arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, CHUNK_MAP_DIRTY); - - chunk->ndirty += run_pages; - arena->ndirty += run_pages; } else { arena_mapbits_unallocated_set(chunk, run_ind, size, arena_mapbits_unzeroed_get(chunk, run_ind)); @@ -878,8 +1007,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) run_ind+run_pages+nrun_pages-1) == nrun_size); assert(arena_mapbits_dirty_get(chunk, run_ind+run_pages+nrun_pages-1) == flag_dirty); - arena_avail_tree_remove(runs_avail, - arena_mapp_get(chunk, run_ind+run_pages)); + arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, + false, true); size += nrun_size; run_pages += nrun_pages; @@ -905,8 +1034,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == prun_size); assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, - run_ind)); + arena_avail_remove(arena, chunk, run_ind, prun_pages, true, + false); size += prun_size; run_pages += prun_pages; @@ -921,19 +1050,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); - - if (dirty) { - /* - * Insert into chunks_dirty before potentially calling - * arena_chunk_dealloc(), so that chunks_dirty and - * arena->ndirty are consistent. - */ - if (chunk->dirtied == false) { - ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = true; - } - } + arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); /* Deallocate chunk if it is now completely unused. */ if (size == arena_maxclass) { @@ -982,7 +1099,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty); - arena_run_dalloc(arena, run, false); + arena_run_dalloc(arena, run, false, false); } static void @@ -1015,7 +1132,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, flag_dirty); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty); + dirty, false); } static arena_run_t * @@ -1526,7 +1643,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ } - arena_run_dalloc(arena, run, true); + arena_run_dalloc(arena, run, true, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -1638,7 +1755,7 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) } } - arena_run_dalloc(arena, (arena_run_t *)ptr, true); + arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); } void @@ -1985,15 +2102,14 @@ arena_new(arena_t *arena, unsigned ind) arena->dss_prec = chunk_dss_prec_get(); /* Initialize chunks. */ - ql_new(&arena->chunks_dirty); + arena_chunk_dirty_new(&arena->chunks_dirty); arena->spare = NULL; arena->nactive = 0; arena->ndirty = 0; arena->npurgatory = 0; - arena_avail_tree_new(&arena->runs_avail_clean); - arena_avail_tree_new(&arena->runs_avail_dirty); + arena_avail_tree_new(&arena->runs_avail); /* Initialize bins. */ for (i = 0; i < NBINS; i++) {