Fix mlockall()/madvise() interaction.

mlockall(2) can cause purging via madvise(2) to fail.  Fix purging code
to check whether madvise() succeeded, and base zeroed page metadata on
the result.

Reported by Olivier Lecomte.
This commit is contained in:
Jason Evans 2012-10-08 17:56:11 -07:00
parent f4c3f8545b
commit 7de92767c2
5 changed files with 48 additions and 41 deletions

View File

@ -9,7 +9,7 @@
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
void pages_purge(void *addr, size_t length);
bool pages_purge(void *addr, size_t length);
void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero);
bool chunk_dealloc_mmap(void *chunk, size_t size);

View File

@ -23,6 +23,9 @@ struct extent_node_s {
/* Total region size. */
size_t size;
/* True if zero-filled; used by chunk recycling code. */
bool zeroed;
};
typedef rb_tree(extent_node_t) extent_tree_t;

View File

@ -551,24 +551,12 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
{
ql_head(arena_chunk_map_t) mapelms;
arena_chunk_map_t *mapelm;
size_t pageind, flag_unzeroed;
size_t pageind;
size_t ndirty;
size_t nmadvise;
ql_new(&mapelms);
flag_unzeroed =
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
/*
* madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
* mappings.
*/
0
#else
CHUNK_MAP_UNZEROED
#endif
;
/*
* If chunk is the spare, temporarily re-allocate it, 1) so that its
* run is reinserted into runs_avail_dirty, and 2) so that it cannot be
@ -603,26 +591,12 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
assert(arena_mapbits_dirty_get(chunk, pageind) ==
arena_mapbits_dirty_get(chunk, pageind+npages-1));
if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
size_t i;
arena_avail_tree_remove(
&arena->runs_avail_dirty, mapelm);
arena_mapbits_unzeroed_set(chunk, pageind,
flag_unzeroed);
arena_mapbits_large_set(chunk, pageind,
(npages << LG_PAGE), 0);
/*
* Update internal elements in the page map, so
* that CHUNK_MAP_UNZEROED is properly set.
*/
for (i = 1; i < npages - 1; i++) {
arena_mapbits_unzeroed_set(chunk,
pageind+i, flag_unzeroed);
}
if (npages > 1) {
arena_mapbits_unzeroed_set(chunk,
pageind+npages-1, flag_unzeroed);
arena_mapbits_large_set(chunk,
pageind+npages-1, 0, 0);
}
@ -685,14 +659,30 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
sizeof(arena_chunk_map_t)) + map_bias;
size_t npages = arena_mapbits_large_size_get(chunk, pageind) >>
LG_PAGE;
bool unzeroed;
size_t flag_unzeroed, i;
assert(pageind + npages <= chunk_npages);
assert(ndirty >= npages);
if (config_debug)
ndirty -= npages;
pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)),
(npages << LG_PAGE));
unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind <<
LG_PAGE)), (npages << LG_PAGE));
flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0;
/*
* Set the unzeroed flag for all pages, now that pages_purge()
* has returned whether the pages were zeroed as a side effect
* of purging. This chunk map modification is safe even though
* the arena mutex isn't currently owned by this thread,
* because the run is marked as allocated, thus protecting it
* from being modified by any other thread. As long as these
* writes don't perturb the first and last elements'
* CHUNK_MAP_ALLOCATED bits, behavior is well defined.
*/
for (i = 0; i < npages; i++) {
arena_mapbits_unzeroed_set(chunk, pageind+i,
flag_unzeroed);
}
if (config_stats)
nmadvise++;
}

View File

@ -43,6 +43,7 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero)
extent_node_t *node;
extent_node_t key;
size_t alloc_size, leadsize, trailsize;
bool zeroed;
if (base) {
/*
@ -107,17 +108,18 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero)
}
malloc_mutex_unlock(&chunks_mtx);
if (node != NULL)
zeroed = false;
if (node != NULL) {
if (node->zeroed) {
zeroed = true;
*zero = true;
}
base_node_dealloc(node);
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
/* Pages are zeroed as a side effect of pages_purge(). */
*zero = true;
#else
if (*zero) {
}
if (zeroed == false && *zero) {
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
memset(ret, 0, size);
}
#endif
return (ret);
}
@ -191,9 +193,10 @@ label_return:
static void
chunk_record(void *chunk, size_t size)
{
bool unzeroed;
extent_node_t *xnode, *node, *prev, key;
pages_purge(chunk, size);
unzeroed = pages_purge(chunk, size);
/*
* Allocate a node before acquiring chunks_mtx even though it might not
@ -216,6 +219,7 @@ chunk_record(void *chunk, size_t size)
extent_tree_szad_remove(&chunks_szad, node);
node->addr = chunk;
node->size += size;
node->zeroed = (node->zeroed && (unzeroed == false));
extent_tree_szad_insert(&chunks_szad, node);
if (xnode != NULL)
base_node_dealloc(xnode);
@ -234,6 +238,7 @@ chunk_record(void *chunk, size_t size)
node = xnode;
node->addr = chunk;
node->size = size;
node->zeroed = (unzeroed == false);
extent_tree_ad_insert(&chunks_ad, node);
extent_tree_szad_insert(&chunks_szad, node);
}
@ -253,6 +258,7 @@ chunk_record(void *chunk, size_t size)
extent_tree_szad_remove(&chunks_szad, node);
node->addr = prev->addr;
node->size += prev->size;
node->zeroed = (node->zeroed && prev->zeroed);
extent_tree_szad_insert(&chunks_szad, node);
base_node_dealloc(prev);

View File

@ -113,22 +113,30 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
#endif
}
void
bool
pages_purge(void *addr, size_t length)
{
bool unzeroed;
#ifdef _WIN32
VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE);
unzeroed = true;
#else
# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
# define JEMALLOC_MADV_PURGE MADV_DONTNEED
# define JEMALLOC_MADV_ZEROS true
# elif defined(JEMALLOC_PURGE_MADVISE_FREE)
# define JEMALLOC_MADV_PURGE MADV_FREE
# define JEMALLOC_MADV_ZEROS false
# else
# error "No method defined for purging unused dirty pages."
# endif
madvise(addr, length, JEMALLOC_MADV_PURGE);
int err = madvise(addr, length, JEMALLOC_MADV_PURGE);
unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0);
# undef JEMALLOC_MADV_PURGE
# undef JEMALLOC_MADV_ZEROS
#endif
return (unzeroed);
}
static void *