Disable munmap() if it causes VM map holes.

Add a configure test to determine whether common mmap()/munmap()
patterns cause VM map holes, and only use munmap() to discard unused
chunks if the problem does not exist.

Unify the chunk caching for mmap and dss.

Fix options processing to limit lg_chunk to be large enough that
redzones will always fit.
This commit is contained in:
Jason Evans 2012-04-12 20:20:58 -07:00
parent d6abcbb14b
commit 7ca0fdfb85
11 changed files with 277 additions and 244 deletions

View File

@ -817,6 +817,73 @@ else
AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT])
fi
dnl Determine whether common sequences of mmap()/munmap() calls will leave
dnl semi-permanent VM map holes. If so, disable munmap.
AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes],
[je_cv_vmmap_hole],
AC_RUN_IFELSE([AC_LANG_PROGRAM(
[[#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#define NPTRS 11
#define MMAP_SIZE ((size_t)(1U << 22))
static void *
do_mmap(size_t size)
{
void *ret;
ret = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1,
0);
if (ret == MAP_FAILED) {
fprintf(stderr, "mmap() error\n");
exit(1);
}
return (ret);
}
static void
do_munmap(void *ptr, size_t size)
{
if (munmap(ptr, size) == -1) {
fprintf(stderr, "munmap() error\n");
exit(1);
}
}
]],
[[
void *p0, *p1, *p2, *p3, *p4;
FILE *f;
f = fopen("conftest.out", "w");
if (f == NULL)
exit(1);
p0 = do_mmap(MMAP_SIZE);
p1 = do_mmap(MMAP_SIZE);
p2 = do_mmap(MMAP_SIZE);
do_munmap(p1, MMAP_SIZE);
p3 = do_mmap(MMAP_SIZE * 2);
do_munmap(p3, MMAP_SIZE * 2);
p4 = do_mmap(MMAP_SIZE);
if (p4 != p1) {
fprintf(stderr, "Hoped for %p, got %p\n", p1, p4);
fprintf(stderr, "%p..%p..%p..%p..%p\n", p0, p1, p2, p3, p4);
fprintf(f, "yes\n");
} else
fprintf(f, "no\n");
fclose(f);
return (0);
]])],
[je_cv_vmmap_hole=`cat conftest.out`],
[je_cv_vmmap_hole=unknown]))
if test "x$je_cv_vmmap_hole" = "xno" ; then
AC_DEFINE([JEMALLOC_MUNMAP], [ ])
fi
dnl ============================================================================
dnl jemalloc configuration.
dnl

View File

@ -11,7 +11,6 @@
void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero);
bool chunk_in_dss(void *chunk);
bool chunk_dealloc_dss(void *chunk, size_t size);
bool chunk_dss_boot(void);
void chunk_dss_prefork(void);
void chunk_dss_postfork_parent(void);

View File

@ -10,7 +10,7 @@
#ifdef JEMALLOC_H_EXTERNS
void *chunk_alloc_mmap(size_t size, size_t alignment);
void chunk_dealloc_mmap(void *chunk, size_t size);
bool chunk_dealloc_mmap(void *chunk, size_t size);
bool chunk_mmap_boot(void);

View File

@ -102,6 +102,13 @@ static const bool config_prof_libunwind =
false
#endif
;
static const bool config_munmap =
#ifdef JEMALLOC_MUNMAP
true
#else
false
#endif
;
static const bool config_stats =
#ifdef JEMALLOC_STATS
true

View File

@ -57,7 +57,6 @@
#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
#define chunk_boot JEMALLOC_N(chunk_boot)
#define chunk_dealloc JEMALLOC_N(chunk_dealloc)
#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss)
#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child)

View File

@ -172,6 +172,14 @@
/* One page is 2^STATIC_PAGE_SHIFT bytes. */
#undef STATIC_PAGE_SHIFT
/*
* If defined, use munmap() to unmap freed chunks, rather than storing them for
* later reuse. This is automatically disabled if configuration determines
* that common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
#undef JEMALLOC_MUNMAP
/* TLS is used to map arenas and magazine caches to threads. */
#undef JEMALLOC_TLS
@ -209,6 +217,13 @@
*/
#undef JEMALLOC_PURGE_MADVISE_DONTNEED
#undef JEMALLOC_PURGE_MADVISE_FREE
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
# define JEMALLOC_MADV_PURGE MADV_DONTNEED
#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
# define JEMALLOC_MADV_PURGE MADV_FREE
#else
# error "No method defined for purging unused dirty pages."
#endif
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
#undef LG_SIZEOF_PTR

View File

@ -676,16 +676,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
if (config_debug)
ndirty -= npages;
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
# define MADV_PURGE MADV_DONTNEED
#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
# define MADV_PURGE MADV_FREE
#else
# error "No method defined for purging unused dirty pages."
#endif
madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)),
(npages << LG_PAGE), MADV_PURGE);
#undef MADV_PURGE
(npages << LG_PAGE), JEMALLOC_MADV_PURGE);
if (config_stats)
nmadvise++;
}

View File

@ -9,6 +9,15 @@ size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
malloc_mutex_t chunks_mtx;
chunk_stats_t stats_chunks;
/*
* Trees of chunks that were previously allocated (trees differ only in node
* ordering). These are used when allocating chunks, in an attempt to re-use
* address space. Depending on function, different tree orderings are needed,
* which is why there are two trees with the same contents.
*/
static extent_tree_t chunks_szad;
static extent_tree_t chunks_ad;
rtree_t *chunks_rtree;
/* Various chunk-related settings. */
@ -19,6 +28,84 @@ size_t map_bias;
size_t arena_maxclass; /* Max size class for arenas. */
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static void *chunk_recycle(size_t size, size_t alignment, bool *zero);
static void chunk_record(void *chunk, size_t size);
/******************************************************************************/
static void *
chunk_recycle(size_t size, size_t alignment, bool *zero)
{
void *ret;
extent_node_t *node;
extent_node_t key;
size_t alloc_size, leadsize, trailsize;
alloc_size = size + alignment - chunksize;
/* Beware size_t wrap-around. */
if (alloc_size < size)
return (NULL);
key.addr = NULL;
key.size = alloc_size;
malloc_mutex_lock(&chunks_mtx);
node = extent_tree_szad_nsearch(&chunks_szad, &key);
if (node == NULL) {
malloc_mutex_unlock(&chunks_mtx);
return (NULL);
}
leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
(uintptr_t)node->addr;
assert(alloc_size >= leadsize + size);
trailsize = alloc_size - leadsize - size;
ret = (void *)((uintptr_t)node->addr + leadsize);
/* Remove node from the tree. */
extent_tree_szad_remove(&chunks_szad, node);
extent_tree_ad_remove(&chunks_ad, node);
if (leadsize != 0) {
/* Insert the leading space as a smaller chunk. */
node->size = leadsize;
extent_tree_szad_insert(&chunks_szad, node);
extent_tree_ad_insert(&chunks_ad, node);
node = NULL;
}
if (trailsize != 0) {
/* Insert the trailing space as a smaller chunk. */
if (node == NULL) {
/*
* An additional node is required, but
* base_node_alloc() can cause a new base chunk to be
* allocated. Drop chunks_mtx in order to avoid
* deadlock, and if node allocation fails, deallocate
* the result before returning an error.
*/
malloc_mutex_unlock(&chunks_mtx);
node = base_node_alloc();
if (node == NULL) {
chunk_dealloc(ret, size, true);
return (NULL);
}
malloc_mutex_lock(&chunks_mtx);
}
node->addr = (void *)((uintptr_t)(ret) + size);
node->size = trailsize;
extent_tree_szad_insert(&chunks_szad, node);
extent_tree_ad_insert(&chunks_ad, node);
node = NULL;
}
malloc_mutex_unlock(&chunks_mtx);
if (node != NULL)
base_node_dealloc(node);
#ifdef JEMALLOC_PURGE_MADVISE_FREE
if (*zero) {
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
memset(ret, 0, size);
}
#endif
return (ret);
}
/*
* If the caller specifies (*zero == false), it is still possible to receive
@ -35,6 +122,9 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero)
assert((size & chunksize_mask) == 0);
assert((alignment & chunksize_mask) == 0);
ret = chunk_recycle(size, alignment, zero);
if (ret != NULL)
goto label_return;
if (config_dss) {
ret = chunk_alloc_dss(size, alignment, zero);
if (ret != NULL)
@ -76,6 +166,80 @@ label_return:
return (ret);
}
static void
chunk_record(void *chunk, size_t size)
{
extent_node_t *xnode, *node, *prev, key;
madvise(chunk, size, JEMALLOC_MADV_PURGE);
xnode = NULL;
malloc_mutex_lock(&chunks_mtx);
while (true) {
key.addr = (void *)((uintptr_t)chunk + size);
node = extent_tree_ad_nsearch(&chunks_ad, &key);
/* Try to coalesce forward. */
if (node != NULL && node->addr == key.addr) {
/*
* Coalesce chunk with the following address range.
* This does not change the position within chunks_ad,
* so only remove/insert from/into chunks_szad.
*/
extent_tree_szad_remove(&chunks_szad, node);
node->addr = chunk;
node->size += size;
extent_tree_szad_insert(&chunks_szad, node);
break;
} else if (xnode == NULL) {
/*
* It is possible that base_node_alloc() will cause a
* new base chunk to be allocated, so take care not to
* deadlock on chunks_mtx, and recover if another thread
* deallocates an adjacent chunk while this one is busy
* allocating xnode.
*/
malloc_mutex_unlock(&chunks_mtx);
xnode = base_node_alloc();
if (xnode == NULL)
return;
malloc_mutex_lock(&chunks_mtx);
} else {
/* Coalescing forward failed, so insert a new node. */
node = xnode;
xnode = NULL;
node->addr = chunk;
node->size = size;
extent_tree_ad_insert(&chunks_ad, node);
extent_tree_szad_insert(&chunks_szad, node);
break;
}
}
/* Discard xnode if it ended up unused due to a race. */
if (xnode != NULL)
base_node_dealloc(xnode);
/* Try to coalesce backward. */
prev = extent_tree_ad_prev(&chunks_ad, node);
if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
chunk) {
/*
* Coalesce chunk with the previous address range. This does
* not change the position within chunks_ad, so only
* remove/insert node from/into chunks_szad.
*/
extent_tree_szad_remove(&chunks_szad, prev);
extent_tree_ad_remove(&chunks_ad, prev);
extent_tree_szad_remove(&chunks_szad, node);
node->addr = prev->addr;
node->size += prev->size;
extent_tree_szad_insert(&chunks_szad, node);
base_node_dealloc(prev);
}
malloc_mutex_unlock(&chunks_mtx);
}
void
chunk_dealloc(void *chunk, size_t size, bool unmap)
{
@ -94,9 +258,9 @@ chunk_dealloc(void *chunk, size_t size, bool unmap)
}
if (unmap) {
if (config_dss && chunk_dealloc_dss(chunk, size) == false)
if (chunk_dealloc_mmap(chunk, size) == false)
return;
chunk_dealloc_mmap(chunk, size);
chunk_record(chunk, size);
}
}
@ -117,6 +281,8 @@ chunk_boot0(void)
}
if (config_dss && chunk_dss_boot())
return (true);
extent_tree_szad_new(&chunks_szad);
extent_tree_ad_new(&chunks_ad);
if (config_ivsalloc) {
chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
opt_lg_chunk);

View File

@ -3,17 +3,6 @@
/******************************************************************************/
/* Data. */
#ifndef JEMALLOC_HAVE_SBRK
void *
sbrk(intptr_t increment)
{
not_implemented();
return (NULL);
}
#endif
/*
* Protects sbrk() calls. This avoids malloc races among threads, though it
* does not protect against races with threads that call sbrk() directly.
@ -27,92 +16,18 @@ static void *dss_prev;
/* Current upper limit on DSS addresses. */
static void *dss_max;
/*
* Trees of chunks that were previously allocated (trees differ only in node
* ordering). These are used when allocating chunks, in an attempt to re-use
* address space. Depending on function, different tree orderings are needed,
* which is why there are two trees with the same contents.
*/
static extent_tree_t dss_chunks_szad;
static extent_tree_t dss_chunks_ad;
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static void *chunk_recycle_dss(size_t size, size_t alignment, bool *zero);
static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
/******************************************************************************/
#ifndef JEMALLOC_HAVE_SBRK
static void *
chunk_recycle_dss(size_t size, size_t alignment, bool *zero)
sbrk(intptr_t increment)
{
void *ret;
extent_node_t *node;
extent_node_t key;
size_t alloc_size, leadsize, trailsize;
cassert(config_dss);
not_implemented();
alloc_size = size + alignment - chunksize;
/* Beware size_t wrap-around. */
if (alloc_size < size)
return (NULL);
key.addr = NULL;
key.size = alloc_size;
malloc_mutex_lock(&dss_mtx);
node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
if (node == NULL) {
malloc_mutex_unlock(&dss_mtx);
return (NULL);
}
leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
(uintptr_t)node->addr;
assert(alloc_size >= leadsize + size);
trailsize = alloc_size - leadsize - size;
ret = (void *)((uintptr_t)node->addr + leadsize);
/* Remove node from the tree. */
extent_tree_szad_remove(&dss_chunks_szad, node);
extent_tree_ad_remove(&dss_chunks_ad, node);
if (leadsize != 0) {
/* Insert the leading space as a smaller chunk. */
node->size = leadsize;
extent_tree_szad_insert(&dss_chunks_szad, node);
extent_tree_ad_insert(&dss_chunks_ad, node);
node = NULL;
}
if (trailsize != 0) {
/* Insert the trailing space as a smaller chunk. */
if (node == NULL) {
/*
* An additional node is required, but
* base_node_alloc() can cause a new base chunk to be
* allocated. Drop dss_mtx in order to avoid deadlock,
* and if node allocation fails, deallocate the result
* before returning an error.
*/
malloc_mutex_unlock(&dss_mtx);
node = base_node_alloc();
if (node == NULL) {
chunk_dealloc_dss(ret, size);
return (NULL);
}
malloc_mutex_lock(&dss_mtx);
}
node->addr = (void *)((uintptr_t)(ret) + size);
node->size = trailsize;
extent_tree_szad_insert(&dss_chunks_szad, node);
extent_tree_ad_insert(&dss_chunks_ad, node);
node = NULL;
}
malloc_mutex_unlock(&dss_mtx);
if (node != NULL)
base_node_dealloc(node);
if (*zero)
memset(ret, 0, size);
return (ret);
return (NULL);
}
#endif
void *
chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
@ -123,10 +38,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
assert(size > 0 && (size & chunksize_mask) == 0);
assert(alignment > 0 && (alignment & chunksize_mask) == 0);
ret = chunk_recycle_dss(size, alignment, zero);
if (ret != NULL)
return (ret);
/*
* sbrk() uses a signed increment argument, so take care not to
* interpret a huge allocation request as a negative increment.
@ -177,7 +88,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
dss_max = dss_next;
malloc_mutex_unlock(&dss_mtx);
if (cpad_size != 0)
chunk_dealloc_dss(cpad, cpad_size);
chunk_dealloc(cpad, cpad_size, true);
*zero = true;
return (ret);
}
@ -188,81 +99,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
return (NULL);
}
static extent_node_t *
chunk_dealloc_dss_record(void *chunk, size_t size)
{
extent_node_t *xnode, *node, *prev, key;
cassert(config_dss);
xnode = NULL;
while (true) {
key.addr = (void *)((uintptr_t)chunk + size);
node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
/* Try to coalesce forward. */
if (node != NULL && node->addr == key.addr) {
/*
* Coalesce chunk with the following address range.
* This does not change the position within
* dss_chunks_ad, so only remove/insert from/into
* dss_chunks_szad.
*/
extent_tree_szad_remove(&dss_chunks_szad, node);
node->addr = chunk;
node->size += size;
extent_tree_szad_insert(&dss_chunks_szad, node);
break;
} else if (xnode == NULL) {
/*
* It is possible that base_node_alloc() will cause a
* new base chunk to be allocated, so take care not to
* deadlock on dss_mtx, and recover if another thread
* deallocates an adjacent chunk while this one is busy
* allocating xnode.
*/
malloc_mutex_unlock(&dss_mtx);
xnode = base_node_alloc();
malloc_mutex_lock(&dss_mtx);
if (xnode == NULL)
return (NULL);
} else {
/* Coalescing forward failed, so insert a new node. */
node = xnode;
xnode = NULL;
node->addr = chunk;
node->size = size;
extent_tree_ad_insert(&dss_chunks_ad, node);
extent_tree_szad_insert(&dss_chunks_szad, node);
break;
}
}
/* Discard xnode if it ended up unused do to a race. */
if (xnode != NULL)
base_node_dealloc(xnode);
/* Try to coalesce backward. */
prev = extent_tree_ad_prev(&dss_chunks_ad, node);
if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
chunk) {
/*
* Coalesce chunk with the previous address range. This does
* not change the position within dss_chunks_ad, so only
* remove/insert node from/into dss_chunks_szad.
*/
extent_tree_szad_remove(&dss_chunks_szad, prev);
extent_tree_ad_remove(&dss_chunks_ad, prev);
extent_tree_szad_remove(&dss_chunks_szad, node);
node->addr = prev->addr;
node->size += prev->size;
extent_tree_szad_insert(&dss_chunks_szad, node);
base_node_dealloc(prev);
}
return (node);
}
bool
chunk_in_dss(void *chunk)
{
@ -281,58 +117,6 @@ chunk_in_dss(void *chunk)
return (ret);
}
bool
chunk_dealloc_dss(void *chunk, size_t size)
{
bool ret;
cassert(config_dss);
malloc_mutex_lock(&dss_mtx);
if ((uintptr_t)chunk >= (uintptr_t)dss_base
&& (uintptr_t)chunk < (uintptr_t)dss_max) {
extent_node_t *node;
/* Try to coalesce with other unused chunks. */
node = chunk_dealloc_dss_record(chunk, size);
if (node != NULL) {
chunk = node->addr;
size = node->size;
}
/* Get the current end of the DSS. */
dss_max = sbrk(0);
/*
* Try to shrink the DSS if this chunk is at the end of the
* DSS. The sbrk() call here is subject to a race condition
* with threads that use brk(2) or sbrk(2) directly, but the
* alternative would be to leak memory for the sake of poorly
* designed multi-threaded programs.
*/
if ((void *)((uintptr_t)chunk + size) == dss_max
&& (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
/* Success. */
dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
if (node != NULL) {
extent_tree_szad_remove(&dss_chunks_szad, node);
extent_tree_ad_remove(&dss_chunks_ad, node);
base_node_dealloc(node);
}
} else
madvise(chunk, size, MADV_DONTNEED);
ret = false;
goto label_return;
}
ret = true;
label_return:
malloc_mutex_unlock(&dss_mtx);
return (ret);
}
bool
chunk_dss_boot(void)
{
@ -344,8 +128,6 @@ chunk_dss_boot(void)
dss_base = sbrk(0);
dss_prev = dss_base;
dss_max = dss_base;
extent_tree_szad_new(&dss_chunks_szad);
extent_tree_ad_new(&dss_chunks_ad);
return (false);
}

View File

@ -178,11 +178,14 @@ chunk_alloc_mmap(size_t size, size_t alignment)
return (ret);
}
void
bool
chunk_dealloc_mmap(void *chunk, size_t size)
{
pages_unmap(chunk, size);
if (config_munmap)
pages_unmap(chunk, size);
return (config_munmap == false);
}
bool

View File

@ -501,11 +501,14 @@ malloc_conf_init(void)
CONF_HANDLE_BOOL(opt_abort, abort)
/*
* Chunks always require at least one * header page,
* plus one data page.
* Chunks always require at least one header page, plus
* one data page in the absence of redzones, or three
* pages in the presence of redzones. In order to
* simplify options processing, fix the limit based on
* config_fill.
*/
CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE+1,
(sizeof(size_t) << 3) - 1)
CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE +
(config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1)
CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX)
CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult,
-1, (sizeof(size_t) << 3) - 1)