Refactor base_alloc() to guarantee demand-zeroed memory.

Refactor base_alloc() to guarantee that allocations are carved from demand-zeroed virtual memory. This supports sparse data structures such as multi-page radix tree nodes. Enhance base_alloc() to keep track of fragments which were too small to support previous allocation requests, and try to consume them during subsequent requests. This becomes important when request sizes commonly approach or exceed the chunk size (as could radix tree node allocations).
2015-01-30 21:49:19 -08:00 · 2015-01-30 21:49:19 -08:00 · f500a10b2e
commit f500a10b2e
parent 918a1a5b3f
5 changed files with 114 additions and 78 deletions
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@ -10,7 +10,6 @@
 #ifdef JEMALLOC_H_EXTERNS

 void	*base_alloc(size_t size);
-void	*base_calloc(size_t number, size_t size);
 extent_node_t *base_node_alloc(void);
 void	base_node_dalloc(extent_node_t *node);
 size_t	base_allocated_get(void);
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@ -103,7 +103,6 @@ atomic_sub_z
 base_alloc
 base_allocated_get
 base_boot
-base_calloc
 base_node_alloc
 base_node_dalloc
 base_postfork_child
--- a/src/base.c
+++ b/src/base.c
@ -5,73 +5,117 @@
 /* Data. */

 static malloc_mutex_t	base_mtx;
-
-/*
- * Current pages that are being used for internal memory allocations.  These
- * pages are carved up in cacheline-size quanta, so that there is no chance of
- * false cache line sharing.
- */
-static void		*base_pages;
-static void		*base_next_addr;
-static void		*base_past_addr; /* Addr immediately past base_pages. */
+static extent_tree_t	base_avail_szad;
 static extent_node_t	*base_nodes;
-
 static size_t		base_allocated;

 /******************************************************************************/

-static bool
-base_pages_alloc(size_t minsize)
+static extent_node_t *
+base_node_try_alloc_locked(void)
 {
-	size_t csize;
+	extent_node_t *node;

-	assert(minsize != 0);
-	csize = CHUNK_CEILING(minsize);
-	base_pages = chunk_alloc_base(csize);
-	if (base_pages == NULL)
-		return (true);
-	base_next_addr = base_pages;
-	base_past_addr = (void *)((uintptr_t)base_pages + csize);
-
-	return (false);
+	if (base_nodes == NULL)
+		return (NULL);
+	node = base_nodes;
+	base_nodes = *(extent_node_t **)node;
+	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
+	return (node);
 }

+static void
+base_node_dalloc_locked(extent_node_t *node)
+{
+
+	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
+	*(extent_node_t **)node = base_nodes;
+	base_nodes = node;
+}
+
+/* base_mtx must be held. */
+static extent_node_t *
+base_chunk_alloc(size_t minsize)
+{
+	extent_node_t *node;
+	size_t csize, nsize;
+	void *addr;
+
+	assert(minsize != 0);
+	node = base_node_try_alloc_locked();
+	/* Allocate enough space to also carve a node out if necessary. */
+	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
+	csize = CHUNK_CEILING(minsize + nsize);
+	addr = chunk_alloc_base(csize);
+	if (addr == NULL) {
+		if (node != NULL)
+			base_node_dalloc_locked(node);
+		return (NULL);
+	}
+	if (node == NULL) {
+		csize -= nsize;
+		node = (extent_node_t *)((uintptr_t)addr + csize);
+		if (config_stats)
+			base_allocated += nsize;
+	}
+	node->addr = addr;
+	node->size = csize;
+	return (node);
+}
+
+static void *
+base_alloc_locked(size_t size)
+{
+	void *ret;
+	size_t csize;
+	extent_node_t *node;
+	extent_node_t key;
+
+	/*
+	 * Round size up to nearest multiple of the cacheline size, so that
+	 * there is no chance of false cache line sharing.
+	 */
+	csize = CACHELINE_CEILING(size);
+
+	key.addr = NULL;
+	key.size = csize;
+	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
+	if (node != NULL) {
+		/* Use existing space. */
+		extent_tree_szad_remove(&base_avail_szad, node);
+	} else {
+		/* Try to allocate more space. */
+		node = base_chunk_alloc(csize);
+	}
+	if (node == NULL)
+		return (NULL);
+
+	ret = node->addr;
+	if (node->size > csize) {
+		node->addr = (void *)((uintptr_t)ret + csize);
+		node->size -= csize;
+		extent_tree_szad_insert(&base_avail_szad, node);
+	} else
+		base_node_dalloc_locked(node);
+	if (config_stats)
+		base_allocated += csize;
+	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
+	return (ret);
+}
+
+/*
+ * base_alloc() guarantees demand-zeroed memory, in order to make multi-page
+ * sparse data structures such as radix tree nodes efficient with respect to
+ * physical memory usage.
+ */
 void *
 base_alloc(size_t size)
 {
 	void *ret;
-	size_t csize;
-
-	/* Round size up to nearest multiple of the cacheline size. */
-	csize = CACHELINE_CEILING(size);

 	malloc_mutex_lock(&base_mtx);
-	/* Make sure there's enough space for the allocation. */
-	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
-		if (base_pages_alloc(csize)) {
+	ret = base_alloc_locked(size);
 	malloc_mutex_unlock(&base_mtx);
-			return (NULL);
-		}
-	}
-	/* Allocate. */
-	ret = base_next_addr;
-	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
-	if (config_stats)
-		base_allocated += csize;
-	malloc_mutex_unlock(&base_mtx);
-	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
-
-	return (ret);
-}
-
-void *
-base_calloc(size_t number, size_t size)
-{
-	void *ret = base_alloc(number * size);
-
-	if (ret != NULL)
-		memset(ret, 0, number * size);
-
 	return (ret);
 }

@ -81,17 +125,9 @@ base_node_alloc(void)
 	extent_node_t *ret;

 	malloc_mutex_lock(&base_mtx);
-	if (base_nodes != NULL) {
-		ret = base_nodes;
-		base_nodes = *(extent_node_t **)ret;
+	if ((ret = base_node_try_alloc_locked()) == NULL)
+		ret = (extent_node_t *)base_alloc_locked(sizeof(extent_node_t));
 	malloc_mutex_unlock(&base_mtx);
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret,
-		    sizeof(extent_node_t));
-	} else {
-		malloc_mutex_unlock(&base_mtx);
-		ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
-	}
-
 	return (ret);
 }

@ -99,10 +135,8 @@ void
 base_node_dalloc(extent_node_t *node)
 {

-	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	malloc_mutex_lock(&base_mtx);
-	*(extent_node_t **)node = base_nodes;
-	base_nodes = node;
+	base_node_dalloc_locked(node);
 	malloc_mutex_unlock(&base_mtx);
 }

@ -121,9 +155,10 @@ bool
 base_boot(void)
 {

-	base_nodes = NULL;
 	if (malloc_mutex_init(&base_mtx))
 		return (true);
+	extent_tree_szad_new(&base_avail_szad);
+	base_nodes = NULL;

 	return (false);
 }
--- a/src/chunk.c
+++ b/src/chunk.c
@ -232,15 +232,18 @@ chunk_alloc_base(size_t size)
 	void *ret;
 	bool zero;

-	zero = false;
-	ret = chunk_alloc_core(NULL, size, chunksize, true, &zero,
-	    chunk_dss_prec_get());
-	if (ret == NULL)
-		return (NULL);
-	if (chunk_register(ret, size, true)) {
+	/*
+	 * Directly call chunk_alloc_mmap() rather than chunk_alloc_core()
+	 * because it's critical that chunk_alloc_base() return untouched
+	 * demand-zeroed virtual memory.
+	 */
+	zero = true;
+	ret = chunk_alloc_mmap(size, chunksize, &zero);
+	if (ret != NULL && chunk_register(ret, size, true)) {
 		chunk_dalloc_core(ret, size);
-		return (NULL);
+		ret = NULL;
 	}
+
 	return (ret);
 }

--- a/src/mutex.c
+++ b/src/mutex.c
@ -83,8 +83,8 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 		mutex->postponed_next = postponed_mutexes;
 		postponed_mutexes = mutex;
 	} else {
-		if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) !=
-		    0)
+		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
+		    bootstrap_calloc) != 0)
 			return (true);
 	}
 #else
@ -140,7 +140,7 @@ mutex_boot(void)
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
 		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
-		    base_calloc) != 0)
+		    bootstrap_calloc) != 0)
 			return (true);
 		postponed_mutexes = postponed_mutexes->postponed_next;
 	}