Refactor rtree to be lock-free.
Recent huge allocation refactoring associates huge allocations with arenas, but it remains necessary to quickly look up huge allocation metadata during reallocation/deallocation. A global radix tree remains a good solution to this problem, but locking would have become the primary bottleneck after (upcoming) migration of chunk management from global to per arena data structures. This lock-free implementation uses double-checked reads to traverse the tree, so that in the steady state, each read or write requires only a single atomic operation. This implementation also assures that no more than two tree levels actually exist, through a combination of careful virtual memory allocation which makes large sparse nodes cheap, and skipping the root node on x64 (possible because the top 16 bits are all 0 in practice).
This commit is contained in:
25
src/chunk.c
25
src/chunk.c
@@ -21,7 +21,7 @@ static extent_tree_t chunks_ad_mmap;
|
||||
static extent_tree_t chunks_szad_dss;
|
||||
static extent_tree_t chunks_ad_dss;
|
||||
|
||||
rtree_t *chunks_rtree;
|
||||
rtree_t chunks_rtree;
|
||||
|
||||
/* Various chunk-related settings. */
|
||||
size_t chunksize;
|
||||
@@ -200,7 +200,7 @@ chunk_register(void *chunk, size_t size, bool base)
|
||||
assert(CHUNK_ADDR2BASE(chunk) == chunk);
|
||||
|
||||
if (config_ivsalloc && !base) {
|
||||
if (rtree_set(chunks_rtree, (uintptr_t)chunk, 1))
|
||||
if (rtree_set(&chunks_rtree, (uintptr_t)chunk, chunk))
|
||||
return (true);
|
||||
}
|
||||
if (config_stats || config_prof) {
|
||||
@@ -395,7 +395,7 @@ chunk_dalloc_core(void *chunk, size_t size)
|
||||
assert((size & chunksize_mask) == 0);
|
||||
|
||||
if (config_ivsalloc)
|
||||
rtree_set(chunks_rtree, (uintptr_t)chunk, 0);
|
||||
rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL);
|
||||
if (config_stats || config_prof) {
|
||||
malloc_mutex_lock(&chunks_mtx);
|
||||
assert(stats_chunks.curchunks >= (size / chunksize));
|
||||
@@ -415,6 +415,14 @@ chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind)
|
||||
return (false);
|
||||
}
|
||||
|
||||
static rtree_node_elm_t *
|
||||
chunks_rtree_node_alloc(size_t nelms)
|
||||
{
|
||||
|
||||
return ((rtree_node_elm_t *)base_alloc(nelms *
|
||||
sizeof(rtree_node_elm_t)));
|
||||
}
|
||||
|
||||
bool
|
||||
chunk_boot(void)
|
||||
{
|
||||
@@ -436,9 +444,8 @@ chunk_boot(void)
|
||||
extent_tree_szad_new(&chunks_szad_dss);
|
||||
extent_tree_ad_new(&chunks_ad_dss);
|
||||
if (config_ivsalloc) {
|
||||
chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
|
||||
opt_lg_chunk, base_alloc, NULL);
|
||||
if (chunks_rtree == NULL)
|
||||
if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) -
|
||||
opt_lg_chunk, chunks_rtree_node_alloc, NULL))
|
||||
return (true);
|
||||
}
|
||||
|
||||
@@ -450,8 +457,6 @@ chunk_prefork(void)
|
||||
{
|
||||
|
||||
malloc_mutex_prefork(&chunks_mtx);
|
||||
if (config_ivsalloc)
|
||||
rtree_prefork(chunks_rtree);
|
||||
chunk_dss_prefork();
|
||||
}
|
||||
|
||||
@@ -460,8 +465,6 @@ chunk_postfork_parent(void)
|
||||
{
|
||||
|
||||
chunk_dss_postfork_parent();
|
||||
if (config_ivsalloc)
|
||||
rtree_postfork_parent(chunks_rtree);
|
||||
malloc_mutex_postfork_parent(&chunks_mtx);
|
||||
}
|
||||
|
||||
@@ -470,7 +473,5 @@ chunk_postfork_child(void)
|
||||
{
|
||||
|
||||
chunk_dss_postfork_child();
|
||||
if (config_ivsalloc)
|
||||
rtree_postfork_child(chunks_rtree);
|
||||
malloc_mutex_postfork_child(&chunks_mtx);
|
||||
}
|
||||
|
150
src/rtree.c
150
src/rtree.c
@@ -1,75 +1,74 @@
|
||||
#define JEMALLOC_RTREE_C_
|
||||
#include "jemalloc/internal/jemalloc_internal.h"
|
||||
|
||||
rtree_t *
|
||||
rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
|
||||
static unsigned
|
||||
hmin(unsigned ha, unsigned hb)
|
||||
{
|
||||
rtree_t *ret;
|
||||
unsigned bits_per_level, bits_in_leaf, height, i;
|
||||
|
||||
return (ha < hb ? ha : hb);
|
||||
}
|
||||
|
||||
/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */
|
||||
bool
|
||||
rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
|
||||
rtree_node_dalloc_t *dalloc)
|
||||
{
|
||||
unsigned bits_in_leaf, height, i;
|
||||
|
||||
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
|
||||
|
||||
bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void
|
||||
*)))) - 1;
|
||||
bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE /
|
||||
sizeof(uint8_t)))) - 1;
|
||||
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
|
||||
: (bits % RTREE_BITS_PER_LEVEL);
|
||||
if (bits > bits_in_leaf) {
|
||||
height = 1 + (bits - bits_in_leaf) / bits_per_level;
|
||||
if ((height-1) * bits_per_level + bits_in_leaf != bits)
|
||||
height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL;
|
||||
if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits)
|
||||
height++;
|
||||
} else {
|
||||
height = 1;
|
||||
}
|
||||
assert((height-1) * bits_per_level + bits_in_leaf >= bits);
|
||||
|
||||
ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) +
|
||||
(sizeof(unsigned) * height));
|
||||
if (ret == NULL)
|
||||
return (NULL);
|
||||
memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
|
||||
height));
|
||||
|
||||
ret->alloc = alloc;
|
||||
ret->dalloc = dalloc;
|
||||
if (malloc_mutex_init(&ret->mutex)) {
|
||||
if (dalloc != NULL)
|
||||
dalloc(ret);
|
||||
return (NULL);
|
||||
}
|
||||
ret->height = height;
|
||||
if (height > 1) {
|
||||
if ((height-1) * bits_per_level + bits_in_leaf > bits) {
|
||||
ret->level2bits[0] = (bits - bits_in_leaf) %
|
||||
bits_per_level;
|
||||
} else
|
||||
ret->level2bits[0] = bits_per_level;
|
||||
for (i = 1; i < height-1; i++)
|
||||
ret->level2bits[i] = bits_per_level;
|
||||
ret->level2bits[height-1] = bits_in_leaf;
|
||||
} else
|
||||
ret->level2bits[0] = bits;
|
||||
height = 1;
|
||||
assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits);
|
||||
|
||||
ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]);
|
||||
if (ret->root == NULL) {
|
||||
if (dalloc != NULL)
|
||||
dalloc(ret);
|
||||
return (NULL);
|
||||
rtree->alloc = alloc;
|
||||
rtree->dalloc = dalloc;
|
||||
rtree->height = height;
|
||||
|
||||
/* Root level. */
|
||||
rtree->levels[0].subtree = NULL;
|
||||
rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL :
|
||||
bits_in_leaf;
|
||||
rtree->levels[0].cumbits = rtree->levels[0].bits;
|
||||
/* Interior levels. */
|
||||
for (i = 1; i < height-1; i++) {
|
||||
rtree->levels[i].subtree = NULL;
|
||||
rtree->levels[i].bits = RTREE_BITS_PER_LEVEL;
|
||||
rtree->levels[i].cumbits = rtree->levels[i-1].cumbits +
|
||||
RTREE_BITS_PER_LEVEL;
|
||||
}
|
||||
/* Leaf level. */
|
||||
if (height > 1) {
|
||||
rtree->levels[height-1].subtree = NULL;
|
||||
rtree->levels[height-1].bits = bits_in_leaf;
|
||||
rtree->levels[height-1].cumbits = bits;
|
||||
}
|
||||
memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
|
||||
|
||||
return (ret);
|
||||
/* Compute lookup table to be used by rtree_start_level(). */
|
||||
for (i = 0; i < RTREE_HEIGHT_MAX; i++) {
|
||||
rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height -
|
||||
1);
|
||||
}
|
||||
|
||||
return (false);
|
||||
}
|
||||
|
||||
static void
|
||||
rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level)
|
||||
rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level)
|
||||
{
|
||||
|
||||
if (level < rtree->height - 1) {
|
||||
size_t nchildren, i;
|
||||
|
||||
nchildren = ZU(1) << rtree->level2bits[level];
|
||||
nchildren = ZU(1) << rtree->levels[level].bits;
|
||||
for (i = 0; i < nchildren; i++) {
|
||||
void **child = (void **)node[i];
|
||||
rtree_node_elm_t *child = node[i].child;
|
||||
if (child != NULL)
|
||||
rtree_delete_subtree(rtree, child, level + 1);
|
||||
}
|
||||
@@ -80,28 +79,49 @@ rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level)
|
||||
void
|
||||
rtree_delete(rtree_t *rtree)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
rtree_delete_subtree(rtree, rtree->root, 0);
|
||||
rtree->dalloc(rtree);
|
||||
for (i = 0; i < rtree->height; i++) {
|
||||
rtree_node_elm_t *subtree = rtree->levels[i].subtree;
|
||||
if (subtree != NULL)
|
||||
rtree_delete_subtree(rtree, subtree, i);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rtree_prefork(rtree_t *rtree)
|
||||
static rtree_node_elm_t *
|
||||
rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
|
||||
{
|
||||
rtree_node_elm_t *node;
|
||||
|
||||
if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
|
||||
/*
|
||||
* Another thread is already in the process of initializing.
|
||||
* Spin-wait until initialization is complete.
|
||||
*/
|
||||
do {
|
||||
CPU_SPINWAIT;
|
||||
node = atomic_read_p((void **)elmp);
|
||||
} while (node == RTREE_NODE_INITIALIZING);
|
||||
} else {
|
||||
node = rtree->alloc(ZU(1) << rtree->levels[level].bits);
|
||||
if (node == NULL)
|
||||
return (NULL);
|
||||
atomic_write_p((void **)elmp, node);
|
||||
}
|
||||
|
||||
return (node);
|
||||
}
|
||||
|
||||
rtree_node_elm_t *
|
||||
rtree_subtree_read_hard(rtree_t *rtree, unsigned level)
|
||||
{
|
||||
|
||||
malloc_mutex_prefork(&rtree->mutex);
|
||||
return (rtree_node_init(rtree, level, &rtree->levels[level].subtree));
|
||||
}
|
||||
|
||||
void
|
||||
rtree_postfork_parent(rtree_t *rtree)
|
||||
rtree_node_elm_t *
|
||||
rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
|
||||
{
|
||||
|
||||
malloc_mutex_postfork_parent(&rtree->mutex);
|
||||
}
|
||||
|
||||
void
|
||||
rtree_postfork_child(rtree_t *rtree)
|
||||
{
|
||||
|
||||
malloc_mutex_postfork_child(&rtree->mutex);
|
||||
return (rtree_node_init(rtree, level, &elm->child));
|
||||
}
|
||||
|
Reference in New Issue
Block a user