Add element acquire/release capabilities to rtree.
This makes it possible to acquire short-term "ownership" of rtree elements so that it is possible to read an extent pointer *and* read the extent's contents with a guarantee that the element will not be modified until the ownership is released. This is intended as a mechanism for resolving rtree read/write races rather than as a way to lock extents.
This commit is contained in:
@@ -87,7 +87,7 @@ JEMALLOC_INLINE extent_t *
|
||||
chunk_lookup(const void *ptr, bool dependent)
|
||||
{
|
||||
|
||||
return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent));
|
||||
return (rtree_read(&chunks_rtree, (uintptr_t)ptr, dependent));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -457,18 +457,24 @@ register_zone
|
||||
rtree_child_read
|
||||
rtree_child_read_hard
|
||||
rtree_child_tryread
|
||||
rtree_clear
|
||||
rtree_delete
|
||||
rtree_get
|
||||
rtree_new
|
||||
rtree_node_valid
|
||||
rtree_set
|
||||
rtree_elm_acquire
|
||||
rtree_elm_lookup
|
||||
rtree_elm_read
|
||||
rtree_elm_read_acquired
|
||||
rtree_elm_release
|
||||
rtree_elm_write
|
||||
rtree_elm_write_acquired
|
||||
rtree_read
|
||||
rtree_start_level
|
||||
rtree_subkey
|
||||
rtree_subtree_read
|
||||
rtree_subtree_read_hard
|
||||
rtree_subtree_tryread
|
||||
rtree_val_read
|
||||
rtree_val_write
|
||||
rtree_write
|
||||
run_quantize_ceil
|
||||
run_quantize_floor
|
||||
s2u
|
||||
|
@@ -6,7 +6,7 @@
|
||||
*/
|
||||
#ifdef JEMALLOC_H_TYPES
|
||||
|
||||
typedef struct rtree_node_elm_s rtree_node_elm_t;
|
||||
typedef struct rtree_elm_s rtree_elm_t;
|
||||
typedef struct rtree_level_s rtree_level_t;
|
||||
typedef struct rtree_s rtree_t;
|
||||
|
||||
@@ -21,25 +21,24 @@ typedef struct rtree_s rtree_t;
|
||||
((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
|
||||
|
||||
/* Used for two-stage lock-free node initialization. */
|
||||
#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
|
||||
#define RTREE_NODE_INITIALIZING ((rtree_elm_t *)0x1)
|
||||
|
||||
/*
|
||||
* The node allocation callback function's argument is the number of contiguous
|
||||
* rtree_node_elm_t structures to allocate, and the resulting memory must be
|
||||
* zeroed.
|
||||
* rtree_elm_t structures to allocate, and the resulting memory must be zeroed.
|
||||
*/
|
||||
typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t);
|
||||
typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *);
|
||||
typedef rtree_elm_t *(rtree_node_alloc_t)(size_t);
|
||||
typedef void (rtree_node_dalloc_t)(rtree_elm_t *);
|
||||
|
||||
#endif /* JEMALLOC_H_TYPES */
|
||||
/******************************************************************************/
|
||||
#ifdef JEMALLOC_H_STRUCTS
|
||||
|
||||
struct rtree_node_elm_s {
|
||||
struct rtree_elm_s {
|
||||
union {
|
||||
void *pun;
|
||||
rtree_node_elm_t *child;
|
||||
extent_t *val;
|
||||
void *pun;
|
||||
rtree_elm_t *child;
|
||||
extent_t *extent;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -60,15 +59,15 @@ struct rtree_level_s {
|
||||
*
|
||||
* levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
|
||||
*
|
||||
* levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
|
||||
* levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...]
|
||||
*
|
||||
* This has practical implications on x64, which currently uses only the
|
||||
* lower 47 bits of virtual address space in userland, thus leaving
|
||||
* subtrees[0] unused and avoiding a level of tree traversal.
|
||||
*/
|
||||
union {
|
||||
void *subtree_pun;
|
||||
rtree_node_elm_t *subtree;
|
||||
void *subtree_pun;
|
||||
rtree_elm_t *subtree;
|
||||
};
|
||||
/* Number of key bits distinguished by this level. */
|
||||
unsigned bits;
|
||||
@@ -98,10 +97,9 @@ struct rtree_s {
|
||||
bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
|
||||
rtree_node_dalloc_t *dalloc);
|
||||
void rtree_delete(rtree_t *rtree);
|
||||
rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree,
|
||||
rtree_elm_t *rtree_subtree_read_hard(rtree_t *rtree, unsigned level);
|
||||
rtree_elm_t *rtree_child_read_hard(rtree_t *rtree, rtree_elm_t *elm,
|
||||
unsigned level);
|
||||
rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree,
|
||||
rtree_node_elm_t *elm, unsigned level);
|
||||
|
||||
#endif /* JEMALLOC_H_EXTERNS */
|
||||
/******************************************************************************/
|
||||
@@ -111,22 +109,27 @@ rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree,
|
||||
unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
|
||||
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
|
||||
|
||||
bool rtree_node_valid(rtree_node_elm_t *node);
|
||||
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm,
|
||||
bool dependent);
|
||||
rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
|
||||
bool rtree_node_valid(rtree_elm_t *node);
|
||||
rtree_elm_t *rtree_child_tryread(rtree_elm_t *elm, bool dependent);
|
||||
rtree_elm_t *rtree_child_read(rtree_t *rtree, rtree_elm_t *elm,
|
||||
unsigned level, bool dependent);
|
||||
extent_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
|
||||
extent_t *rtree_elm_read(rtree_elm_t *elm, bool dependent);
|
||||
void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
|
||||
rtree_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level,
|
||||
bool dependent);
|
||||
void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
|
||||
const extent_t *val);
|
||||
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level,
|
||||
bool dependent);
|
||||
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level,
|
||||
rtree_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level,
|
||||
bool dependent);
|
||||
rtree_elm_t *rtree_elm_lookup(rtree_t *rtree, uintptr_t key,
|
||||
bool dependent, bool init_missing);
|
||||
|
||||
extent_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
|
||||
bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_t *val);
|
||||
bool rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent);
|
||||
extent_t *rtree_read(rtree_t *rtree, uintptr_t key, bool dependent);
|
||||
rtree_elm_t *rtree_elm_acquire(rtree_t *rtree, uintptr_t key,
|
||||
bool dependent, bool init_missing);
|
||||
extent_t *rtree_elm_read_acquired(rtree_elm_t *elm);
|
||||
void rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent);
|
||||
void rtree_elm_release(rtree_elm_t *elm);
|
||||
void rtree_clear(rtree_t *rtree, uintptr_t key);
|
||||
#endif
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
|
||||
@@ -154,18 +157,18 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
rtree_node_valid(rtree_node_elm_t *node)
|
||||
rtree_node_valid(rtree_elm_t *node)
|
||||
{
|
||||
|
||||
return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
|
||||
rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
|
||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||
rtree_child_tryread(rtree_elm_t *elm, bool dependent)
|
||||
{
|
||||
rtree_node_elm_t *child;
|
||||
rtree_elm_t *child;
|
||||
|
||||
/* Double-checked read (first read may be stale. */
|
||||
/* Double-checked read (first read may be stale). */
|
||||
child = elm->child;
|
||||
if (!dependent && !rtree_node_valid(child))
|
||||
child = atomic_read_p(&elm->pun);
|
||||
@@ -173,11 +176,11 @@ rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
|
||||
return (child);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
|
||||
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
|
||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||
rtree_child_read(rtree_t *rtree, rtree_elm_t *elm, unsigned level,
|
||||
bool dependent)
|
||||
{
|
||||
rtree_node_elm_t *child;
|
||||
rtree_elm_t *child;
|
||||
|
||||
child = rtree_child_tryread(elm, dependent);
|
||||
if (!dependent && unlikely(!rtree_node_valid(child)))
|
||||
@@ -187,40 +190,46 @@ rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
|
||||
rtree_elm_read(rtree_elm_t *elm, bool dependent)
|
||||
{
|
||||
extent_t *extent;
|
||||
|
||||
if (dependent) {
|
||||
/*
|
||||
* Reading a val on behalf of a pointer to a valid allocation is
|
||||
* guaranteed to be a clean read even without synchronization,
|
||||
* because the rtree update became visible in memory before the
|
||||
* pointer came into existence.
|
||||
* Reading a value on behalf of a pointer to a valid allocation
|
||||
* is guaranteed to be a clean read even without
|
||||
* synchronization, because the rtree update became visible in
|
||||
* memory before the pointer came into existence.
|
||||
*/
|
||||
return (elm->val);
|
||||
extent = elm->extent;
|
||||
} else {
|
||||
/*
|
||||
* An arbitrary read, e.g. on behalf of ivsalloc(), may not be
|
||||
* dependent on a previous rtree write, which means a stale read
|
||||
* could result if synchronization were omitted here.
|
||||
*/
|
||||
return (atomic_read_p(&elm->pun));
|
||||
extent = (extent_t *)atomic_read_p(&elm->pun);
|
||||
}
|
||||
|
||||
/* Mask the lock bit. */
|
||||
extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
|
||||
|
||||
return (extent);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_t *val)
|
||||
rtree_elm_write(rtree_elm_t *elm, const extent_t *extent)
|
||||
{
|
||||
|
||||
atomic_write_p(&elm->pun, val);
|
||||
atomic_write_p(&elm->pun, extent);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
|
||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||
rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
|
||||
{
|
||||
rtree_node_elm_t *subtree;
|
||||
rtree_elm_t *subtree;
|
||||
|
||||
/* Double-checked read (first read may be stale. */
|
||||
/* Double-checked read (first read may be stale). */
|
||||
subtree = rtree->levels[level].subtree;
|
||||
if (!dependent && unlikely(!rtree_node_valid(subtree)))
|
||||
subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
|
||||
@@ -228,10 +237,10 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
|
||||
return (subtree);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
|
||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||
rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
|
||||
{
|
||||
rtree_node_elm_t *subtree;
|
||||
rtree_elm_t *subtree;
|
||||
|
||||
subtree = rtree_subtree_tryread(rtree, level, dependent);
|
||||
if (!dependent && unlikely(!rtree_node_valid(subtree)))
|
||||
@@ -240,16 +249,20 @@ rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
|
||||
return (subtree);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
|
||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||
rtree_elm_lookup(rtree_t *rtree, uintptr_t key, bool dependent,
|
||||
bool init_missing)
|
||||
{
|
||||
uintptr_t subkey;
|
||||
unsigned start_level;
|
||||
rtree_node_elm_t *node;
|
||||
rtree_elm_t *node;
|
||||
|
||||
assert(!dependent || !init_missing);
|
||||
|
||||
start_level = rtree_start_level(rtree, key);
|
||||
|
||||
node = rtree_subtree_tryread(rtree, start_level, dependent);
|
||||
node = init_missing ? rtree_subtree_read(rtree, start_level, dependent)
|
||||
: rtree_subtree_tryread(rtree, start_level, dependent);
|
||||
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
|
||||
switch (start_level + RTREE_GET_BIAS) {
|
||||
#define RTREE_GET_SUBTREE(level) \
|
||||
@@ -259,7 +272,9 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
|
||||
return (NULL); \
|
||||
subkey = rtree_subkey(rtree, key, level - \
|
||||
RTREE_GET_BIAS); \
|
||||
node = rtree_child_tryread(&node[subkey], dependent); \
|
||||
node = init_missing ? rtree_child_read(rtree, \
|
||||
&node[subkey], level - RTREE_GET_BIAS, dependent) : \
|
||||
rtree_child_tryread(&node[subkey], dependent); \
|
||||
/* Fall through. */
|
||||
#define RTREE_GET_LEAF(level) \
|
||||
case level: \
|
||||
@@ -272,8 +287,7 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
|
||||
* node is a leaf, so it contains values rather than \
|
||||
* child pointers. \
|
||||
*/ \
|
||||
return (rtree_val_read(rtree, &node[subkey], \
|
||||
dependent));
|
||||
return (&node[subkey]);
|
||||
#if RTREE_HEIGHT_MAX > 1
|
||||
RTREE_GET_SUBTREE(0)
|
||||
#endif
|
||||
@@ -332,33 +346,94 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE bool
|
||||
rtree_set(rtree_t *rtree, uintptr_t key, const extent_t *val)
|
||||
rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent)
|
||||
{
|
||||
uintptr_t subkey;
|
||||
unsigned i, start_level;
|
||||
rtree_node_elm_t *node, *child;
|
||||
rtree_elm_t *elm;
|
||||
|
||||
start_level = rtree_start_level(rtree, key);
|
||||
assert(extent != NULL); /* Use rtree_clear() for this case. */
|
||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||
|
||||
node = rtree_subtree_read(rtree, start_level, false);
|
||||
if (node == NULL)
|
||||
elm = rtree_elm_lookup(rtree, key, false, true);
|
||||
if (elm == NULL)
|
||||
return (true);
|
||||
for (i = start_level; /**/; i++, node = child) {
|
||||
subkey = rtree_subkey(rtree, key, i);
|
||||
if (i == rtree->height - 1) {
|
||||
/*
|
||||
* node is a leaf, so it contains values rather than
|
||||
* child pointers.
|
||||
*/
|
||||
rtree_val_write(rtree, &node[subkey], val);
|
||||
return (false);
|
||||
}
|
||||
assert(i + 1 < rtree->height);
|
||||
child = rtree_child_read(rtree, &node[subkey], i, false);
|
||||
if (child == NULL)
|
||||
return (true);
|
||||
assert(rtree_elm_read(elm, false) == NULL);
|
||||
rtree_elm_write(elm, extent);
|
||||
|
||||
return (false);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_read(rtree_t *rtree, uintptr_t key, bool dependent)
|
||||
{
|
||||
rtree_elm_t *elm;
|
||||
|
||||
elm = rtree_elm_lookup(rtree, key, dependent, false);
|
||||
if (elm == NULL)
|
||||
return (NULL);
|
||||
|
||||
return (rtree_elm_read(elm, dependent));
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE rtree_elm_t *
|
||||
rtree_elm_acquire(rtree_t *rtree, uintptr_t key, bool dependent,
|
||||
bool init_missing)
|
||||
{
|
||||
rtree_elm_t *elm;
|
||||
|
||||
elm = rtree_elm_lookup(rtree, key, dependent, init_missing);
|
||||
if (!dependent && elm == NULL)
|
||||
return (NULL);
|
||||
{
|
||||
extent_t *extent;
|
||||
void *s;
|
||||
|
||||
do {
|
||||
extent = rtree_elm_read(elm, false);
|
||||
/* The least significant bit serves as a lock. */
|
||||
s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
|
||||
} while (atomic_cas_p(&elm->pun, (void *)extent, s));
|
||||
}
|
||||
not_reached();
|
||||
|
||||
return (elm);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE extent_t *
|
||||
rtree_elm_read_acquired(rtree_elm_t *elm)
|
||||
{
|
||||
extent_t *extent;
|
||||
|
||||
assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
|
||||
extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
|
||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||
|
||||
return (extent);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent)
|
||||
{
|
||||
|
||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||
assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
|
||||
elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
|
||||
assert(rtree_elm_read_acquired(elm) == extent);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
rtree_elm_release(rtree_elm_t *elm)
|
||||
{
|
||||
|
||||
rtree_elm_write(elm, rtree_elm_read_acquired(elm));
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
rtree_clear(rtree_t *rtree, uintptr_t key)
|
||||
{
|
||||
rtree_elm_t *elm;
|
||||
|
||||
elm = rtree_elm_acquire(rtree, key, true, false);
|
||||
rtree_elm_write_acquired(elm, NULL);
|
||||
rtree_elm_release(elm);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user