Use linear scan for small bitmaps

For small bitmaps, a linear scan of the bitmap is slightly faster than
a tree search - bitmap_t is more compact, and there are fewer writes
since we don't have to propogate state transitions up the tree.
On x86_64 with the current settings, I'm seeing ~.5%-1% CPU improvement
in production canaries with this change.

The old tree code is left since 32bit sizes are much larger (and ffsl
smaller), and maybe the run sizes will change in the future.

This resolves #339.
This commit is contained in:
Dave Watson 2016-02-24 08:04:43 -08:00 committed by Jason Evans
parent 01ecdf32d6
commit b8823ab026
2 changed files with 88 additions and 3 deletions

View File

@ -15,6 +15,15 @@ typedef unsigned long bitmap_t;
#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
/*
* Do some analysis on how big the bitmap is before we use a tree. For a brute
* force linear search, if we would have to call ffsl more than 2^3 times, use a
* tree instead.
*/
#if LG_RUN_MAXREGS - LG_BITMAP_GROUP_NBITS > 3
# define USE_TREE
#endif
/* Number of groups required to store a given number of bits. */
#define BITMAP_BITS2GROUPS(nbits) \
((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
@ -48,6 +57,8 @@ typedef unsigned long bitmap_t;
/*
* Maximum number of groups required to support LG_BITMAP_MAXBITS.
*/
#ifdef USE_TREE
#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
@ -65,6 +76,13 @@ typedef unsigned long bitmap_t;
(LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
#else /* USE_TREE */
#define BITMAP_GROUPS_MAX \
(ZU(1) << (LG_RUN_MAXREGS - LG_SIZEOF_BITMAP - LG_SIZEOF_BITMAP))
#endif /* USE_TREE */
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
@ -78,6 +96,7 @@ struct bitmap_info_s {
/* Logical number of bits in bitmap (stored at bottom level). */
size_t nbits;
#ifdef USE_TREE
/* Number of levels necessary for nbits. */
unsigned nlevels;
@ -86,6 +105,10 @@ struct bitmap_info_s {
* bottom to top (e.g. the bottom level is stored in levels[0]).
*/
bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
#else /* USE_TREE */
/* Number of groups necessary for nbits. */
size_t ngroups;
#endif /* USE_TREE */
};
#endif /* JEMALLOC_H_STRUCTS */
@ -112,10 +135,20 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
JEMALLOC_INLINE bool
bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
#ifdef USE_TREE
size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
bitmap_t rg = bitmap[rgoff];
/* The bitmap is full iff the root group is 0. */
return (rg == 0);
#else
size_t i;
for (i = 0; i < binfo->ngroups; i++) {
if (bitmap[i] != 0)
return (false);
}
return (true);
#endif
}
JEMALLOC_INLINE bool
@ -146,6 +179,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(bitmap_get(bitmap, binfo, bit));
#ifdef USE_TREE
/* Propagate group state transitions up the tree. */
if (g == 0) {
unsigned i;
@ -161,6 +195,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
break;
}
}
#endif
}
/* sfu: set first unset. */
@ -173,6 +208,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
assert(!bitmap_full(bitmap, binfo));
#ifdef USE_TREE
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
bit = ffs_lu(g) - 1;
@ -181,7 +217,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
g = bitmap[binfo->levels[i].group_offset + bit];
bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
}
#else
i = 0;
g = bitmap[0];
while ((bit = ffs_lu(g)) == 0) {
i++;
g = bitmap[i];
}
bit = (bit - 1) + (i << 6);
#endif
bitmap_set(bitmap, binfo, bit);
return (bit);
}
@ -192,7 +236,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
size_t goff;
bitmap_t *gp;
bitmap_t g;
bool propagate;
UNUSED bool propagate;
assert(bit < binfo->nbits);
assert(bitmap_get(bitmap, binfo, bit));
@ -204,6 +248,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(!bitmap_get(bitmap, binfo, bit));
#ifdef USE_TREE
/* Propagate group state transitions up the tree. */
if (propagate) {
unsigned i;
@ -221,6 +266,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
break;
}
}
#endif /* USE_TREE */
}
#endif

View File

@ -3,6 +3,8 @@
/******************************************************************************/
#ifdef USE_TREE
void
bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
{
@ -32,6 +34,13 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
binfo->nbits = nbits;
}
static size_t
bitmap_info_ngroups(const bitmap_info_t *binfo)
{
return (binfo->levels[binfo->nlevels].group_offset);
}
void
bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
@ -60,13 +69,43 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
}
}
#else /* USE_TREE */
void
bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
{
size_t i;
assert(nbits > 0);
assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
i = nbits >> LG_BITMAP_GROUP_NBITS;
if (nbits % BITMAP_GROUP_NBITS != 0)
i++;
binfo->ngroups = i;
binfo->nbits = nbits;
}
static size_t
bitmap_info_ngroups(const bitmap_info_t *binfo)
{
return (binfo->levels[binfo->nlevels].group_offset);
return (binfo->ngroups);
}
void
bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
{
size_t extra;
memset(bitmap, 0xffU, bitmap_size(binfo));
extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
if (extra != 0)
bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
}
#endif /* USE_TREE */
size_t
bitmap_size(const bitmap_info_t *binfo)
{