Update hash from MurmurHash2 to MurmurHash3.

Update hash from MurmurHash2 to MurmurHash3, primarily because the
latter generates 128 bits in a single call for no extra cost, which
simplifies integration with cuckoo hashing.
This commit is contained in:
Jason Evans
2013-01-22 12:02:08 -08:00
parent 7329a4f038
commit ae03bf6a57
6 changed files with 337 additions and 129 deletions

View File

@@ -70,20 +70,20 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
JEMALLOC_INLINE size_t
ckh_isearch(ckh_t *ckh, const void *key)
{
size_t hash1, hash2, bucket, cell;
size_t hashes[2], bucket, cell;
assert(ckh != NULL);
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
ckh->hash(key, hashes);
/* Search primary bucket. */
bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
cell = ckh_bucket_search(ckh, bucket, key);
if (cell != SIZE_T_MAX)
return (cell);
/* Search secondary bucket. */
bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
cell = ckh_bucket_search(ckh, bucket, key);
return (cell);
}
@@ -126,7 +126,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
{
const void *key, *data, *tkey, *tdata;
ckhc_t *cell;
size_t hash1, hash2, bucket, tbucket;
size_t hashes[2], bucket, tbucket;
unsigned i;
bucket = argbucket;
@@ -155,10 +155,11 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
#endif
/* Find the alternate bucket for the evicted item. */
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
ckh->hash(key, hashes);
tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
if (tbucket == bucket) {
tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
- 1);
/*
* It may be that (tbucket == bucket) still, if the
* item's hashes both indicate this bucket. However,
@@ -192,19 +193,19 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
JEMALLOC_INLINE bool
ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
{
size_t hash1, hash2, bucket;
size_t hashes[2], bucket;
const void *key = *argkey;
const void *data = *argdata;
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
ckh->hash(key, hashes);
/* Try to insert in primary bucket. */
bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
return (false);
/* Try to insert in secondary bucket. */
bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
return (false);
@@ -526,31 +527,10 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
}
void
ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
ckh_string_hash(const void *key, size_t r_hash[2])
{
size_t ret1, ret2;
uint64_t h;
assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
assert(hash1 != NULL);
assert(hash2 != NULL);
h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea));
if (minbits <= 32) {
/*
* Avoid doing multiple hashes, since a single hash provides
* enough bits.
*/
ret1 = h & ZU(0xffffffffU);
ret2 = h >> 32;
} else {
ret1 = h;
ret2 = hash(key, strlen((const char *)key),
UINT64_C(0x8432a476666bbc13));
}
*hash1 = ret1;
*hash2 = ret2;
hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
}
bool
@@ -564,41 +544,16 @@ ckh_string_keycomp(const void *k1, const void *k2)
}
void
ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2)
ckh_pointer_hash(const void *key, size_t r_hash[2])
{
size_t ret1, ret2;
uint64_t h;
union {
const void *v;
uint64_t i;
size_t i;
} u;
assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
assert(hash1 != NULL);
assert(hash2 != NULL);
assert(sizeof(u.v) == sizeof(u.i));
#if (LG_SIZEOF_PTR != LG_SIZEOF_INT)
u.i = 0;
#endif
u.v = key;
h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082));
if (minbits <= 32) {
/*
* Avoid doing multiple hashes, since a single hash provides
* enough bits.
*/
ret1 = h & ZU(0xffffffffU);
ret2 = h >> 32;
} else {
assert(SIZEOF_PTR == 8);
ret1 = h;
ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d));
}
*hash1 = ret1;
*hash2 = ret2;
hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash);
}
bool

View File

@@ -90,8 +90,7 @@ static bool prof_dump(bool propagate_err, const char *filename,
bool leakcheck);
static void prof_dump_filename(char *filename, char v, int64_t vseq);
static void prof_fdump(void);
static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2);
static void prof_bt_hash(const void *key, size_t r_hash[2]);
static bool prof_bt_keycomp(const void *k1, const void *k2);
static malloc_mutex_t *prof_ctx_mutex_choose(void);
@@ -1043,34 +1042,13 @@ prof_gdump(void)
}
static void
prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
prof_bt_hash(const void *key, size_t r_hash[2])
{
size_t ret1, ret2;
uint64_t h;
prof_bt_t *bt = (prof_bt_t *)key;
cassert(config_prof);
assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
assert(hash1 != NULL);
assert(hash2 != NULL);
h = hash(bt->vec, bt->len * sizeof(void *),
UINT64_C(0x94122f335b332aea));
if (minbits <= 32) {
/*
* Avoid doing multiple hashes, since a single hash provides
* enough bits.
*/
ret1 = h & ZU(0xffffffffU);
ret2 = h >> 32;
} else {
ret1 = h;
ret2 = hash(bt->vec, bt->len * sizeof(void *),
UINT64_C(0x8432a476666bbc13));
}
*hash1 = ret1;
*hash2 = ret2;
hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
}
static bool