Update hash from MurmurHash2 to MurmurHash3.

Update hash from MurmurHash2 to MurmurHash3, primarily because the latter generates 128 bits in a single call for no extra cost, which simplifies integration with cuckoo hashing.
2013-01-22 12:02:08 -08:00
parent 7329a4f038
commit ae03bf6a57
6 changed files with 337 additions and 129 deletions
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -70,20 +70,20 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
 JEMALLOC_INLINE size_t
 ckh_isearch(ckh_t *ckh, const void *key)
 {
-	size_t hash1, hash2, bucket, cell;
+	size_t hashes[2], bucket, cell;

 	assert(ckh != NULL);

-	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+	ckh->hash(key, hashes);

 	/* Search primary bucket. */
-	bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
 	if (cell != SIZE_T_MAX)
 		return (cell);

 	/* Search secondary bucket. */
-	bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
 	return (cell);
 }
@@ -126,7 +126,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 {
 	const void *key, *data, *tkey, *tdata;
 	ckhc_t *cell;
-	size_t hash1, hash2, bucket, tbucket;
+	size_t hashes[2], bucket, tbucket;
 	unsigned i;

 	bucket = argbucket;
@@ -155,10 +155,11 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 #endif

 		/* Find the alternate bucket for the evicted item. */
-		ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
-		tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+		ckh->hash(key, hashes);
+		tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 		if (tbucket == bucket) {
-			tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+			tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
+			    - 1);
 			/*
 			 * It may be that (tbucket == bucket) still, if the
 			 * item's hashes both indicate this bucket.  However,
@@ -192,19 +193,19 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 JEMALLOC_INLINE bool
 ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
 {
-	size_t hash1, hash2, bucket;
+	size_t hashes[2], bucket;
 	const void *key = *argkey;
 	const void *data = *argdata;

-	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+	ckh->hash(key, hashes);

 	/* Try to insert in primary bucket. */
-	bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
 		return (false);

 	/* Try to insert in secondary bucket. */
-	bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
 		return (false);

@@ -526,31 +527,10 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 }

 void
-ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+ckh_string_hash(const void *key, size_t r_hash[2])
 {
-	size_t ret1, ret2;
-	uint64_t h;

-	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
-	assert(hash1 != NULL);
-	assert(hash2 != NULL);
-
-	h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea));
-	if (minbits <= 32) {
-		/*
-		 * Avoid doing multiple hashes, since a single hash provides
-		 * enough bits.
-		 */
-		ret1 = h & ZU(0xffffffffU);
-		ret2 = h >> 32;
-	} else {
-		ret1 = h;
-		ret2 = hash(key, strlen((const char *)key),
-		    UINT64_C(0x8432a476666bbc13));
-	}
-
-	*hash1 = ret1;
-	*hash2 = ret2;
+	hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
 }

 bool
@@ -564,41 +544,16 @@ ckh_string_keycomp(const void *k1, const void *k2)
 }

 void
-ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
-    size_t *hash2)
+ckh_pointer_hash(const void *key, size_t r_hash[2])
 {
-	size_t ret1, ret2;
-	uint64_t h;
 	union {
 		const void	*v;
-		uint64_t	i;
+		size_t		i;
 	} u;

-	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
-	assert(hash1 != NULL);
-	assert(hash2 != NULL);
-
 	assert(sizeof(u.v) == sizeof(u.i));
-#if (LG_SIZEOF_PTR != LG_SIZEOF_INT)
-	u.i = 0;
-#endif
 	u.v = key;
-	h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082));
-	if (minbits <= 32) {
-		/*
-		 * Avoid doing multiple hashes, since a single hash provides
-		 * enough bits.
-		 */
-		ret1 = h & ZU(0xffffffffU);
-		ret2 = h >> 32;
-	} else {
-		assert(SIZEOF_PTR == 8);
-		ret1 = h;
-		ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d));
-	}
-
-	*hash1 = ret1;
-	*hash2 = ret2;
+	hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash);
 }

 bool
--- a/src/prof.c
+++ b/src/prof.c
@@ -90,8 +90,7 @@ static bool	prof_dump(bool propagate_err, const char *filename,
    bool leakcheck);
 static void	prof_dump_filename(char *filename, char v, int64_t vseq);
 static void	prof_fdump(void);
-static void	prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
-    size_t *hash2);
+static void	prof_bt_hash(const void *key, size_t r_hash[2]);
 static bool	prof_bt_keycomp(const void *k1, const void *k2);
 static malloc_mutex_t	*prof_ctx_mutex_choose(void);

@@ -1043,34 +1042,13 @@ prof_gdump(void)
 }

 static void
-prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+prof_bt_hash(const void *key, size_t r_hash[2])
 {
-	size_t ret1, ret2;
-	uint64_t h;
 	prof_bt_t *bt = (prof_bt_t *)key;

 	cassert(config_prof);
-	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
-	assert(hash1 != NULL);
-	assert(hash2 != NULL);

-	h = hash(bt->vec, bt->len * sizeof(void *),
-	    UINT64_C(0x94122f335b332aea));
-	if (minbits <= 32) {
-		/*
-		 * Avoid doing multiple hashes, since a single hash provides
-		 * enough bits.
-		 */
-		ret1 = h & ZU(0xffffffffU);
-		ret2 = h >> 32;
-	} else {
-		ret1 = h;
-		ret2 = hash(bt->vec, bt->len * sizeof(void *),
-		    UINT64_C(0x8432a476666bbc13));
-	}
-
-	*hash1 = ret1;
-	*hash2 = ret2;
+	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
 }

 static bool