mirror of
http://github.com/valkey-io/valkey
synced 2024-11-23 20:00:00 +00:00
Use endian neutral hash function for HyperLogLog.
We need to be sure that you can save a dataset in a Redis instance, reload it in a different architecture, and continue to count in the same HyperLogLog structure. So 32 and 64 bit, little or bit endian, must all guarantee to output the same hash for the same element.
This commit is contained in:
parent
4628ac0065
commit
aaf6db459b
@ -213,33 +213,48 @@
|
||||
|
||||
/* ========================= HyperLogLog algorithm ========================= */
|
||||
|
||||
/* Our hahs function is MurmurHash2, 64 bit version. */
|
||||
/* Our hash function is MurmurHash2, 64 bit version.
|
||||
* It was modified for Redis in order to provide the same result in
|
||||
* big and little endian archs (endian neutral). */
|
||||
uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
|
||||
const uint64_t m = 0xc6a4a7935bd1e995;
|
||||
const int r = 47;
|
||||
uint64_t h = seed ^ (len * m);
|
||||
const uint64_t *data = (const uint64_t *)key;
|
||||
const uint64_t *end = data + (len/8);
|
||||
const uint8_t *data = (const uint8_t *)key;
|
||||
const uint8_t *end = data + (len-(len&7));
|
||||
|
||||
while(data != end) {
|
||||
uint64_t k = *data++;
|
||||
uint64_t k;
|
||||
|
||||
#if (BYTE_ORDER == LITTLE_ENDIAN)
|
||||
k = *((uint64_t*)data);
|
||||
#else
|
||||
k = (uint64_t) data[0];
|
||||
k |= (uint64_t) data[1] << 8;
|
||||
k |= (uint64_t) data[2] << 16;
|
||||
k |= (uint64_t) data[3] << 24;
|
||||
k |= (uint64_t) data[4] << 32;
|
||||
k |= (uint64_t) data[5] << 40;
|
||||
k |= (uint64_t) data[6] << 48;
|
||||
k |= (uint64_t) data[7] << 56;
|
||||
#endif
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
h ^= k;
|
||||
h *= m;
|
||||
data += 8;
|
||||
}
|
||||
|
||||
const unsigned char *data2 = (const unsigned char*)data;
|
||||
|
||||
switch(len & 7) {
|
||||
case 7: h ^= (uint64_t)data2[6] << 48;
|
||||
case 6: h ^= (uint64_t)data2[5] << 40;
|
||||
case 5: h ^= (uint64_t)data2[4] << 32;
|
||||
case 4: h ^= (uint64_t)data2[3] << 24;
|
||||
case 3: h ^= (uint64_t)data2[2] << 16;
|
||||
case 2: h ^= (uint64_t)data2[1] << 8;
|
||||
case 1: h ^= (uint64_t)data2[0];
|
||||
case 7: h ^= (uint64_t)data[6] << 48;
|
||||
case 6: h ^= (uint64_t)data[5] << 40;
|
||||
case 5: h ^= (uint64_t)data[4] << 32;
|
||||
case 4: h ^= (uint64_t)data[3] << 24;
|
||||
case 3: h ^= (uint64_t)data[2] << 16;
|
||||
case 2: h ^= (uint64_t)data[1] << 8;
|
||||
case 1: h ^= (uint64_t)data[0];
|
||||
h *= m;
|
||||
};
|
||||
|
||||
@ -462,7 +477,6 @@ void hllCountCommand(redisClient *c) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* This command performs a self-test of the HLL registers implementation.
|
||||
* Something that is not easy to test from within the outside.
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user