mirror of
http://github.com/valkey-io/valkey
synced 2024-11-23 20:00:00 +00:00
Better distribution for set get-random-element operations.
This commit is contained in:
parent
e6948b8f28
commit
61a01793ed
24
src/dict.c
24
src/dict.c
@ -739,6 +739,30 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
|
|||||||
return stored;
|
return stored;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This is like dictGetRandomKey() from the POV of the API, but will do more
|
||||||
|
* work to ensure a better distribution of the returned element.
|
||||||
|
*
|
||||||
|
* This function improves the distribution because the dictGetRandomKey()
|
||||||
|
* problem is that it selects a random bucket, then it selects a random
|
||||||
|
* element from the chain in the bucket. However elements being in different
|
||||||
|
* chain lengths will have different probabilities of being reported. With
|
||||||
|
* this function instead what we do is to consider a "linear" range of the table
|
||||||
|
* that may be constituted of N buckets with chains of different lengths
|
||||||
|
* appearing one after the other. Then we report a random element in the range.
|
||||||
|
* In this way we smooth away the problem of different chain lenghts. */
|
||||||
|
#define GETFAIR_NUM_ENTRIES 20
|
||||||
|
dictEntry *dictGetFairRandomKey(dict *d) {
|
||||||
|
dictEntry *entries[GETFAIR_NUM_ENTRIES];
|
||||||
|
unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES);
|
||||||
|
/* Note that dictGetSomeKeys() may return zero elements in an unlucky
|
||||||
|
* run() even if there are actually elements inside the hash table. So
|
||||||
|
* when we get zero, we call the true dictGetRandomKey() that will always
|
||||||
|
* yeld the element if the hash table has at least one. */
|
||||||
|
if (count == 0) return dictGetRandomKey(d);
|
||||||
|
unsigned int idx = rand() % count;
|
||||||
|
return entries[idx];
|
||||||
|
}
|
||||||
|
|
||||||
/* Function to reverse bits. Algorithm from:
|
/* Function to reverse bits. Algorithm from:
|
||||||
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
|
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
|
||||||
static unsigned long rev(unsigned long v) {
|
static unsigned long rev(unsigned long v) {
|
||||||
|
@ -166,6 +166,7 @@ dictIterator *dictGetSafeIterator(dict *d);
|
|||||||
dictEntry *dictNext(dictIterator *iter);
|
dictEntry *dictNext(dictIterator *iter);
|
||||||
void dictReleaseIterator(dictIterator *iter);
|
void dictReleaseIterator(dictIterator *iter);
|
||||||
dictEntry *dictGetRandomKey(dict *d);
|
dictEntry *dictGetRandomKey(dict *d);
|
||||||
|
dictEntry *dictGetFairRandomKey(dict *d);
|
||||||
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
|
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
|
||||||
void dictGetStats(char *buf, size_t bufsize, dict *d);
|
void dictGetStats(char *buf, size_t bufsize, dict *d);
|
||||||
uint64_t dictGenHashFunction(const void *key, int len);
|
uint64_t dictGenHashFunction(const void *key, int len);
|
||||||
|
@ -207,7 +207,7 @@ sds setTypeNextObject(setTypeIterator *si) {
|
|||||||
* used field with values which are easy to trap if misused. */
|
* used field with values which are easy to trap if misused. */
|
||||||
int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
|
int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
|
||||||
if (setobj->encoding == OBJ_ENCODING_HT) {
|
if (setobj->encoding == OBJ_ENCODING_HT) {
|
||||||
dictEntry *de = dictGetRandomKey(setobj->ptr);
|
dictEntry *de = dictGetFairRandomKey(setobj->ptr);
|
||||||
*sdsele = dictGetKey(de);
|
*sdsele = dictGetKey(de);
|
||||||
*llele = -123456789; /* Not needed. Defensive. */
|
*llele = -123456789; /* Not needed. Defensive. */
|
||||||
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
|
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
|
||||||
|
Loading…
Reference in New Issue
Block a user