valkey/utils/hashtable/rehashing.c
Wang Yuan 75f9dec644
Limit the main db and expires dictionaries to expand (#7954)
As we know, redis may reject user's requests or evict some keys if
used memory is over maxmemory. Dictionaries expanding may make
things worse, some big dictionaries, such as main db and expires dict,
may eat huge memory at once for allocating a new big hash table and be
far more than maxmemory after expanding.
There are related issues: #4213 #4583

More details, when expand dict in redis, we will allocate a new big
ht[1] that generally is double of ht[0], The size of ht[1] will be
very big if ht[0] already is big. For db dict, if we have more than
64 million keys, we need to cost 1GB for ht[1] when dict expands.

If the sum of used memory and new hash table of dict needed exceeds
maxmemory, we shouldn't allow the dict to expand. Because, if we
enable keys eviction, we still couldn't add much more keys after
eviction and rehashing, what's worse, redis will keep less keys when
redis only remains a little memory for storing new hash table instead
of users' data. Moreover users can't write data in redis if disable
keys eviction.

What this commit changed ?

Add a new member function expandAllowed for dict type, it provide a way
for caller to allow expand or not. We expose two parameters for this
function: more memory needed for expanding and dict current load factor,
users can implement a function to make a decision by them.
For main db dict and expires dict type, these dictionaries may be very
big and cost huge memory for expanding, so we implement a judgement
function: we can stop dict to expand provisionally if used memory will
be over maxmemory after dict expands, but to guarantee the performance
of redis, we still allow dict to expand if dict load factor exceeds the
safe load factor.
Add test cases to verify we don't allow main db to expand when left
memory is not enough, so that avoid keys eviction.

Other changes:

For new hash table size when expand. Before this commit, the size is
that double used of dict and later _dictNextPower. Actually we aim to
control a dict load factor between 0.5 and 1.0. Now we replace *2 with
+1, since the first check is that used >= size, the outcome of before
will usually be the same as _dictNextPower(used+1). The only case where
it'll differ is when dict_can_resize is false during fork, so that later
the _dictNextPower(used*2) will cause the dict to jump to *4 (i.e.
_dictNextPower(1025*2) will return 4096).
Fix rehash test cases due to changing algorithm of new hash table size
when expand.
2020-12-06 11:53:04 +02:00

144 lines
3.5 KiB
C

#include "redis.h"
#include "dict.h"
void _redisAssert(char *x, char *y, int l) {
printf("ASSERT: %s %s %d\n",x,y,l);
exit(1);
}
unsigned int dictKeyHash(const void *keyp) {
unsigned long key = (unsigned long)keyp;
key = dictGenHashFunction(&key,sizeof(key));
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}
int dictKeyCompare(void *privdata, const void *key1, const void *key2) {
unsigned long k1 = (unsigned long)key1;
unsigned long k2 = (unsigned long)key2;
return k1 == k2;
}
dictType dictTypeTest = {
dictKeyHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictKeyCompare, /* key compare */
NULL, /* key destructor */
NULL, /* val destructor */
NULL /* allow to expand */
};
void showBuckets(dictht ht) {
if (ht.table == NULL) {
printf("NULL\n");
} else {
int j;
for (j = 0; j < ht.size; j++) {
printf("%c", ht.table[j] ? '1' : '0');
}
printf("\n");
}
}
void show(dict *d) {
int j;
if (d->rehashidx != -1) {
printf("rhidx: ");
for (j = 0; j < d->rehashidx; j++)
printf(".");
printf("|\n");
}
printf("ht[0]: ");
showBuckets(d->ht[0]);
printf("ht[1]: ");
showBuckets(d->ht[1]);
printf("\n");
}
int sortPointers(const void *a, const void *b) {
unsigned long la, lb;
la = (long) (*((dictEntry**)a));
lb = (long) (*((dictEntry**)b));
return la-lb;
}
void stressGetKeys(dict *d, int times, int *perfect_run, int *approx_run) {
int j;
dictEntry **des = zmalloc(sizeof(dictEntry*)*dictSize(d));
for (j = 0; j < times; j++) {
int requested = rand() % (dictSize(d)+1);
int returned = dictGetSomeKeys(d, des, requested);
int dup = 0;
qsort(des,returned,sizeof(dictEntry*),sortPointers);
if (returned > 1) {
int i;
for (i = 0; i < returned-1; i++) {
if (des[i] == des[i+1]) dup++;
}
}
if (requested == returned && dup == 0) {
(*perfect_run)++;
} else {
(*approx_run)++;
printf("Requested, returned, duplicated: %d %d %d\n",
requested, returned, dup);
}
}
zfree(des);
}
#define MAX1 120
#define MAX2 1000
int main(void) {
dict *d = dictCreate(&dictTypeTest,NULL);
unsigned long i;
srand(time(NULL));
for (i = 0; i < MAX1; i++) {
dictAdd(d,(void*)i,NULL);
show(d);
}
printf("Size: %d\n", (int)dictSize(d));
for (i = 0; i < MAX1; i++) {
dictDelete(d,(void*)i);
dictResize(d);
show(d);
}
dictRelease(d);
d = dictCreate(&dictTypeTest,NULL);
printf("Stress testing dictGetSomeKeys\n");
int perfect_run = 0, approx_run = 0;
for (i = 0; i < MAX2; i++) {
dictAdd(d,(void*)i,NULL);
stressGetKeys(d,100,&perfect_run,&approx_run);
}
for (i = 0; i < MAX2; i++) {
dictDelete(d,(void*)i);
dictResize(d);
stressGetKeys(d,100,&perfect_run,&approx_run);
}
printf("dictGetSomeKey, %d perfect runs, %d approximated runs\n",
perfect_run, approx_run);
dictRelease(d);
printf("TEST PASSED!\n");
return 0;
}