mirror of
http://github.com/valkey-io/valkey
synced 2024-11-22 00:52:38 +00:00
basic VM mostly working!
This commit is contained in:
parent
d894161b85
commit
4ef8de8ad7
157
redis.c
157
redis.c
@ -361,7 +361,7 @@ struct redisServer {
|
||||
redisClient *master; /* client that is master for this slave */
|
||||
int replstate;
|
||||
unsigned int maxclients;
|
||||
unsigned long maxmemory;
|
||||
unsigned long long maxmemory;
|
||||
unsigned int blockedclients;
|
||||
/* Sort parameters - qsort_r() is only available under BSD so we
|
||||
* have to take this state global, in order to pass it to sortCompare() */
|
||||
@ -372,7 +372,7 @@ struct redisServer {
|
||||
int vm_enabled;
|
||||
off_t vm_page_size;
|
||||
off_t vm_pages;
|
||||
long vm_max_memory;
|
||||
unsigned long long vm_max_memory;
|
||||
/* Virtual memory state */
|
||||
FILE *vm_fp;
|
||||
int vm_fd;
|
||||
@ -459,6 +459,7 @@ static void addReplySds(redisClient *c, sds s);
|
||||
static void incrRefCount(robj *o);
|
||||
static int rdbSaveBackground(char *filename);
|
||||
static robj *createStringObject(char *ptr, size_t len);
|
||||
static robj *dupStringObject(robj *o);
|
||||
static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
|
||||
static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
|
||||
static int syncWithMaster(void);
|
||||
@ -491,6 +492,7 @@ static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
|
||||
static void vmInit(void);
|
||||
static void vmMarkPagesFree(off_t page, off_t count);
|
||||
static robj *vmLoadObject(robj *key);
|
||||
static int vmSwapOneObject(void);
|
||||
|
||||
static void authCommand(redisClient *c);
|
||||
static void pingCommand(redisClient *c);
|
||||
@ -1153,7 +1155,7 @@ static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientD
|
||||
/* Continue to expire if at the end of the cycle more than 25%
|
||||
* of the keys were expired. */
|
||||
do {
|
||||
int num = dictSize(db->expires);
|
||||
long num = dictSize(db->expires);
|
||||
time_t now = time(NULL);
|
||||
|
||||
expired = 0;
|
||||
@ -1173,6 +1175,15 @@ static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientD
|
||||
} while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
|
||||
}
|
||||
|
||||
/* Swap a few keys on disk if we are over the memory limit and VM
|
||||
* is enbled. */
|
||||
while (server.vm_enabled && zmalloc_used_memory() > server.vm_max_memory) {
|
||||
if (vmSwapOneObject() == REDIS_ERR) {
|
||||
redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit reached but unable to swap more objects out!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if we should connect to a MASTER */
|
||||
if (server.replstate == REDIS_REPL_CONNECT) {
|
||||
redisLog(REDIS_NOTICE,"Connecting to MASTER...");
|
||||
@ -1502,6 +1513,12 @@ static void loadServerConfig(char *filename) {
|
||||
if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
|
||||
err = "argument must be 'yes' or 'no'"; goto loaderr;
|
||||
}
|
||||
} else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
|
||||
server.vm_max_memory = strtoll(argv[1], NULL, 10);
|
||||
} else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
|
||||
server.vm_page_size = strtoll(argv[1], NULL, 10);
|
||||
} else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
|
||||
server.vm_pages = strtoll(argv[1], NULL, 10);
|
||||
} else {
|
||||
err = "Bad directive or wrong number of arguments"; goto loaderr;
|
||||
}
|
||||
@ -2278,6 +2295,10 @@ static robj *createStringObject(char *ptr, size_t len) {
|
||||
return createObject(REDIS_STRING,sdsnewlen(ptr,len));
|
||||
}
|
||||
|
||||
static robj *dupStringObject(robj *o) {
|
||||
return createStringObject(o->ptr,sdslen(o->ptr));
|
||||
}
|
||||
|
||||
static robj *createListObject(void) {
|
||||
list *l = listCreate();
|
||||
|
||||
@ -6656,6 +6677,8 @@ static void vmInit(void) {
|
||||
redisLog(REDIS_NOTICE,"Swap file allocated with success");
|
||||
}
|
||||
server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
|
||||
redisLog(REDIS_DEBUG,"Allocated %lld bytes page table for %lld pages",
|
||||
(long long) (server.vm_pages+7)/8, server.vm_pages);
|
||||
memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
|
||||
/* Try to remove the swap file, so the OS will really delete it from the
|
||||
* file system when Redis exists. */
|
||||
@ -6667,7 +6690,8 @@ static void vmMarkPageUsed(off_t page) {
|
||||
off_t byte = page/8;
|
||||
int bit = page&7;
|
||||
server.vm_bitmap[byte] |= 1<<bit;
|
||||
printf("Mark used: %lld (byte:%d bit:%d)\n", (long long)page, byte, bit);
|
||||
printf("Mark used: %lld (byte:%lld bit:%d)\n", (long long)page,
|
||||
(long long)byte, bit);
|
||||
}
|
||||
|
||||
/* Mark N contiguous pages as used, with 'page' being the first. */
|
||||
@ -6782,6 +6806,7 @@ static int vmSwapObject(robj *key, robj *val) {
|
||||
off_t page;
|
||||
|
||||
assert(key->storage == REDIS_VM_MEMORY);
|
||||
assert(key->refcount == 1);
|
||||
if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return REDIS_ERR;
|
||||
if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
|
||||
redisLog(REDIS_WARNING,
|
||||
@ -6827,6 +6852,123 @@ static robj *vmLoadObject(robj *key) {
|
||||
return val;
|
||||
}
|
||||
|
||||
/* How a good candidate is this object for swapping?
|
||||
* The better candidate it is, the greater the returned value.
|
||||
*
|
||||
* Currently we try to perform a fast estimation of the object size in
|
||||
* memory, and combine it with aging informations.
|
||||
*
|
||||
* Basically swappability = idle-time * log(estimated size)
|
||||
*
|
||||
* Bigger objects are preferred over smaller objects, but not
|
||||
* proportionally, this is why we use the logarithm. This algorithm is
|
||||
* just a first try and will probably be tuned later. */
|
||||
static double computeObjectSwappability(robj *o) {
|
||||
time_t age = server.unixtime - o->vm.atime;
|
||||
long asize = 0;
|
||||
list *l;
|
||||
dict *d;
|
||||
struct dictEntry *de;
|
||||
int z;
|
||||
|
||||
if (age <= 0) return 0;
|
||||
switch(o->type) {
|
||||
case REDIS_STRING:
|
||||
if (o->encoding != REDIS_ENCODING_RAW) {
|
||||
asize = sizeof(*o);
|
||||
} else {
|
||||
asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
|
||||
}
|
||||
break;
|
||||
case REDIS_LIST:
|
||||
l = o->ptr;
|
||||
listNode *ln = listFirst(l);
|
||||
|
||||
asize = sizeof(list);
|
||||
if (ln) {
|
||||
robj *ele = ln->value;
|
||||
long elesize;
|
||||
|
||||
elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
|
||||
(sizeof(*o)+sdslen(ele->ptr)) :
|
||||
sizeof(*o);
|
||||
asize += (sizeof(listNode)+elesize)*listLength(l);
|
||||
}
|
||||
break;
|
||||
case REDIS_SET:
|
||||
case REDIS_ZSET:
|
||||
z = (o->type == REDIS_ZSET);
|
||||
d = z ? ((zset*)o->ptr)->dict : o->ptr;
|
||||
|
||||
asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
|
||||
if (z) asize += sizeof(zset)-sizeof(dict);
|
||||
if (dictSize(d)) {
|
||||
long elesize;
|
||||
robj *ele;
|
||||
|
||||
de = dictGetRandomKey(d);
|
||||
ele = dictGetEntryKey(de);
|
||||
elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
|
||||
(sizeof(*o)+sdslen(ele->ptr)) :
|
||||
sizeof(*o);
|
||||
asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
|
||||
if (z) asize += sizeof(zskiplistNode)*dictSize(d);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return (double)asize*log(1+asize);
|
||||
}
|
||||
|
||||
/* Try to swap an object that's a good candidate for swapping.
|
||||
* Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
|
||||
* to swap any object at all. */
|
||||
static int vmSwapOneObject(void) {
|
||||
int j, i;
|
||||
struct dictEntry *best = NULL;
|
||||
double best_swappability = 0;
|
||||
robj *key, *val;
|
||||
|
||||
for (j = 0; j < server.dbnum; j++) {
|
||||
redisDb *db = server.db+j;
|
||||
|
||||
if (dictSize(db->dict) == 0) continue;
|
||||
for (i = 0; i < 5; i++) {
|
||||
dictEntry *de;
|
||||
double swappability;
|
||||
|
||||
de = dictGetRandomKey(db->dict);
|
||||
key = dictGetEntryKey(de);
|
||||
val = dictGetEntryVal(de);
|
||||
if (key->storage != REDIS_VM_MEMORY) continue;
|
||||
swappability = computeObjectSwappability(val);
|
||||
if (!best || swappability > best_swappability) {
|
||||
best = de;
|
||||
best_swappability = swappability;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (best == NULL) return REDIS_ERR;
|
||||
key = dictGetEntryKey(best);
|
||||
val = dictGetEntryVal(best);
|
||||
|
||||
redisLog(REDIS_DEBUG,"Key with best swappability: %s, %f\n",
|
||||
key->ptr, best_swappability);
|
||||
|
||||
/* Unshare the key if needed */
|
||||
if (key->refcount > 1) {
|
||||
robj *newkey = dupStringObject(key);
|
||||
decrRefCount(key);
|
||||
key = dictGetEntryKey(best) = newkey;
|
||||
}
|
||||
/* Swap it */
|
||||
if (vmSwapObject(key,val) == REDIS_OK) {
|
||||
dictGetEntryVal(best) = NULL;
|
||||
return REDIS_OK;
|
||||
} else {
|
||||
return REDIS_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
/* ================================= Debugging ============================== */
|
||||
|
||||
static void debugCommand(redisClient *c) {
|
||||
@ -6880,6 +7022,13 @@ static void debugCommand(redisClient *c) {
|
||||
}
|
||||
key = dictGetEntryKey(de);
|
||||
val = dictGetEntryVal(de);
|
||||
/* If the key is shared we want to create a copy */
|
||||
if (key->refcount > 1) {
|
||||
robj *newkey = dupStringObject(key);
|
||||
decrRefCount(key);
|
||||
key = dictGetEntryKey(de) = newkey;
|
||||
}
|
||||
/* Swap it */
|
||||
if (key->storage != REDIS_VM_MEMORY) {
|
||||
addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
|
||||
} else if (vmSwapObject(key,val) == REDIS_OK) {
|
||||
|
36
redis.conf
36
redis.conf
@ -166,7 +166,43 @@ appendfsync always
|
||||
|
||||
################################ VIRTUAL MEMORY ###############################
|
||||
|
||||
# Virtual Memory allows Redis to work with datasets bigger than the actual
|
||||
# amount of RAM needed to hold the whole dataset in memory.
|
||||
# In order to do so very used keys are taken in memory while the other keys
|
||||
# are swapped into a swap file, similarly to what operating systems do
|
||||
# with memory pages.
|
||||
#
|
||||
# To enable VM just set 'vm-enabled' to yes, and set the following three
|
||||
# VM parameters accordingly to your needs.
|
||||
|
||||
vm-enabled yes
|
||||
# vm-enabled no
|
||||
|
||||
# vm-max-memory configures the VM to use at max the specified amount of
|
||||
# RAM. Everything that deos not fit will be swapped on disk *if* possible, that
|
||||
# is, if there is still enough contiguous space in the swap file.
|
||||
vm-max-memory 10000000
|
||||
|
||||
# Redis swap files is split into pages. An object can be saved using multiple
|
||||
# contiguous pages, but pages can't be shared between different objects.
|
||||
# So if your page is too big, small objects swapped out on disk will waste
|
||||
# a lot of space. If you page is too small, there is less space in the swap
|
||||
# file (assuming you configured the same number of total swap file pages).
|
||||
#
|
||||
# If you use a lot of small objects, use a page size of 64 or 32 bytes.
|
||||
# If you use a lot of big objects, use a bigger page size.
|
||||
# If unsure, use the defualt :)
|
||||
vm-page-size 256
|
||||
|
||||
# Number of total memory pages in the swap file.
|
||||
# Given that the page table (a bitmap of free/used pages) is taken in memory,
|
||||
# every 8 pages on disk will consume 1 byte of RAM.
|
||||
#
|
||||
# The total swap size is vm-page-size * vm-pages
|
||||
#
|
||||
# With the default of 256-bytes memory pages and 104857600 pages Redis will
|
||||
# use a 25 GB swap file, that will use rougly 13 MB of RAM for the page table.
|
||||
vm-pages 104857600
|
||||
|
||||
############################### ADVANCED CONFIG ###############################
|
||||
|
||||
|
@ -17,6 +17,7 @@ static struct redisFunctionSym symsTable[] = {
|
||||
{"call",(unsigned long)call},
|
||||
{"closeTimedoutClients",(unsigned long)closeTimedoutClients},
|
||||
{"compareStringObjects",(unsigned long)compareStringObjects},
|
||||
{"computeObjectSwappability",(unsigned long)computeObjectSwappability},
|
||||
{"createClient",(unsigned long)createClient},
|
||||
{"createListObject",(unsigned long)createListObject},
|
||||
{"createObject",(unsigned long)createObject},
|
||||
@ -40,6 +41,7 @@ static struct redisFunctionSym symsTable[] = {
|
||||
{"dictRedisObjectDestructor",(unsigned long)dictRedisObjectDestructor},
|
||||
{"dictVanillaFree",(unsigned long)dictVanillaFree},
|
||||
{"dupClientReplyValue",(unsigned long)dupClientReplyValue},
|
||||
{"dupStringObject",(unsigned long)dupStringObject},
|
||||
{"echoCommand",(unsigned long)echoCommand},
|
||||
{"execCommand",(unsigned long)execCommand},
|
||||
{"existsCommand",(unsigned long)existsCommand},
|
||||
@ -119,6 +121,7 @@ static struct redisFunctionSym symsTable[] = {
|
||||
{"rdbLoadIntegerObject",(unsigned long)rdbLoadIntegerObject},
|
||||
{"rdbLoadLen",(unsigned long)rdbLoadLen},
|
||||
{"rdbLoadLzfStringObject",(unsigned long)rdbLoadLzfStringObject},
|
||||
{"rdbLoadObject",(unsigned long)rdbLoadObject},
|
||||
{"rdbLoadStringObject",(unsigned long)rdbLoadStringObject},
|
||||
{"rdbLoadTime",(unsigned long)rdbLoadTime},
|
||||
{"rdbLoadType",(unsigned long)rdbLoadType},
|
||||
@ -128,10 +131,13 @@ static struct redisFunctionSym symsTable[] = {
|
||||
{"rdbSaveDoubleValue",(unsigned long)rdbSaveDoubleValue},
|
||||
{"rdbSaveLen",(unsigned long)rdbSaveLen},
|
||||
{"rdbSaveLzfStringObject",(unsigned long)rdbSaveLzfStringObject},
|
||||
{"rdbSaveObject",(unsigned long)rdbSaveObject},
|
||||
{"rdbSaveStringObject",(unsigned long)rdbSaveStringObject},
|
||||
{"rdbSaveStringObjectRaw",(unsigned long)rdbSaveStringObjectRaw},
|
||||
{"rdbSaveTime",(unsigned long)rdbSaveTime},
|
||||
{"rdbSaveType",(unsigned long)rdbSaveType},
|
||||
{"rdbSavedObjectLen",(unsigned long)rdbSavedObjectLen},
|
||||
{"rdbSavedObjectPages",(unsigned long)rdbSavedObjectPages},
|
||||
{"rdbTryIntegerEncoding",(unsigned long)rdbTryIntegerEncoding},
|
||||
{"readQueryFromClient",(unsigned long)readQueryFromClient},
|
||||
{"redisLog",(unsigned long)redisLog},
|
||||
@ -193,6 +199,16 @@ static struct redisFunctionSym symsTable[] = {
|
||||
{"typeCommand",(unsigned long)typeCommand},
|
||||
{"unblockClient",(unsigned long)unblockClient},
|
||||
{"updateSlavesWaitingBgsave",(unsigned long)updateSlavesWaitingBgsave},
|
||||
{"vmFindContiguousPages",(unsigned long)vmFindContiguousPages},
|
||||
{"vmFreePage",(unsigned long)vmFreePage},
|
||||
{"vmInit",(unsigned long)vmInit},
|
||||
{"vmLoadObject",(unsigned long)vmLoadObject},
|
||||
{"vmMarkPageFree",(unsigned long)vmMarkPageFree},
|
||||
{"vmMarkPageUsed",(unsigned long)vmMarkPageUsed},
|
||||
{"vmMarkPagesFree",(unsigned long)vmMarkPagesFree},
|
||||
{"vmMarkPagesUsed",(unsigned long)vmMarkPagesUsed},
|
||||
{"vmSwapObject",(unsigned long)vmSwapObject},
|
||||
{"vmSwapOneObject",(unsigned long)vmSwapOneObject},
|
||||
{"yesnotoi",(unsigned long)yesnotoi},
|
||||
{"zaddCommand",(unsigned long)zaddCommand},
|
||||
{"zaddGenericCommand",(unsigned long)zaddGenericCommand},
|
||||
|
Loading…
Reference in New Issue
Block a user