mirror of
http://github.com/valkey-io/valkey
synced 2024-11-22 00:52:38 +00:00
Remove tmp rdb file in background thread (#7762)
We're already using bg_unlink in several places to delete the rdb file in the background,
and avoid paying the cost of the deletion from our main thread.
This commit uses bg_unlink to remove the temporary rdb file in the background too.
However, in case we delete that rdb file just before exiting, we don't actually wait for the
background thread or the main thread to delete it, and just let the OS clean up after us.
i.e. we open the file, unlink it and exit with the fd still open.
Furthermore, rdbRemoveTempFile can be called from a thread and was using snprintf which is
not async-signal-safe, we now use ll2string instead.
(cherry picked from commit b002d2b4f1
)
This commit is contained in:
parent
7bc0726016
commit
0bdddd3c89
29
src/rdb.c
29
src/rdb.c
@ -34,6 +34,7 @@
|
||||
#include "stream.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
@ -1413,11 +1414,29 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
|
||||
return C_OK; /* unreached */
|
||||
}
|
||||
|
||||
void rdbRemoveTempFile(pid_t childpid) {
|
||||
/* Note that we may call this function in signal handle 'sigShutdownHandler',
|
||||
* so we need guarantee all functions we call are async-signal-safe.
|
||||
* If we call this function from signal handle, we won't call bg_unlik that
|
||||
* is not async-signal-safe. */
|
||||
void rdbRemoveTempFile(pid_t childpid, int from_signal) {
|
||||
char tmpfile[256];
|
||||
char pid[32];
|
||||
|
||||
snprintf(tmpfile,sizeof(tmpfile),"temp-%d.rdb", (int) childpid);
|
||||
unlink(tmpfile);
|
||||
/* Generate temp rdb file name using aync-signal safe functions. */
|
||||
int pid_len = ll2string(pid, sizeof(pid), childpid);
|
||||
strcpy(tmpfile, "temp-");
|
||||
strncpy(tmpfile+5, pid, pid_len);
|
||||
strcpy(tmpfile+5+pid_len, ".rdb");
|
||||
|
||||
if (from_signal) {
|
||||
/* bg_unlink is not async-signal-safe, but in this case we don't really
|
||||
* need to close the fd, it'll be released when the process exists. */
|
||||
int fd = open(tmpfile, O_RDONLY|O_NONBLOCK);
|
||||
UNUSED(fd);
|
||||
unlink(tmpfile);
|
||||
} else {
|
||||
bg_unlink(tmpfile);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function is called by rdbLoadObject() when the code is in RDB-check
|
||||
@ -2420,7 +2439,7 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
|
||||
serverLog(LL_WARNING,
|
||||
"Background saving terminated by signal %d", bysignal);
|
||||
latencyStartMonitor(latency);
|
||||
rdbRemoveTempFile(server.rdb_child_pid);
|
||||
rdbRemoveTempFile(server.rdb_child_pid, 0);
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
|
||||
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
|
||||
@ -2477,7 +2496,7 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
|
||||
* the cleanup needed. */
|
||||
void killRDBChild(void) {
|
||||
kill(server.rdb_child_pid,SIGUSR1);
|
||||
rdbRemoveTempFile(server.rdb_child_pid);
|
||||
rdbRemoveTempFile(server.rdb_child_pid, 0);
|
||||
closeChildInfoPipe();
|
||||
updateDictResizePolicy();
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ int rdbLoadObjectType(rio *rdb);
|
||||
int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags);
|
||||
int rdbSaveBackground(char *filename, rdbSaveInfo *rsi);
|
||||
int rdbSaveToSlavesSockets(rdbSaveInfo *rsi);
|
||||
void rdbRemoveTempFile(pid_t childpid);
|
||||
void rdbRemoveTempFile(pid_t childpid, int from_signal);
|
||||
int rdbSave(char *filename, rdbSaveInfo *rsi);
|
||||
ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key);
|
||||
size_t rdbSavedObjectLen(robj *o, robj *key);
|
||||
|
@ -3797,6 +3797,10 @@ int prepareForShutdown(int flags) {
|
||||
overwrite the synchronous saving did by SHUTDOWN. */
|
||||
if (server.rdb_child_pid != -1) {
|
||||
serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
|
||||
/* Note that, in killRDBChild, we call rdbRemoveTempFile that will
|
||||
* do close fd(in order to unlink file actully) in background thread.
|
||||
* The temp rdb file fd may won't be closed when redis exits quickly,
|
||||
* but OS will close this fd when process exits. */
|
||||
killRDBChild();
|
||||
}
|
||||
|
||||
@ -4846,7 +4850,7 @@ static void sigShutdownHandler(int sig) {
|
||||
* on disk. */
|
||||
if (server.shutdown_asap && sig == SIGINT) {
|
||||
serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
|
||||
rdbRemoveTempFile(getpid());
|
||||
rdbRemoveTempFile(getpid(), 1);
|
||||
exit(1); /* Exit with an error since this was not a clean shutdown. */
|
||||
} else if (server.loading) {
|
||||
serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now.");
|
||||
|
@ -1865,6 +1865,7 @@ int writeCommandsDeniedByDiskError(void);
|
||||
/* RDB persistence */
|
||||
#include "rdb.h"
|
||||
void killRDBChild(void);
|
||||
int bg_unlink(const char *filename);
|
||||
|
||||
/* AOF persistence */
|
||||
void flushAppendOnlyFile(int force);
|
||||
|
@ -69,6 +69,7 @@ set ::all_tests {
|
||||
unit/tls
|
||||
unit/tracking
|
||||
unit/oom-score-adj
|
||||
unit/shutdown
|
||||
}
|
||||
# Index to the next test to run in the ::all_tests list.
|
||||
set ::next_test 0
|
||||
|
49
tests/unit/shutdown.tcl
Normal file
49
tests/unit/shutdown.tcl
Normal file
@ -0,0 +1,49 @@
|
||||
start_server {tags {"shutdown"}} {
|
||||
test {Temp rdb will be deleted if we use bg_unlink when shutdown} {
|
||||
for {set i 0} {$i < 20} {incr i} {
|
||||
r set $i $i
|
||||
}
|
||||
# It will cost 2s(20 * 100ms) to dump rdb
|
||||
r config set rdb-key-save-delay 100000
|
||||
|
||||
# Child is dumping rdb
|
||||
r bgsave
|
||||
after 100
|
||||
set dir [lindex [r config get dir] 1]
|
||||
set child_pid [get_child_pid 0]
|
||||
set temp_rdb [file join [lindex [r config get dir] 1] temp-${child_pid}.rdb]
|
||||
# Temp rdb must be existed
|
||||
assert {[file exists $temp_rdb]}
|
||||
|
||||
catch {r shutdown nosave}
|
||||
# Make sure the server was killed
|
||||
catch {set rd [redis_deferring_client]} e
|
||||
assert_match {*connection refused*} $e
|
||||
|
||||
# Temp rdb file must be deleted
|
||||
assert {![file exists $temp_rdb]}
|
||||
}
|
||||
}
|
||||
|
||||
start_server {tags {"shutdown"}} {
|
||||
test {Temp rdb will be deleted in signal handle} {
|
||||
for {set i 0} {$i < 20} {incr i} {
|
||||
r set $i $i
|
||||
}
|
||||
# It will cost 2s(20 * 100ms) to dump rdb
|
||||
r config set rdb-key-save-delay 100000
|
||||
|
||||
set pid [s process_id]
|
||||
set temp_rdb [file join [lindex [r config get dir] 1] temp-${pid}.rdb]
|
||||
|
||||
exec kill -SIGINT $pid
|
||||
after 100
|
||||
# Temp rdb must be existed
|
||||
assert {[file exists $temp_rdb]}
|
||||
|
||||
# Temp rdb file must be deleted
|
||||
exec kill -SIGINT $pid
|
||||
after 100
|
||||
assert {![file exists $temp_rdb]}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user