Remove tmp rdb file in background thread (#7762)

We're already using bg_unlink in several places to delete the rdb file in the background,
and avoid paying the cost of the deletion from our main thread.
This commit uses bg_unlink to remove the temporary rdb file in the background too.

However, in case we delete that rdb file just before exiting, we don't actually wait for the
background thread or the main thread to delete it, and just let the OS clean up after us.
i.e. we open the file, unlink it and exit with the fd still open.

Furthermore, rdbRemoveTempFile can be called from a thread and was using snprintf which is
not async-signal-safe, we now use ll2string instead.

(cherry picked from commit b002d2b4f1)
This commit is contained in:
Wang Yuan 2020-09-17 23:20:10 +08:00 committed by Oran Agra
parent 7bc0726016
commit 0bdddd3c89
6 changed files with 81 additions and 7 deletions

View File

@ -34,6 +34,7 @@
#include "stream.h"
#include <math.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
@ -1413,11 +1414,29 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
return C_OK; /* unreached */
}
void rdbRemoveTempFile(pid_t childpid) {
/* Note that we may call this function in signal handle 'sigShutdownHandler',
* so we need guarantee all functions we call are async-signal-safe.
* If we call this function from signal handle, we won't call bg_unlik that
* is not async-signal-safe. */
void rdbRemoveTempFile(pid_t childpid, int from_signal) {
char tmpfile[256];
char pid[32];
snprintf(tmpfile,sizeof(tmpfile),"temp-%d.rdb", (int) childpid);
unlink(tmpfile);
/* Generate temp rdb file name using aync-signal safe functions. */
int pid_len = ll2string(pid, sizeof(pid), childpid);
strcpy(tmpfile, "temp-");
strncpy(tmpfile+5, pid, pid_len);
strcpy(tmpfile+5+pid_len, ".rdb");
if (from_signal) {
/* bg_unlink is not async-signal-safe, but in this case we don't really
* need to close the fd, it'll be released when the process exists. */
int fd = open(tmpfile, O_RDONLY|O_NONBLOCK);
UNUSED(fd);
unlink(tmpfile);
} else {
bg_unlink(tmpfile);
}
}
/* This function is called by rdbLoadObject() when the code is in RDB-check
@ -2420,7 +2439,7 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
serverLog(LL_WARNING,
"Background saving terminated by signal %d", bysignal);
latencyStartMonitor(latency);
rdbRemoveTempFile(server.rdb_child_pid);
rdbRemoveTempFile(server.rdb_child_pid, 0);
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
@ -2477,7 +2496,7 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
* the cleanup needed. */
void killRDBChild(void) {
kill(server.rdb_child_pid,SIGUSR1);
rdbRemoveTempFile(server.rdb_child_pid);
rdbRemoveTempFile(server.rdb_child_pid, 0);
closeChildInfoPipe();
updateDictResizePolicy();
}

View File

@ -141,7 +141,7 @@ int rdbLoadObjectType(rio *rdb);
int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags);
int rdbSaveBackground(char *filename, rdbSaveInfo *rsi);
int rdbSaveToSlavesSockets(rdbSaveInfo *rsi);
void rdbRemoveTempFile(pid_t childpid);
void rdbRemoveTempFile(pid_t childpid, int from_signal);
int rdbSave(char *filename, rdbSaveInfo *rsi);
ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key);
size_t rdbSavedObjectLen(robj *o, robj *key);

View File

@ -3797,6 +3797,10 @@ int prepareForShutdown(int flags) {
overwrite the synchronous saving did by SHUTDOWN. */
if (server.rdb_child_pid != -1) {
serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
/* Note that, in killRDBChild, we call rdbRemoveTempFile that will
* do close fd(in order to unlink file actully) in background thread.
* The temp rdb file fd may won't be closed when redis exits quickly,
* but OS will close this fd when process exits. */
killRDBChild();
}
@ -4846,7 +4850,7 @@ static void sigShutdownHandler(int sig) {
* on disk. */
if (server.shutdown_asap && sig == SIGINT) {
serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
rdbRemoveTempFile(getpid());
rdbRemoveTempFile(getpid(), 1);
exit(1); /* Exit with an error since this was not a clean shutdown. */
} else if (server.loading) {
serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now.");

View File

@ -1865,6 +1865,7 @@ int writeCommandsDeniedByDiskError(void);
/* RDB persistence */
#include "rdb.h"
void killRDBChild(void);
int bg_unlink(const char *filename);
/* AOF persistence */
void flushAppendOnlyFile(int force);

View File

@ -69,6 +69,7 @@ set ::all_tests {
unit/tls
unit/tracking
unit/oom-score-adj
unit/shutdown
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0

49
tests/unit/shutdown.tcl Normal file
View File

@ -0,0 +1,49 @@
start_server {tags {"shutdown"}} {
test {Temp rdb will be deleted if we use bg_unlink when shutdown} {
for {set i 0} {$i < 20} {incr i} {
r set $i $i
}
# It will cost 2s(20 * 100ms) to dump rdb
r config set rdb-key-save-delay 100000
# Child is dumping rdb
r bgsave
after 100
set dir [lindex [r config get dir] 1]
set child_pid [get_child_pid 0]
set temp_rdb [file join [lindex [r config get dir] 1] temp-${child_pid}.rdb]
# Temp rdb must be existed
assert {[file exists $temp_rdb]}
catch {r shutdown nosave}
# Make sure the server was killed
catch {set rd [redis_deferring_client]} e
assert_match {*connection refused*} $e
# Temp rdb file must be deleted
assert {![file exists $temp_rdb]}
}
}
start_server {tags {"shutdown"}} {
test {Temp rdb will be deleted in signal handle} {
for {set i 0} {$i < 20} {incr i} {
r set $i $i
}
# It will cost 2s(20 * 100ms) to dump rdb
r config set rdb-key-save-delay 100000
set pid [s process_id]
set temp_rdb [file join [lindex [r config get dir] 1] temp-${pid}.rdb]
exec kill -SIGINT $pid
after 100
# Temp rdb must be existed
assert {[file exists $temp_rdb]}
# Temp rdb file must be deleted
exec kill -SIGINT $pid
after 100
assert {![file exists $temp_rdb]}
}
}