diskless master, avoid bgsave child hung when fork parent crashes (#11463)

During a diskless sync, if the master main process crashes, the child would
have hung in `write`. This fix closes the read fd on the child side, so that if the
parent crashes, the child will get a write error and exit.

This change also fixes disk-based replication, BGSAVE and AOFRW.
In that case the child wouldn't have been hang, it would have just kept
running until done which may be pointless.

There is a certain degree of risk here. in case there's a BGSAVE child that could
maybe succeed and the parent dies for some reason, the old code would have let
the child keep running and maybe succeed and avoid data loss.
On the other hand, if the parent is restarted, it would have loaded an old rdb file
(or none), and then the child could reach the end and rename the rdb file (data
conflicting with what the parent has), or also have a race with another BGSAVE
child that the new parent started.

Note that i removed a comment saying a write error will be ignored in the child
and handled by the parent (this comment was very old and i don't think relevant).
This commit is contained in:
Oran Agra 2022-11-09 10:02:18 +02:00 committed by GitHub
parent f928991853
commit ccaef5c923
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 59 additions and 2 deletions

View File

@ -112,7 +112,9 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress,
ssize_t wlen = sizeof(data);
if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
/* Nothing to do on error, this will be detected by the other side. */
/* Failed writing to parent, it could have been killed, exit. */
serverLog(LL_WARNING,"Child failed reporting info to parent, exiting. %s", strerror(errno));
exit(1);
}
}

View File

@ -3399,6 +3399,10 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
rioInitWithFd(&rdb,rdb_pipe_write);
/* Close the reading part, so that if the parent crashes, the child will
* get a write error and exit. */
close(server.rdb_pipe_read);
redisSetProcTitle("redis-rdb-to-slaves");
redisSetCpuAffinity(server.bgsave_cpulist);

View File

@ -6400,6 +6400,10 @@ int redisFork(int purpose) {
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
dismissMemoryInChild();
closeChildUnusedResourceAfterFork();
/* Close the reading part, so that if the parent crashes, the child will
* get a write error and exit. */
if (server.child_info_pipe[0] != -1)
close(server.child_info_pipe[0]);
} else {
/* Parent */
if (childpid == -1) {

View File

@ -992,7 +992,7 @@ test "diskless replication child being killed is collected" {
# wait for the replicas to start reading the rdb
wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
# wait to be sure the eplica is hung and the master is blocked on write
# wait to be sure the replica is hung and the master is blocked on write
after 500
# simulate the OOM killer or anyone else kills the child
@ -1012,6 +1012,45 @@ test "diskless replication child being killed is collected" {
}
} {} {external:skip}
foreach mdl {yes no} {
test "replication dies when parent is killed - diskless: $mdl" {
# when master is killed, make sure the fork child can detect that and exit
start_server {tags {"repl"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
set master_pid [srv 0 pid]
$master config set repl-diskless-sync $mdl
$master config set repl-diskless-sync-delay 0
# create keys that will take 10 seconds to save
$master config set rdb-key-save-delay 1000
$master debug populate 10000
start_server {} {
set replica [srv 0 client]
$replica replicaof $master_host $master_port
# wait for rdb child to start
wait_for_condition 5000 10 {
[s -1 rdb_bgsave_in_progress] == 1
} else {
fail "rdb child didn't start"
}
set fork_child_pid [get_child_pid -1]
# simulate the OOM killer or anyone else kills the parent
exec kill -9 $master_pid
# wait for the child to notice the parent died have exited
wait_for_condition 500 10 {
[process_is_alive $fork_child_pid] == 0
} else {
fail "rdb child didn't terminate"
}
}
}
} {} {external:skip}
}
test "diskless replication read pipe cleanup" {
# In diskless replication, we create a read pipe for the RDB, between the child and the parent.
# When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).

View File

@ -627,6 +627,14 @@ proc get_child_pid {idx} {
return $child_pid
}
proc process_is_alive pid {
if {[catch {exec ps -p $pid} err]} {
return 0
} else {
return 1
}
}
proc cmdrstat {cmd r} {
if {[regexp "\r\ncmdstat_$cmd:(.*?)\r\n" [$r info commandstats] _ value]} {
set _ $value