diff --git a/src/childinfo.c b/src/childinfo.c index 015987c84..e5184ff8b 100644 --- a/src/childinfo.c +++ b/src/childinfo.c @@ -112,7 +112,9 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, ssize_t wlen = sizeof(data); if (write(server.child_info_pipe[1], &data, wlen) != wlen) { - /* Nothing to do on error, this will be detected by the other side. */ + /* Failed writing to parent, it could have been killed, exit. */ + serverLog(LL_WARNING,"Child failed reporting info to parent, exiting. %s", strerror(errno)); + exit(1); } } diff --git a/src/rdb.c b/src/rdb.c index ff8a1f53d..083e5cf89 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -3399,6 +3399,10 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) { rioInitWithFd(&rdb,rdb_pipe_write); + /* Close the reading part, so that if the parent crashes, the child will + * get a write error and exit. */ + close(server.rdb_pipe_read); + redisSetProcTitle("redis-rdb-to-slaves"); redisSetCpuAffinity(server.bgsave_cpulist); diff --git a/src/server.c b/src/server.c index 47886ba71..149b675eb 100644 --- a/src/server.c +++ b/src/server.c @@ -6400,6 +6400,10 @@ int redisFork(int purpose) { setOOMScoreAdj(CONFIG_OOM_BGCHILD); dismissMemoryInChild(); closeChildUnusedResourceAfterFork(); + /* Close the reading part, so that if the parent crashes, the child will + * get a write error and exit. */ + if (server.child_info_pipe[0] != -1) + close(server.child_info_pipe[0]); } else { /* Parent */ if (childpid == -1) { diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 153aa8620..617b9f78e 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -992,7 +992,7 @@ test "diskless replication child being killed is collected" { # wait for the replicas to start reading the rdb wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10 - # wait to be sure the eplica is hung and the master is blocked on write + # wait to be sure the replica is hung and the master is blocked on write after 500 # simulate the OOM killer or anyone else kills the child @@ -1012,6 +1012,45 @@ test "diskless replication child being killed is collected" { } } {} {external:skip} +foreach mdl {yes no} { + test "replication dies when parent is killed - diskless: $mdl" { + # when master is killed, make sure the fork child can detect that and exit + start_server {tags {"repl"}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 0 + # create keys that will take 10 seconds to save + $master config set rdb-key-save-delay 1000 + $master debug populate 10000 + start_server {} { + set replica [srv 0 client] + $replica replicaof $master_host $master_port + + # wait for rdb child to start + wait_for_condition 5000 10 { + [s -1 rdb_bgsave_in_progress] == 1 + } else { + fail "rdb child didn't start" + } + set fork_child_pid [get_child_pid -1] + + # simulate the OOM killer or anyone else kills the parent + exec kill -9 $master_pid + + # wait for the child to notice the parent died have exited + wait_for_condition 500 10 { + [process_is_alive $fork_child_pid] == 0 + } else { + fail "rdb child didn't terminate" + } + } + } + } {} {external:skip} +} + test "diskless replication read pipe cleanup" { # In diskless replication, we create a read pipe for the RDB, between the child and the parent. # When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered). diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 51a8420a3..a7332d29c 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -627,6 +627,14 @@ proc get_child_pid {idx} { return $child_pid } +proc process_is_alive pid { + if {[catch {exec ps -p $pid} err]} { + return 0 + } else { + return 1 + } +} + proc cmdrstat {cmd r} { if {[regexp "\r\ncmdstat_$cmd:(.*?)\r\n" [$r info commandstats] _ value]} { set _ $value