Trigger the election immediately when doing a manual failover (#1081)

Currently when a manual failover is triggeded, we will set a
CLUSTER_TODO_HANDLE_FAILOVER to start the election as soon as
possible in the next beforeSleep. But in fact, we won't delay
the election in manual failover, waitting for the next beforeSleep
to kick in will delay the election a some milliseconds.

We can trigger the election immediately in this case in the
same function call, without waitting for beforeSleep, which
can save us some milliseconds.

Signed-off-by: Binbin <binloveplay1314@qq.com>
This commit is contained in:
Binbin 2024-11-11 21:43:46 +08:00 committed by GitHub
parent 4aacffa32d
commit 167e8ab8de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4519,8 +4519,9 @@ void clusterFailoverReplaceYourPrimary(void) {
* 3) Perform the failover informing all the other nodes.
*/
void clusterHandleReplicaFailover(void) {
mstime_t now = mstime();
mstime_t data_age;
mstime_t auth_age = mstime() - server.cluster->failover_auth_time;
mstime_t auth_age = now - server.cluster->failover_auth_time;
int needed_quorum = (server.cluster->size / 2) + 1;
int manual_failover = server.cluster->mf_end != 0 && server.cluster->mf_can_start;
mstime_t auth_timeout, auth_retry_time;
@ -4582,7 +4583,7 @@ void clusterHandleReplicaFailover(void) {
/* If the previous failover attempt timeout and the retry time has
* elapsed, we can setup a new one. */
if (auth_age > auth_retry_time) {
server.cluster->failover_auth_time = mstime() +
server.cluster->failover_auth_time = now +
500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */
random() % 500; /* Random delay between 0 and 500 milliseconds. */
server.cluster->failover_auth_count = 0;
@ -4594,20 +4595,26 @@ void clusterHandleReplicaFailover(void) {
server.cluster->failover_auth_time += server.cluster->failover_auth_rank * 1000;
/* However if this is a manual failover, no delay is needed. */
if (server.cluster->mf_end) {
server.cluster->failover_auth_time = mstime();
server.cluster->failover_auth_time = now;
server.cluster->failover_auth_rank = 0;
clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
/* Reset auth_age since it is outdated now and we can bypass the auth_timeout
* check in the next state and start the election ASAP. */
auth_age = 0;
}
serverLog(LL_NOTICE,
"Start of election delayed for %lld milliseconds "
"(rank #%d, offset %lld).",
server.cluster->failover_auth_time - mstime(), server.cluster->failover_auth_rank,
server.cluster->failover_auth_time - now, server.cluster->failover_auth_rank,
replicationGetReplicaOffset());
/* Now that we have a scheduled election, broadcast our offset
* to all the other replicas so that they'll updated their offsets
* if our offset is better. */
clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_REPLICAS);
return;
/* Return ASAP if we can't start the election now. In a manual failover,
* we can start the election immediately, so in this case we continue to
* the next state without waiting for the next beforeSleep. */
if (now < server.cluster->failover_auth_time) return;
}
/* It is possible that we received more updated offsets from other
@ -4627,7 +4634,7 @@ void clusterHandleReplicaFailover(void) {
}
/* Return ASAP if we can't still start the election. */
if (mstime() < server.cluster->failover_auth_time) {
if (now < server.cluster->failover_auth_time) {
clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY);
return;
}