Fix empty primary may have dirty slots data due to bad migration (#1285)

If we become an empty primary for some reason, we still need to
check if we need to delete dirty slots, because we may have dirty
slots data left over from a bad migration. Like the target node forcibly
executes CLUSTER SETSLOT NODE to take over the slot without
performing key migration.

Signed-off-by: Binbin <binloveplay1314@qq.com>
This commit is contained in:
Binbin 2024-11-11 22:13:47 +08:00 committed by GitHub
parent a2d22c63c0
commit 2df56d87c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 32 additions and 1 deletions

View File

@ -2451,6 +2451,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
* need to delete all the keys in the slots we lost ownership. */
uint16_t dirty_slots[CLUSTER_SLOTS];
int dirty_slots_count = 0;
int delete_dirty_slots = 0;
/* We should detect if sender is new primary of our shard.
* We will know it if all our slots were migrated to sender, and sender
@ -2677,6 +2678,12 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
serverLog(LL_NOTICE,
"My last slot was migrated to node %.40s (%s) in shard %.40s. I am now an empty primary.",
sender->name, sender->human_nodename, sender->shard_id);
/* We may still have dirty slots when we became a empty primary due to
* a bad migration.
*
* In order to maintain a consistent state between keys and slots
* we need to remove all the keys from the slots we lost. */
delete_dirty_slots = 1;
}
} else if (dirty_slots_count) {
/* If we are here, we received an update message which removed
@ -2686,6 +2693,10 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
*
* In order to maintain a consistent state between keys and slots
* we need to remove all the keys from the slots we lost. */
delete_dirty_slots = 1;
}
if (delete_dirty_slots) {
for (int j = 0; j < dirty_slots_count; j++) {
serverLog(LL_NOTICE, "Deleting keys in dirty slot %d on node %.40s (%s) in shard %.40s", dirty_slots[j],
myself->name, myself->human_nodename, myself->shard_id);
@ -6069,7 +6080,7 @@ void removeChannelsInSlot(unsigned int slot) {
/* Remove all the keys in the specified hash slot.
* The number of removed items is returned. */
unsigned int delKeysInSlot(unsigned int hashslot) {
if (!kvstoreDictSize(server.db->keys, hashslot)) return 0;
if (!countKeysInSlot(hashslot)) return 0;
unsigned int j = 0;

View File

@ -400,3 +400,23 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout
start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} {
test_cluster_setslot "setslot"
} my_slot_allocation cluster_allocate_replicas ;# start_cluster
start_cluster 3 0 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} {
test "Empty primary will check and delete the dirty slots" {
R 2 config set cluster-allow-replica-migration no
# Write a key to slot 0.
R 2 incr key_977613
# Move slot 0 from primary 2 to primary 0.
R 0 cluster bumpepoch
R 0 cluster setslot 0 node [R 0 cluster myid]
# Wait for R 2 to report that it is an empty primary (cluster-allow-replica-migration no)
wait_for_log_messages -2 {"*I am now an empty primary*"} 0 1000 50
# Make sure primary 0 will delete the dirty slots.
verify_log_message -2 "*Deleting keys in dirty slot 0*" 0
assert_equal [R 2 dbsize] 0
}
} my_slot_allocation cluster_allocate_replicas ;# start_cluster