stabilize tests that involved with load handlers (#8967)

When test stop 'load handler' by killing the process that generating the load,
some commands that already in the input buffer, still might be processed by the server.
This may cause some instability in tests, that count on that no more commands
processed after we stop the `load handler'

In this commit, new proc 'wait_load_handlers_disconnected' added, to verify that no more
cammands from any 'load handler' prossesed, by checking that the clients who
genreate the load is disconnceted.

Also, replacing check of dbsize with wait_for_ofs_sync before comparing debug digest, as
it would fail in case the last key the workload wrote was an overridden key (not a new one).

Affected tests
Race fix:
- failover command to specific replica works
- Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl
- AOF rewrite during write load: RDB preamble=$rdbpre

Cleanup and speedup:
- Test replication with blocking lists and sorted sets operations
- Test replication with parallel clients writing in different DBs
- Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect
This commit is contained in:
YaacovHazan 2021-05-20 15:29:43 +03:00 committed by GitHub
parent 8627751ec6
commit 32a2584e07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 37 additions and 53 deletions

View File

@ -12,6 +12,7 @@ set ::tlsdir "tests/tls"
# blocking.
proc bg_block_op {host port db ops tls} {
set r [redis $host $port 0 $tls]
$r client setname LOAD_HANDLER
$r select $db
for {set j 0} {$j < $ops} {incr j} {

View File

@ -5,6 +5,7 @@ set ::tlsdir "tests/tls"
proc bg_complex_data {host port db ops tls} {
set r [redis $host $port 0 $tls]
$r client setname LOAD_HANDLER
$r select $db
createComplexDataset $r $ops
}

View File

@ -5,6 +5,7 @@ set ::tlsdir "tests/tls"
proc gen_write_load {host port seconds tls} {
set start_time [clock seconds]
set r [redis $host $port 1 $tls]
$r client setname LOAD_HANDLER
$r select 9
while 1 {
$r set [expr rand()] [expr rand()]

View File

@ -33,14 +33,9 @@ start_server {tags {"repl"}} {
stop_bg_block_op $load_handle0
stop_bg_block_op $load_handle1
stop_bg_block_op $load_handle2
set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
after 1000
incr retry -1
}
if {[$master debug digest] ne [$slave debug digest]} {
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
@ -49,10 +44,8 @@ start_server {tags {"repl"}} {
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
puts "Master - Replica inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert_equal [r debug digest] [r -1 debug digest]
}
}
}

View File

@ -83,7 +83,11 @@ start_server {} {
} else {
fail "Failover from node 0 to node 1 did not finish"
}
# stop the write load and make sure no more commands processed
stop_write_load $load_handler
wait_load_handlers_disconnected
$node_2 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
wait_for_sync $node_2

View File

@ -21,15 +21,9 @@ start_server {tags {"repl network"}} {
stop_bg_complex_data $load_handle0
stop_bg_complex_data $load_handle1
stop_bg_complex_data $load_handle2
set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
after 1000
incr retry -1
}
assert {[$master dbsize] > 0}
if {[$master debug digest] ne [$slave debug digest]} {
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
@ -38,10 +32,9 @@ start_server {tags {"repl network"}} {
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
puts "Master - Replica inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert_equal [r debug digest] [r -1 debug digest]
assert {[$master dbsize] > 0}
}
}
}

View File

@ -97,15 +97,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
fail "Slave still not connected after some time"
}
set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
after 1000
incr retry -1
}
assert {[$master dbsize] > 0}
if {[$master debug digest] ne [$slave debug digest]} {
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
@ -114,10 +108,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
puts "Master - Replica inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert_equal [r debug digest] [r -1 debug digest]
assert {[$master dbsize] > 0}
eval $cond
}
}

View File

@ -316,15 +316,12 @@ foreach mdl {no yes} {
stop_write_load $load_handle3
stop_write_load $load_handle4
# Make sure that slaves and master have same
# number of keys
wait_for_condition 500 100 {
[$master dbsize] == [[lindex $slaves 0] dbsize] &&
[$master dbsize] == [[lindex $slaves 1] dbsize] &&
[$master dbsize] == [[lindex $slaves 2] dbsize]
} else {
fail "Different number of keys between master and replica after too long time."
}
# Make sure no more commands processed
wait_load_handlers_disconnected
wait_for_ofs_sync $master [lindex $slaves 0]
wait_for_ofs_sync $master [lindex $slaves 1]
wait_for_ofs_sync $master [lindex $slaves 2]
# Check digests
set digest [$master debug digest]

View File

@ -504,6 +504,14 @@ proc stop_write_load {handle} {
catch {exec /bin/kill -9 $handle}
}
proc wait_load_handlers_disconnected {{level 0}} {
wait_for_condition 50 100 {
![string match {*name=LOAD_HANDLER*} [r $level client list]]
} else {
fail "load_handler(s) still connected after too long time."
}
}
proc K { x y } { set x }
# Shuffle a list with Fisher-Yates algorithm.

View File

@ -41,15 +41,8 @@ start_server {tags {"aofrw"}} {
stop_write_load $load_handle3
stop_write_load $load_handle4
# Make sure that we remain the only connected client.
# This step is needed to make sure there are no pending writes
# that will be processed between the two "debug digest" calls.
wait_for_condition 50 100 {
[llength [split [string trim [r client list]] "\n"]] == 1
} else {
puts [r client list]
fail "Clients generating loads are not disconnecting"
}
# Make sure no more commands processed, before taking debug digest
wait_load_handlers_disconnected
# Get the data set digest
set d1 [r debug digest]