mirror of
http://github.com/valkey-io/valkey
synced 2024-11-22 18:54:58 +00:00
make replication tests more stable on slow machines
solving few replication related tests race conditions which fail on slow machines bugfix in slave buffers test: since the test is executed twice, each time with a different commands count, the threshold for the delta can't be a constant.
This commit is contained in:
parent
0a6090bfd8
commit
ba809f26d4
@ -166,12 +166,15 @@ start_server {} {
|
||||
# Pick a random slave
|
||||
set slave_id [expr {($master_id+1)%5}]
|
||||
set sync_count [status $R($master_id) sync_full]
|
||||
set sync_partial [status $R($master_id) sync_partial_ok]
|
||||
catch {
|
||||
$R($slave_id) config rewrite
|
||||
$R($slave_id) debug restart
|
||||
}
|
||||
# note: just waiting for connected_slaves==4 has a race condition since
|
||||
# we might do the check before the master realized that the slave disconnected
|
||||
wait_for_condition 50 1000 {
|
||||
[status $R($master_id) connected_slaves] == 4
|
||||
[status $R($master_id) sync_partial_ok] == $sync_partial + 1
|
||||
} else {
|
||||
fail "Replica not reconnecting"
|
||||
}
|
||||
|
@ -79,6 +79,32 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec
|
||||
stop_bg_complex_data $load_handle0
|
||||
stop_bg_complex_data $load_handle1
|
||||
stop_bg_complex_data $load_handle2
|
||||
|
||||
# Wait for the slave to reach the "online"
|
||||
# state from the POV of the master.
|
||||
set retry 5000
|
||||
while {$retry} {
|
||||
set info [$master info]
|
||||
if {[string match {*slave0:*state=online*} $info]} {
|
||||
break
|
||||
} else {
|
||||
incr retry -1
|
||||
after 100
|
||||
}
|
||||
}
|
||||
if {$retry == 0} {
|
||||
error "assertion:Slave not correctly synchronized"
|
||||
}
|
||||
|
||||
# Wait that slave acknowledge it is online so
|
||||
# we are sure that DBSIZE and DEBUG DIGEST will not
|
||||
# fail because of timing issues. (-LOADING error)
|
||||
wait_for_condition 5000 100 {
|
||||
[lindex [$slave role] 3] eq {connected}
|
||||
} else {
|
||||
fail "Slave still not connected after some time"
|
||||
}
|
||||
|
||||
set retry 10
|
||||
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
|
||||
{
|
||||
|
@ -161,7 +161,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
|
||||
}
|
||||
|
||||
# make sure master doesn't disconnect slave because of timeout
|
||||
$master config set repl-timeout 300 ;# 5 minutes
|
||||
$master config set repl-timeout 1200 ;# 20 minutes (for valgrind and slow machines)
|
||||
$master config set maxmemory-policy allkeys-random
|
||||
$master config set client-output-buffer-limit "replica 100000000 100000000 300"
|
||||
$master config set repl-backlog-size [expr {10*1024}]
|
||||
@ -212,7 +212,8 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
|
||||
|
||||
assert {[$master dbsize] == 100}
|
||||
assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers
|
||||
assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
|
||||
set delta_max [expr {$cmd_count / 2}] ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
|
||||
assert {$delta < $delta_max && $delta > -$delta_max}
|
||||
|
||||
$master client kill type slave
|
||||
set killed_used [s -1 used_memory]
|
||||
@ -221,7 +222,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
|
||||
set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}]
|
||||
set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}]
|
||||
assert {$killed_slave_buf == 0}
|
||||
assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
|
||||
assert {$delta_no_repl > -$delta_max && $delta_no_repl < $delta_max}
|
||||
|
||||
}
|
||||
# unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server
|
||||
|
Loading…
Reference in New Issue
Block a user