valkey/tests/sentinel.tcl
antirez 630fb3539f Sentinel test: restart_instance should refresh pid attrib.
Also kill_instance was modified to warn when a test will try to kill the
same instance multiple times for error.
2014-02-25 08:23:48 +01:00

324 lines
9.5 KiB
Tcl

# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
# This softare is released under the BSD License. See the COPYING file for
# more information.
package require Tcl 8.5
set tcl_precision 17
source tests/support/redis.tcl
source tests/support/util.tcl
source tests/support/server.tcl
source tests/support/test.tcl
set ::verbose 0
set ::pause_on_error 0
set ::sentinel_instances {}
set ::redis_instances {}
set ::sentinel_base_port 20000
set ::redis_base_port 30000
set ::instances_count 5 ; # How many Sentinels / Instances we use at max
set ::pids {} ; # We kill everything at exit
set ::dirs {} ; # We remove all the temp dirs at exit
set ::run_matching {} ; # If non empty, only tests matching pattern are run.
if {[catch {cd tests/sentinel-tmp}]} {
puts "tests/sentinel-tmp directory not found."
puts "Please run this test from the Redis source root."
exit 1
}
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count} {
for {set j 0} {$j < $count} {incr j} {
set port [find_available_port $base_port]
incr base_port
puts "Starting $type #$j at port $port"
# Create a directory for this Sentinel.
set dirname "${type}_${j}"
lappend ::dirs $dirname
catch {exec rm -rf $dirname}
file mkdir $dirname
# Write the Sentinel config file.
set cfgfile [file join $dirname $type.conf]
set cfg [open $cfgfile w]
puts $cfg "port $port"
puts $cfg "dir ./$dirname"
puts $cfg "logfile log.txt"
close $cfg
# Finally exec it and remember the pid for later cleanup.
if {$type eq "redis"} {
set prgname redis-server
} else {
set prgname redis-sentinel
}
set pid [exec ../../src/${prgname} $cfgfile &]
lappend ::pids $pid
# Check availability
if {[server_is_up 127.0.0.1 $port 100] == 0} {
abort_sentinel_test "Problems starting $type #$j: ping timeout"
}
# Push the instance into the right list
lappend ::${type}_instances [list \
pid $pid \
host 127.0.0.1 \
port $port \
link [redis 127.0.0.1 $port] \
]
}
}
proc cleanup {} {
puts "Cleaning up..."
foreach pid $::pids {
catch {exec kill -9 $pid}
}
foreach dir $::dirs {
catch {exec rm -rf $dir}
}
}
proc abort_sentinel_test msg {
puts "WARNING: Aborting the test."
puts ">>>>>>>> $msg"
cleanup
exit 1
}
proc parse_options {} {
for {set j 0} {$j < [llength $::argv]} {incr j} {
set opt [lindex $::argv $j]
set val [lindex $::argv [expr $j+1]]
if {$opt eq "--single"} {
incr j
set ::run_matching "*${val}*"
} elseif {$opt eq "--pause-on-error"} {
set ::pause_on_error 1
} elseif {$opt eq "--help"} {
puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
puts "\nOptions:"
puts "--single <pattern> Only runs tests specified by pattern."
puts "--pause-on-error Pause for manual inspection on error."
puts "--help Shows this help."
exit 0
} else {
puts "Unknown option $opt"
exit 1
}
}
}
proc main {} {
parse_options
spawn_instance sentinel $::sentinel_base_port $::instances_count
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup
}
# If --pause-on-error option was passed at startup this function is called
# on error in order to give the developer a chance to understand more about
# the error condition while the instances are still running.
proc pause_on_error {} {
puts ""
puts [colorstr yellow "*** Please inspect the error now ***"]
puts "\nType \"continue\" to resume the test.\n"
while 1 {
puts -nonewline "> "
flush stdout
if {[gets stdin] eq {continue}} break
}
}
# We redefine 'test' as for Sentinel we don't use the server-client
# architecture for the test, everything is sequential.
proc test {descr code} {
puts -nonewline "> $descr: "
flush stdout
if {[catch {set retval [uplevel 1 $code]} error]} {
if {[string match "assertion:*" $error]} {
set msg [string range $error 10 end]
puts [colorstr red $msg]
if {$::pause_on_error} pause_on_error
} else {
# Re-raise, let handler up the stack take care of this.
error $error $::errorInfo
}
} else {
puts [colorstr green OK]
}
}
proc run_tests {} {
set tests [lsort [glob ../sentinel-tests/*]]
foreach test $tests {
if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
continue
}
if {[file isdirectory $test]} continue
puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
source $test
}
}
# The "S" command is used to interact with the N-th Sentinel.
# The general form is:
#
# S <sentinel-id> command arg arg arg ...
#
# Example to ping the Sentinel 0 (first instance): S 0 PING
proc S {n args} {
set s [lindex $::sentinel_instances $n]
[dict get $s link] {*}$args
}
# Like R but to chat with Redis instances.
proc R {n args} {
set r [lindex $::redis_instances $n]
[dict get $r link] {*}$args
}
proc get_info_field {info field} {
set fl [string length $field]
append field :
foreach line [split $info "\n"] {
set line [string trim $line "\r\n "]
if {[string range $line 0 $fl] eq $field} {
return [string range $line [expr {$fl+1}] end]
}
}
return {}
}
proc SI {n field} {
get_info_field [S $n info] $field
}
proc RI {n field} {
get_info_field [R $n info] $field
}
# Iterate over IDs of sentinel or redis instances.
proc foreach_instance_id {instances idvar code} {
upvar 1 $idvar id
for {set id 0} {$id < [llength $instances]} {incr id} {
set errcode [catch {uplevel 1 $code} result]
if {$errcode == 1} {
error $result $::errorInfo $::errorCode
} elseif {$errcode != 0} {
return -code $errcode $result
}
}
}
proc foreach_sentinel_id {idvar code} {
set errcode [catch {uplevel 1 [list foreach_instance_id $::sentinel_instances $idvar $code]} result]
return -code $errcode $result
}
proc foreach_redis_id {idvar code} {
set errcode [catch {uplevel 1 [list foreach_instance_id $::redis_instances $idvar $code]} result]
return -code $errcode $result
}
# Get the specific attribute of the specified instance type, id.
proc get_instance_attrib {type id attrib} {
dict get [lindex [set ::${type}_instances] $id] $attrib
}
# Set the specific attribute of the specified instance type, id.
proc set_instance_attrib {type id attrib newval} {
set d [lindex [set ::${type}_instances] $id]
dict set d $attrib $newval
lset ::${type}_instances $id $d
}
# Create a master-slave cluster of the given number of total instances.
# The first instance "0" is the master, all others are configured as
# slaves.
proc create_redis_master_slave_cluster n {
foreach_redis_id id {
if {$id == 0} {
# Our master.
R $id slaveof no one
R $id flushall
} elseif {$id < $n} {
R $id slaveof [get_instance_attrib redis 0 host] \
[get_instance_attrib redis 0 port]
} else {
# Instances not part of the cluster.
R $id slaveof no one
}
}
# Wait for all the slaves to sync.
wait_for_condition 100 50 {
[RI 0 connected_slaves] == ($n-1)
} else {
fail "Unable to create a master-slaves cluster."
}
}
proc get_instance_id_by_port {type port} {
foreach_${type}_id id {
if {[get_instance_attrib $type $id port] == $port} {
return $id
}
}
fail "Instance $type port $port not found."
}
# Kill an instance of the specified type/id with SIGKILL.
# This function will mark the instance PID as -1 to remember that this instance
# is no longer running and will remove its PID from the list of pids that
# we kill at cleanup.
#
# The instance can be restarted with restart-instance.
proc kill_instance {type id} {
set pid [get_instance_attrib $type $id pid]
if {$pid == -1} {
error "You tried to kill $type $id twice."
}
exec kill -9 $pid
set_instance_attrib $type $id pid -1
set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
# Remove the PID from the list of pids to kill at exit.
set ::pids [lsearch -all -inline -not -exact $::pids $pid]
}
# Restart an instance previously killed by kill_instance
proc restart_instance {type id} {
set dirname "${type}_${id}"
set cfgfile [file join $dirname $type.conf]
set port [get_instance_attrib $type $id port]
# Execute the instance with its old setup and append the new pid
# file for cleanup.
if {$type eq "redis"} {
set prgname redis-server
} else {
set prgname redis-sentinel
}
set pid [exec ../../src/${prgname} $cfgfile &]
set_instance_attrib $type $id pid $pid
lappend ::pids $pid
# Check that the instance is running
if {[server_is_up 127.0.0.1 $port 100] == 0} {
abort_sentinel_test "Problems starting $type #$j: ping timeout"
}
# Connect with it with a fresh link
set_instance_attrib $type $id link [redis 127.0.0.1 $port]
}
if {[catch main e]} {
puts $::errorInfo
cleanup
}