2022-11-20 10:12:15 +00:00
|
|
|
# tests of corrupt listpack payload with valid CRC
|
2020-08-14 13:05:34 +00:00
|
|
|
|
2024-02-20 10:21:10 +00:00
|
|
|
# The fuzzer can cause corrupt the state in many places, which could
|
|
|
|
# mess up the reply, so we decided to skip logreqres.
|
|
|
|
tags {"dump" "corruption" "external:skip" "logreqres:skip"} {
|
2020-08-14 13:05:34 +00:00
|
|
|
|
2021-04-25 10:08:46 +00:00
|
|
|
# catch sigterm so that in case one of the random command hangs the test,
|
2024-04-09 08:24:03 +00:00
|
|
|
# usually due to the server not putting a response in the output buffers,
|
2021-04-25 10:08:46 +00:00
|
|
|
# we'll know which command it was
|
|
|
|
if { ! [ catch {
|
|
|
|
package require Tclx
|
|
|
|
} err ] } {
|
|
|
|
signal error SIGTERM
|
|
|
|
}
|
|
|
|
|
2020-08-14 13:05:34 +00:00
|
|
|
proc generate_collections {suffix elements} {
|
2024-04-09 14:38:09 +00:00
|
|
|
set rd [valkey_deferring_client]
|
2020-08-14 13:05:34 +00:00
|
|
|
for {set j 0} {$j < $elements} {incr j} {
|
|
|
|
# add both string values and integers
|
|
|
|
if {$j % 2 == 0} {set val $j} else {set val "_$j"}
|
|
|
|
$rd hset hash$suffix $j $val
|
|
|
|
$rd lpush list$suffix $val
|
|
|
|
$rd zadd zset$suffix $j $val
|
|
|
|
$rd sadd set$suffix $val
|
|
|
|
$rd xadd stream$suffix * item 1 value $val
|
|
|
|
}
|
|
|
|
for {set j 0} {$j < $elements * 5} {incr j} {
|
|
|
|
$rd read ; # Discard replies
|
|
|
|
}
|
|
|
|
$rd close
|
|
|
|
}
|
|
|
|
|
|
|
|
# generate keys with various types and encodings
|
|
|
|
proc generate_types {} {
|
|
|
|
r config set list-max-ziplist-size 5
|
|
|
|
r config set hash-max-ziplist-entries 5
|
2022-11-20 10:12:15 +00:00
|
|
|
r config set set-max-listpack-entries 5
|
2020-08-14 13:05:34 +00:00
|
|
|
r config set zset-max-ziplist-entries 5
|
|
|
|
r config set stream-node-max-entries 5
|
|
|
|
|
|
|
|
# create small (ziplist / listpack encoded) objects with 3 items
|
|
|
|
generate_collections "" 3
|
|
|
|
|
|
|
|
# add some metadata to the stream
|
|
|
|
r xgroup create stream mygroup 0
|
|
|
|
set records [r xreadgroup GROUP mygroup Alice COUNT 2 STREAMS stream >]
|
2021-03-01 15:23:29 +00:00
|
|
|
r xdel stream [lindex [lindex [lindex [lindex $records 0] 1] 1] 0]
|
2020-08-14 13:05:34 +00:00
|
|
|
r xack stream mygroup [lindex [lindex [lindex [lindex $records 0] 1] 0] 0]
|
|
|
|
|
|
|
|
# create other non-collection types
|
|
|
|
r incr int
|
|
|
|
r set string str
|
|
|
|
|
|
|
|
# create bigger objects with 10 items (more than a single ziplist / listpack)
|
|
|
|
generate_collections big 10
|
|
|
|
|
|
|
|
# make sure our big stream also has a listpack record that has different
|
2021-06-10 12:39:33 +00:00
|
|
|
# field names than the master recorded
|
2020-08-14 13:05:34 +00:00
|
|
|
r xadd streambig * item 1 value 1
|
|
|
|
r xadd streambig * item 1 unique value
|
|
|
|
}
|
|
|
|
|
|
|
|
proc corrupt_payload {payload} {
|
|
|
|
set len [string length $payload]
|
|
|
|
set count 1 ;# usually corrupt only one byte
|
|
|
|
if {rand() > 0.9} { set count 2 }
|
|
|
|
while { $count > 0 } {
|
|
|
|
set idx [expr {int(rand() * $len)}]
|
|
|
|
set ch [binary format c [expr {int(rand()*255)}]]
|
|
|
|
set payload [string replace $payload $idx $idx $ch]
|
|
|
|
incr count -1
|
|
|
|
}
|
|
|
|
return $payload
|
|
|
|
}
|
|
|
|
|
|
|
|
# fuzzy tester for corrupt RESTORE payloads
|
|
|
|
# valgrind will make sure there were no leaks in the rdb loader error handling code
|
|
|
|
foreach sanitize_dump {no yes} {
|
|
|
|
if {$::accurate} {
|
|
|
|
set min_duration [expr {60 * 10}] ;# run at least 10 minutes
|
|
|
|
set min_cycles 1000 ;# run at least 1k cycles (max 16 minutes)
|
|
|
|
} else {
|
|
|
|
set min_duration 10 ; # run at least 10 seconds
|
|
|
|
set min_cycles 10 ; # run at least 10 cycles
|
|
|
|
}
|
|
|
|
|
2021-02-03 15:35:28 +00:00
|
|
|
# Don't execute this on FreeBSD due to a yet-undiscovered memory issue
|
|
|
|
# which causes tclsh to bloat.
|
|
|
|
if {[exec uname] == "FreeBSD"} {
|
|
|
|
set min_cycles 1
|
|
|
|
set min_duration 1
|
|
|
|
}
|
|
|
|
|
2020-08-14 13:05:34 +00:00
|
|
|
test "Fuzzer corrupt restore payloads - sanitize_dump: $sanitize_dump" {
|
|
|
|
if {$min_duration * 2 > $::timeout} {
|
|
|
|
fail "insufficient timeout"
|
|
|
|
}
|
|
|
|
# start a server, fill with data and save an RDB file once (avoid re-save)
|
|
|
|
start_server [list overrides [list "save" "" use-exit-on-panic yes crash-memcheck-enabled no loglevel verbose] ] {
|
|
|
|
set stdout [srv 0 stdout]
|
|
|
|
r config set sanitize-dump-payload $sanitize_dump
|
|
|
|
r debug set-skip-checksum-validation 1
|
|
|
|
set start_time [clock seconds]
|
|
|
|
generate_types
|
2021-08-05 19:57:05 +00:00
|
|
|
set dbsize [r dbsize]
|
2020-08-14 13:05:34 +00:00
|
|
|
r save
|
|
|
|
set cycle 0
|
|
|
|
set stat_terminated_in_restore 0
|
|
|
|
set stat_terminated_in_traffic 0
|
|
|
|
set stat_terminated_by_signal 0
|
|
|
|
set stat_successful_restore 0
|
|
|
|
set stat_rejected_restore 0
|
|
|
|
set stat_traffic_commands_sent 0
|
|
|
|
# repeatedly DUMP a random key, corrupt it and try RESTORE into a new key
|
|
|
|
while true {
|
|
|
|
set k [r randomkey]
|
|
|
|
set dump [r dump $k]
|
|
|
|
set dump [corrupt_payload $dump]
|
|
|
|
set printable_dump [string2printable $dump]
|
|
|
|
set restore_failed false
|
|
|
|
set report_and_restart false
|
|
|
|
set sent {}
|
|
|
|
# RESTORE can fail, but hopefully not terminate
|
|
|
|
if { [catch { r restore "_$k" 0 $dump REPLACE } err] } {
|
|
|
|
set restore_failed true
|
|
|
|
# skip if return failed with an error response.
|
|
|
|
if {[string match "ERR*" $err]} {
|
|
|
|
incr stat_rejected_restore
|
|
|
|
} else {
|
|
|
|
set report_and_restart true
|
|
|
|
incr stat_terminated_in_restore
|
|
|
|
write_log_line 0 "corrupt payload: $printable_dump"
|
2021-07-29 08:53:21 +00:00
|
|
|
if {$sanitize_dump == yes} {
|
2020-08-14 13:05:34 +00:00
|
|
|
puts "Server crashed in RESTORE with payload: $printable_dump"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
r ping ;# an attempt to check if the server didn't terminate (this will throw an error that will terminate the tests)
|
|
|
|
}
|
|
|
|
|
|
|
|
set print_commands false
|
|
|
|
if {!$restore_failed} {
|
|
|
|
# if RESTORE didn't fail or terminate, run some random traffic on the new key
|
|
|
|
incr stat_successful_restore
|
|
|
|
if { [ catch {
|
|
|
|
set sent [generate_fuzzy_traffic_on_key "_$k" 1] ;# traffic for 1 second
|
|
|
|
incr stat_traffic_commands_sent [llength $sent]
|
|
|
|
r del "_$k" ;# in case the server terminated, here's where we'll detect it.
|
2021-08-05 19:57:05 +00:00
|
|
|
if {$dbsize != [r dbsize]} {
|
|
|
|
puts "unexpected keys"
|
|
|
|
puts "keys: [r keys *]"
|
2022-11-22 09:20:24 +00:00
|
|
|
puts "commands leading to it:"
|
|
|
|
foreach cmd $sent {
|
|
|
|
foreach arg $cmd {
|
|
|
|
puts -nonewline "[string2printable $arg] "
|
|
|
|
}
|
|
|
|
puts ""
|
|
|
|
}
|
2021-08-05 19:57:05 +00:00
|
|
|
exit 1
|
|
|
|
}
|
2020-08-14 13:05:34 +00:00
|
|
|
} err ] } {
|
2022-11-22 09:20:24 +00:00
|
|
|
set err [format "%s" $err] ;# convert to string for pattern matching
|
|
|
|
if {[string match "*SIGTERM*" $err]} {
|
|
|
|
puts "payload that caused test to hang: $printable_dump"
|
2023-10-08 13:19:31 +00:00
|
|
|
if {$::dump_logs} {
|
|
|
|
set srv [get_srv 0]
|
|
|
|
dump_server_log $srv
|
|
|
|
}
|
2022-11-22 09:20:24 +00:00
|
|
|
exit 1
|
|
|
|
}
|
2020-08-14 13:05:34 +00:00
|
|
|
# if the server terminated update stats and restart it
|
|
|
|
set report_and_restart true
|
|
|
|
incr stat_terminated_in_traffic
|
|
|
|
set by_signal [count_log_message 0 "crashed by signal"]
|
|
|
|
incr stat_terminated_by_signal $by_signal
|
|
|
|
|
2021-07-29 08:53:21 +00:00
|
|
|
if {$by_signal != 0 || $sanitize_dump == yes} {
|
2023-09-27 06:08:18 +00:00
|
|
|
if {$::dump_logs} {
|
|
|
|
set srv [get_srv 0]
|
|
|
|
dump_server_log $srv
|
|
|
|
}
|
|
|
|
|
2020-08-14 13:05:34 +00:00
|
|
|
puts "Server crashed (by signal: $by_signal), with payload: $printable_dump"
|
|
|
|
set print_commands true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# check valgrind report for invalid reads after each RESTORE
|
|
|
|
# payload so that we have a report that is easier to reproduce
|
|
|
|
set valgrind_errors [find_valgrind_errors [srv 0 stderr] false]
|
2021-11-21 16:47:10 +00:00
|
|
|
set asan_errors [sanitizer_errors_from_file [srv 0 stderr]]
|
|
|
|
if {$valgrind_errors != "" || $asan_errors != ""} {
|
|
|
|
puts "valgrind or asan found an issue for payload: $printable_dump"
|
2020-08-14 13:05:34 +00:00
|
|
|
set report_and_restart true
|
|
|
|
set print_commands true
|
|
|
|
}
|
|
|
|
|
|
|
|
if {$report_and_restart} {
|
|
|
|
if {$print_commands} {
|
|
|
|
puts "violating commands:"
|
|
|
|
foreach cmd $sent {
|
|
|
|
foreach arg $cmd {
|
|
|
|
puts -nonewline "[string2printable $arg] "
|
|
|
|
}
|
|
|
|
puts ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# restart the server and re-apply debug configuration
|
|
|
|
write_log_line 0 "corrupt payload: $printable_dump"
|
|
|
|
restart_server 0 true true
|
|
|
|
r config set sanitize-dump-payload $sanitize_dump
|
|
|
|
r debug set-skip-checksum-validation 1
|
|
|
|
}
|
|
|
|
|
|
|
|
incr cycle
|
|
|
|
if { ([clock seconds]-$start_time) >= $min_duration && $cycle >= $min_cycles} {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if {$::verbose} {
|
|
|
|
puts "Done $cycle cycles in [expr {[clock seconds]-$start_time}] seconds."
|
|
|
|
puts "RESTORE: successful: $stat_successful_restore, rejected: $stat_rejected_restore"
|
|
|
|
puts "Total commands sent in traffic: $stat_traffic_commands_sent, crashes during traffic: $stat_terminated_in_traffic ($stat_terminated_by_signal by signal)."
|
|
|
|
}
|
|
|
|
}
|
|
|
|
# if we run sanitization we never expect the server to crash at runtime
|
2021-07-29 08:53:21 +00:00
|
|
|
if {$sanitize_dump == yes} {
|
2020-08-14 13:05:34 +00:00
|
|
|
assert_equal $stat_terminated_in_restore 0
|
|
|
|
assert_equal $stat_terminated_in_traffic 0
|
|
|
|
}
|
|
|
|
# make sure all terminations where due to assertion and not a SIGSEGV
|
|
|
|
assert_equal $stat_terminated_by_signal 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} ;# tags
|
|
|
|
|