123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- # Failover stress test.
- # In this test a different node is killed in a loop for N
- # iterations. The test checks that certain properties
- # are preserved across iterations.
- source "../tests/includes/init-tests.tcl"
- test "Create a 5 nodes cluster" {
- create_cluster 5 5
- }
- test "Cluster is up" {
- assert_cluster_state ok
- }
- set iterations 20
- set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
- while {[incr iterations -1]} {
- set tokill [randomInt 10]
- set other [expr {($tokill+1)%10}] ; # Some other instance.
- set key [randstring 20 20 alpha]
- set val [randstring 20 20 alpha]
- set role [RI $tokill role]
- if {$role eq {master}} {
- set slave {}
- set myid [dict get [get_myself $tokill] id]
- foreach_redis_id id {
- if {$id == $tokill} continue
- if {[dict get [get_myself $id] slaveof] eq $myid} {
- set slave $id
- }
- }
- if {$slave eq {}} {
- fail "Unable to retrieve slave's ID for master #$tokill"
- }
- }
- puts "--- Iteration $iterations ---"
- if {$role eq {master}} {
- test "Wait for slave of #$tokill to sync" {
- wait_for_condition 1000 50 {
- [string match {*state=online*} [RI $tokill slave0]]
- } else {
- fail "Slave of node #$tokill is not ok"
- }
- }
- set slave_config_epoch [CI $slave cluster_my_epoch]
- }
- test "Cluster is writable before failover" {
- for {set i 0} {$i < 100} {incr i} {
- catch {$cluster set $key:$i $val:$i} err
- assert {$err eq {OK}}
- }
- # Wait for the write to propagate to the slave if we
- # are going to kill a master.
- if {$role eq {master}} {
- R $tokill wait 1 20000
- }
- }
- test "Killing node #$tokill" {
- kill_instance redis $tokill
- }
- if {$role eq {master}} {
- test "Wait failover by #$slave with old epoch $slave_config_epoch" {
- wait_for_condition 1000 50 {
- [CI $slave cluster_my_epoch] > $slave_config_epoch
- } else {
- fail "No failover detected, epoch is still [CI $slave cluster_my_epoch]"
- }
- }
- }
- test "Cluster should eventually be up again" {
- assert_cluster_state ok
- }
- test "Cluster is writable again" {
- for {set i 0} {$i < 100} {incr i} {
- catch {$cluster set $key:$i:2 $val:$i:2} err
- assert {$err eq {OK}}
- }
- }
- test "Restarting node #$tokill" {
- restart_instance redis $tokill
- }
- test "Instance #$tokill is now a slave" {
- wait_for_condition 1000 50 {
- [RI $tokill role] eq {slave}
- } else {
- fail "Restarted instance is not a slave"
- }
- }
- test "We can read back the value we set before" {
- for {set i 0} {$i < 100} {incr i} {
- catch {$cluster get $key:$i} err
- assert {$err eq "$val:$i"}
- catch {$cluster get $key:$i:2} err
- assert {$err eq "$val:$i:2"}
- }
- }
- }
- test "Post condition: current_epoch >= my_epoch everywhere" {
- foreach_redis_id id {
- assert {[CI $id cluster_current_epoch] >= [CI $id cluster_my_epoch]}
- }
- }
|