03-failover-loop.tcl 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # Failover stress test.
  2. # In this test a different node is killed in a loop for N
  3. # iterations. The test checks that certain properties
  4. # are preserved across iterations.
  5. source "../tests/includes/init-tests.tcl"
  6. test "Create a 5 nodes cluster" {
  7. create_cluster 5 5
  8. }
  9. test "Cluster is up" {
  10. assert_cluster_state ok
  11. }
  12. set iterations 20
  13. set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
  14. while {[incr iterations -1]} {
  15. set tokill [randomInt 10]
  16. set other [expr {($tokill+1)%10}] ; # Some other instance.
  17. set key [randstring 20 20 alpha]
  18. set val [randstring 20 20 alpha]
  19. set role [RI $tokill role]
  20. if {$role eq {master}} {
  21. set slave {}
  22. set myid [dict get [get_myself $tokill] id]
  23. foreach_redis_id id {
  24. if {$id == $tokill} continue
  25. if {[dict get [get_myself $id] slaveof] eq $myid} {
  26. set slave $id
  27. }
  28. }
  29. if {$slave eq {}} {
  30. fail "Unable to retrieve slave's ID for master #$tokill"
  31. }
  32. }
  33. puts "--- Iteration $iterations ---"
  34. if {$role eq {master}} {
  35. test "Wait for slave of #$tokill to sync" {
  36. wait_for_condition 1000 50 {
  37. [string match {*state=online*} [RI $tokill slave0]]
  38. } else {
  39. fail "Slave of node #$tokill is not ok"
  40. }
  41. }
  42. set slave_config_epoch [CI $slave cluster_my_epoch]
  43. }
  44. test "Cluster is writable before failover" {
  45. for {set i 0} {$i < 100} {incr i} {
  46. catch {$cluster set $key:$i $val:$i} err
  47. assert {$err eq {OK}}
  48. }
  49. # Wait for the write to propagate to the slave if we
  50. # are going to kill a master.
  51. if {$role eq {master}} {
  52. R $tokill wait 1 20000
  53. }
  54. }
  55. test "Killing node #$tokill" {
  56. kill_instance redis $tokill
  57. }
  58. if {$role eq {master}} {
  59. test "Wait failover by #$slave with old epoch $slave_config_epoch" {
  60. wait_for_condition 1000 50 {
  61. [CI $slave cluster_my_epoch] > $slave_config_epoch
  62. } else {
  63. fail "No failover detected, epoch is still [CI $slave cluster_my_epoch]"
  64. }
  65. }
  66. }
  67. test "Cluster should eventually be up again" {
  68. assert_cluster_state ok
  69. }
  70. test "Cluster is writable again" {
  71. for {set i 0} {$i < 100} {incr i} {
  72. catch {$cluster set $key:$i:2 $val:$i:2} err
  73. assert {$err eq {OK}}
  74. }
  75. }
  76. test "Restarting node #$tokill" {
  77. restart_instance redis $tokill
  78. }
  79. test "Instance #$tokill is now a slave" {
  80. wait_for_condition 1000 50 {
  81. [RI $tokill role] eq {slave}
  82. } else {
  83. fail "Restarted instance is not a slave"
  84. }
  85. }
  86. test "We can read back the value we set before" {
  87. for {set i 0} {$i < 100} {incr i} {
  88. catch {$cluster get $key:$i} err
  89. assert {$err eq "$val:$i"}
  90. catch {$cluster get $key:$i:2} err
  91. assert {$err eq "$val:$i:2"}
  92. }
  93. }
  94. }
  95. test "Post condition: current_epoch >= my_epoch everywhere" {
  96. foreach_redis_id id {
  97. assert {[CI $id cluster_current_epoch] >= [CI $id cluster_my_epoch]}
  98. }
  99. }