123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- # Failover stress test.
- # In this test a different node is killed in a loop for N
- # iterations. The test checks that certain properties
- # are preserved across iterations.
- source "../tests/includes/init-tests.tcl"
- source "../../../tests/support/cli.tcl"
- test "Create a 5 nodes cluster" {
- create_cluster 5 5
- }
- test "Cluster is up" {
- assert_cluster_state ok
- }
- test "Enable AOF in all the instances" {
- foreach_redis_id id {
- R $id config set appendonly yes
- # We use "appendfsync no" because it's fast but also guarantees that
- # write(2) is performed before replying to client.
- R $id config set appendfsync no
- }
- foreach_redis_id id {
- wait_for_condition 1000 500 {
- [RI $id aof_rewrite_in_progress] == 0 &&
- [RI $id aof_enabled] == 1
- } else {
- fail "Failed to enable AOF on instance #$id"
- }
- }
- }
- # Return non-zero if the specified PID is about a process still in execution,
- # otherwise 0 is returned.
- proc process_is_running {pid} {
- # PS should return with an error if PID is non existing,
- # and catch will return non-zero. We want to return non-zero if
- # the PID exists, so we invert the return value with expr not operator.
- expr {![catch {exec ps -p $pid}]}
- }
- # Our resharding test performs the following actions:
- #
- # - N commands are sent to the cluster in the course of the test.
- # - Every command selects a random key from key:0 to key:MAX-1.
- # - The operation RPUSH key <randomvalue> is performed.
- # - Tcl remembers into an array all the values pushed to each list.
- # - After N/2 commands, the resharding process is started in background.
- # - The test continues while the resharding is in progress.
- # - At the end of the test, we wait for the resharding process to stop.
- # - Finally the keys are checked to see if they contain the value they should.
- set numkeys 50000
- set numops 200000
- set start_node_port [get_instance_attrib redis 0 port]
- set cluster [redis_cluster 127.0.0.1:$start_node_port]
- if {$::tls} {
- # setup a non-TLS cluster client to the TLS cluster
- set plaintext_port [get_instance_attrib redis 0 plaintext-port]
- set cluster_plaintext [redis_cluster 127.0.0.1:$plaintext_port 0]
- puts "Testing TLS cluster on start node 127.0.0.1:$start_node_port, plaintext port $plaintext_port"
- } else {
- set cluster_plaintext $cluster
- puts "Testing using non-TLS cluster"
- }
- catch {unset content}
- array set content {}
- set tribpid {}
- test "Cluster consistency during live resharding" {
- set ele 0
- for {set j 0} {$j < $numops} {incr j} {
- # Trigger the resharding once we execute half the ops.
- if {$tribpid ne {} &&
- ($j % 10000) == 0 &&
- ![process_is_running $tribpid]} {
- set tribpid {}
- }
- if {$j >= $numops/2 && $tribpid eq {}} {
- puts -nonewline "...Starting resharding..."
- flush stdout
- set target [dict get [get_myself [randomInt 5]] id]
- set tribpid [lindex [exec \
- ../../../src/redis-cli --cluster reshard \
- 127.0.0.1:[get_instance_attrib redis 0 port] \
- --cluster-from all \
- --cluster-to $target \
- --cluster-slots 100 \
- --cluster-yes \
- {*}[rediscli_tls_config "../../../tests"] \
- | [info nameofexecutable] \
- ../tests/helpers/onlydots.tcl \
- &] 0]
- }
- # Write random data to random list.
- set listid [randomInt $numkeys]
- set key "key:$listid"
- incr ele
- # We write both with Lua scripts and with plain commands.
- # This way we are able to stress Lua -> Redis command invocation
- # as well, that has tests to prevent Lua to write into wrong
- # hash slots.
- # We also use both TLS and plaintext connections.
- if {$listid % 3 == 0} {
- $cluster rpush $key $ele
- } elseif {$listid % 3 == 1} {
- $cluster_plaintext rpush $key $ele
- } else {
- $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
- }
- lappend content($key) $ele
- if {($j % 1000) == 0} {
- puts -nonewline W; flush stdout
- }
- }
- # Wait for the resharding process to end
- wait_for_condition 1000 500 {
- [process_is_running $tribpid] == 0
- } else {
- fail "Resharding is not terminating after some time."
- }
- }
- test "Verify $numkeys keys for consistency with logical content" {
- # Check that the Redis Cluster content matches our logical content.
- foreach {key value} [array get content] {
- if {[$cluster lrange $key 0 -1] ne $value} {
- fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
- }
- }
- }
- test "Crash and restart all the instances" {
- foreach_redis_id id {
- kill_instance redis $id
- restart_instance redis $id
- }
- }
- test "Cluster should eventually be up again" {
- assert_cluster_state ok
- }
- test "Verify $numkeys keys after the crash & restart" {
- # Check that the Redis Cluster content matches our logical content.
- foreach {key value} [array get content] {
- if {[$cluster lrange $key 0 -1] ne $value} {
- fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
- }
- }
- }
- test "Disable AOF in all the instances" {
- foreach_redis_id id {
- R $id config set appendonly no
- }
- }
- test "Verify slaves consistency" {
- set verified_masters 0
- foreach_redis_id id {
- set role [R $id role]
- lassign $role myrole myoffset slaves
- if {$myrole eq {slave}} continue
- set masterport [get_instance_attrib redis $id port]
- set masterdigest [R $id debug digest]
- foreach_redis_id sid {
- set srole [R $sid role]
- if {[lindex $srole 0] eq {master}} continue
- if {[lindex $srole 2] != $masterport} continue
- wait_for_condition 1000 500 {
- [R $sid debug digest] eq $masterdigest
- } else {
- fail "Master and slave data digest are different"
- }
- incr verified_masters
- }
- }
- assert {$verified_masters >= 5}
- }
- test "Dump sanitization was skipped for migrations" {
- set verified_masters 0
- foreach_redis_id id {
- assert {[RI $id dump_payload_sanitizations] == 0}
- }
- }
|