1
0

hyperloglog.tcl 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. start_server {tags {"hll"}} {
  2. test {HyperLogLog self test passes} {
  3. catch {r pfselftest} e
  4. set e
  5. } {OK} {needs:pfdebug}
  6. test {PFADD without arguments creates an HLL value} {
  7. r pfadd hll
  8. r exists hll
  9. } {1}
  10. test {Approximated cardinality after creation is zero} {
  11. r pfcount hll
  12. } {0}
  13. test {PFADD returns 1 when at least 1 reg was modified} {
  14. r pfadd hll a b c
  15. } {1}
  16. test {PFADD returns 0 when no reg was modified} {
  17. r pfadd hll a b c
  18. } {0}
  19. test {PFADD works with empty string (regression)} {
  20. r pfadd hll ""
  21. }
  22. # Note that the self test stresses much better the
  23. # cardinality estimation error. We are testing just the
  24. # command implementation itself here.
  25. test {PFCOUNT returns approximated cardinality of set} {
  26. r del hll
  27. set res {}
  28. r pfadd hll 1 2 3 4 5
  29. lappend res [r pfcount hll]
  30. # Call it again to test cached value invalidation.
  31. r pfadd hll 6 7 8 8 9 10
  32. lappend res [r pfcount hll]
  33. set res
  34. } {5 10}
  35. test {HyperLogLogs are promote from sparse to dense} {
  36. r del hll
  37. r config set hll-sparse-max-bytes 3000
  38. set n 0
  39. while {$n < 100000} {
  40. set elements {}
  41. for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
  42. incr n 100
  43. r pfadd hll {*}$elements
  44. set card [r pfcount hll]
  45. set err [expr {abs($card-$n)}]
  46. assert {$err < (double($card)/100)*5}
  47. if {$n < 1000} {
  48. assert {[r pfdebug encoding hll] eq {sparse}}
  49. } elseif {$n > 10000} {
  50. assert {[r pfdebug encoding hll] eq {dense}}
  51. }
  52. }
  53. } {} {needs:pfdebug}
  54. test {HyperLogLog sparse encoding stress test} {
  55. for {set x 0} {$x < 1000} {incr x} {
  56. r del hll1
  57. r del hll2
  58. set numele [randomInt 100]
  59. set elements {}
  60. for {set j 0} {$j < $numele} {incr j} {
  61. lappend elements [expr rand()]
  62. }
  63. # Force dense representation of hll2
  64. r pfadd hll2
  65. r pfdebug todense hll2
  66. r pfadd hll1 {*}$elements
  67. r pfadd hll2 {*}$elements
  68. assert {[r pfdebug encoding hll1] eq {sparse}}
  69. assert {[r pfdebug encoding hll2] eq {dense}}
  70. # Cardinality estimated should match exactly.
  71. assert {[r pfcount hll1] eq [r pfcount hll2]}
  72. }
  73. } {} {needs:pfdebug}
  74. test {Corrupted sparse HyperLogLogs are detected: Additional at tail} {
  75. r del hll
  76. r pfadd hll a b c
  77. r append hll "hello"
  78. set e {}
  79. catch {r pfcount hll} e
  80. set e
  81. } {*INVALIDOBJ*}
  82. test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
  83. r del hll
  84. r pfadd hll a b c
  85. r setrange hll 0 "0123"
  86. set e {}
  87. catch {r pfcount hll} e
  88. set e
  89. } {*WRONGTYPE*}
  90. test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
  91. r del hll
  92. r pfadd hll a b c
  93. r setrange hll 4 "x"
  94. set e {}
  95. catch {r pfcount hll} e
  96. set e
  97. } {*WRONGTYPE*}
  98. test {Corrupted dense HyperLogLogs are detected: Wrong length} {
  99. r del hll
  100. r pfadd hll a b c
  101. r setrange hll 4 "\x00"
  102. set e {}
  103. catch {r pfcount hll} e
  104. set e
  105. } {*WRONGTYPE*}
  106. test {Fuzzing dense/sparse encoding: Redis should always detect errors} {
  107. for {set j 0} {$j < 1000} {incr j} {
  108. r del hll
  109. set items {}
  110. set numitems [randomInt 3000]
  111. for {set i 0} {$i < $numitems} {incr i} {
  112. lappend items [expr {rand()}]
  113. }
  114. r pfadd hll {*}$items
  115. # Corrupt it in some random way.
  116. for {set i 0} {$i < 5} {incr i} {
  117. set len [r strlen hll]
  118. set pos [randomInt $len]
  119. set byte [randstring 1 1 binary]
  120. r setrange hll $pos $byte
  121. # Don't modify more bytes 50% of times
  122. if {rand() < 0.5} break
  123. }
  124. # Use the hyperloglog to check if it crashes
  125. # Redis in some way.
  126. catch {
  127. r pfcount hll
  128. }
  129. }
  130. }
  131. test {PFADD, PFCOUNT, PFMERGE type checking works} {
  132. r set foo{t} bar
  133. catch {r pfadd foo{t} 1} e
  134. assert_match {*WRONGTYPE*} $e
  135. catch {r pfcount foo{t}} e
  136. assert_match {*WRONGTYPE*} $e
  137. catch {r pfmerge bar{t} foo{t}} e
  138. assert_match {*WRONGTYPE*} $e
  139. catch {r pfmerge foo{t} bar{t}} e
  140. assert_match {*WRONGTYPE*} $e
  141. }
  142. test {PFMERGE results on the cardinality of union of sets} {
  143. r del hll{t} hll1{t} hll2{t} hll3{t}
  144. r pfadd hll1{t} a b c
  145. r pfadd hll2{t} b c d
  146. r pfadd hll3{t} c d e
  147. r pfmerge hll{t} hll1{t} hll2{t} hll3{t}
  148. r pfcount hll{t}
  149. } {5}
  150. test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
  151. r del hll1{t} hll2{t} hll3{t}
  152. for {set x 1} {$x < 10000} {incr x} {
  153. r pfadd hll1{t} "foo-$x"
  154. r pfadd hll2{t} "bar-$x"
  155. r pfadd hll3{t} "zap-$x"
  156. set card [r pfcount hll1{t} hll2{t} hll3{t}]
  157. set realcard [expr {$x*3}]
  158. set err [expr {abs($card-$realcard)}]
  159. assert {$err < (double($card)/100)*5}
  160. }
  161. }
  162. test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
  163. r del hll1{t} hll2{t} hll3{t}
  164. set elements {}
  165. for {set x 1} {$x < 10000} {incr x} {
  166. for {set j 1} {$j <= 3} {incr j} {
  167. set rint [randomInt 20000]
  168. r pfadd hll$j{t} $rint
  169. lappend elements $rint
  170. }
  171. }
  172. set realcard [llength [lsort -unique $elements]]
  173. set card [r pfcount hll1{t} hll2{t} hll3{t}]
  174. set err [expr {abs($card-$realcard)}]
  175. assert {$err < (double($card)/100)*5}
  176. }
  177. test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
  178. r del hll
  179. r pfadd hll 1 2 3
  180. llength [r pfdebug getreg hll]
  181. } {16384} {needs:pfdebug}
  182. test {PFADD / PFCOUNT cache invalidation works} {
  183. r del hll
  184. r pfadd hll a b c
  185. r pfcount hll
  186. assert {[r getrange hll 15 15] eq "\x00"}
  187. r pfadd hll a b c
  188. assert {[r getrange hll 15 15] eq "\x00"}
  189. r pfadd hll 1 2 3
  190. assert {[r getrange hll 15 15] eq "\x80"}
  191. }
  192. }