memefficiency.tcl 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. proc test_memory_efficiency {range} {
  2. r flushall
  3. set rd [redis_deferring_client]
  4. set base_mem [s used_memory]
  5. set written 0
  6. for {set j 0} {$j < 10000} {incr j} {
  7. set key key:$j
  8. set val [string repeat A [expr {int(rand()*$range)}]]
  9. $rd set $key $val
  10. incr written [string length $key]
  11. incr written [string length $val]
  12. incr written 2 ;# A separator is the minimum to store key-value data.
  13. }
  14. for {set j 0} {$j < 10000} {incr j} {
  15. $rd read ; # Discard replies
  16. }
  17. set current_mem [s used_memory]
  18. set used [expr {$current_mem-$base_mem}]
  19. set efficiency [expr {double($written)/$used}]
  20. return $efficiency
  21. }
  22. start_server {tags {"memefficiency external:skip"}} {
  23. foreach {size_range expected_min_efficiency} {
  24. 32 0.15
  25. 64 0.25
  26. 128 0.35
  27. 1024 0.75
  28. 16384 0.82
  29. } {
  30. test "Memory efficiency with values in range $size_range" {
  31. set efficiency [test_memory_efficiency $size_range]
  32. assert {$efficiency >= $expected_min_efficiency}
  33. }
  34. }
  35. }
  36. run_solo {defrag} {
  37. start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save ""}} {
  38. if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} {
  39. test "Active defrag" {
  40. r config set hz 100
  41. r config set activedefrag no
  42. r config set active-defrag-threshold-lower 5
  43. r config set active-defrag-cycle-min 65
  44. r config set active-defrag-cycle-max 75
  45. r config set active-defrag-ignore-bytes 2mb
  46. r config set maxmemory 100mb
  47. r config set maxmemory-policy allkeys-lru
  48. populate 700000 asdf1 150
  49. populate 170000 asdf2 300
  50. after 120 ;# serverCron only updates the info once in 100ms
  51. set frag [s allocator_frag_ratio]
  52. if {$::verbose} {
  53. puts "frag $frag"
  54. }
  55. assert {$frag >= 1.4}
  56. r config set latency-monitor-threshold 5
  57. r latency reset
  58. r config set maxmemory 110mb ;# prevent further eviction (not to fail the digest test)
  59. set digest [r debug digest]
  60. catch {r config set activedefrag yes} e
  61. if {[r config get activedefrag] eq "activedefrag yes"} {
  62. # Wait for the active defrag to start working (decision once a
  63. # second).
  64. wait_for_condition 50 100 {
  65. [s active_defrag_running] ne 0
  66. } else {
  67. fail "defrag not started."
  68. }
  69. # Wait for the active defrag to stop working.
  70. wait_for_condition 2000 100 {
  71. [s active_defrag_running] eq 0
  72. } else {
  73. after 120 ;# serverCron only updates the info once in 100ms
  74. puts [r info memory]
  75. puts [r memory malloc-stats]
  76. fail "defrag didn't stop."
  77. }
  78. # Test the the fragmentation is lower.
  79. after 120 ;# serverCron only updates the info once in 100ms
  80. set frag [s allocator_frag_ratio]
  81. set max_latency 0
  82. foreach event [r latency latest] {
  83. lassign $event eventname time latency max
  84. if {$eventname == "active-defrag-cycle"} {
  85. set max_latency $max
  86. }
  87. }
  88. if {$::verbose} {
  89. puts "frag $frag"
  90. set misses [s active_defrag_misses]
  91. set hits [s active_defrag_hits]
  92. puts "hits: $hits"
  93. puts "misses: $misses"
  94. puts "max latency $max_latency"
  95. puts [r latency latest]
  96. puts [r latency history active-defrag-cycle]
  97. }
  98. assert {$frag < 1.1}
  99. # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
  100. # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
  101. if {!$::no_latency} {
  102. assert {$max_latency <= 30}
  103. }
  104. }
  105. # verify the data isn't corrupted or changed
  106. set newdigest [r debug digest]
  107. assert {$digest eq $newdigest}
  108. r save ;# saving an rdb iterates over all the data / pointers
  109. # if defrag is supported, test AOF loading too
  110. if {[r config get activedefrag] eq "activedefrag yes"} {
  111. # reset stats and load the AOF file
  112. r config resetstat
  113. r config set key-load-delay -50 ;# sleep on average 1/50 usec
  114. r debug loadaof
  115. r config set activedefrag no
  116. # measure hits and misses right after aof loading
  117. set misses [s active_defrag_misses]
  118. set hits [s active_defrag_hits]
  119. after 120 ;# serverCron only updates the info once in 100ms
  120. set frag [s allocator_frag_ratio]
  121. set max_latency 0
  122. foreach event [r latency latest] {
  123. lassign $event eventname time latency max
  124. if {$eventname == "loading-cron"} {
  125. set max_latency $max
  126. }
  127. }
  128. if {$::verbose} {
  129. puts "AOF loading:"
  130. puts "frag $frag"
  131. puts "hits: $hits"
  132. puts "misses: $misses"
  133. puts "max latency $max_latency"
  134. puts [r latency latest]
  135. puts [r latency history loading-cron]
  136. }
  137. # make sure we had defrag hits during AOF loading
  138. assert {$hits > 100000}
  139. # make sure the defragger did enough work to keep the fragmentation low during loading.
  140. # we cannot check that it went all the way down, since we don't wait for full defrag cycle to complete.
  141. assert {$frag < 1.4}
  142. # since the AOF contains simple (fast) SET commands (and the cron during loading runs every 1000 commands),
  143. # it'll still not block the loading for long periods of time.
  144. if {!$::no_latency} {
  145. assert {$max_latency <= 30}
  146. }
  147. }
  148. }
  149. r config set appendonly no
  150. r config set key-load-delay 0
  151. test "Active defrag big keys" {
  152. r flushdb
  153. r config resetstat
  154. r config set hz 100
  155. r config set activedefrag no
  156. r config set active-defrag-max-scan-fields 1000
  157. r config set active-defrag-threshold-lower 5
  158. r config set active-defrag-cycle-min 65
  159. r config set active-defrag-cycle-max 75
  160. r config set active-defrag-ignore-bytes 2mb
  161. r config set maxmemory 0
  162. r config set list-max-ziplist-size 5 ;# list of 10k items will have 2000 quicklist nodes
  163. r config set stream-node-max-entries 5
  164. r hmset hash h1 v1 h2 v2 h3 v3
  165. r lpush list a b c d
  166. r zadd zset 0 a 1 b 2 c 3 d
  167. r sadd set a b c d
  168. r xadd stream * item 1 value a
  169. r xadd stream * item 2 value b
  170. r xgroup create stream mygroup 0
  171. r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
  172. # create big keys with 10k items
  173. set rd [redis_deferring_client]
  174. for {set j 0} {$j < 10000} {incr j} {
  175. $rd hset bighash $j [concat "asdfasdfasdf" $j]
  176. $rd lpush biglist [concat "asdfasdfasdf" $j]
  177. $rd zadd bigzset $j [concat "asdfasdfasdf" $j]
  178. $rd sadd bigset [concat "asdfasdfasdf" $j]
  179. $rd xadd bigstream * item 1 value a
  180. }
  181. for {set j 0} {$j < 50000} {incr j} {
  182. $rd read ; # Discard replies
  183. }
  184. set expected_frag 1.7
  185. if {$::accurate} {
  186. # scale the hash to 1m fields in order to have a measurable the latency
  187. for {set j 10000} {$j < 1000000} {incr j} {
  188. $rd hset bighash $j [concat "asdfasdfasdf" $j]
  189. }
  190. for {set j 10000} {$j < 1000000} {incr j} {
  191. $rd read ; # Discard replies
  192. }
  193. # creating that big hash, increased used_memory, so the relative frag goes down
  194. set expected_frag 1.3
  195. }
  196. # add a mass of string keys
  197. for {set j 0} {$j < 500000} {incr j} {
  198. $rd setrange $j 150 a
  199. }
  200. for {set j 0} {$j < 500000} {incr j} {
  201. $rd read ; # Discard replies
  202. }
  203. assert_equal [r dbsize] 500010
  204. # create some fragmentation
  205. for {set j 0} {$j < 500000} {incr j 2} {
  206. $rd del $j
  207. }
  208. for {set j 0} {$j < 500000} {incr j 2} {
  209. $rd read ; # Discard replies
  210. }
  211. assert_equal [r dbsize] 250010
  212. # start defrag
  213. after 120 ;# serverCron only updates the info once in 100ms
  214. set frag [s allocator_frag_ratio]
  215. if {$::verbose} {
  216. puts "frag $frag"
  217. }
  218. assert {$frag >= $expected_frag}
  219. r config set latency-monitor-threshold 5
  220. r latency reset
  221. set digest [r debug digest]
  222. catch {r config set activedefrag yes} e
  223. if {[r config get activedefrag] eq "activedefrag yes"} {
  224. # wait for the active defrag to start working (decision once a second)
  225. wait_for_condition 50 100 {
  226. [s active_defrag_running] ne 0
  227. } else {
  228. fail "defrag not started."
  229. }
  230. # wait for the active defrag to stop working
  231. wait_for_condition 500 100 {
  232. [s active_defrag_running] eq 0
  233. } else {
  234. after 120 ;# serverCron only updates the info once in 100ms
  235. puts [r info memory]
  236. puts [r memory malloc-stats]
  237. fail "defrag didn't stop."
  238. }
  239. # test the the fragmentation is lower
  240. after 120 ;# serverCron only updates the info once in 100ms
  241. set frag [s allocator_frag_ratio]
  242. set max_latency 0
  243. foreach event [r latency latest] {
  244. lassign $event eventname time latency max
  245. if {$eventname == "active-defrag-cycle"} {
  246. set max_latency $max
  247. }
  248. }
  249. if {$::verbose} {
  250. puts "frag $frag"
  251. set misses [s active_defrag_misses]
  252. set hits [s active_defrag_hits]
  253. puts "hits: $hits"
  254. puts "misses: $misses"
  255. puts "max latency $max_latency"
  256. puts [r latency latest]
  257. puts [r latency history active-defrag-cycle]
  258. }
  259. assert {$frag < 1.1}
  260. # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
  261. # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
  262. if {!$::no_latency} {
  263. assert {$max_latency <= 30}
  264. }
  265. }
  266. # verify the data isn't corrupted or changed
  267. set newdigest [r debug digest]
  268. assert {$digest eq $newdigest}
  269. r save ;# saving an rdb iterates over all the data / pointers
  270. } {OK}
  271. test "Active defrag big list" {
  272. r flushdb
  273. r config resetstat
  274. r config set hz 100
  275. r config set activedefrag no
  276. r config set active-defrag-max-scan-fields 1000
  277. r config set active-defrag-threshold-lower 5
  278. r config set active-defrag-cycle-min 65
  279. r config set active-defrag-cycle-max 75
  280. r config set active-defrag-ignore-bytes 2mb
  281. r config set maxmemory 0
  282. r config set list-max-ziplist-size 5 ;# list of 500k items will have 100k quicklist nodes
  283. # create big keys with 10k items
  284. set rd [redis_deferring_client]
  285. set expected_frag 1.7
  286. # add a mass of list nodes to two lists (allocations are interlaced)
  287. set val [string repeat A 100] ;# 5 items of 100 bytes puts us in the 640 bytes bin, which has 32 regs, so high potential for fragmentation
  288. set elements 500000
  289. for {set j 0} {$j < $elements} {incr j} {
  290. $rd lpush biglist1 $val
  291. $rd lpush biglist2 $val
  292. }
  293. for {set j 0} {$j < $elements} {incr j} {
  294. $rd read ; # Discard replies
  295. $rd read ; # Discard replies
  296. }
  297. # create some fragmentation
  298. r del biglist2
  299. # start defrag
  300. after 120 ;# serverCron only updates the info once in 100ms
  301. set frag [s allocator_frag_ratio]
  302. if {$::verbose} {
  303. puts "frag $frag"
  304. }
  305. assert {$frag >= $expected_frag}
  306. r config set latency-monitor-threshold 5
  307. r latency reset
  308. set digest [r debug digest]
  309. catch {r config set activedefrag yes} e
  310. if {[r config get activedefrag] eq "activedefrag yes"} {
  311. # wait for the active defrag to start working (decision once a second)
  312. wait_for_condition 50 100 {
  313. [s active_defrag_running] ne 0
  314. } else {
  315. fail "defrag not started."
  316. }
  317. # wait for the active defrag to stop working
  318. wait_for_condition 500 100 {
  319. [s active_defrag_running] eq 0
  320. } else {
  321. after 120 ;# serverCron only updates the info once in 100ms
  322. puts [r info memory]
  323. puts [r info stats]
  324. puts [r memory malloc-stats]
  325. fail "defrag didn't stop."
  326. }
  327. # test the the fragmentation is lower
  328. after 120 ;# serverCron only updates the info once in 100ms
  329. set misses [s active_defrag_misses]
  330. set hits [s active_defrag_hits]
  331. set frag [s allocator_frag_ratio]
  332. set max_latency 0
  333. foreach event [r latency latest] {
  334. lassign $event eventname time latency max
  335. if {$eventname == "active-defrag-cycle"} {
  336. set max_latency $max
  337. }
  338. }
  339. if {$::verbose} {
  340. puts "frag $frag"
  341. puts "misses: $misses"
  342. puts "hits: $hits"
  343. puts "max latency $max_latency"
  344. puts [r latency latest]
  345. puts [r latency history active-defrag-cycle]
  346. }
  347. assert {$frag < 1.1}
  348. # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
  349. # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
  350. if {!$::no_latency} {
  351. assert {$max_latency <= 30}
  352. }
  353. # in extreme cases of stagnation, we see over 20m misses before the tests aborts with "defrag didn't stop",
  354. # in normal cases we only see 100k misses out of 500k elements
  355. assert {$misses < $elements}
  356. }
  357. # verify the data isn't corrupted or changed
  358. set newdigest [r debug digest]
  359. assert {$digest eq $newdigest}
  360. r save ;# saving an rdb iterates over all the data / pointers
  361. r del biglist1 ;# coverage for quicklistBookmarksClear
  362. } {1}
  363. test "Active defrag edge case" {
  364. # there was an edge case in defrag where all the slabs of a certain bin are exact the same
  365. # % utilization, with the exception of the current slab from which new allocations are made
  366. # if the current slab is lower in utilization the defragger would have ended up in stagnation,
  367. # kept running and not move any allocation.
  368. # this test is more consistent on a fresh server with no history
  369. start_server {tags {"defrag"} overrides {save ""}} {
  370. r flushdb
  371. r config resetstat
  372. r config set hz 100
  373. r config set activedefrag no
  374. r config set active-defrag-max-scan-fields 1000
  375. r config set active-defrag-threshold-lower 5
  376. r config set active-defrag-cycle-min 65
  377. r config set active-defrag-cycle-max 75
  378. r config set active-defrag-ignore-bytes 1mb
  379. r config set maxmemory 0
  380. set expected_frag 1.3
  381. r debug mallctl-str thread.tcache.flush VOID
  382. # fill the first slab containin 32 regs of 640 bytes.
  383. for {set j 0} {$j < 32} {incr j} {
  384. r setrange "_$j" 600 x
  385. r debug mallctl-str thread.tcache.flush VOID
  386. }
  387. # add a mass of keys with 600 bytes values, fill the bin of 640 bytes which has 32 regs per slab.
  388. set rd [redis_deferring_client]
  389. set keys 640000
  390. for {set j 0} {$j < $keys} {incr j} {
  391. $rd setrange $j 600 x
  392. }
  393. for {set j 0} {$j < $keys} {incr j} {
  394. $rd read ; # Discard replies
  395. }
  396. # create some fragmentation of 50%
  397. set sent 0
  398. for {set j 0} {$j < $keys} {incr j 1} {
  399. $rd del $j
  400. incr sent
  401. incr j 1
  402. }
  403. for {set j 0} {$j < $sent} {incr j} {
  404. $rd read ; # Discard replies
  405. }
  406. # create higher fragmentation in the first slab
  407. for {set j 10} {$j < 32} {incr j} {
  408. r del "_$j"
  409. }
  410. # start defrag
  411. after 120 ;# serverCron only updates the info once in 100ms
  412. set frag [s allocator_frag_ratio]
  413. if {$::verbose} {
  414. puts "frag $frag"
  415. }
  416. assert {$frag >= $expected_frag}
  417. set digest [r debug digest]
  418. catch {r config set activedefrag yes} e
  419. if {[r config get activedefrag] eq "activedefrag yes"} {
  420. # wait for the active defrag to start working (decision once a second)
  421. wait_for_condition 50 100 {
  422. [s active_defrag_running] ne 0
  423. } else {
  424. fail "defrag not started."
  425. }
  426. # wait for the active defrag to stop working
  427. wait_for_condition 500 100 {
  428. [s active_defrag_running] eq 0
  429. } else {
  430. after 120 ;# serverCron only updates the info once in 100ms
  431. puts [r info memory]
  432. puts [r info stats]
  433. puts [r memory malloc-stats]
  434. fail "defrag didn't stop."
  435. }
  436. # test the the fragmentation is lower
  437. after 120 ;# serverCron only updates the info once in 100ms
  438. set misses [s active_defrag_misses]
  439. set hits [s active_defrag_hits]
  440. set frag [s allocator_frag_ratio]
  441. if {$::verbose} {
  442. puts "frag $frag"
  443. puts "hits: $hits"
  444. puts "misses: $misses"
  445. }
  446. assert {$frag < 1.1}
  447. assert {$misses < 10000000} ;# when defrag doesn't stop, we have some 30m misses, when it does, we have 2m misses
  448. }
  449. # verify the data isn't corrupted or changed
  450. set newdigest [r debug digest]
  451. assert {$digest eq $newdigest}
  452. r save ;# saving an rdb iterates over all the data / pointers
  453. }
  454. }
  455. }
  456. }
  457. } ;# run_solo