18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * background writeback - scan btree for dirty data and write it to the backing 48c2ecf20Sopenharmony_ci * device 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 78c2ecf20Sopenharmony_ci * Copyright 2012 Google, Inc. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include "bcache.h" 118c2ecf20Sopenharmony_ci#include "btree.h" 128c2ecf20Sopenharmony_ci#include "debug.h" 138c2ecf20Sopenharmony_ci#include "writeback.h" 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include <linux/delay.h> 168c2ecf20Sopenharmony_ci#include <linux/kthread.h> 178c2ecf20Sopenharmony_ci#include <linux/sched/clock.h> 188c2ecf20Sopenharmony_ci#include <trace/events/bcache.h> 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_cistatic void update_gc_after_writeback(struct cache_set *c) 218c2ecf20Sopenharmony_ci{ 228c2ecf20Sopenharmony_ci if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) || 238c2ecf20Sopenharmony_ci c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD) 248c2ecf20Sopenharmony_ci return; 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci c->gc_after_writeback |= BCH_DO_AUTO_GC; 278c2ecf20Sopenharmony_ci} 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci/* Rate limiting */ 308c2ecf20Sopenharmony_cistatic uint64_t __calc_target_rate(struct cached_dev *dc) 318c2ecf20Sopenharmony_ci{ 328c2ecf20Sopenharmony_ci struct cache_set *c = dc->disk.c; 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci /* 358c2ecf20Sopenharmony_ci * This is the size of the cache, minus the amount used for 368c2ecf20Sopenharmony_ci * flash-only devices 378c2ecf20Sopenharmony_ci */ 388c2ecf20Sopenharmony_ci uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size - 398c2ecf20Sopenharmony_ci atomic_long_read(&c->flash_dev_dirty_sectors); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci /* 428c2ecf20Sopenharmony_ci * Unfortunately there is no control of global dirty data. If the 438c2ecf20Sopenharmony_ci * user states that they want 10% dirty data in the cache, and has, 448c2ecf20Sopenharmony_ci * e.g., 5 backing volumes of equal size, we try and ensure each 458c2ecf20Sopenharmony_ci * backing volume uses about 2% of the cache for dirty data. 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci uint32_t bdev_share = 488c2ecf20Sopenharmony_ci div64_u64(bdev_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, 498c2ecf20Sopenharmony_ci c->cached_dev_sectors); 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci uint64_t cache_dirty_target = 528c2ecf20Sopenharmony_ci div_u64(cache_sectors * dc->writeback_percent, 100); 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci /* Ensure each backing dev gets at least one dirty share */ 558c2ecf20Sopenharmony_ci if (bdev_share < 1) 568c2ecf20Sopenharmony_ci bdev_share = 1; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT; 598c2ecf20Sopenharmony_ci} 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_cistatic void __update_writeback_rate(struct cached_dev *dc) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci /* 648c2ecf20Sopenharmony_ci * PI controller: 658c2ecf20Sopenharmony_ci * Figures out the amount that should be written per second. 668c2ecf20Sopenharmony_ci * 678c2ecf20Sopenharmony_ci * First, the error (number of sectors that are dirty beyond our 688c2ecf20Sopenharmony_ci * target) is calculated. The error is accumulated (numerically 698c2ecf20Sopenharmony_ci * integrated). 708c2ecf20Sopenharmony_ci * 718c2ecf20Sopenharmony_ci * Then, the proportional value and integral value are scaled 728c2ecf20Sopenharmony_ci * based on configured values. These are stored as inverses to 738c2ecf20Sopenharmony_ci * avoid fixed point math and to make configuration easy-- e.g. 748c2ecf20Sopenharmony_ci * the default value of 40 for writeback_rate_p_term_inverse 758c2ecf20Sopenharmony_ci * attempts to write at a rate that would retire all the dirty 768c2ecf20Sopenharmony_ci * blocks in 40 seconds. 778c2ecf20Sopenharmony_ci * 788c2ecf20Sopenharmony_ci * The writeback_rate_i_inverse value of 10000 means that 1/10000th 798c2ecf20Sopenharmony_ci * of the error is accumulated in the integral term per second. 808c2ecf20Sopenharmony_ci * This acts as a slow, long-term average that is not subject to 818c2ecf20Sopenharmony_ci * variations in usage like the p term. 828c2ecf20Sopenharmony_ci */ 838c2ecf20Sopenharmony_ci int64_t target = __calc_target_rate(dc); 848c2ecf20Sopenharmony_ci int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); 858c2ecf20Sopenharmony_ci int64_t error = dirty - target; 868c2ecf20Sopenharmony_ci int64_t proportional_scaled = 878c2ecf20Sopenharmony_ci div_s64(error, dc->writeback_rate_p_term_inverse); 888c2ecf20Sopenharmony_ci int64_t integral_scaled; 898c2ecf20Sopenharmony_ci uint32_t new_rate; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci if ((error < 0 && dc->writeback_rate_integral > 0) || 928c2ecf20Sopenharmony_ci (error > 0 && time_before64(local_clock(), 938c2ecf20Sopenharmony_ci dc->writeback_rate.next + NSEC_PER_MSEC))) { 948c2ecf20Sopenharmony_ci /* 958c2ecf20Sopenharmony_ci * Only decrease the integral term if it's more than 968c2ecf20Sopenharmony_ci * zero. Only increase the integral term if the device 978c2ecf20Sopenharmony_ci * is keeping up. (Don't wind up the integral 988c2ecf20Sopenharmony_ci * ineffectively in either case). 998c2ecf20Sopenharmony_ci * 1008c2ecf20Sopenharmony_ci * It's necessary to scale this by 1018c2ecf20Sopenharmony_ci * writeback_rate_update_seconds to keep the integral 1028c2ecf20Sopenharmony_ci * term dimensioned properly. 1038c2ecf20Sopenharmony_ci */ 1048c2ecf20Sopenharmony_ci dc->writeback_rate_integral += error * 1058c2ecf20Sopenharmony_ci dc->writeback_rate_update_seconds; 1068c2ecf20Sopenharmony_ci } 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci integral_scaled = div_s64(dc->writeback_rate_integral, 1098c2ecf20Sopenharmony_ci dc->writeback_rate_i_term_inverse); 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci new_rate = clamp_t(int32_t, (proportional_scaled + integral_scaled), 1128c2ecf20Sopenharmony_ci dc->writeback_rate_minimum, NSEC_PER_SEC); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci dc->writeback_rate_proportional = proportional_scaled; 1158c2ecf20Sopenharmony_ci dc->writeback_rate_integral_scaled = integral_scaled; 1168c2ecf20Sopenharmony_ci dc->writeback_rate_change = new_rate - 1178c2ecf20Sopenharmony_ci atomic_long_read(&dc->writeback_rate.rate); 1188c2ecf20Sopenharmony_ci atomic_long_set(&dc->writeback_rate.rate, new_rate); 1198c2ecf20Sopenharmony_ci dc->writeback_rate_target = target; 1208c2ecf20Sopenharmony_ci} 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_cistatic bool idle_counter_exceeded(struct cache_set *c) 1238c2ecf20Sopenharmony_ci{ 1248c2ecf20Sopenharmony_ci int counter, dev_nr; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci /* 1278c2ecf20Sopenharmony_ci * If c->idle_counter is overflow (idel for really long time), 1288c2ecf20Sopenharmony_ci * reset as 0 and not set maximum rate this time for code 1298c2ecf20Sopenharmony_ci * simplicity. 1308c2ecf20Sopenharmony_ci */ 1318c2ecf20Sopenharmony_ci counter = atomic_inc_return(&c->idle_counter); 1328c2ecf20Sopenharmony_ci if (counter <= 0) { 1338c2ecf20Sopenharmony_ci atomic_set(&c->idle_counter, 0); 1348c2ecf20Sopenharmony_ci return false; 1358c2ecf20Sopenharmony_ci } 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci dev_nr = atomic_read(&c->attached_dev_nr); 1388c2ecf20Sopenharmony_ci if (dev_nr == 0) 1398c2ecf20Sopenharmony_ci return false; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci /* 1428c2ecf20Sopenharmony_ci * c->idle_counter is increased by writeback thread of all 1438c2ecf20Sopenharmony_ci * attached backing devices, in order to represent a rough 1448c2ecf20Sopenharmony_ci * time period, counter should be divided by dev_nr. 1458c2ecf20Sopenharmony_ci * Otherwise the idle time cannot be larger with more backing 1468c2ecf20Sopenharmony_ci * device attached. 1478c2ecf20Sopenharmony_ci * The following calculation equals to checking 1488c2ecf20Sopenharmony_ci * (counter / dev_nr) < (dev_nr * 6) 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_ci if (counter < (dev_nr * dev_nr * 6)) 1518c2ecf20Sopenharmony_ci return false; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci return true; 1548c2ecf20Sopenharmony_ci} 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci/* 1578c2ecf20Sopenharmony_ci * Idle_counter is increased every time when update_writeback_rate() is 1588c2ecf20Sopenharmony_ci * called. If all backing devices attached to the same cache set have 1598c2ecf20Sopenharmony_ci * identical dc->writeback_rate_update_seconds values, it is about 6 1608c2ecf20Sopenharmony_ci * rounds of update_writeback_rate() on each backing device before 1618c2ecf20Sopenharmony_ci * c->at_max_writeback_rate is set to 1, and then max wrteback rate set 1628c2ecf20Sopenharmony_ci * to each dc->writeback_rate.rate. 1638c2ecf20Sopenharmony_ci * In order to avoid extra locking cost for counting exact dirty cached 1648c2ecf20Sopenharmony_ci * devices number, c->attached_dev_nr is used to calculate the idle 1658c2ecf20Sopenharmony_ci * throushold. It might be bigger if not all cached device are in write- 1668c2ecf20Sopenharmony_ci * back mode, but it still works well with limited extra rounds of 1678c2ecf20Sopenharmony_ci * update_writeback_rate(). 1688c2ecf20Sopenharmony_ci */ 1698c2ecf20Sopenharmony_cistatic bool set_at_max_writeback_rate(struct cache_set *c, 1708c2ecf20Sopenharmony_ci struct cached_dev *dc) 1718c2ecf20Sopenharmony_ci{ 1728c2ecf20Sopenharmony_ci /* Don't sst max writeback rate if it is disabled */ 1738c2ecf20Sopenharmony_ci if (!c->idle_max_writeback_rate_enabled) 1748c2ecf20Sopenharmony_ci return false; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci /* Don't set max writeback rate if gc is running */ 1778c2ecf20Sopenharmony_ci if (!c->gc_mark_valid) 1788c2ecf20Sopenharmony_ci return false; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci if (!idle_counter_exceeded(c)) 1818c2ecf20Sopenharmony_ci return false; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci if (atomic_read(&c->at_max_writeback_rate) != 1) 1848c2ecf20Sopenharmony_ci atomic_set(&c->at_max_writeback_rate, 1); 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci atomic_long_set(&dc->writeback_rate.rate, INT_MAX); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci /* keep writeback_rate_target as existing value */ 1898c2ecf20Sopenharmony_ci dc->writeback_rate_proportional = 0; 1908c2ecf20Sopenharmony_ci dc->writeback_rate_integral_scaled = 0; 1918c2ecf20Sopenharmony_ci dc->writeback_rate_change = 0; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci /* 1948c2ecf20Sopenharmony_ci * In case new I/O arrives during before 1958c2ecf20Sopenharmony_ci * set_at_max_writeback_rate() returns. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_ci if (!idle_counter_exceeded(c) || 1988c2ecf20Sopenharmony_ci !atomic_read(&c->at_max_writeback_rate)) 1998c2ecf20Sopenharmony_ci return false; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci return true; 2028c2ecf20Sopenharmony_ci} 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_cistatic void update_writeback_rate(struct work_struct *work) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(to_delayed_work(work), 2078c2ecf20Sopenharmony_ci struct cached_dev, 2088c2ecf20Sopenharmony_ci writeback_rate_update); 2098c2ecf20Sopenharmony_ci struct cache_set *c = dc->disk.c; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci /* 2128c2ecf20Sopenharmony_ci * should check BCACHE_DEV_RATE_DW_RUNNING before calling 2138c2ecf20Sopenharmony_ci * cancel_delayed_work_sync(). 2148c2ecf20Sopenharmony_ci */ 2158c2ecf20Sopenharmony_ci set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); 2168c2ecf20Sopenharmony_ci /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ 2178c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci /* 2208c2ecf20Sopenharmony_ci * CACHE_SET_IO_DISABLE might be set via sysfs interface, 2218c2ecf20Sopenharmony_ci * check it here too. 2228c2ecf20Sopenharmony_ci */ 2238c2ecf20Sopenharmony_ci if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) || 2248c2ecf20Sopenharmony_ci test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { 2258c2ecf20Sopenharmony_ci clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); 2268c2ecf20Sopenharmony_ci /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ 2278c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 2288c2ecf20Sopenharmony_ci return; 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { 2328c2ecf20Sopenharmony_ci /* 2338c2ecf20Sopenharmony_ci * If the whole cache set is idle, set_at_max_writeback_rate() 2348c2ecf20Sopenharmony_ci * will set writeback rate to a max number. Then it is 2358c2ecf20Sopenharmony_ci * unncessary to update writeback rate for an idle cache set 2368c2ecf20Sopenharmony_ci * in maximum writeback rate number(s). 2378c2ecf20Sopenharmony_ci */ 2388c2ecf20Sopenharmony_ci if (!set_at_max_writeback_rate(c, dc)) { 2398c2ecf20Sopenharmony_ci down_read(&dc->writeback_lock); 2408c2ecf20Sopenharmony_ci __update_writeback_rate(dc); 2418c2ecf20Sopenharmony_ci update_gc_after_writeback(c); 2428c2ecf20Sopenharmony_ci up_read(&dc->writeback_lock); 2438c2ecf20Sopenharmony_ci } 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci /* 2488c2ecf20Sopenharmony_ci * CACHE_SET_IO_DISABLE might be set via sysfs interface, 2498c2ecf20Sopenharmony_ci * check it here too. 2508c2ecf20Sopenharmony_ci */ 2518c2ecf20Sopenharmony_ci if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) && 2528c2ecf20Sopenharmony_ci !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { 2538c2ecf20Sopenharmony_ci schedule_delayed_work(&dc->writeback_rate_update, 2548c2ecf20Sopenharmony_ci dc->writeback_rate_update_seconds * HZ); 2558c2ecf20Sopenharmony_ci } 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci /* 2588c2ecf20Sopenharmony_ci * should check BCACHE_DEV_RATE_DW_RUNNING before calling 2598c2ecf20Sopenharmony_ci * cancel_delayed_work_sync(). 2608c2ecf20Sopenharmony_ci */ 2618c2ecf20Sopenharmony_ci clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); 2628c2ecf20Sopenharmony_ci /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ 2638c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 2648c2ecf20Sopenharmony_ci} 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_cistatic unsigned int writeback_delay(struct cached_dev *dc, 2678c2ecf20Sopenharmony_ci unsigned int sectors) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || 2708c2ecf20Sopenharmony_ci !dc->writeback_percent) 2718c2ecf20Sopenharmony_ci return 0; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci return bch_next_delay(&dc->writeback_rate, sectors); 2748c2ecf20Sopenharmony_ci} 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_cistruct dirty_io { 2778c2ecf20Sopenharmony_ci struct closure cl; 2788c2ecf20Sopenharmony_ci struct cached_dev *dc; 2798c2ecf20Sopenharmony_ci uint16_t sequence; 2808c2ecf20Sopenharmony_ci struct bio bio; 2818c2ecf20Sopenharmony_ci}; 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_cistatic void dirty_init(struct keybuf_key *w) 2848c2ecf20Sopenharmony_ci{ 2858c2ecf20Sopenharmony_ci struct dirty_io *io = w->private; 2868c2ecf20Sopenharmony_ci struct bio *bio = &io->bio; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci bio_init(bio, bio->bi_inline_vecs, 2898c2ecf20Sopenharmony_ci DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)); 2908c2ecf20Sopenharmony_ci if (!io->dc->writeback_percent) 2918c2ecf20Sopenharmony_ci bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9; 2948c2ecf20Sopenharmony_ci bio->bi_private = w; 2958c2ecf20Sopenharmony_ci bch_bio_map(bio, NULL); 2968c2ecf20Sopenharmony_ci} 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_cistatic void dirty_io_destructor(struct closure *cl) 2998c2ecf20Sopenharmony_ci{ 3008c2ecf20Sopenharmony_ci struct dirty_io *io = container_of(cl, struct dirty_io, cl); 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci kfree(io); 3038c2ecf20Sopenharmony_ci} 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_cistatic void write_dirty_finish(struct closure *cl) 3068c2ecf20Sopenharmony_ci{ 3078c2ecf20Sopenharmony_ci struct dirty_io *io = container_of(cl, struct dirty_io, cl); 3088c2ecf20Sopenharmony_ci struct keybuf_key *w = io->bio.bi_private; 3098c2ecf20Sopenharmony_ci struct cached_dev *dc = io->dc; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci bio_free_pages(&io->bio); 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci /* This is kind of a dumb way of signalling errors. */ 3148c2ecf20Sopenharmony_ci if (KEY_DIRTY(&w->key)) { 3158c2ecf20Sopenharmony_ci int ret; 3168c2ecf20Sopenharmony_ci unsigned int i; 3178c2ecf20Sopenharmony_ci struct keylist keys; 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci bch_keylist_init(&keys); 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci bkey_copy(keys.top, &w->key); 3228c2ecf20Sopenharmony_ci SET_KEY_DIRTY(keys.top, false); 3238c2ecf20Sopenharmony_ci bch_keylist_push(&keys); 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci for (i = 0; i < KEY_PTRS(&w->key); i++) 3268c2ecf20Sopenharmony_ci atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci ret = bch_btree_insert(dc->disk.c, &keys, NULL, &w->key); 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci if (ret) 3318c2ecf20Sopenharmony_ci trace_bcache_writeback_collision(&w->key); 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci atomic_long_inc(ret 3348c2ecf20Sopenharmony_ci ? &dc->disk.c->writeback_keys_failed 3358c2ecf20Sopenharmony_ci : &dc->disk.c->writeback_keys_done); 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci bch_keybuf_del(&dc->writeback_keys, w); 3398c2ecf20Sopenharmony_ci up(&dc->in_flight); 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci closure_return_with_destructor(cl, dirty_io_destructor); 3428c2ecf20Sopenharmony_ci} 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_cistatic void dirty_endio(struct bio *bio) 3458c2ecf20Sopenharmony_ci{ 3468c2ecf20Sopenharmony_ci struct keybuf_key *w = bio->bi_private; 3478c2ecf20Sopenharmony_ci struct dirty_io *io = w->private; 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci if (bio->bi_status) { 3508c2ecf20Sopenharmony_ci SET_KEY_DIRTY(&w->key, false); 3518c2ecf20Sopenharmony_ci bch_count_backing_io_errors(io->dc, bio); 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci closure_put(&io->cl); 3558c2ecf20Sopenharmony_ci} 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_cistatic void write_dirty(struct closure *cl) 3588c2ecf20Sopenharmony_ci{ 3598c2ecf20Sopenharmony_ci struct dirty_io *io = container_of(cl, struct dirty_io, cl); 3608c2ecf20Sopenharmony_ci struct keybuf_key *w = io->bio.bi_private; 3618c2ecf20Sopenharmony_ci struct cached_dev *dc = io->dc; 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci uint16_t next_sequence; 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci if (atomic_read(&dc->writeback_sequence_next) != io->sequence) { 3668c2ecf20Sopenharmony_ci /* Not our turn to write; wait for a write to complete */ 3678c2ecf20Sopenharmony_ci closure_wait(&dc->writeback_ordering_wait, cl); 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci if (atomic_read(&dc->writeback_sequence_next) == io->sequence) { 3708c2ecf20Sopenharmony_ci /* 3718c2ecf20Sopenharmony_ci * Edge case-- it happened in indeterminate order 3728c2ecf20Sopenharmony_ci * relative to when we were added to wait list.. 3738c2ecf20Sopenharmony_ci */ 3748c2ecf20Sopenharmony_ci closure_wake_up(&dc->writeback_ordering_wait); 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci continue_at(cl, write_dirty, io->dc->writeback_write_wq); 3788c2ecf20Sopenharmony_ci return; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci next_sequence = io->sequence + 1; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci /* 3848c2ecf20Sopenharmony_ci * IO errors are signalled using the dirty bit on the key. 3858c2ecf20Sopenharmony_ci * If we failed to read, we should not attempt to write to the 3868c2ecf20Sopenharmony_ci * backing device. Instead, immediately go to write_dirty_finish 3878c2ecf20Sopenharmony_ci * to clean up. 3888c2ecf20Sopenharmony_ci */ 3898c2ecf20Sopenharmony_ci if (KEY_DIRTY(&w->key)) { 3908c2ecf20Sopenharmony_ci dirty_init(w); 3918c2ecf20Sopenharmony_ci bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); 3928c2ecf20Sopenharmony_ci io->bio.bi_iter.bi_sector = KEY_START(&w->key); 3938c2ecf20Sopenharmony_ci bio_set_dev(&io->bio, io->dc->bdev); 3948c2ecf20Sopenharmony_ci io->bio.bi_end_io = dirty_endio; 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci /* I/O request sent to backing device */ 3978c2ecf20Sopenharmony_ci closure_bio_submit(io->dc->disk.c, &io->bio, cl); 3988c2ecf20Sopenharmony_ci } 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci atomic_set(&dc->writeback_sequence_next, next_sequence); 4018c2ecf20Sopenharmony_ci closure_wake_up(&dc->writeback_ordering_wait); 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); 4048c2ecf20Sopenharmony_ci} 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_cistatic void read_dirty_endio(struct bio *bio) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci struct keybuf_key *w = bio->bi_private; 4098c2ecf20Sopenharmony_ci struct dirty_io *io = w->private; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci /* is_read = 1 */ 4128c2ecf20Sopenharmony_ci bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), 4138c2ecf20Sopenharmony_ci bio->bi_status, 1, 4148c2ecf20Sopenharmony_ci "reading dirty data from cache"); 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci dirty_endio(bio); 4178c2ecf20Sopenharmony_ci} 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_cistatic void read_dirty_submit(struct closure *cl) 4208c2ecf20Sopenharmony_ci{ 4218c2ecf20Sopenharmony_ci struct dirty_io *io = container_of(cl, struct dirty_io, cl); 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci closure_bio_submit(io->dc->disk.c, &io->bio, cl); 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci continue_at(cl, write_dirty, io->dc->writeback_write_wq); 4268c2ecf20Sopenharmony_ci} 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_cistatic void read_dirty(struct cached_dev *dc) 4298c2ecf20Sopenharmony_ci{ 4308c2ecf20Sopenharmony_ci unsigned int delay = 0; 4318c2ecf20Sopenharmony_ci struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w; 4328c2ecf20Sopenharmony_ci size_t size; 4338c2ecf20Sopenharmony_ci int nk, i; 4348c2ecf20Sopenharmony_ci struct dirty_io *io; 4358c2ecf20Sopenharmony_ci struct closure cl; 4368c2ecf20Sopenharmony_ci uint16_t sequence = 0; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci BUG_ON(!llist_empty(&dc->writeback_ordering_wait.list)); 4398c2ecf20Sopenharmony_ci atomic_set(&dc->writeback_sequence_next, sequence); 4408c2ecf20Sopenharmony_ci closure_init_stack(&cl); 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci /* 4438c2ecf20Sopenharmony_ci * XXX: if we error, background writeback just spins. Should use some 4448c2ecf20Sopenharmony_ci * mempools. 4458c2ecf20Sopenharmony_ci */ 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci next = bch_keybuf_next(&dc->writeback_keys); 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci while (!kthread_should_stop() && 4508c2ecf20Sopenharmony_ci !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && 4518c2ecf20Sopenharmony_ci next) { 4528c2ecf20Sopenharmony_ci size = 0; 4538c2ecf20Sopenharmony_ci nk = 0; 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci do { 4568c2ecf20Sopenharmony_ci BUG_ON(ptr_stale(dc->disk.c, &next->key, 0)); 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci /* 4598c2ecf20Sopenharmony_ci * Don't combine too many operations, even if they 4608c2ecf20Sopenharmony_ci * are all small. 4618c2ecf20Sopenharmony_ci */ 4628c2ecf20Sopenharmony_ci if (nk >= MAX_WRITEBACKS_IN_PASS) 4638c2ecf20Sopenharmony_ci break; 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci /* 4668c2ecf20Sopenharmony_ci * If the current operation is very large, don't 4678c2ecf20Sopenharmony_ci * further combine operations. 4688c2ecf20Sopenharmony_ci */ 4698c2ecf20Sopenharmony_ci if (size >= MAX_WRITESIZE_IN_PASS) 4708c2ecf20Sopenharmony_ci break; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci /* 4738c2ecf20Sopenharmony_ci * Operations are only eligible to be combined 4748c2ecf20Sopenharmony_ci * if they are contiguous. 4758c2ecf20Sopenharmony_ci * 4768c2ecf20Sopenharmony_ci * TODO: add a heuristic willing to fire a 4778c2ecf20Sopenharmony_ci * certain amount of non-contiguous IO per pass, 4788c2ecf20Sopenharmony_ci * so that we can benefit from backing device 4798c2ecf20Sopenharmony_ci * command queueing. 4808c2ecf20Sopenharmony_ci */ 4818c2ecf20Sopenharmony_ci if ((nk != 0) && bkey_cmp(&keys[nk-1]->key, 4828c2ecf20Sopenharmony_ci &START_KEY(&next->key))) 4838c2ecf20Sopenharmony_ci break; 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci size += KEY_SIZE(&next->key); 4868c2ecf20Sopenharmony_ci keys[nk++] = next; 4878c2ecf20Sopenharmony_ci } while ((next = bch_keybuf_next(&dc->writeback_keys))); 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci /* Now we have gathered a set of 1..5 keys to write back. */ 4908c2ecf20Sopenharmony_ci for (i = 0; i < nk; i++) { 4918c2ecf20Sopenharmony_ci w = keys[i]; 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_ci io = kzalloc(struct_size(io, bio.bi_inline_vecs, 4948c2ecf20Sopenharmony_ci DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)), 4958c2ecf20Sopenharmony_ci GFP_KERNEL); 4968c2ecf20Sopenharmony_ci if (!io) 4978c2ecf20Sopenharmony_ci goto err; 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci w->private = io; 5008c2ecf20Sopenharmony_ci io->dc = dc; 5018c2ecf20Sopenharmony_ci io->sequence = sequence++; 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci dirty_init(w); 5048c2ecf20Sopenharmony_ci bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); 5058c2ecf20Sopenharmony_ci io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); 5068c2ecf20Sopenharmony_ci bio_set_dev(&io->bio, 5078c2ecf20Sopenharmony_ci PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); 5088c2ecf20Sopenharmony_ci io->bio.bi_end_io = read_dirty_endio; 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_ci if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) 5118c2ecf20Sopenharmony_ci goto err_free; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci trace_bcache_writeback(&w->key); 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci down(&dc->in_flight); 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci /* 5188c2ecf20Sopenharmony_ci * We've acquired a semaphore for the maximum 5198c2ecf20Sopenharmony_ci * simultaneous number of writebacks; from here 5208c2ecf20Sopenharmony_ci * everything happens asynchronously. 5218c2ecf20Sopenharmony_ci */ 5228c2ecf20Sopenharmony_ci closure_call(&io->cl, read_dirty_submit, NULL, &cl); 5238c2ecf20Sopenharmony_ci } 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci delay = writeback_delay(dc, size); 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci while (!kthread_should_stop() && 5288c2ecf20Sopenharmony_ci !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && 5298c2ecf20Sopenharmony_ci delay) { 5308c2ecf20Sopenharmony_ci schedule_timeout_interruptible(delay); 5318c2ecf20Sopenharmony_ci delay = writeback_delay(dc, 0); 5328c2ecf20Sopenharmony_ci } 5338c2ecf20Sopenharmony_ci } 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci if (0) { 5368c2ecf20Sopenharmony_cierr_free: 5378c2ecf20Sopenharmony_ci kfree(w->private); 5388c2ecf20Sopenharmony_cierr: 5398c2ecf20Sopenharmony_ci bch_keybuf_del(&dc->writeback_keys, w); 5408c2ecf20Sopenharmony_ci } 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci /* 5438c2ecf20Sopenharmony_ci * Wait for outstanding writeback IOs to finish (and keybuf slots to be 5448c2ecf20Sopenharmony_ci * freed) before refilling again 5458c2ecf20Sopenharmony_ci */ 5468c2ecf20Sopenharmony_ci closure_sync(&cl); 5478c2ecf20Sopenharmony_ci} 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci/* Scan for dirty data */ 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_civoid bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, 5528c2ecf20Sopenharmony_ci uint64_t offset, int nr_sectors) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci struct bcache_device *d = c->devices[inode]; 5558c2ecf20Sopenharmony_ci unsigned int stripe_offset, sectors_dirty; 5568c2ecf20Sopenharmony_ci int stripe; 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci if (!d) 5598c2ecf20Sopenharmony_ci return; 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci stripe = offset_to_stripe(d, offset); 5628c2ecf20Sopenharmony_ci if (stripe < 0) 5638c2ecf20Sopenharmony_ci return; 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci if (UUID_FLASH_ONLY(&c->uuids[inode])) 5668c2ecf20Sopenharmony_ci atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors); 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci stripe_offset = offset & (d->stripe_size - 1); 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci while (nr_sectors) { 5718c2ecf20Sopenharmony_ci int s = min_t(unsigned int, abs(nr_sectors), 5728c2ecf20Sopenharmony_ci d->stripe_size - stripe_offset); 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci if (nr_sectors < 0) 5758c2ecf20Sopenharmony_ci s = -s; 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci if (stripe >= d->nr_stripes) 5788c2ecf20Sopenharmony_ci return; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci sectors_dirty = atomic_add_return(s, 5818c2ecf20Sopenharmony_ci d->stripe_sectors_dirty + stripe); 5828c2ecf20Sopenharmony_ci if (sectors_dirty == d->stripe_size) 5838c2ecf20Sopenharmony_ci set_bit(stripe, d->full_dirty_stripes); 5848c2ecf20Sopenharmony_ci else 5858c2ecf20Sopenharmony_ci clear_bit(stripe, d->full_dirty_stripes); 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci nr_sectors -= s; 5888c2ecf20Sopenharmony_ci stripe_offset = 0; 5898c2ecf20Sopenharmony_ci stripe++; 5908c2ecf20Sopenharmony_ci } 5918c2ecf20Sopenharmony_ci} 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_cistatic bool dirty_pred(struct keybuf *buf, struct bkey *k) 5948c2ecf20Sopenharmony_ci{ 5958c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(buf, 5968c2ecf20Sopenharmony_ci struct cached_dev, 5978c2ecf20Sopenharmony_ci writeback_keys); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci BUG_ON(KEY_INODE(k) != dc->disk.id); 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci return KEY_DIRTY(k); 6028c2ecf20Sopenharmony_ci} 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_cistatic void refill_full_stripes(struct cached_dev *dc) 6058c2ecf20Sopenharmony_ci{ 6068c2ecf20Sopenharmony_ci struct keybuf *buf = &dc->writeback_keys; 6078c2ecf20Sopenharmony_ci unsigned int start_stripe, next_stripe; 6088c2ecf20Sopenharmony_ci int stripe; 6098c2ecf20Sopenharmony_ci bool wrapped = false; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned)); 6128c2ecf20Sopenharmony_ci if (stripe < 0) 6138c2ecf20Sopenharmony_ci stripe = 0; 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci start_stripe = stripe; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci while (1) { 6188c2ecf20Sopenharmony_ci stripe = find_next_bit(dc->disk.full_dirty_stripes, 6198c2ecf20Sopenharmony_ci dc->disk.nr_stripes, stripe); 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci if (stripe == dc->disk.nr_stripes) 6228c2ecf20Sopenharmony_ci goto next; 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes, 6258c2ecf20Sopenharmony_ci dc->disk.nr_stripes, stripe); 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci buf->last_scanned = KEY(dc->disk.id, 6288c2ecf20Sopenharmony_ci stripe * dc->disk.stripe_size, 0); 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci bch_refill_keybuf(dc->disk.c, buf, 6318c2ecf20Sopenharmony_ci &KEY(dc->disk.id, 6328c2ecf20Sopenharmony_ci next_stripe * dc->disk.stripe_size, 0), 6338c2ecf20Sopenharmony_ci dirty_pred); 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci if (array_freelist_empty(&buf->freelist)) 6368c2ecf20Sopenharmony_ci return; 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci stripe = next_stripe; 6398c2ecf20Sopenharmony_cinext: 6408c2ecf20Sopenharmony_ci if (wrapped && stripe > start_stripe) 6418c2ecf20Sopenharmony_ci return; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci if (stripe == dc->disk.nr_stripes) { 6448c2ecf20Sopenharmony_ci stripe = 0; 6458c2ecf20Sopenharmony_ci wrapped = true; 6468c2ecf20Sopenharmony_ci } 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci} 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci/* 6518c2ecf20Sopenharmony_ci * Returns true if we scanned the entire disk 6528c2ecf20Sopenharmony_ci */ 6538c2ecf20Sopenharmony_cistatic bool refill_dirty(struct cached_dev *dc) 6548c2ecf20Sopenharmony_ci{ 6558c2ecf20Sopenharmony_ci struct keybuf *buf = &dc->writeback_keys; 6568c2ecf20Sopenharmony_ci struct bkey start = KEY(dc->disk.id, 0, 0); 6578c2ecf20Sopenharmony_ci struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); 6588c2ecf20Sopenharmony_ci struct bkey start_pos; 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci /* 6618c2ecf20Sopenharmony_ci * make sure keybuf pos is inside the range for this disk - at bringup 6628c2ecf20Sopenharmony_ci * we might not be attached yet so this disk's inode nr isn't 6638c2ecf20Sopenharmony_ci * initialized then 6648c2ecf20Sopenharmony_ci */ 6658c2ecf20Sopenharmony_ci if (bkey_cmp(&buf->last_scanned, &start) < 0 || 6668c2ecf20Sopenharmony_ci bkey_cmp(&buf->last_scanned, &end) > 0) 6678c2ecf20Sopenharmony_ci buf->last_scanned = start; 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci if (dc->partial_stripes_expensive) { 6708c2ecf20Sopenharmony_ci refill_full_stripes(dc); 6718c2ecf20Sopenharmony_ci if (array_freelist_empty(&buf->freelist)) 6728c2ecf20Sopenharmony_ci return false; 6738c2ecf20Sopenharmony_ci } 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci start_pos = buf->last_scanned; 6768c2ecf20Sopenharmony_ci bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci if (bkey_cmp(&buf->last_scanned, &end) < 0) 6798c2ecf20Sopenharmony_ci return false; 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci /* 6828c2ecf20Sopenharmony_ci * If we get to the end start scanning again from the beginning, and 6838c2ecf20Sopenharmony_ci * only scan up to where we initially started scanning from: 6848c2ecf20Sopenharmony_ci */ 6858c2ecf20Sopenharmony_ci buf->last_scanned = start; 6868c2ecf20Sopenharmony_ci bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred); 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci return bkey_cmp(&buf->last_scanned, &start_pos) >= 0; 6898c2ecf20Sopenharmony_ci} 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_cistatic int bch_writeback_thread(void *arg) 6928c2ecf20Sopenharmony_ci{ 6938c2ecf20Sopenharmony_ci struct cached_dev *dc = arg; 6948c2ecf20Sopenharmony_ci struct cache_set *c = dc->disk.c; 6958c2ecf20Sopenharmony_ci bool searched_full_index; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci bch_ratelimit_reset(&dc->writeback_rate); 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci while (!kthread_should_stop() && 7008c2ecf20Sopenharmony_ci !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { 7018c2ecf20Sopenharmony_ci down_write(&dc->writeback_lock); 7028c2ecf20Sopenharmony_ci set_current_state(TASK_INTERRUPTIBLE); 7038c2ecf20Sopenharmony_ci /* 7048c2ecf20Sopenharmony_ci * If the bache device is detaching, skip here and continue 7058c2ecf20Sopenharmony_ci * to perform writeback. Otherwise, if no dirty data on cache, 7068c2ecf20Sopenharmony_ci * or there is dirty data on cache but writeback is disabled, 7078c2ecf20Sopenharmony_ci * the writeback thread should sleep here and wait for others 7088c2ecf20Sopenharmony_ci * to wake up it. 7098c2ecf20Sopenharmony_ci */ 7108c2ecf20Sopenharmony_ci if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) && 7118c2ecf20Sopenharmony_ci (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) { 7128c2ecf20Sopenharmony_ci up_write(&dc->writeback_lock); 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci if (kthread_should_stop() || 7158c2ecf20Sopenharmony_ci test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { 7168c2ecf20Sopenharmony_ci set_current_state(TASK_RUNNING); 7178c2ecf20Sopenharmony_ci break; 7188c2ecf20Sopenharmony_ci } 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci schedule(); 7218c2ecf20Sopenharmony_ci continue; 7228c2ecf20Sopenharmony_ci } 7238c2ecf20Sopenharmony_ci set_current_state(TASK_RUNNING); 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci searched_full_index = refill_dirty(dc); 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci if (searched_full_index && 7288c2ecf20Sopenharmony_ci RB_EMPTY_ROOT(&dc->writeback_keys.keys)) { 7298c2ecf20Sopenharmony_ci atomic_set(&dc->has_dirty, 0); 7308c2ecf20Sopenharmony_ci SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); 7318c2ecf20Sopenharmony_ci bch_write_bdev_super(dc, NULL); 7328c2ecf20Sopenharmony_ci /* 7338c2ecf20Sopenharmony_ci * If bcache device is detaching via sysfs interface, 7348c2ecf20Sopenharmony_ci * writeback thread should stop after there is no dirty 7358c2ecf20Sopenharmony_ci * data on cache. BCACHE_DEV_DETACHING flag is set in 7368c2ecf20Sopenharmony_ci * bch_cached_dev_detach(). 7378c2ecf20Sopenharmony_ci */ 7388c2ecf20Sopenharmony_ci if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) { 7398c2ecf20Sopenharmony_ci up_write(&dc->writeback_lock); 7408c2ecf20Sopenharmony_ci break; 7418c2ecf20Sopenharmony_ci } 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci /* 7448c2ecf20Sopenharmony_ci * When dirty data rate is high (e.g. 50%+), there might 7458c2ecf20Sopenharmony_ci * be heavy buckets fragmentation after writeback 7468c2ecf20Sopenharmony_ci * finished, which hurts following write performance. 7478c2ecf20Sopenharmony_ci * If users really care about write performance they 7488c2ecf20Sopenharmony_ci * may set BCH_ENABLE_AUTO_GC via sysfs, then when 7498c2ecf20Sopenharmony_ci * BCH_DO_AUTO_GC is set, garbage collection thread 7508c2ecf20Sopenharmony_ci * will be wake up here. After moving gc, the shrunk 7518c2ecf20Sopenharmony_ci * btree and discarded free buckets SSD space may be 7528c2ecf20Sopenharmony_ci * helpful for following write requests. 7538c2ecf20Sopenharmony_ci */ 7548c2ecf20Sopenharmony_ci if (c->gc_after_writeback == 7558c2ecf20Sopenharmony_ci (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) { 7568c2ecf20Sopenharmony_ci c->gc_after_writeback &= ~BCH_DO_AUTO_GC; 7578c2ecf20Sopenharmony_ci force_wake_up_gc(c); 7588c2ecf20Sopenharmony_ci } 7598c2ecf20Sopenharmony_ci } 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci up_write(&dc->writeback_lock); 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci read_dirty(dc); 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci if (searched_full_index) { 7668c2ecf20Sopenharmony_ci unsigned int delay = dc->writeback_delay * HZ; 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci while (delay && 7698c2ecf20Sopenharmony_ci !kthread_should_stop() && 7708c2ecf20Sopenharmony_ci !test_bit(CACHE_SET_IO_DISABLE, &c->flags) && 7718c2ecf20Sopenharmony_ci !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) 7728c2ecf20Sopenharmony_ci delay = schedule_timeout_interruptible(delay); 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci bch_ratelimit_reset(&dc->writeback_rate); 7758c2ecf20Sopenharmony_ci } 7768c2ecf20Sopenharmony_ci } 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci if (dc->writeback_write_wq) { 7798c2ecf20Sopenharmony_ci flush_workqueue(dc->writeback_write_wq); 7808c2ecf20Sopenharmony_ci destroy_workqueue(dc->writeback_write_wq); 7818c2ecf20Sopenharmony_ci } 7828c2ecf20Sopenharmony_ci cached_dev_put(dc); 7838c2ecf20Sopenharmony_ci wait_for_kthread_stop(); 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci return 0; 7868c2ecf20Sopenharmony_ci} 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci/* Init */ 7898c2ecf20Sopenharmony_ci#define INIT_KEYS_EACH_TIME 500000 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_cistruct sectors_dirty_init { 7928c2ecf20Sopenharmony_ci struct btree_op op; 7938c2ecf20Sopenharmony_ci unsigned int inode; 7948c2ecf20Sopenharmony_ci size_t count; 7958c2ecf20Sopenharmony_ci}; 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_cistatic int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, 7988c2ecf20Sopenharmony_ci struct bkey *k) 7998c2ecf20Sopenharmony_ci{ 8008c2ecf20Sopenharmony_ci struct sectors_dirty_init *op = container_of(_op, 8018c2ecf20Sopenharmony_ci struct sectors_dirty_init, op); 8028c2ecf20Sopenharmony_ci if (KEY_INODE(k) > op->inode) 8038c2ecf20Sopenharmony_ci return MAP_DONE; 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci if (KEY_DIRTY(k)) 8068c2ecf20Sopenharmony_ci bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), 8078c2ecf20Sopenharmony_ci KEY_START(k), KEY_SIZE(k)); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci op->count++; 8108c2ecf20Sopenharmony_ci if (!(op->count % INIT_KEYS_EACH_TIME)) 8118c2ecf20Sopenharmony_ci cond_resched(); 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci return MAP_CONTINUE; 8148c2ecf20Sopenharmony_ci} 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_cistatic int bch_root_node_dirty_init(struct cache_set *c, 8178c2ecf20Sopenharmony_ci struct bcache_device *d, 8188c2ecf20Sopenharmony_ci struct bkey *k) 8198c2ecf20Sopenharmony_ci{ 8208c2ecf20Sopenharmony_ci struct sectors_dirty_init op; 8218c2ecf20Sopenharmony_ci int ret; 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci bch_btree_op_init(&op.op, -1); 8248c2ecf20Sopenharmony_ci op.inode = d->id; 8258c2ecf20Sopenharmony_ci op.count = 0; 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci ret = bcache_btree(map_keys_recurse, 8288c2ecf20Sopenharmony_ci k, 8298c2ecf20Sopenharmony_ci c->root, 8308c2ecf20Sopenharmony_ci &op.op, 8318c2ecf20Sopenharmony_ci &KEY(op.inode, 0, 0), 8328c2ecf20Sopenharmony_ci sectors_dirty_init_fn, 8338c2ecf20Sopenharmony_ci 0); 8348c2ecf20Sopenharmony_ci if (ret < 0) 8358c2ecf20Sopenharmony_ci pr_warn("sectors dirty init failed, ret=%d!\n", ret); 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_ci /* 8388c2ecf20Sopenharmony_ci * The op may be added to cache_set's btree_cache_wait 8398c2ecf20Sopenharmony_ci * in mca_cannibalize(), must ensure it is removed from 8408c2ecf20Sopenharmony_ci * the list and release btree_cache_alloc_lock before 8418c2ecf20Sopenharmony_ci * free op memory. 8428c2ecf20Sopenharmony_ci * Otherwise, the btree_cache_wait will be damaged. 8438c2ecf20Sopenharmony_ci */ 8448c2ecf20Sopenharmony_ci bch_cannibalize_unlock(c); 8458c2ecf20Sopenharmony_ci finish_wait(&c->btree_cache_wait, &(&op.op)->wait); 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci return ret; 8488c2ecf20Sopenharmony_ci} 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_cistatic int bch_dirty_init_thread(void *arg) 8518c2ecf20Sopenharmony_ci{ 8528c2ecf20Sopenharmony_ci struct dirty_init_thrd_info *info = arg; 8538c2ecf20Sopenharmony_ci struct bch_dirty_init_state *state = info->state; 8548c2ecf20Sopenharmony_ci struct cache_set *c = state->c; 8558c2ecf20Sopenharmony_ci struct btree_iter iter; 8568c2ecf20Sopenharmony_ci struct bkey *k, *p; 8578c2ecf20Sopenharmony_ci int cur_idx, prev_idx, skip_nr; 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci k = p = NULL; 8608c2ecf20Sopenharmony_ci prev_idx = 0; 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ci bch_btree_iter_init(&c->root->keys, &iter, NULL); 8638c2ecf20Sopenharmony_ci k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); 8648c2ecf20Sopenharmony_ci BUG_ON(!k); 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci p = k; 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci while (k) { 8698c2ecf20Sopenharmony_ci spin_lock(&state->idx_lock); 8708c2ecf20Sopenharmony_ci cur_idx = state->key_idx; 8718c2ecf20Sopenharmony_ci state->key_idx++; 8728c2ecf20Sopenharmony_ci spin_unlock(&state->idx_lock); 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci skip_nr = cur_idx - prev_idx; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci while (skip_nr) { 8778c2ecf20Sopenharmony_ci k = bch_btree_iter_next_filter(&iter, 8788c2ecf20Sopenharmony_ci &c->root->keys, 8798c2ecf20Sopenharmony_ci bch_ptr_bad); 8808c2ecf20Sopenharmony_ci if (k) 8818c2ecf20Sopenharmony_ci p = k; 8828c2ecf20Sopenharmony_ci else { 8838c2ecf20Sopenharmony_ci atomic_set(&state->enough, 1); 8848c2ecf20Sopenharmony_ci /* Update state->enough earlier */ 8858c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 8868c2ecf20Sopenharmony_ci goto out; 8878c2ecf20Sopenharmony_ci } 8888c2ecf20Sopenharmony_ci skip_nr--; 8898c2ecf20Sopenharmony_ci } 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci if (p) { 8928c2ecf20Sopenharmony_ci if (bch_root_node_dirty_init(c, state->d, p) < 0) 8938c2ecf20Sopenharmony_ci goto out; 8948c2ecf20Sopenharmony_ci } 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci p = NULL; 8978c2ecf20Sopenharmony_ci prev_idx = cur_idx; 8988c2ecf20Sopenharmony_ci } 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ciout: 9018c2ecf20Sopenharmony_ci /* In order to wake up state->wait in time */ 9028c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 9038c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&state->started)) 9048c2ecf20Sopenharmony_ci wake_up(&state->wait); 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci return 0; 9078c2ecf20Sopenharmony_ci} 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_cistatic int bch_btre_dirty_init_thread_nr(void) 9108c2ecf20Sopenharmony_ci{ 9118c2ecf20Sopenharmony_ci int n = num_online_cpus()/2; 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci if (n == 0) 9148c2ecf20Sopenharmony_ci n = 1; 9158c2ecf20Sopenharmony_ci else if (n > BCH_DIRTY_INIT_THRD_MAX) 9168c2ecf20Sopenharmony_ci n = BCH_DIRTY_INIT_THRD_MAX; 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci return n; 9198c2ecf20Sopenharmony_ci} 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_civoid bch_sectors_dirty_init(struct bcache_device *d) 9228c2ecf20Sopenharmony_ci{ 9238c2ecf20Sopenharmony_ci int i; 9248c2ecf20Sopenharmony_ci struct btree *b = NULL; 9258c2ecf20Sopenharmony_ci struct bkey *k = NULL; 9268c2ecf20Sopenharmony_ci struct btree_iter iter; 9278c2ecf20Sopenharmony_ci struct sectors_dirty_init op; 9288c2ecf20Sopenharmony_ci struct cache_set *c = d->c; 9298c2ecf20Sopenharmony_ci struct bch_dirty_init_state state; 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_ciretry_lock: 9328c2ecf20Sopenharmony_ci b = c->root; 9338c2ecf20Sopenharmony_ci rw_lock(0, b, b->level); 9348c2ecf20Sopenharmony_ci if (b != c->root) { 9358c2ecf20Sopenharmony_ci rw_unlock(0, b); 9368c2ecf20Sopenharmony_ci goto retry_lock; 9378c2ecf20Sopenharmony_ci } 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci /* Just count root keys if no leaf node */ 9408c2ecf20Sopenharmony_ci if (c->root->level == 0) { 9418c2ecf20Sopenharmony_ci bch_btree_op_init(&op.op, -1); 9428c2ecf20Sopenharmony_ci op.inode = d->id; 9438c2ecf20Sopenharmony_ci op.count = 0; 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci for_each_key_filter(&c->root->keys, 9468c2ecf20Sopenharmony_ci k, &iter, bch_ptr_invalid) { 9478c2ecf20Sopenharmony_ci if (KEY_INODE(k) != op.inode) 9488c2ecf20Sopenharmony_ci continue; 9498c2ecf20Sopenharmony_ci sectors_dirty_init_fn(&op.op, c->root, k); 9508c2ecf20Sopenharmony_ci } 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci rw_unlock(0, b); 9538c2ecf20Sopenharmony_ci return; 9548c2ecf20Sopenharmony_ci } 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci memset(&state, 0, sizeof(struct bch_dirty_init_state)); 9578c2ecf20Sopenharmony_ci state.c = c; 9588c2ecf20Sopenharmony_ci state.d = d; 9598c2ecf20Sopenharmony_ci state.total_threads = bch_btre_dirty_init_thread_nr(); 9608c2ecf20Sopenharmony_ci state.key_idx = 0; 9618c2ecf20Sopenharmony_ci spin_lock_init(&state.idx_lock); 9628c2ecf20Sopenharmony_ci atomic_set(&state.started, 0); 9638c2ecf20Sopenharmony_ci atomic_set(&state.enough, 0); 9648c2ecf20Sopenharmony_ci init_waitqueue_head(&state.wait); 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci for (i = 0; i < state.total_threads; i++) { 9678c2ecf20Sopenharmony_ci /* Fetch latest state.enough earlier */ 9688c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 9698c2ecf20Sopenharmony_ci if (atomic_read(&state.enough)) 9708c2ecf20Sopenharmony_ci break; 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci atomic_inc(&state.started); 9738c2ecf20Sopenharmony_ci state.infos[i].state = &state; 9748c2ecf20Sopenharmony_ci state.infos[i].thread = 9758c2ecf20Sopenharmony_ci kthread_run(bch_dirty_init_thread, &state.infos[i], 9768c2ecf20Sopenharmony_ci "bch_dirtcnt[%d]", i); 9778c2ecf20Sopenharmony_ci if (IS_ERR(state.infos[i].thread)) { 9788c2ecf20Sopenharmony_ci pr_err("fails to run thread bch_dirty_init[%d]\n", i); 9798c2ecf20Sopenharmony_ci atomic_dec(&state.started); 9808c2ecf20Sopenharmony_ci for (--i; i >= 0; i--) 9818c2ecf20Sopenharmony_ci kthread_stop(state.infos[i].thread); 9828c2ecf20Sopenharmony_ci goto out; 9838c2ecf20Sopenharmony_ci } 9848c2ecf20Sopenharmony_ci } 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ciout: 9878c2ecf20Sopenharmony_ci /* Must wait for all threads to stop. */ 9888c2ecf20Sopenharmony_ci wait_event(state.wait, atomic_read(&state.started) == 0); 9898c2ecf20Sopenharmony_ci rw_unlock(0, b); 9908c2ecf20Sopenharmony_ci} 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_civoid bch_cached_dev_writeback_init(struct cached_dev *dc) 9938c2ecf20Sopenharmony_ci{ 9948c2ecf20Sopenharmony_ci sema_init(&dc->in_flight, 64); 9958c2ecf20Sopenharmony_ci init_rwsem(&dc->writeback_lock); 9968c2ecf20Sopenharmony_ci bch_keybuf_init(&dc->writeback_keys); 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci dc->writeback_metadata = true; 9998c2ecf20Sopenharmony_ci dc->writeback_running = false; 10008c2ecf20Sopenharmony_ci dc->writeback_percent = 10; 10018c2ecf20Sopenharmony_ci dc->writeback_delay = 30; 10028c2ecf20Sopenharmony_ci atomic_long_set(&dc->writeback_rate.rate, 1024); 10038c2ecf20Sopenharmony_ci dc->writeback_rate_minimum = 8; 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ci dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; 10068c2ecf20Sopenharmony_ci dc->writeback_rate_p_term_inverse = 40; 10078c2ecf20Sopenharmony_ci dc->writeback_rate_i_term_inverse = 10000; 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); 10108c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); 10118c2ecf20Sopenharmony_ci} 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ciint bch_cached_dev_writeback_start(struct cached_dev *dc) 10148c2ecf20Sopenharmony_ci{ 10158c2ecf20Sopenharmony_ci dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", 10168c2ecf20Sopenharmony_ci WQ_MEM_RECLAIM, 0); 10178c2ecf20Sopenharmony_ci if (!dc->writeback_write_wq) 10188c2ecf20Sopenharmony_ci return -ENOMEM; 10198c2ecf20Sopenharmony_ci 10208c2ecf20Sopenharmony_ci cached_dev_get(dc); 10218c2ecf20Sopenharmony_ci dc->writeback_thread = kthread_create(bch_writeback_thread, dc, 10228c2ecf20Sopenharmony_ci "bcache_writeback"); 10238c2ecf20Sopenharmony_ci if (IS_ERR(dc->writeback_thread)) { 10248c2ecf20Sopenharmony_ci cached_dev_put(dc); 10258c2ecf20Sopenharmony_ci destroy_workqueue(dc->writeback_write_wq); 10268c2ecf20Sopenharmony_ci return PTR_ERR(dc->writeback_thread); 10278c2ecf20Sopenharmony_ci } 10288c2ecf20Sopenharmony_ci dc->writeback_running = true; 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); 10318c2ecf20Sopenharmony_ci schedule_delayed_work(&dc->writeback_rate_update, 10328c2ecf20Sopenharmony_ci dc->writeback_rate_update_seconds * HZ); 10338c2ecf20Sopenharmony_ci 10348c2ecf20Sopenharmony_ci bch_writeback_queue(dc); 10358c2ecf20Sopenharmony_ci 10368c2ecf20Sopenharmony_ci return 0; 10378c2ecf20Sopenharmony_ci} 1038