18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Some low level IO code, and hacks for various block layer limitations 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 68c2ecf20Sopenharmony_ci * Copyright 2012 Google, Inc. 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include "bcache.h" 108c2ecf20Sopenharmony_ci#include "bset.h" 118c2ecf20Sopenharmony_ci#include "debug.h" 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci/* Bios with headers */ 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_civoid bch_bbio_free(struct bio *bio, struct cache_set *c) 188c2ecf20Sopenharmony_ci{ 198c2ecf20Sopenharmony_ci struct bbio *b = container_of(bio, struct bbio, bio); 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci mempool_free(b, &c->bio_meta); 228c2ecf20Sopenharmony_ci} 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_cistruct bio *bch_bbio_alloc(struct cache_set *c) 258c2ecf20Sopenharmony_ci{ 268c2ecf20Sopenharmony_ci struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO); 278c2ecf20Sopenharmony_ci struct bio *bio = &b->bio; 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb)); 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci return bio; 328c2ecf20Sopenharmony_ci} 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_civoid __bch_submit_bbio(struct bio *bio, struct cache_set *c) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci struct bbio *b = container_of(bio, struct bbio, bio); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); 398c2ecf20Sopenharmony_ci bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci b->submit_time_us = local_clock_us(); 428c2ecf20Sopenharmony_ci closure_bio_submit(c, bio, bio->bi_private); 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_civoid bch_submit_bbio(struct bio *bio, struct cache_set *c, 468c2ecf20Sopenharmony_ci struct bkey *k, unsigned int ptr) 478c2ecf20Sopenharmony_ci{ 488c2ecf20Sopenharmony_ci struct bbio *b = container_of(bio, struct bbio, bio); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci bch_bkey_copy_single_ptr(&b->key, k, ptr); 518c2ecf20Sopenharmony_ci __bch_submit_bbio(bio, c); 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* IO errors */ 558c2ecf20Sopenharmony_civoid bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci unsigned int errors; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci /* 628c2ecf20Sopenharmony_ci * Read-ahead requests on a degrading and recovering md raid 638c2ecf20Sopenharmony_ci * (e.g. raid6) device might be failured immediately by md 648c2ecf20Sopenharmony_ci * raid code, which is not a real hardware media failure. So 658c2ecf20Sopenharmony_ci * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. 668c2ecf20Sopenharmony_ci */ 678c2ecf20Sopenharmony_ci if (bio->bi_opf & REQ_RAHEAD) { 688c2ecf20Sopenharmony_ci pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", 698c2ecf20Sopenharmony_ci dc->backing_dev_name); 708c2ecf20Sopenharmony_ci return; 718c2ecf20Sopenharmony_ci } 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci errors = atomic_add_return(1, &dc->io_errors); 748c2ecf20Sopenharmony_ci if (errors < dc->error_limit) 758c2ecf20Sopenharmony_ci pr_err("%s: IO error on backing device, unrecoverable\n", 768c2ecf20Sopenharmony_ci dc->backing_dev_name); 778c2ecf20Sopenharmony_ci else 788c2ecf20Sopenharmony_ci bch_cached_dev_error(dc); 798c2ecf20Sopenharmony_ci} 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_civoid bch_count_io_errors(struct cache *ca, 828c2ecf20Sopenharmony_ci blk_status_t error, 838c2ecf20Sopenharmony_ci int is_read, 848c2ecf20Sopenharmony_ci const char *m) 858c2ecf20Sopenharmony_ci{ 868c2ecf20Sopenharmony_ci /* 878c2ecf20Sopenharmony_ci * The halflife of an error is: 888c2ecf20Sopenharmony_ci * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci if (ca->set->error_decay) { 928c2ecf20Sopenharmony_ci unsigned int count = atomic_inc_return(&ca->io_count); 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci while (count > ca->set->error_decay) { 958c2ecf20Sopenharmony_ci unsigned int errors; 968c2ecf20Sopenharmony_ci unsigned int old = count; 978c2ecf20Sopenharmony_ci unsigned int new = count - ca->set->error_decay; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci /* 1008c2ecf20Sopenharmony_ci * First we subtract refresh from count; each time we 1018c2ecf20Sopenharmony_ci * successfully do so, we rescale the errors once: 1028c2ecf20Sopenharmony_ci */ 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci count = atomic_cmpxchg(&ca->io_count, old, new); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci if (count == old) { 1078c2ecf20Sopenharmony_ci count = new; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci errors = atomic_read(&ca->io_errors); 1108c2ecf20Sopenharmony_ci do { 1118c2ecf20Sopenharmony_ci old = errors; 1128c2ecf20Sopenharmony_ci new = ((uint64_t) errors * 127) / 128; 1138c2ecf20Sopenharmony_ci errors = atomic_cmpxchg(&ca->io_errors, 1148c2ecf20Sopenharmony_ci old, new); 1158c2ecf20Sopenharmony_ci } while (old != errors); 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci } 1188c2ecf20Sopenharmony_ci } 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci if (error) { 1218c2ecf20Sopenharmony_ci unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT, 1228c2ecf20Sopenharmony_ci &ca->io_errors); 1238c2ecf20Sopenharmony_ci errors >>= IO_ERROR_SHIFT; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci if (errors < ca->set->error_limit) 1268c2ecf20Sopenharmony_ci pr_err("%s: IO error on %s%s\n", 1278c2ecf20Sopenharmony_ci ca->cache_dev_name, m, 1288c2ecf20Sopenharmony_ci is_read ? ", recovering." : "."); 1298c2ecf20Sopenharmony_ci else 1308c2ecf20Sopenharmony_ci bch_cache_set_error(ca->set, 1318c2ecf20Sopenharmony_ci "%s: too many IO errors %s\n", 1328c2ecf20Sopenharmony_ci ca->cache_dev_name, m); 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci} 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_civoid bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, 1378c2ecf20Sopenharmony_ci blk_status_t error, const char *m) 1388c2ecf20Sopenharmony_ci{ 1398c2ecf20Sopenharmony_ci struct bbio *b = container_of(bio, struct bbio, bio); 1408c2ecf20Sopenharmony_ci struct cache *ca = PTR_CACHE(c, &b->key, 0); 1418c2ecf20Sopenharmony_ci int is_read = (bio_data_dir(bio) == READ ? 1 : 0); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci unsigned int threshold = op_is_write(bio_op(bio)) 1448c2ecf20Sopenharmony_ci ? c->congested_write_threshold_us 1458c2ecf20Sopenharmony_ci : c->congested_read_threshold_us; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci if (threshold) { 1488c2ecf20Sopenharmony_ci unsigned int t = local_clock_us(); 1498c2ecf20Sopenharmony_ci int us = t - b->submit_time_us; 1508c2ecf20Sopenharmony_ci int congested = atomic_read(&c->congested); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci if (us > (int) threshold) { 1538c2ecf20Sopenharmony_ci int ms = us / 1024; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci c->congested_last_us = t; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci ms = min(ms, CONGESTED_MAX + congested); 1588c2ecf20Sopenharmony_ci atomic_sub(ms, &c->congested); 1598c2ecf20Sopenharmony_ci } else if (congested < 0) 1608c2ecf20Sopenharmony_ci atomic_inc(&c->congested); 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci bch_count_io_errors(ca, error, is_read, m); 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_civoid bch_bbio_endio(struct cache_set *c, struct bio *bio, 1678c2ecf20Sopenharmony_ci blk_status_t error, const char *m) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci struct closure *cl = bio->bi_private; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci bch_bbio_count_io_errors(c, bio, error, m); 1728c2ecf20Sopenharmony_ci bio_put(bio); 1738c2ecf20Sopenharmony_ci closure_put(cl); 1748c2ecf20Sopenharmony_ci} 175