18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Some low level IO code, and hacks for various block layer limitations
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
68c2ecf20Sopenharmony_ci * Copyright 2012 Google, Inc.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include "bcache.h"
108c2ecf20Sopenharmony_ci#include "bset.h"
118c2ecf20Sopenharmony_ci#include "debug.h"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci/* Bios with headers */
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_civoid bch_bbio_free(struct bio *bio, struct cache_set *c)
188c2ecf20Sopenharmony_ci{
198c2ecf20Sopenharmony_ci	struct bbio *b = container_of(bio, struct bbio, bio);
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci	mempool_free(b, &c->bio_meta);
228c2ecf20Sopenharmony_ci}
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistruct bio *bch_bbio_alloc(struct cache_set *c)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
278c2ecf20Sopenharmony_ci	struct bio *bio = &b->bio;
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	return bio;
328c2ecf20Sopenharmony_ci}
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_civoid __bch_submit_bbio(struct bio *bio, struct cache_set *c)
358c2ecf20Sopenharmony_ci{
368c2ecf20Sopenharmony_ci	struct bbio *b = container_of(bio, struct bbio, bio);
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
398c2ecf20Sopenharmony_ci	bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	b->submit_time_us = local_clock_us();
428c2ecf20Sopenharmony_ci	closure_bio_submit(c, bio, bio->bi_private);
438c2ecf20Sopenharmony_ci}
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_civoid bch_submit_bbio(struct bio *bio, struct cache_set *c,
468c2ecf20Sopenharmony_ci		     struct bkey *k, unsigned int ptr)
478c2ecf20Sopenharmony_ci{
488c2ecf20Sopenharmony_ci	struct bbio *b = container_of(bio, struct bbio, bio);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	bch_bkey_copy_single_ptr(&b->key, k, ptr);
518c2ecf20Sopenharmony_ci	__bch_submit_bbio(bio, c);
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/* IO errors */
558c2ecf20Sopenharmony_civoid bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	unsigned int errors;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	/*
628c2ecf20Sopenharmony_ci	 * Read-ahead requests on a degrading and recovering md raid
638c2ecf20Sopenharmony_ci	 * (e.g. raid6) device might be failured immediately by md
648c2ecf20Sopenharmony_ci	 * raid code, which is not a real hardware media failure. So
658c2ecf20Sopenharmony_ci	 * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
668c2ecf20Sopenharmony_ci	 */
678c2ecf20Sopenharmony_ci	if (bio->bi_opf & REQ_RAHEAD) {
688c2ecf20Sopenharmony_ci		pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
698c2ecf20Sopenharmony_ci				    dc->backing_dev_name);
708c2ecf20Sopenharmony_ci		return;
718c2ecf20Sopenharmony_ci	}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	errors = atomic_add_return(1, &dc->io_errors);
748c2ecf20Sopenharmony_ci	if (errors < dc->error_limit)
758c2ecf20Sopenharmony_ci		pr_err("%s: IO error on backing device, unrecoverable\n",
768c2ecf20Sopenharmony_ci			dc->backing_dev_name);
778c2ecf20Sopenharmony_ci	else
788c2ecf20Sopenharmony_ci		bch_cached_dev_error(dc);
798c2ecf20Sopenharmony_ci}
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_civoid bch_count_io_errors(struct cache *ca,
828c2ecf20Sopenharmony_ci			 blk_status_t error,
838c2ecf20Sopenharmony_ci			 int is_read,
848c2ecf20Sopenharmony_ci			 const char *m)
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	/*
878c2ecf20Sopenharmony_ci	 * The halflife of an error is:
888c2ecf20Sopenharmony_ci	 * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
898c2ecf20Sopenharmony_ci	 */
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	if (ca->set->error_decay) {
928c2ecf20Sopenharmony_ci		unsigned int count = atomic_inc_return(&ca->io_count);
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci		while (count > ca->set->error_decay) {
958c2ecf20Sopenharmony_ci			unsigned int errors;
968c2ecf20Sopenharmony_ci			unsigned int old = count;
978c2ecf20Sopenharmony_ci			unsigned int new = count - ca->set->error_decay;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci			/*
1008c2ecf20Sopenharmony_ci			 * First we subtract refresh from count; each time we
1018c2ecf20Sopenharmony_ci			 * successfully do so, we rescale the errors once:
1028c2ecf20Sopenharmony_ci			 */
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci			count = atomic_cmpxchg(&ca->io_count, old, new);
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci			if (count == old) {
1078c2ecf20Sopenharmony_ci				count = new;
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci				errors = atomic_read(&ca->io_errors);
1108c2ecf20Sopenharmony_ci				do {
1118c2ecf20Sopenharmony_ci					old = errors;
1128c2ecf20Sopenharmony_ci					new = ((uint64_t) errors * 127) / 128;
1138c2ecf20Sopenharmony_ci					errors = atomic_cmpxchg(&ca->io_errors,
1148c2ecf20Sopenharmony_ci								old, new);
1158c2ecf20Sopenharmony_ci				} while (old != errors);
1168c2ecf20Sopenharmony_ci			}
1178c2ecf20Sopenharmony_ci		}
1188c2ecf20Sopenharmony_ci	}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	if (error) {
1218c2ecf20Sopenharmony_ci		unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT,
1228c2ecf20Sopenharmony_ci						    &ca->io_errors);
1238c2ecf20Sopenharmony_ci		errors >>= IO_ERROR_SHIFT;
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci		if (errors < ca->set->error_limit)
1268c2ecf20Sopenharmony_ci			pr_err("%s: IO error on %s%s\n",
1278c2ecf20Sopenharmony_ci			       ca->cache_dev_name, m,
1288c2ecf20Sopenharmony_ci			       is_read ? ", recovering." : ".");
1298c2ecf20Sopenharmony_ci		else
1308c2ecf20Sopenharmony_ci			bch_cache_set_error(ca->set,
1318c2ecf20Sopenharmony_ci					    "%s: too many IO errors %s\n",
1328c2ecf20Sopenharmony_ci					    ca->cache_dev_name, m);
1338c2ecf20Sopenharmony_ci	}
1348c2ecf20Sopenharmony_ci}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_civoid bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
1378c2ecf20Sopenharmony_ci			      blk_status_t error, const char *m)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	struct bbio *b = container_of(bio, struct bbio, bio);
1408c2ecf20Sopenharmony_ci	struct cache *ca = PTR_CACHE(c, &b->key, 0);
1418c2ecf20Sopenharmony_ci	int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	unsigned int threshold = op_is_write(bio_op(bio))
1448c2ecf20Sopenharmony_ci		? c->congested_write_threshold_us
1458c2ecf20Sopenharmony_ci		: c->congested_read_threshold_us;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	if (threshold) {
1488c2ecf20Sopenharmony_ci		unsigned int t = local_clock_us();
1498c2ecf20Sopenharmony_ci		int us = t - b->submit_time_us;
1508c2ecf20Sopenharmony_ci		int congested = atomic_read(&c->congested);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci		if (us > (int) threshold) {
1538c2ecf20Sopenharmony_ci			int ms = us / 1024;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci			c->congested_last_us = t;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci			ms = min(ms, CONGESTED_MAX + congested);
1588c2ecf20Sopenharmony_ci			atomic_sub(ms, &c->congested);
1598c2ecf20Sopenharmony_ci		} else if (congested < 0)
1608c2ecf20Sopenharmony_ci			atomic_inc(&c->congested);
1618c2ecf20Sopenharmony_ci	}
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	bch_count_io_errors(ca, error, is_read, m);
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_civoid bch_bbio_endio(struct cache_set *c, struct bio *bio,
1678c2ecf20Sopenharmony_ci		    blk_status_t error, const char *m)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	struct closure *cl = bio->bi_private;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	bch_bbio_count_io_errors(c, bio, error, m);
1728c2ecf20Sopenharmony_ci	bio_put(bio);
1738c2ecf20Sopenharmony_ci	closure_put(cl);
1748c2ecf20Sopenharmony_ci}
175