18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * bcache setup/teardown code, and some metadata io - read a superblock and 48c2ecf20Sopenharmony_ci * figure out what to do with it. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 78c2ecf20Sopenharmony_ci * Copyright 2012 Google, Inc. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include "bcache.h" 118c2ecf20Sopenharmony_ci#include "btree.h" 128c2ecf20Sopenharmony_ci#include "debug.h" 138c2ecf20Sopenharmony_ci#include "extents.h" 148c2ecf20Sopenharmony_ci#include "request.h" 158c2ecf20Sopenharmony_ci#include "writeback.h" 168c2ecf20Sopenharmony_ci#include "features.h" 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 198c2ecf20Sopenharmony_ci#include <linux/debugfs.h> 208c2ecf20Sopenharmony_ci#include <linux/genhd.h> 218c2ecf20Sopenharmony_ci#include <linux/idr.h> 228c2ecf20Sopenharmony_ci#include <linux/kthread.h> 238c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 248c2ecf20Sopenharmony_ci#include <linux/module.h> 258c2ecf20Sopenharmony_ci#include <linux/random.h> 268c2ecf20Sopenharmony_ci#include <linux/reboot.h> 278c2ecf20Sopenharmony_ci#include <linux/sysfs.h> 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ciunsigned int bch_cutoff_writeback; 308c2ecf20Sopenharmony_ciunsigned int bch_cutoff_writeback_sync; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic const char bcache_magic[] = { 338c2ecf20Sopenharmony_ci 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 348c2ecf20Sopenharmony_ci 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 358c2ecf20Sopenharmony_ci}; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_cistatic const char invalid_uuid[] = { 388c2ecf20Sopenharmony_ci 0xa0, 0x3e, 0xf8, 0xed, 0x3e, 0xe1, 0xb8, 0x78, 398c2ecf20Sopenharmony_ci 0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99 408c2ecf20Sopenharmony_ci}; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistatic struct kobject *bcache_kobj; 438c2ecf20Sopenharmony_cistruct mutex bch_register_lock; 448c2ecf20Sopenharmony_cibool bcache_is_reboot; 458c2ecf20Sopenharmony_ciLIST_HEAD(bch_cache_sets); 468c2ecf20Sopenharmony_cistatic LIST_HEAD(uncached_devices); 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cistatic int bcache_major; 498c2ecf20Sopenharmony_cistatic DEFINE_IDA(bcache_device_idx); 508c2ecf20Sopenharmony_cistatic wait_queue_head_t unregister_wait; 518c2ecf20Sopenharmony_cistruct workqueue_struct *bcache_wq; 528c2ecf20Sopenharmony_cistruct workqueue_struct *bch_flush_wq; 538c2ecf20Sopenharmony_cistruct workqueue_struct *bch_journal_wq; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) 578c2ecf20Sopenharmony_ci/* limitation of partitions number on single bcache device */ 588c2ecf20Sopenharmony_ci#define BCACHE_MINORS 128 598c2ecf20Sopenharmony_ci/* limitation of bcache devices number on single system */ 608c2ecf20Sopenharmony_ci#define BCACHE_DEVICE_IDX_MAX ((1U << MINORBITS)/BCACHE_MINORS) 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci/* Superblock */ 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s) 658c2ecf20Sopenharmony_ci{ 668c2ecf20Sopenharmony_ci unsigned int bucket_size = le16_to_cpu(s->bucket_size); 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { 698c2ecf20Sopenharmony_ci if (bch_has_feature_large_bucket(sb)) { 708c2ecf20Sopenharmony_ci unsigned int max, order; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci max = sizeof(unsigned int) * BITS_PER_BYTE - 1; 738c2ecf20Sopenharmony_ci order = le16_to_cpu(s->bucket_size); 748c2ecf20Sopenharmony_ci /* 758c2ecf20Sopenharmony_ci * bcache tool will make sure the overflow won't 768c2ecf20Sopenharmony_ci * happen, an error message here is enough. 778c2ecf20Sopenharmony_ci */ 788c2ecf20Sopenharmony_ci if (order > max) 798c2ecf20Sopenharmony_ci pr_err("Bucket size (1 << %u) overflows\n", 808c2ecf20Sopenharmony_ci order); 818c2ecf20Sopenharmony_ci bucket_size = 1 << order; 828c2ecf20Sopenharmony_ci } else if (bch_has_feature_obso_large_bucket(sb)) { 838c2ecf20Sopenharmony_ci bucket_size += 848c2ecf20Sopenharmony_ci le16_to_cpu(s->obso_bucket_size_hi) << 16; 858c2ecf20Sopenharmony_ci } 868c2ecf20Sopenharmony_ci } 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci return bucket_size; 898c2ecf20Sopenharmony_ci} 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cistatic const char *read_super_common(struct cache_sb *sb, struct block_device *bdev, 928c2ecf20Sopenharmony_ci struct cache_sb_disk *s) 938c2ecf20Sopenharmony_ci{ 948c2ecf20Sopenharmony_ci const char *err; 958c2ecf20Sopenharmony_ci unsigned int i; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci sb->first_bucket= le16_to_cpu(s->first_bucket); 988c2ecf20Sopenharmony_ci sb->nbuckets = le64_to_cpu(s->nbuckets); 998c2ecf20Sopenharmony_ci sb->bucket_size = get_bucket_size(sb, s); 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci sb->nr_in_set = le16_to_cpu(s->nr_in_set); 1028c2ecf20Sopenharmony_ci sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci err = "Too many journal buckets"; 1058c2ecf20Sopenharmony_ci if (sb->keys > SB_JOURNAL_BUCKETS) 1068c2ecf20Sopenharmony_ci goto err; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci err = "Too many buckets"; 1098c2ecf20Sopenharmony_ci if (sb->nbuckets > LONG_MAX) 1108c2ecf20Sopenharmony_ci goto err; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci err = "Not enough buckets"; 1138c2ecf20Sopenharmony_ci if (sb->nbuckets < 1 << 7) 1148c2ecf20Sopenharmony_ci goto err; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci err = "Bad block size (not power of 2)"; 1178c2ecf20Sopenharmony_ci if (!is_power_of_2(sb->block_size)) 1188c2ecf20Sopenharmony_ci goto err; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci err = "Bad block size (larger than page size)"; 1218c2ecf20Sopenharmony_ci if (sb->block_size > PAGE_SECTORS) 1228c2ecf20Sopenharmony_ci goto err; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci err = "Bad bucket size (not power of 2)"; 1258c2ecf20Sopenharmony_ci if (!is_power_of_2(sb->bucket_size)) 1268c2ecf20Sopenharmony_ci goto err; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci err = "Bad bucket size (smaller than page size)"; 1298c2ecf20Sopenharmony_ci if (sb->bucket_size < PAGE_SECTORS) 1308c2ecf20Sopenharmony_ci goto err; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci err = "Invalid superblock: device too small"; 1338c2ecf20Sopenharmony_ci if (get_capacity(bdev->bd_disk) < 1348c2ecf20Sopenharmony_ci sb->bucket_size * sb->nbuckets) 1358c2ecf20Sopenharmony_ci goto err; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci err = "Bad UUID"; 1388c2ecf20Sopenharmony_ci if (bch_is_zero(sb->set_uuid, 16)) 1398c2ecf20Sopenharmony_ci goto err; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci err = "Bad cache device number in set"; 1428c2ecf20Sopenharmony_ci if (!sb->nr_in_set || 1438c2ecf20Sopenharmony_ci sb->nr_in_set <= sb->nr_this_dev || 1448c2ecf20Sopenharmony_ci sb->nr_in_set > MAX_CACHES_PER_SET) 1458c2ecf20Sopenharmony_ci goto err; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci err = "Journal buckets not sequential"; 1488c2ecf20Sopenharmony_ci for (i = 0; i < sb->keys; i++) 1498c2ecf20Sopenharmony_ci if (sb->d[i] != sb->first_bucket + i) 1508c2ecf20Sopenharmony_ci goto err; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci err = "Too many journal buckets"; 1538c2ecf20Sopenharmony_ci if (sb->first_bucket + sb->keys > sb->nbuckets) 1548c2ecf20Sopenharmony_ci goto err; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci err = "Invalid superblock: first bucket comes before end of super"; 1578c2ecf20Sopenharmony_ci if (sb->first_bucket * sb->bucket_size < 16) 1588c2ecf20Sopenharmony_ci goto err; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci err = NULL; 1618c2ecf20Sopenharmony_cierr: 1628c2ecf20Sopenharmony_ci return err; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_cistatic const char *read_super(struct cache_sb *sb, struct block_device *bdev, 1678c2ecf20Sopenharmony_ci struct cache_sb_disk **res) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci const char *err; 1708c2ecf20Sopenharmony_ci struct cache_sb_disk *s; 1718c2ecf20Sopenharmony_ci struct page *page; 1728c2ecf20Sopenharmony_ci unsigned int i; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci page = read_cache_page_gfp(bdev->bd_inode->i_mapping, 1758c2ecf20Sopenharmony_ci SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); 1768c2ecf20Sopenharmony_ci if (IS_ERR(page)) 1778c2ecf20Sopenharmony_ci return "IO error"; 1788c2ecf20Sopenharmony_ci s = page_address(page) + offset_in_page(SB_OFFSET); 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci sb->offset = le64_to_cpu(s->offset); 1818c2ecf20Sopenharmony_ci sb->version = le64_to_cpu(s->version); 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci memcpy(sb->magic, s->magic, 16); 1848c2ecf20Sopenharmony_ci memcpy(sb->uuid, s->uuid, 16); 1858c2ecf20Sopenharmony_ci memcpy(sb->set_uuid, s->set_uuid, 16); 1868c2ecf20Sopenharmony_ci memcpy(sb->label, s->label, SB_LABEL_SIZE); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci sb->flags = le64_to_cpu(s->flags); 1898c2ecf20Sopenharmony_ci sb->seq = le64_to_cpu(s->seq); 1908c2ecf20Sopenharmony_ci sb->last_mount = le32_to_cpu(s->last_mount); 1918c2ecf20Sopenharmony_ci sb->keys = le16_to_cpu(s->keys); 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci for (i = 0; i < SB_JOURNAL_BUCKETS; i++) 1948c2ecf20Sopenharmony_ci sb->d[i] = le64_to_cpu(s->d[i]); 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n", 1978c2ecf20Sopenharmony_ci sb->version, sb->flags, sb->seq, sb->keys); 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci err = "Not a bcache superblock (bad offset)"; 2008c2ecf20Sopenharmony_ci if (sb->offset != SB_SECTOR) 2018c2ecf20Sopenharmony_ci goto err; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci err = "Not a bcache superblock (bad magic)"; 2048c2ecf20Sopenharmony_ci if (memcmp(sb->magic, bcache_magic, 16)) 2058c2ecf20Sopenharmony_ci goto err; 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci err = "Bad checksum"; 2088c2ecf20Sopenharmony_ci if (s->csum != csum_set(s)) 2098c2ecf20Sopenharmony_ci goto err; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci err = "Bad UUID"; 2128c2ecf20Sopenharmony_ci if (bch_is_zero(sb->uuid, 16)) 2138c2ecf20Sopenharmony_ci goto err; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci sb->block_size = le16_to_cpu(s->block_size); 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci err = "Superblock block size smaller than device block size"; 2188c2ecf20Sopenharmony_ci if (sb->block_size << 9 < bdev_logical_block_size(bdev)) 2198c2ecf20Sopenharmony_ci goto err; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci switch (sb->version) { 2228c2ecf20Sopenharmony_ci case BCACHE_SB_VERSION_BDEV: 2238c2ecf20Sopenharmony_ci sb->data_offset = BDEV_DATA_START_DEFAULT; 2248c2ecf20Sopenharmony_ci break; 2258c2ecf20Sopenharmony_ci case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: 2268c2ecf20Sopenharmony_ci case BCACHE_SB_VERSION_BDEV_WITH_FEATURES: 2278c2ecf20Sopenharmony_ci sb->data_offset = le64_to_cpu(s->data_offset); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci err = "Bad data offset"; 2308c2ecf20Sopenharmony_ci if (sb->data_offset < BDEV_DATA_START_DEFAULT) 2318c2ecf20Sopenharmony_ci goto err; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci break; 2348c2ecf20Sopenharmony_ci case BCACHE_SB_VERSION_CDEV: 2358c2ecf20Sopenharmony_ci case BCACHE_SB_VERSION_CDEV_WITH_UUID: 2368c2ecf20Sopenharmony_ci err = read_super_common(sb, bdev, s); 2378c2ecf20Sopenharmony_ci if (err) 2388c2ecf20Sopenharmony_ci goto err; 2398c2ecf20Sopenharmony_ci break; 2408c2ecf20Sopenharmony_ci case BCACHE_SB_VERSION_CDEV_WITH_FEATURES: 2418c2ecf20Sopenharmony_ci /* 2428c2ecf20Sopenharmony_ci * Feature bits are needed in read_super_common(), 2438c2ecf20Sopenharmony_ci * convert them firstly. 2448c2ecf20Sopenharmony_ci */ 2458c2ecf20Sopenharmony_ci sb->feature_compat = le64_to_cpu(s->feature_compat); 2468c2ecf20Sopenharmony_ci sb->feature_incompat = le64_to_cpu(s->feature_incompat); 2478c2ecf20Sopenharmony_ci sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci /* Check incompatible features */ 2508c2ecf20Sopenharmony_ci err = "Unsupported compatible feature found"; 2518c2ecf20Sopenharmony_ci if (bch_has_unknown_compat_features(sb)) 2528c2ecf20Sopenharmony_ci goto err; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci err = "Unsupported read-only compatible feature found"; 2558c2ecf20Sopenharmony_ci if (bch_has_unknown_ro_compat_features(sb)) 2568c2ecf20Sopenharmony_ci goto err; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci err = "Unsupported incompatible feature found"; 2598c2ecf20Sopenharmony_ci if (bch_has_unknown_incompat_features(sb)) 2608c2ecf20Sopenharmony_ci goto err; 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci err = read_super_common(sb, bdev, s); 2638c2ecf20Sopenharmony_ci if (err) 2648c2ecf20Sopenharmony_ci goto err; 2658c2ecf20Sopenharmony_ci break; 2668c2ecf20Sopenharmony_ci default: 2678c2ecf20Sopenharmony_ci err = "Unsupported superblock version"; 2688c2ecf20Sopenharmony_ci goto err; 2698c2ecf20Sopenharmony_ci } 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci sb->last_mount = (u32)ktime_get_real_seconds(); 2728c2ecf20Sopenharmony_ci *res = s; 2738c2ecf20Sopenharmony_ci return NULL; 2748c2ecf20Sopenharmony_cierr: 2758c2ecf20Sopenharmony_ci put_page(page); 2768c2ecf20Sopenharmony_ci return err; 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cistatic void write_bdev_super_endio(struct bio *bio) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci struct cached_dev *dc = bio->bi_private; 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci if (bio->bi_status) 2848c2ecf20Sopenharmony_ci bch_count_backing_io_errors(dc, bio); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci closure_put(&dc->sb_write); 2878c2ecf20Sopenharmony_ci} 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_cistatic void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, 2908c2ecf20Sopenharmony_ci struct bio *bio) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci unsigned int i; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META; 2958c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = SB_SECTOR; 2968c2ecf20Sopenharmony_ci __bio_add_page(bio, virt_to_page(out), SB_SIZE, 2978c2ecf20Sopenharmony_ci offset_in_page(out)); 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci out->offset = cpu_to_le64(sb->offset); 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci memcpy(out->uuid, sb->uuid, 16); 3028c2ecf20Sopenharmony_ci memcpy(out->set_uuid, sb->set_uuid, 16); 3038c2ecf20Sopenharmony_ci memcpy(out->label, sb->label, SB_LABEL_SIZE); 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci out->flags = cpu_to_le64(sb->flags); 3068c2ecf20Sopenharmony_ci out->seq = cpu_to_le64(sb->seq); 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci out->last_mount = cpu_to_le32(sb->last_mount); 3098c2ecf20Sopenharmony_ci out->first_bucket = cpu_to_le16(sb->first_bucket); 3108c2ecf20Sopenharmony_ci out->keys = cpu_to_le16(sb->keys); 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci for (i = 0; i < sb->keys; i++) 3138c2ecf20Sopenharmony_ci out->d[i] = cpu_to_le64(sb->d[i]); 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { 3168c2ecf20Sopenharmony_ci out->feature_compat = cpu_to_le64(sb->feature_compat); 3178c2ecf20Sopenharmony_ci out->feature_incompat = cpu_to_le64(sb->feature_incompat); 3188c2ecf20Sopenharmony_ci out->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat); 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci out->version = cpu_to_le64(sb->version); 3228c2ecf20Sopenharmony_ci out->csum = csum_set(out); 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci pr_debug("ver %llu, flags %llu, seq %llu\n", 3258c2ecf20Sopenharmony_ci sb->version, sb->flags, sb->seq); 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci submit_bio(bio); 3288c2ecf20Sopenharmony_ci} 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_cistatic void bch_write_bdev_super_unlock(struct closure *cl) 3318c2ecf20Sopenharmony_ci{ 3328c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write); 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci up(&dc->sb_write_mutex); 3358c2ecf20Sopenharmony_ci} 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_civoid bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) 3388c2ecf20Sopenharmony_ci{ 3398c2ecf20Sopenharmony_ci struct closure *cl = &dc->sb_write; 3408c2ecf20Sopenharmony_ci struct bio *bio = &dc->sb_bio; 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci down(&dc->sb_write_mutex); 3438c2ecf20Sopenharmony_ci closure_init(cl, parent); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci bio_init(bio, dc->sb_bv, 1); 3468c2ecf20Sopenharmony_ci bio_set_dev(bio, dc->bdev); 3478c2ecf20Sopenharmony_ci bio->bi_end_io = write_bdev_super_endio; 3488c2ecf20Sopenharmony_ci bio->bi_private = dc; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci closure_get(cl); 3518c2ecf20Sopenharmony_ci /* I/O request sent to backing device */ 3528c2ecf20Sopenharmony_ci __write_super(&dc->sb, dc->sb_disk, bio); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci closure_return_with_destructor(cl, bch_write_bdev_super_unlock); 3558c2ecf20Sopenharmony_ci} 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_cistatic void write_super_endio(struct bio *bio) 3588c2ecf20Sopenharmony_ci{ 3598c2ecf20Sopenharmony_ci struct cache *ca = bio->bi_private; 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci /* is_read = 0 */ 3628c2ecf20Sopenharmony_ci bch_count_io_errors(ca, bio->bi_status, 0, 3638c2ecf20Sopenharmony_ci "writing superblock"); 3648c2ecf20Sopenharmony_ci closure_put(&ca->set->sb_write); 3658c2ecf20Sopenharmony_ci} 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_cistatic void bcache_write_super_unlock(struct closure *cl) 3688c2ecf20Sopenharmony_ci{ 3698c2ecf20Sopenharmony_ci struct cache_set *c = container_of(cl, struct cache_set, sb_write); 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci up(&c->sb_write_mutex); 3728c2ecf20Sopenharmony_ci} 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_civoid bcache_write_super(struct cache_set *c) 3758c2ecf20Sopenharmony_ci{ 3768c2ecf20Sopenharmony_ci struct closure *cl = &c->sb_write; 3778c2ecf20Sopenharmony_ci struct cache *ca = c->cache; 3788c2ecf20Sopenharmony_ci struct bio *bio = &ca->sb_bio; 3798c2ecf20Sopenharmony_ci unsigned int version = BCACHE_SB_VERSION_CDEV_WITH_UUID; 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci down(&c->sb_write_mutex); 3828c2ecf20Sopenharmony_ci closure_init(cl, &c->cl); 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci ca->sb.seq++; 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci if (ca->sb.version < version) 3878c2ecf20Sopenharmony_ci ca->sb.version = version; 3888c2ecf20Sopenharmony_ci 3898c2ecf20Sopenharmony_ci bio_init(bio, ca->sb_bv, 1); 3908c2ecf20Sopenharmony_ci bio_set_dev(bio, ca->bdev); 3918c2ecf20Sopenharmony_ci bio->bi_end_io = write_super_endio; 3928c2ecf20Sopenharmony_ci bio->bi_private = ca; 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci closure_get(cl); 3958c2ecf20Sopenharmony_ci __write_super(&ca->sb, ca->sb_disk, bio); 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci closure_return_with_destructor(cl, bcache_write_super_unlock); 3988c2ecf20Sopenharmony_ci} 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci/* UUID io */ 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_cistatic void uuid_endio(struct bio *bio) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci struct closure *cl = bio->bi_private; 4058c2ecf20Sopenharmony_ci struct cache_set *c = container_of(cl, struct cache_set, uuid_write); 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci cache_set_err_on(bio->bi_status, c, "accessing uuids"); 4088c2ecf20Sopenharmony_ci bch_bbio_free(bio, c); 4098c2ecf20Sopenharmony_ci closure_put(cl); 4108c2ecf20Sopenharmony_ci} 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_cistatic void uuid_io_unlock(struct closure *cl) 4138c2ecf20Sopenharmony_ci{ 4148c2ecf20Sopenharmony_ci struct cache_set *c = container_of(cl, struct cache_set, uuid_write); 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci up(&c->uuid_write_mutex); 4178c2ecf20Sopenharmony_ci} 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_cistatic void uuid_io(struct cache_set *c, int op, unsigned long op_flags, 4208c2ecf20Sopenharmony_ci struct bkey *k, struct closure *parent) 4218c2ecf20Sopenharmony_ci{ 4228c2ecf20Sopenharmony_ci struct closure *cl = &c->uuid_write; 4238c2ecf20Sopenharmony_ci struct uuid_entry *u; 4248c2ecf20Sopenharmony_ci unsigned int i; 4258c2ecf20Sopenharmony_ci char buf[80]; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci BUG_ON(!parent); 4288c2ecf20Sopenharmony_ci down(&c->uuid_write_mutex); 4298c2ecf20Sopenharmony_ci closure_init(cl, parent); 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci for (i = 0; i < KEY_PTRS(k); i++) { 4328c2ecf20Sopenharmony_ci struct bio *bio = bch_bbio_alloc(c); 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci bio->bi_opf = REQ_SYNC | REQ_META | op_flags; 4358c2ecf20Sopenharmony_ci bio->bi_iter.bi_size = KEY_SIZE(k) << 9; 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci bio->bi_end_io = uuid_endio; 4388c2ecf20Sopenharmony_ci bio->bi_private = cl; 4398c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); 4408c2ecf20Sopenharmony_ci bch_bio_map(bio, c->uuids); 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci bch_submit_bbio(bio, c, k, i); 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci if (op != REQ_OP_WRITE) 4458c2ecf20Sopenharmony_ci break; 4468c2ecf20Sopenharmony_ci } 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci bch_extent_to_text(buf, sizeof(buf), k); 4498c2ecf20Sopenharmony_ci pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf); 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) 4528c2ecf20Sopenharmony_ci if (!bch_is_zero(u->uuid, 16)) 4538c2ecf20Sopenharmony_ci pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n", 4548c2ecf20Sopenharmony_ci u - c->uuids, u->uuid, u->label, 4558c2ecf20Sopenharmony_ci u->first_reg, u->last_reg, u->invalidated); 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci closure_return_with_destructor(cl, uuid_io_unlock); 4588c2ecf20Sopenharmony_ci} 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_cistatic char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) 4618c2ecf20Sopenharmony_ci{ 4628c2ecf20Sopenharmony_ci struct bkey *k = &j->uuid_bucket; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci if (__bch_btree_ptr_invalid(c, k)) 4658c2ecf20Sopenharmony_ci return "bad uuid pointer"; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci bkey_copy(&c->uuid_bucket, k); 4688c2ecf20Sopenharmony_ci uuid_io(c, REQ_OP_READ, 0, k, cl); 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci if (j->version < BCACHE_JSET_VERSION_UUIDv1) { 4718c2ecf20Sopenharmony_ci struct uuid_entry_v0 *u0 = (void *) c->uuids; 4728c2ecf20Sopenharmony_ci struct uuid_entry *u1 = (void *) c->uuids; 4738c2ecf20Sopenharmony_ci int i; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci closure_sync(cl); 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci /* 4788c2ecf20Sopenharmony_ci * Since the new uuid entry is bigger than the old, we have to 4798c2ecf20Sopenharmony_ci * convert starting at the highest memory address and work down 4808c2ecf20Sopenharmony_ci * in order to do it in place 4818c2ecf20Sopenharmony_ci */ 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci for (i = c->nr_uuids - 1; 4848c2ecf20Sopenharmony_ci i >= 0; 4858c2ecf20Sopenharmony_ci --i) { 4868c2ecf20Sopenharmony_ci memcpy(u1[i].uuid, u0[i].uuid, 16); 4878c2ecf20Sopenharmony_ci memcpy(u1[i].label, u0[i].label, 32); 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci u1[i].first_reg = u0[i].first_reg; 4908c2ecf20Sopenharmony_ci u1[i].last_reg = u0[i].last_reg; 4918c2ecf20Sopenharmony_ci u1[i].invalidated = u0[i].invalidated; 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_ci u1[i].flags = 0; 4948c2ecf20Sopenharmony_ci u1[i].sectors = 0; 4958c2ecf20Sopenharmony_ci } 4968c2ecf20Sopenharmony_ci } 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci return NULL; 4998c2ecf20Sopenharmony_ci} 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_cistatic int __uuid_write(struct cache_set *c) 5028c2ecf20Sopenharmony_ci{ 5038c2ecf20Sopenharmony_ci BKEY_PADDED(key) k; 5048c2ecf20Sopenharmony_ci struct closure cl; 5058c2ecf20Sopenharmony_ci struct cache *ca = c->cache; 5068c2ecf20Sopenharmony_ci unsigned int size; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci closure_init_stack(&cl); 5098c2ecf20Sopenharmony_ci lockdep_assert_held(&bch_register_lock); 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true)) 5128c2ecf20Sopenharmony_ci return 1; 5138c2ecf20Sopenharmony_ci 5148c2ecf20Sopenharmony_ci size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS; 5158c2ecf20Sopenharmony_ci SET_KEY_SIZE(&k.key, size); 5168c2ecf20Sopenharmony_ci uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); 5178c2ecf20Sopenharmony_ci closure_sync(&cl); 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci /* Only one bucket used for uuid write */ 5208c2ecf20Sopenharmony_ci atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci bkey_copy(&c->uuid_bucket, &k.key); 5238c2ecf20Sopenharmony_ci bkey_put(c, &k.key); 5248c2ecf20Sopenharmony_ci return 0; 5258c2ecf20Sopenharmony_ci} 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ciint bch_uuid_write(struct cache_set *c) 5288c2ecf20Sopenharmony_ci{ 5298c2ecf20Sopenharmony_ci int ret = __uuid_write(c); 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci if (!ret) 5328c2ecf20Sopenharmony_ci bch_journal_meta(c, NULL); 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci return ret; 5358c2ecf20Sopenharmony_ci} 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_cistatic struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) 5388c2ecf20Sopenharmony_ci{ 5398c2ecf20Sopenharmony_ci struct uuid_entry *u; 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci for (u = c->uuids; 5428c2ecf20Sopenharmony_ci u < c->uuids + c->nr_uuids; u++) 5438c2ecf20Sopenharmony_ci if (!memcmp(u->uuid, uuid, 16)) 5448c2ecf20Sopenharmony_ci return u; 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci return NULL; 5478c2ecf20Sopenharmony_ci} 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_cistatic struct uuid_entry *uuid_find_empty(struct cache_set *c) 5508c2ecf20Sopenharmony_ci{ 5518c2ecf20Sopenharmony_ci static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci return uuid_find(c, zero_uuid); 5548c2ecf20Sopenharmony_ci} 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci/* 5578c2ecf20Sopenharmony_ci * Bucket priorities/gens: 5588c2ecf20Sopenharmony_ci * 5598c2ecf20Sopenharmony_ci * For each bucket, we store on disk its 5608c2ecf20Sopenharmony_ci * 8 bit gen 5618c2ecf20Sopenharmony_ci * 16 bit priority 5628c2ecf20Sopenharmony_ci * 5638c2ecf20Sopenharmony_ci * See alloc.c for an explanation of the gen. The priority is used to implement 5648c2ecf20Sopenharmony_ci * lru (and in the future other) cache replacement policies; for most purposes 5658c2ecf20Sopenharmony_ci * it's just an opaque integer. 5668c2ecf20Sopenharmony_ci * 5678c2ecf20Sopenharmony_ci * The gens and the priorities don't have a whole lot to do with each other, and 5688c2ecf20Sopenharmony_ci * it's actually the gens that must be written out at specific times - it's no 5698c2ecf20Sopenharmony_ci * big deal if the priorities don't get written, if we lose them we just reuse 5708c2ecf20Sopenharmony_ci * buckets in suboptimal order. 5718c2ecf20Sopenharmony_ci * 5728c2ecf20Sopenharmony_ci * On disk they're stored in a packed array, and in as many buckets are required 5738c2ecf20Sopenharmony_ci * to fit them all. The buckets we use to store them form a list; the journal 5748c2ecf20Sopenharmony_ci * header points to the first bucket, the first bucket points to the second 5758c2ecf20Sopenharmony_ci * bucket, et cetera. 5768c2ecf20Sopenharmony_ci * 5778c2ecf20Sopenharmony_ci * This code is used by the allocation code; periodically (whenever it runs out 5788c2ecf20Sopenharmony_ci * of buckets to allocate from) the allocation code will invalidate some 5798c2ecf20Sopenharmony_ci * buckets, but it can't use those buckets until their new gens are safely on 5808c2ecf20Sopenharmony_ci * disk. 5818c2ecf20Sopenharmony_ci */ 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_cistatic void prio_endio(struct bio *bio) 5848c2ecf20Sopenharmony_ci{ 5858c2ecf20Sopenharmony_ci struct cache *ca = bio->bi_private; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci cache_set_err_on(bio->bi_status, ca->set, "accessing priorities"); 5888c2ecf20Sopenharmony_ci bch_bbio_free(bio, ca->set); 5898c2ecf20Sopenharmony_ci closure_put(&ca->prio); 5908c2ecf20Sopenharmony_ci} 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_cistatic void prio_io(struct cache *ca, uint64_t bucket, int op, 5938c2ecf20Sopenharmony_ci unsigned long op_flags) 5948c2ecf20Sopenharmony_ci{ 5958c2ecf20Sopenharmony_ci struct closure *cl = &ca->prio; 5968c2ecf20Sopenharmony_ci struct bio *bio = bch_bbio_alloc(ca->set); 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci closure_init_stack(cl); 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size; 6018c2ecf20Sopenharmony_ci bio_set_dev(bio, ca->bdev); 6028c2ecf20Sopenharmony_ci bio->bi_iter.bi_size = meta_bucket_bytes(&ca->sb); 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci bio->bi_end_io = prio_endio; 6058c2ecf20Sopenharmony_ci bio->bi_private = ca; 6068c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); 6078c2ecf20Sopenharmony_ci bch_bio_map(bio, ca->disk_buckets); 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci closure_bio_submit(ca->set, bio, &ca->prio); 6108c2ecf20Sopenharmony_ci closure_sync(cl); 6118c2ecf20Sopenharmony_ci} 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ciint bch_prio_write(struct cache *ca, bool wait) 6148c2ecf20Sopenharmony_ci{ 6158c2ecf20Sopenharmony_ci int i; 6168c2ecf20Sopenharmony_ci struct bucket *b; 6178c2ecf20Sopenharmony_ci struct closure cl; 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n", 6208c2ecf20Sopenharmony_ci fifo_used(&ca->free[RESERVE_PRIO]), 6218c2ecf20Sopenharmony_ci fifo_used(&ca->free[RESERVE_NONE]), 6228c2ecf20Sopenharmony_ci fifo_used(&ca->free_inc)); 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci /* 6258c2ecf20Sopenharmony_ci * Pre-check if there are enough free buckets. In the non-blocking 6268c2ecf20Sopenharmony_ci * scenario it's better to fail early rather than starting to allocate 6278c2ecf20Sopenharmony_ci * buckets and do a cleanup later in case of failure. 6288c2ecf20Sopenharmony_ci */ 6298c2ecf20Sopenharmony_ci if (!wait) { 6308c2ecf20Sopenharmony_ci size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) + 6318c2ecf20Sopenharmony_ci fifo_used(&ca->free[RESERVE_NONE]); 6328c2ecf20Sopenharmony_ci if (prio_buckets(ca) > avail) 6338c2ecf20Sopenharmony_ci return -ENOMEM; 6348c2ecf20Sopenharmony_ci } 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci closure_init_stack(&cl); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci lockdep_assert_held(&ca->set->bucket_lock); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci ca->disk_buckets->seq++; 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), 6438c2ecf20Sopenharmony_ci &ca->meta_sectors_written); 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci for (i = prio_buckets(ca) - 1; i >= 0; --i) { 6468c2ecf20Sopenharmony_ci long bucket; 6478c2ecf20Sopenharmony_ci struct prio_set *p = ca->disk_buckets; 6488c2ecf20Sopenharmony_ci struct bucket_disk *d = p->data; 6498c2ecf20Sopenharmony_ci struct bucket_disk *end = d + prios_per_bucket(ca); 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci for (b = ca->buckets + i * prios_per_bucket(ca); 6528c2ecf20Sopenharmony_ci b < ca->buckets + ca->sb.nbuckets && d < end; 6538c2ecf20Sopenharmony_ci b++, d++) { 6548c2ecf20Sopenharmony_ci d->prio = cpu_to_le16(b->prio); 6558c2ecf20Sopenharmony_ci d->gen = b->gen; 6568c2ecf20Sopenharmony_ci } 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci p->next_bucket = ca->prio_buckets[i + 1]; 6598c2ecf20Sopenharmony_ci p->magic = pset_magic(&ca->sb); 6608c2ecf20Sopenharmony_ci p->csum = bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8); 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait); 6638c2ecf20Sopenharmony_ci BUG_ON(bucket == -1); 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci mutex_unlock(&ca->set->bucket_lock); 6668c2ecf20Sopenharmony_ci prio_io(ca, bucket, REQ_OP_WRITE, 0); 6678c2ecf20Sopenharmony_ci mutex_lock(&ca->set->bucket_lock); 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci ca->prio_buckets[i] = bucket; 6708c2ecf20Sopenharmony_ci atomic_dec_bug(&ca->buckets[bucket].pin); 6718c2ecf20Sopenharmony_ci } 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci mutex_unlock(&ca->set->bucket_lock); 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci bch_journal_meta(ca->set, &cl); 6768c2ecf20Sopenharmony_ci closure_sync(&cl); 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci mutex_lock(&ca->set->bucket_lock); 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci /* 6818c2ecf20Sopenharmony_ci * Don't want the old priorities to get garbage collected until after we 6828c2ecf20Sopenharmony_ci * finish writing the new ones, and they're journalled 6838c2ecf20Sopenharmony_ci */ 6848c2ecf20Sopenharmony_ci for (i = 0; i < prio_buckets(ca); i++) { 6858c2ecf20Sopenharmony_ci if (ca->prio_last_buckets[i]) 6868c2ecf20Sopenharmony_ci __bch_bucket_free(ca, 6878c2ecf20Sopenharmony_ci &ca->buckets[ca->prio_last_buckets[i]]); 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci ca->prio_last_buckets[i] = ca->prio_buckets[i]; 6908c2ecf20Sopenharmony_ci } 6918c2ecf20Sopenharmony_ci return 0; 6928c2ecf20Sopenharmony_ci} 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_cistatic int prio_read(struct cache *ca, uint64_t bucket) 6958c2ecf20Sopenharmony_ci{ 6968c2ecf20Sopenharmony_ci struct prio_set *p = ca->disk_buckets; 6978c2ecf20Sopenharmony_ci struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; 6988c2ecf20Sopenharmony_ci struct bucket *b; 6998c2ecf20Sopenharmony_ci unsigned int bucket_nr = 0; 7008c2ecf20Sopenharmony_ci int ret = -EIO; 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci for (b = ca->buckets; 7038c2ecf20Sopenharmony_ci b < ca->buckets + ca->sb.nbuckets; 7048c2ecf20Sopenharmony_ci b++, d++) { 7058c2ecf20Sopenharmony_ci if (d == end) { 7068c2ecf20Sopenharmony_ci ca->prio_buckets[bucket_nr] = bucket; 7078c2ecf20Sopenharmony_ci ca->prio_last_buckets[bucket_nr] = bucket; 7088c2ecf20Sopenharmony_ci bucket_nr++; 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci prio_io(ca, bucket, REQ_OP_READ, 0); 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci if (p->csum != 7138c2ecf20Sopenharmony_ci bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) { 7148c2ecf20Sopenharmony_ci pr_warn("bad csum reading priorities\n"); 7158c2ecf20Sopenharmony_ci goto out; 7168c2ecf20Sopenharmony_ci } 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci if (p->magic != pset_magic(&ca->sb)) { 7198c2ecf20Sopenharmony_ci pr_warn("bad magic reading priorities\n"); 7208c2ecf20Sopenharmony_ci goto out; 7218c2ecf20Sopenharmony_ci } 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci bucket = p->next_bucket; 7248c2ecf20Sopenharmony_ci d = p->data; 7258c2ecf20Sopenharmony_ci } 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci b->prio = le16_to_cpu(d->prio); 7288c2ecf20Sopenharmony_ci b->gen = b->last_gc = d->gen; 7298c2ecf20Sopenharmony_ci } 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci ret = 0; 7328c2ecf20Sopenharmony_ciout: 7338c2ecf20Sopenharmony_ci return ret; 7348c2ecf20Sopenharmony_ci} 7358c2ecf20Sopenharmony_ci 7368c2ecf20Sopenharmony_ci/* Bcache device */ 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_cistatic int open_dev(struct block_device *b, fmode_t mode) 7398c2ecf20Sopenharmony_ci{ 7408c2ecf20Sopenharmony_ci struct bcache_device *d = b->bd_disk->private_data; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) 7438c2ecf20Sopenharmony_ci return -ENXIO; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci closure_get(&d->cl); 7468c2ecf20Sopenharmony_ci return 0; 7478c2ecf20Sopenharmony_ci} 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_cistatic void release_dev(struct gendisk *b, fmode_t mode) 7508c2ecf20Sopenharmony_ci{ 7518c2ecf20Sopenharmony_ci struct bcache_device *d = b->private_data; 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci closure_put(&d->cl); 7548c2ecf20Sopenharmony_ci} 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_cistatic int ioctl_dev(struct block_device *b, fmode_t mode, 7578c2ecf20Sopenharmony_ci unsigned int cmd, unsigned long arg) 7588c2ecf20Sopenharmony_ci{ 7598c2ecf20Sopenharmony_ci struct bcache_device *d = b->bd_disk->private_data; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci return d->ioctl(d, mode, cmd, arg); 7628c2ecf20Sopenharmony_ci} 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_cistatic const struct block_device_operations bcache_cached_ops = { 7658c2ecf20Sopenharmony_ci .submit_bio = cached_dev_submit_bio, 7668c2ecf20Sopenharmony_ci .open = open_dev, 7678c2ecf20Sopenharmony_ci .release = release_dev, 7688c2ecf20Sopenharmony_ci .ioctl = ioctl_dev, 7698c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 7708c2ecf20Sopenharmony_ci}; 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_cistatic const struct block_device_operations bcache_flash_ops = { 7738c2ecf20Sopenharmony_ci .submit_bio = flash_dev_submit_bio, 7748c2ecf20Sopenharmony_ci .open = open_dev, 7758c2ecf20Sopenharmony_ci .release = release_dev, 7768c2ecf20Sopenharmony_ci .ioctl = ioctl_dev, 7778c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 7788c2ecf20Sopenharmony_ci}; 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_civoid bcache_device_stop(struct bcache_device *d) 7818c2ecf20Sopenharmony_ci{ 7828c2ecf20Sopenharmony_ci if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags)) 7838c2ecf20Sopenharmony_ci /* 7848c2ecf20Sopenharmony_ci * closure_fn set to 7858c2ecf20Sopenharmony_ci * - cached device: cached_dev_flush() 7868c2ecf20Sopenharmony_ci * - flash dev: flash_dev_flush() 7878c2ecf20Sopenharmony_ci */ 7888c2ecf20Sopenharmony_ci closure_queue(&d->cl); 7898c2ecf20Sopenharmony_ci} 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_cistatic void bcache_device_unlink(struct bcache_device *d) 7928c2ecf20Sopenharmony_ci{ 7938c2ecf20Sopenharmony_ci lockdep_assert_held(&bch_register_lock); 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { 7968c2ecf20Sopenharmony_ci struct cache *ca = d->c->cache; 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci sysfs_remove_link(&d->c->kobj, d->name); 7998c2ecf20Sopenharmony_ci sysfs_remove_link(&d->kobj, "cache"); 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci bd_unlink_disk_holder(ca->bdev, d->disk); 8028c2ecf20Sopenharmony_ci } 8038c2ecf20Sopenharmony_ci} 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_cistatic void bcache_device_link(struct bcache_device *d, struct cache_set *c, 8068c2ecf20Sopenharmony_ci const char *name) 8078c2ecf20Sopenharmony_ci{ 8088c2ecf20Sopenharmony_ci struct cache *ca = c->cache; 8098c2ecf20Sopenharmony_ci int ret; 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci bd_link_disk_holder(ca->bdev, d->disk); 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci snprintf(d->name, BCACHEDEVNAME_SIZE, 8148c2ecf20Sopenharmony_ci "%s%u", name, d->id); 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); 8178c2ecf20Sopenharmony_ci if (ret < 0) 8188c2ecf20Sopenharmony_ci pr_err("Couldn't create device -> cache set symlink\n"); 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); 8218c2ecf20Sopenharmony_ci if (ret < 0) 8228c2ecf20Sopenharmony_ci pr_err("Couldn't create cache set -> device symlink\n"); 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); 8258c2ecf20Sopenharmony_ci} 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_cistatic void bcache_device_detach(struct bcache_device *d) 8288c2ecf20Sopenharmony_ci{ 8298c2ecf20Sopenharmony_ci lockdep_assert_held(&bch_register_lock); 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci atomic_dec(&d->c->attached_dev_nr); 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { 8348c2ecf20Sopenharmony_ci struct uuid_entry *u = d->c->uuids + d->id; 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci SET_UUID_FLASH_ONLY(u, 0); 8378c2ecf20Sopenharmony_ci memcpy(u->uuid, invalid_uuid, 16); 8388c2ecf20Sopenharmony_ci u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); 8398c2ecf20Sopenharmony_ci bch_uuid_write(d->c); 8408c2ecf20Sopenharmony_ci } 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci bcache_device_unlink(d); 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci d->c->devices[d->id] = NULL; 8458c2ecf20Sopenharmony_ci closure_put(&d->c->caching); 8468c2ecf20Sopenharmony_ci d->c = NULL; 8478c2ecf20Sopenharmony_ci} 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_cistatic void bcache_device_attach(struct bcache_device *d, struct cache_set *c, 8508c2ecf20Sopenharmony_ci unsigned int id) 8518c2ecf20Sopenharmony_ci{ 8528c2ecf20Sopenharmony_ci d->id = id; 8538c2ecf20Sopenharmony_ci d->c = c; 8548c2ecf20Sopenharmony_ci c->devices[id] = d; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci if (id >= c->devices_max_used) 8578c2ecf20Sopenharmony_ci c->devices_max_used = id + 1; 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci closure_get(&c->caching); 8608c2ecf20Sopenharmony_ci} 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_cistatic inline int first_minor_to_idx(int first_minor) 8638c2ecf20Sopenharmony_ci{ 8648c2ecf20Sopenharmony_ci return (first_minor/BCACHE_MINORS); 8658c2ecf20Sopenharmony_ci} 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_cistatic inline int idx_to_first_minor(int idx) 8688c2ecf20Sopenharmony_ci{ 8698c2ecf20Sopenharmony_ci return (idx * BCACHE_MINORS); 8708c2ecf20Sopenharmony_ci} 8718c2ecf20Sopenharmony_ci 8728c2ecf20Sopenharmony_cistatic void bcache_device_free(struct bcache_device *d) 8738c2ecf20Sopenharmony_ci{ 8748c2ecf20Sopenharmony_ci struct gendisk *disk = d->disk; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci lockdep_assert_held(&bch_register_lock); 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci if (disk) 8798c2ecf20Sopenharmony_ci pr_info("%s stopped\n", disk->disk_name); 8808c2ecf20Sopenharmony_ci else 8818c2ecf20Sopenharmony_ci pr_err("bcache device (NULL gendisk) stopped\n"); 8828c2ecf20Sopenharmony_ci 8838c2ecf20Sopenharmony_ci if (d->c) 8848c2ecf20Sopenharmony_ci bcache_device_detach(d); 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci if (disk) { 8878c2ecf20Sopenharmony_ci bool disk_added = (disk->flags & GENHD_FL_UP) != 0; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci if (disk_added) 8908c2ecf20Sopenharmony_ci del_gendisk(disk); 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci if (disk->queue) 8938c2ecf20Sopenharmony_ci blk_cleanup_queue(disk->queue); 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_ci ida_simple_remove(&bcache_device_idx, 8968c2ecf20Sopenharmony_ci first_minor_to_idx(disk->first_minor)); 8978c2ecf20Sopenharmony_ci if (disk_added) 8988c2ecf20Sopenharmony_ci put_disk(disk); 8998c2ecf20Sopenharmony_ci } 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci bioset_exit(&d->bio_split); 9028c2ecf20Sopenharmony_ci kvfree(d->full_dirty_stripes); 9038c2ecf20Sopenharmony_ci kvfree(d->stripe_sectors_dirty); 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci closure_debug_destroy(&d->cl); 9068c2ecf20Sopenharmony_ci} 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_cistatic int bcache_device_init(struct bcache_device *d, unsigned int block_size, 9098c2ecf20Sopenharmony_ci sector_t sectors, struct block_device *cached_bdev, 9108c2ecf20Sopenharmony_ci const struct block_device_operations *ops) 9118c2ecf20Sopenharmony_ci{ 9128c2ecf20Sopenharmony_ci struct request_queue *q; 9138c2ecf20Sopenharmony_ci const size_t max_stripes = min_t(size_t, INT_MAX, 9148c2ecf20Sopenharmony_ci SIZE_MAX / sizeof(atomic_t)); 9158c2ecf20Sopenharmony_ci uint64_t n; 9168c2ecf20Sopenharmony_ci int idx; 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci if (!d->stripe_size) 9198c2ecf20Sopenharmony_ci d->stripe_size = 1 << 31; 9208c2ecf20Sopenharmony_ci else if (d->stripe_size < BCH_MIN_STRIPE_SZ) 9218c2ecf20Sopenharmony_ci d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); 9248c2ecf20Sopenharmony_ci if (!n || n > max_stripes) { 9258c2ecf20Sopenharmony_ci pr_err("nr_stripes too large or invalid: %llu (start sector beyond end of disk?)\n", 9268c2ecf20Sopenharmony_ci n); 9278c2ecf20Sopenharmony_ci return -ENOMEM; 9288c2ecf20Sopenharmony_ci } 9298c2ecf20Sopenharmony_ci d->nr_stripes = n; 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_ci n = d->nr_stripes * sizeof(atomic_t); 9328c2ecf20Sopenharmony_ci d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL); 9338c2ecf20Sopenharmony_ci if (!d->stripe_sectors_dirty) 9348c2ecf20Sopenharmony_ci return -ENOMEM; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); 9378c2ecf20Sopenharmony_ci d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL); 9388c2ecf20Sopenharmony_ci if (!d->full_dirty_stripes) 9398c2ecf20Sopenharmony_ci goto out_free_stripe_sectors_dirty; 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci idx = ida_simple_get(&bcache_device_idx, 0, 9428c2ecf20Sopenharmony_ci BCACHE_DEVICE_IDX_MAX, GFP_KERNEL); 9438c2ecf20Sopenharmony_ci if (idx < 0) 9448c2ecf20Sopenharmony_ci goto out_free_full_dirty_stripes; 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ci if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), 9478c2ecf20Sopenharmony_ci BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 9488c2ecf20Sopenharmony_ci goto out_ida_remove; 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci d->disk = alloc_disk(BCACHE_MINORS); 9518c2ecf20Sopenharmony_ci if (!d->disk) 9528c2ecf20Sopenharmony_ci goto out_bioset_exit; 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci set_capacity(d->disk, sectors); 9558c2ecf20Sopenharmony_ci snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci d->disk->major = bcache_major; 9588c2ecf20Sopenharmony_ci d->disk->first_minor = idx_to_first_minor(idx); 9598c2ecf20Sopenharmony_ci d->disk->fops = ops; 9608c2ecf20Sopenharmony_ci d->disk->private_data = d; 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci q = blk_alloc_queue(NUMA_NO_NODE); 9638c2ecf20Sopenharmony_ci if (!q) 9648c2ecf20Sopenharmony_ci return -ENOMEM; 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci d->disk->queue = q; 9678c2ecf20Sopenharmony_ci q->limits.max_hw_sectors = UINT_MAX; 9688c2ecf20Sopenharmony_ci q->limits.max_sectors = UINT_MAX; 9698c2ecf20Sopenharmony_ci q->limits.max_segment_size = UINT_MAX; 9708c2ecf20Sopenharmony_ci q->limits.max_segments = BIO_MAX_PAGES; 9718c2ecf20Sopenharmony_ci blk_queue_max_discard_sectors(q, UINT_MAX); 9728c2ecf20Sopenharmony_ci q->limits.discard_granularity = 512; 9738c2ecf20Sopenharmony_ci q->limits.io_min = block_size; 9748c2ecf20Sopenharmony_ci q->limits.logical_block_size = block_size; 9758c2ecf20Sopenharmony_ci q->limits.physical_block_size = block_size; 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { 9788c2ecf20Sopenharmony_ci /* 9798c2ecf20Sopenharmony_ci * This should only happen with BCACHE_SB_VERSION_BDEV. 9808c2ecf20Sopenharmony_ci * Block/page size is checked for BCACHE_SB_VERSION_CDEV. 9818c2ecf20Sopenharmony_ci */ 9828c2ecf20Sopenharmony_ci pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", 9838c2ecf20Sopenharmony_ci d->disk->disk_name, q->limits.logical_block_size, 9848c2ecf20Sopenharmony_ci PAGE_SIZE, bdev_logical_block_size(cached_bdev)); 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci /* This also adjusts physical block size/min io size if needed */ 9878c2ecf20Sopenharmony_ci blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev)); 9888c2ecf20Sopenharmony_ci } 9898c2ecf20Sopenharmony_ci 9908c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); 9918c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); 9928c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue); 9938c2ecf20Sopenharmony_ci 9948c2ecf20Sopenharmony_ci blk_queue_write_cache(q, true, true); 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci return 0; 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ciout_bioset_exit: 9998c2ecf20Sopenharmony_ci bioset_exit(&d->bio_split); 10008c2ecf20Sopenharmony_ciout_ida_remove: 10018c2ecf20Sopenharmony_ci ida_simple_remove(&bcache_device_idx, idx); 10028c2ecf20Sopenharmony_ciout_free_full_dirty_stripes: 10038c2ecf20Sopenharmony_ci kvfree(d->full_dirty_stripes); 10048c2ecf20Sopenharmony_ciout_free_stripe_sectors_dirty: 10058c2ecf20Sopenharmony_ci kvfree(d->stripe_sectors_dirty); 10068c2ecf20Sopenharmony_ci return -ENOMEM; 10078c2ecf20Sopenharmony_ci 10088c2ecf20Sopenharmony_ci} 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci/* Cached device */ 10118c2ecf20Sopenharmony_ci 10128c2ecf20Sopenharmony_cistatic void calc_cached_dev_sectors(struct cache_set *c) 10138c2ecf20Sopenharmony_ci{ 10148c2ecf20Sopenharmony_ci uint64_t sectors = 0; 10158c2ecf20Sopenharmony_ci struct cached_dev *dc; 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci list_for_each_entry(dc, &c->cached_devs, list) 10188c2ecf20Sopenharmony_ci sectors += bdev_sectors(dc->bdev); 10198c2ecf20Sopenharmony_ci 10208c2ecf20Sopenharmony_ci c->cached_dev_sectors = sectors; 10218c2ecf20Sopenharmony_ci} 10228c2ecf20Sopenharmony_ci 10238c2ecf20Sopenharmony_ci#define BACKING_DEV_OFFLINE_TIMEOUT 5 10248c2ecf20Sopenharmony_cistatic int cached_dev_status_update(void *arg) 10258c2ecf20Sopenharmony_ci{ 10268c2ecf20Sopenharmony_ci struct cached_dev *dc = arg; 10278c2ecf20Sopenharmony_ci struct request_queue *q; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci /* 10308c2ecf20Sopenharmony_ci * If this delayed worker is stopping outside, directly quit here. 10318c2ecf20Sopenharmony_ci * dc->io_disable might be set via sysfs interface, so check it 10328c2ecf20Sopenharmony_ci * here too. 10338c2ecf20Sopenharmony_ci */ 10348c2ecf20Sopenharmony_ci while (!kthread_should_stop() && !dc->io_disable) { 10358c2ecf20Sopenharmony_ci q = bdev_get_queue(dc->bdev); 10368c2ecf20Sopenharmony_ci if (blk_queue_dying(q)) 10378c2ecf20Sopenharmony_ci dc->offline_seconds++; 10388c2ecf20Sopenharmony_ci else 10398c2ecf20Sopenharmony_ci dc->offline_seconds = 0; 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { 10428c2ecf20Sopenharmony_ci pr_err("%s: device offline for %d seconds\n", 10438c2ecf20Sopenharmony_ci dc->backing_dev_name, 10448c2ecf20Sopenharmony_ci BACKING_DEV_OFFLINE_TIMEOUT); 10458c2ecf20Sopenharmony_ci pr_err("%s: disable I/O request due to backing device offline\n", 10468c2ecf20Sopenharmony_ci dc->disk.name); 10478c2ecf20Sopenharmony_ci dc->io_disable = true; 10488c2ecf20Sopenharmony_ci /* let others know earlier that io_disable is true */ 10498c2ecf20Sopenharmony_ci smp_mb(); 10508c2ecf20Sopenharmony_ci bcache_device_stop(&dc->disk); 10518c2ecf20Sopenharmony_ci break; 10528c2ecf20Sopenharmony_ci } 10538c2ecf20Sopenharmony_ci schedule_timeout_interruptible(HZ); 10548c2ecf20Sopenharmony_ci } 10558c2ecf20Sopenharmony_ci 10568c2ecf20Sopenharmony_ci wait_for_kthread_stop(); 10578c2ecf20Sopenharmony_ci return 0; 10588c2ecf20Sopenharmony_ci} 10598c2ecf20Sopenharmony_ci 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ciint bch_cached_dev_run(struct cached_dev *dc) 10628c2ecf20Sopenharmony_ci{ 10638c2ecf20Sopenharmony_ci struct bcache_device *d = &dc->disk; 10648c2ecf20Sopenharmony_ci char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL); 10658c2ecf20Sopenharmony_ci char *env[] = { 10668c2ecf20Sopenharmony_ci "DRIVER=bcache", 10678c2ecf20Sopenharmony_ci kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), 10688c2ecf20Sopenharmony_ci kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf ? : ""), 10698c2ecf20Sopenharmony_ci NULL, 10708c2ecf20Sopenharmony_ci }; 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci if (dc->io_disable) { 10738c2ecf20Sopenharmony_ci pr_err("I/O disabled on cached dev %s\n", 10748c2ecf20Sopenharmony_ci dc->backing_dev_name); 10758c2ecf20Sopenharmony_ci kfree(env[1]); 10768c2ecf20Sopenharmony_ci kfree(env[2]); 10778c2ecf20Sopenharmony_ci kfree(buf); 10788c2ecf20Sopenharmony_ci return -EIO; 10798c2ecf20Sopenharmony_ci } 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci if (atomic_xchg(&dc->running, 1)) { 10828c2ecf20Sopenharmony_ci kfree(env[1]); 10838c2ecf20Sopenharmony_ci kfree(env[2]); 10848c2ecf20Sopenharmony_ci kfree(buf); 10858c2ecf20Sopenharmony_ci pr_info("cached dev %s is running already\n", 10868c2ecf20Sopenharmony_ci dc->backing_dev_name); 10878c2ecf20Sopenharmony_ci return -EBUSY; 10888c2ecf20Sopenharmony_ci } 10898c2ecf20Sopenharmony_ci 10908c2ecf20Sopenharmony_ci if (!d->c && 10918c2ecf20Sopenharmony_ci BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { 10928c2ecf20Sopenharmony_ci struct closure cl; 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci closure_init_stack(&cl); 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ci SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); 10978c2ecf20Sopenharmony_ci bch_write_bdev_super(dc, &cl); 10988c2ecf20Sopenharmony_ci closure_sync(&cl); 10998c2ecf20Sopenharmony_ci } 11008c2ecf20Sopenharmony_ci 11018c2ecf20Sopenharmony_ci add_disk(d->disk); 11028c2ecf20Sopenharmony_ci bd_link_disk_holder(dc->bdev, dc->disk.disk); 11038c2ecf20Sopenharmony_ci /* 11048c2ecf20Sopenharmony_ci * won't show up in the uevent file, use udevadm monitor -e instead 11058c2ecf20Sopenharmony_ci * only class / kset properties are persistent 11068c2ecf20Sopenharmony_ci */ 11078c2ecf20Sopenharmony_ci kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); 11088c2ecf20Sopenharmony_ci kfree(env[1]); 11098c2ecf20Sopenharmony_ci kfree(env[2]); 11108c2ecf20Sopenharmony_ci kfree(buf); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || 11138c2ecf20Sopenharmony_ci sysfs_create_link(&disk_to_dev(d->disk)->kobj, 11148c2ecf20Sopenharmony_ci &d->kobj, "bcache")) { 11158c2ecf20Sopenharmony_ci pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n"); 11168c2ecf20Sopenharmony_ci return -ENOMEM; 11178c2ecf20Sopenharmony_ci } 11188c2ecf20Sopenharmony_ci 11198c2ecf20Sopenharmony_ci dc->status_update_thread = kthread_run(cached_dev_status_update, 11208c2ecf20Sopenharmony_ci dc, "bcache_status_update"); 11218c2ecf20Sopenharmony_ci if (IS_ERR(dc->status_update_thread)) { 11228c2ecf20Sopenharmony_ci pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n"); 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci 11258c2ecf20Sopenharmony_ci return 0; 11268c2ecf20Sopenharmony_ci} 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci/* 11298c2ecf20Sopenharmony_ci * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed 11308c2ecf20Sopenharmony_ci * work dc->writeback_rate_update is running. Wait until the routine 11318c2ecf20Sopenharmony_ci * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to 11328c2ecf20Sopenharmony_ci * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out 11338c2ecf20Sopenharmony_ci * seconds, give up waiting here and continue to cancel it too. 11348c2ecf20Sopenharmony_ci */ 11358c2ecf20Sopenharmony_cistatic void cancel_writeback_rate_update_dwork(struct cached_dev *dc) 11368c2ecf20Sopenharmony_ci{ 11378c2ecf20Sopenharmony_ci int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ; 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci do { 11408c2ecf20Sopenharmony_ci if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING, 11418c2ecf20Sopenharmony_ci &dc->disk.flags)) 11428c2ecf20Sopenharmony_ci break; 11438c2ecf20Sopenharmony_ci time_out--; 11448c2ecf20Sopenharmony_ci schedule_timeout_interruptible(1); 11458c2ecf20Sopenharmony_ci } while (time_out > 0); 11468c2ecf20Sopenharmony_ci 11478c2ecf20Sopenharmony_ci if (time_out == 0) 11488c2ecf20Sopenharmony_ci pr_warn("give up waiting for dc->writeback_write_update to quit\n"); 11498c2ecf20Sopenharmony_ci 11508c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&dc->writeback_rate_update); 11518c2ecf20Sopenharmony_ci} 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_cistatic void cached_dev_detach_finish(struct work_struct *w) 11548c2ecf20Sopenharmony_ci{ 11558c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(w, struct cached_dev, detach); 11568c2ecf20Sopenharmony_ci struct closure cl; 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci closure_init_stack(&cl); 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); 11618c2ecf20Sopenharmony_ci BUG_ON(refcount_read(&dc->count)); 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) 11658c2ecf20Sopenharmony_ci cancel_writeback_rate_update_dwork(dc); 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(dc->writeback_thread)) { 11688c2ecf20Sopenharmony_ci kthread_stop(dc->writeback_thread); 11698c2ecf20Sopenharmony_ci dc->writeback_thread = NULL; 11708c2ecf20Sopenharmony_ci } 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci memset(&dc->sb.set_uuid, 0, 16); 11738c2ecf20Sopenharmony_ci SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); 11748c2ecf20Sopenharmony_ci 11758c2ecf20Sopenharmony_ci bch_write_bdev_super(dc, &cl); 11768c2ecf20Sopenharmony_ci closure_sync(&cl); 11778c2ecf20Sopenharmony_ci 11788c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_ci calc_cached_dev_sectors(dc->disk.c); 11818c2ecf20Sopenharmony_ci bcache_device_detach(&dc->disk); 11828c2ecf20Sopenharmony_ci list_move(&dc->list, &uncached_devices); 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); 11858c2ecf20Sopenharmony_ci clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_ci pr_info("Caching disabled for %s\n", dc->backing_dev_name); 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci /* Drop ref we took in cached_dev_detach() */ 11928c2ecf20Sopenharmony_ci closure_put(&dc->disk.cl); 11938c2ecf20Sopenharmony_ci} 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_civoid bch_cached_dev_detach(struct cached_dev *dc) 11968c2ecf20Sopenharmony_ci{ 11978c2ecf20Sopenharmony_ci lockdep_assert_held(&bch_register_lock); 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_ci if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) 12008c2ecf20Sopenharmony_ci return; 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) 12038c2ecf20Sopenharmony_ci return; 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci /* 12068c2ecf20Sopenharmony_ci * Block the device from being closed and freed until we're finished 12078c2ecf20Sopenharmony_ci * detaching 12088c2ecf20Sopenharmony_ci */ 12098c2ecf20Sopenharmony_ci closure_get(&dc->disk.cl); 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ci bch_writeback_queue(dc); 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci cached_dev_put(dc); 12148c2ecf20Sopenharmony_ci} 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ciint bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, 12178c2ecf20Sopenharmony_ci uint8_t *set_uuid) 12188c2ecf20Sopenharmony_ci{ 12198c2ecf20Sopenharmony_ci uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds()); 12208c2ecf20Sopenharmony_ci struct uuid_entry *u; 12218c2ecf20Sopenharmony_ci struct cached_dev *exist_dc, *t; 12228c2ecf20Sopenharmony_ci int ret = 0; 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) || 12258c2ecf20Sopenharmony_ci (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16))) 12268c2ecf20Sopenharmony_ci return -ENOENT; 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_ci if (dc->disk.c) { 12298c2ecf20Sopenharmony_ci pr_err("Can't attach %s: already attached\n", 12308c2ecf20Sopenharmony_ci dc->backing_dev_name); 12318c2ecf20Sopenharmony_ci return -EINVAL; 12328c2ecf20Sopenharmony_ci } 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci if (test_bit(CACHE_SET_STOPPING, &c->flags)) { 12358c2ecf20Sopenharmony_ci pr_err("Can't attach %s: shutting down\n", 12368c2ecf20Sopenharmony_ci dc->backing_dev_name); 12378c2ecf20Sopenharmony_ci return -EINVAL; 12388c2ecf20Sopenharmony_ci } 12398c2ecf20Sopenharmony_ci 12408c2ecf20Sopenharmony_ci if (dc->sb.block_size < c->cache->sb.block_size) { 12418c2ecf20Sopenharmony_ci /* Will die */ 12428c2ecf20Sopenharmony_ci pr_err("Couldn't attach %s: block size less than set's block size\n", 12438c2ecf20Sopenharmony_ci dc->backing_dev_name); 12448c2ecf20Sopenharmony_ci return -EINVAL; 12458c2ecf20Sopenharmony_ci } 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci /* Check whether already attached */ 12488c2ecf20Sopenharmony_ci list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { 12498c2ecf20Sopenharmony_ci if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { 12508c2ecf20Sopenharmony_ci pr_err("Tried to attach %s but duplicate UUID already attached\n", 12518c2ecf20Sopenharmony_ci dc->backing_dev_name); 12528c2ecf20Sopenharmony_ci 12538c2ecf20Sopenharmony_ci return -EINVAL; 12548c2ecf20Sopenharmony_ci } 12558c2ecf20Sopenharmony_ci } 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci u = uuid_find(c, dc->sb.uuid); 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci if (u && 12608c2ecf20Sopenharmony_ci (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || 12618c2ecf20Sopenharmony_ci BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { 12628c2ecf20Sopenharmony_ci memcpy(u->uuid, invalid_uuid, 16); 12638c2ecf20Sopenharmony_ci u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); 12648c2ecf20Sopenharmony_ci u = NULL; 12658c2ecf20Sopenharmony_ci } 12668c2ecf20Sopenharmony_ci 12678c2ecf20Sopenharmony_ci if (!u) { 12688c2ecf20Sopenharmony_ci if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { 12698c2ecf20Sopenharmony_ci pr_err("Couldn't find uuid for %s in set\n", 12708c2ecf20Sopenharmony_ci dc->backing_dev_name); 12718c2ecf20Sopenharmony_ci return -ENOENT; 12728c2ecf20Sopenharmony_ci } 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci u = uuid_find_empty(c); 12758c2ecf20Sopenharmony_ci if (!u) { 12768c2ecf20Sopenharmony_ci pr_err("Not caching %s, no room for UUID\n", 12778c2ecf20Sopenharmony_ci dc->backing_dev_name); 12788c2ecf20Sopenharmony_ci return -EINVAL; 12798c2ecf20Sopenharmony_ci } 12808c2ecf20Sopenharmony_ci } 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci /* 12838c2ecf20Sopenharmony_ci * Deadlocks since we're called via sysfs... 12848c2ecf20Sopenharmony_ci * sysfs_remove_file(&dc->kobj, &sysfs_attach); 12858c2ecf20Sopenharmony_ci */ 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_ci if (bch_is_zero(u->uuid, 16)) { 12888c2ecf20Sopenharmony_ci struct closure cl; 12898c2ecf20Sopenharmony_ci 12908c2ecf20Sopenharmony_ci closure_init_stack(&cl); 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci memcpy(u->uuid, dc->sb.uuid, 16); 12938c2ecf20Sopenharmony_ci memcpy(u->label, dc->sb.label, SB_LABEL_SIZE); 12948c2ecf20Sopenharmony_ci u->first_reg = u->last_reg = rtime; 12958c2ecf20Sopenharmony_ci bch_uuid_write(c); 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci memcpy(dc->sb.set_uuid, c->set_uuid, 16); 12988c2ecf20Sopenharmony_ci SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci bch_write_bdev_super(dc, &cl); 13018c2ecf20Sopenharmony_ci closure_sync(&cl); 13028c2ecf20Sopenharmony_ci } else { 13038c2ecf20Sopenharmony_ci u->last_reg = rtime; 13048c2ecf20Sopenharmony_ci bch_uuid_write(c); 13058c2ecf20Sopenharmony_ci } 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci bcache_device_attach(&dc->disk, c, u - c->uuids); 13088c2ecf20Sopenharmony_ci list_move(&dc->list, &c->cached_devs); 13098c2ecf20Sopenharmony_ci calc_cached_dev_sectors(c); 13108c2ecf20Sopenharmony_ci 13118c2ecf20Sopenharmony_ci /* 13128c2ecf20Sopenharmony_ci * dc->c must be set before dc->count != 0 - paired with the mb in 13138c2ecf20Sopenharmony_ci * cached_dev_get() 13148c2ecf20Sopenharmony_ci */ 13158c2ecf20Sopenharmony_ci smp_wmb(); 13168c2ecf20Sopenharmony_ci refcount_set(&dc->count, 1); 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci /* Block writeback thread, but spawn it */ 13198c2ecf20Sopenharmony_ci down_write(&dc->writeback_lock); 13208c2ecf20Sopenharmony_ci if (bch_cached_dev_writeback_start(dc)) { 13218c2ecf20Sopenharmony_ci up_write(&dc->writeback_lock); 13228c2ecf20Sopenharmony_ci pr_err("Couldn't start writeback facilities for %s\n", 13238c2ecf20Sopenharmony_ci dc->disk.disk->disk_name); 13248c2ecf20Sopenharmony_ci return -ENOMEM; 13258c2ecf20Sopenharmony_ci } 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { 13288c2ecf20Sopenharmony_ci atomic_set(&dc->has_dirty, 1); 13298c2ecf20Sopenharmony_ci bch_writeback_queue(dc); 13308c2ecf20Sopenharmony_ci } 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci bch_sectors_dirty_init(&dc->disk); 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci ret = bch_cached_dev_run(dc); 13358c2ecf20Sopenharmony_ci if (ret && (ret != -EBUSY)) { 13368c2ecf20Sopenharmony_ci up_write(&dc->writeback_lock); 13378c2ecf20Sopenharmony_ci /* 13388c2ecf20Sopenharmony_ci * bch_register_lock is held, bcache_device_stop() is not 13398c2ecf20Sopenharmony_ci * able to be directly called. The kthread and kworker 13408c2ecf20Sopenharmony_ci * created previously in bch_cached_dev_writeback_start() 13418c2ecf20Sopenharmony_ci * have to be stopped manually here. 13428c2ecf20Sopenharmony_ci */ 13438c2ecf20Sopenharmony_ci kthread_stop(dc->writeback_thread); 13448c2ecf20Sopenharmony_ci cancel_writeback_rate_update_dwork(dc); 13458c2ecf20Sopenharmony_ci pr_err("Couldn't run cached device %s\n", 13468c2ecf20Sopenharmony_ci dc->backing_dev_name); 13478c2ecf20Sopenharmony_ci return ret; 13488c2ecf20Sopenharmony_ci } 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci bcache_device_link(&dc->disk, c, "bdev"); 13518c2ecf20Sopenharmony_ci atomic_inc(&c->attached_dev_nr); 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { 13548c2ecf20Sopenharmony_ci pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); 13558c2ecf20Sopenharmony_ci pr_err("Please update to the latest bcache-tools to create the cache device\n"); 13568c2ecf20Sopenharmony_ci set_disk_ro(dc->disk.disk, 1); 13578c2ecf20Sopenharmony_ci } 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_ci /* Allow the writeback thread to proceed */ 13608c2ecf20Sopenharmony_ci up_write(&dc->writeback_lock); 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci pr_info("Caching %s as %s on set %pU\n", 13638c2ecf20Sopenharmony_ci dc->backing_dev_name, 13648c2ecf20Sopenharmony_ci dc->disk.disk->disk_name, 13658c2ecf20Sopenharmony_ci dc->disk.c->set_uuid); 13668c2ecf20Sopenharmony_ci return 0; 13678c2ecf20Sopenharmony_ci} 13688c2ecf20Sopenharmony_ci 13698c2ecf20Sopenharmony_ci/* when dc->disk.kobj released */ 13708c2ecf20Sopenharmony_civoid bch_cached_dev_release(struct kobject *kobj) 13718c2ecf20Sopenharmony_ci{ 13728c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(kobj, struct cached_dev, 13738c2ecf20Sopenharmony_ci disk.kobj); 13748c2ecf20Sopenharmony_ci kfree(dc); 13758c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 13768c2ecf20Sopenharmony_ci} 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_cistatic void cached_dev_free(struct closure *cl) 13798c2ecf20Sopenharmony_ci{ 13808c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_ci if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) 13838c2ecf20Sopenharmony_ci cancel_writeback_rate_update_dwork(dc); 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(dc->writeback_thread)) 13868c2ecf20Sopenharmony_ci kthread_stop(dc->writeback_thread); 13878c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(dc->status_update_thread)) 13888c2ecf20Sopenharmony_ci kthread_stop(dc->status_update_thread); 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_ci if (atomic_read(&dc->running)) 13938c2ecf20Sopenharmony_ci bd_unlink_disk_holder(dc->bdev, dc->disk.disk); 13948c2ecf20Sopenharmony_ci bcache_device_free(&dc->disk); 13958c2ecf20Sopenharmony_ci list_del(&dc->list); 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci if (dc->sb_disk) 14008c2ecf20Sopenharmony_ci put_page(virt_to_page(dc->sb_disk)); 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(dc->bdev)) 14038c2ecf20Sopenharmony_ci blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci wake_up(&unregister_wait); 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci kobject_put(&dc->disk.kobj); 14088c2ecf20Sopenharmony_ci} 14098c2ecf20Sopenharmony_ci 14108c2ecf20Sopenharmony_cistatic void cached_dev_flush(struct closure *cl) 14118c2ecf20Sopenharmony_ci{ 14128c2ecf20Sopenharmony_ci struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); 14138c2ecf20Sopenharmony_ci struct bcache_device *d = &dc->disk; 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 14168c2ecf20Sopenharmony_ci bcache_device_unlink(d); 14178c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci bch_cache_accounting_destroy(&dc->accounting); 14208c2ecf20Sopenharmony_ci kobject_del(&d->kobj); 14218c2ecf20Sopenharmony_ci 14228c2ecf20Sopenharmony_ci continue_at(cl, cached_dev_free, system_wq); 14238c2ecf20Sopenharmony_ci} 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_cistatic int cached_dev_init(struct cached_dev *dc, unsigned int block_size) 14268c2ecf20Sopenharmony_ci{ 14278c2ecf20Sopenharmony_ci int ret; 14288c2ecf20Sopenharmony_ci struct io *io; 14298c2ecf20Sopenharmony_ci struct request_queue *q = bdev_get_queue(dc->bdev); 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci __module_get(THIS_MODULE); 14328c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dc->list); 14338c2ecf20Sopenharmony_ci closure_init(&dc->disk.cl, NULL); 14348c2ecf20Sopenharmony_ci set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); 14358c2ecf20Sopenharmony_ci kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); 14368c2ecf20Sopenharmony_ci INIT_WORK(&dc->detach, cached_dev_detach_finish); 14378c2ecf20Sopenharmony_ci sema_init(&dc->sb_write_mutex, 1); 14388c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dc->io_lru); 14398c2ecf20Sopenharmony_ci spin_lock_init(&dc->io_lock); 14408c2ecf20Sopenharmony_ci bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci dc->sequential_cutoff = 4 << 20; 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci for (io = dc->io; io < dc->io + RECENT_IO; io++) { 14458c2ecf20Sopenharmony_ci list_add(&io->lru, &dc->io_lru); 14468c2ecf20Sopenharmony_ci hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); 14478c2ecf20Sopenharmony_ci } 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci dc->disk.stripe_size = q->limits.io_opt >> 9; 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci if (dc->disk.stripe_size) 14528c2ecf20Sopenharmony_ci dc->partial_stripes_expensive = 14538c2ecf20Sopenharmony_ci q->limits.raid_partial_stripes_expensive; 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci ret = bcache_device_init(&dc->disk, block_size, 14568c2ecf20Sopenharmony_ci dc->bdev->bd_part->nr_sects - dc->sb.data_offset, 14578c2ecf20Sopenharmony_ci dc->bdev, &bcache_cached_ops); 14588c2ecf20Sopenharmony_ci if (ret) 14598c2ecf20Sopenharmony_ci return ret; 14608c2ecf20Sopenharmony_ci 14618c2ecf20Sopenharmony_ci blk_queue_io_opt(dc->disk.disk->queue, 14628c2ecf20Sopenharmony_ci max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); 14638c2ecf20Sopenharmony_ci 14648c2ecf20Sopenharmony_ci atomic_set(&dc->io_errors, 0); 14658c2ecf20Sopenharmony_ci dc->io_disable = false; 14668c2ecf20Sopenharmony_ci dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; 14678c2ecf20Sopenharmony_ci /* default to auto */ 14688c2ecf20Sopenharmony_ci dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO; 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci bch_cached_dev_request_init(dc); 14718c2ecf20Sopenharmony_ci bch_cached_dev_writeback_init(dc); 14728c2ecf20Sopenharmony_ci return 0; 14738c2ecf20Sopenharmony_ci} 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_ci/* Cached device - bcache superblock */ 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_cistatic int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, 14788c2ecf20Sopenharmony_ci struct block_device *bdev, 14798c2ecf20Sopenharmony_ci struct cached_dev *dc) 14808c2ecf20Sopenharmony_ci{ 14818c2ecf20Sopenharmony_ci const char *err = "cannot allocate memory"; 14828c2ecf20Sopenharmony_ci struct cache_set *c; 14838c2ecf20Sopenharmony_ci int ret = -ENOMEM; 14848c2ecf20Sopenharmony_ci 14858c2ecf20Sopenharmony_ci bdevname(bdev, dc->backing_dev_name); 14868c2ecf20Sopenharmony_ci memcpy(&dc->sb, sb, sizeof(struct cache_sb)); 14878c2ecf20Sopenharmony_ci dc->bdev = bdev; 14888c2ecf20Sopenharmony_ci dc->bdev->bd_holder = dc; 14898c2ecf20Sopenharmony_ci dc->sb_disk = sb_disk; 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci if (cached_dev_init(dc, sb->block_size << 9)) 14928c2ecf20Sopenharmony_ci goto err; 14938c2ecf20Sopenharmony_ci 14948c2ecf20Sopenharmony_ci err = "error creating kobject"; 14958c2ecf20Sopenharmony_ci if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, 14968c2ecf20Sopenharmony_ci "bcache")) 14978c2ecf20Sopenharmony_ci goto err; 14988c2ecf20Sopenharmony_ci if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) 14998c2ecf20Sopenharmony_ci goto err; 15008c2ecf20Sopenharmony_ci 15018c2ecf20Sopenharmony_ci pr_info("registered backing device %s\n", dc->backing_dev_name); 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci list_add(&dc->list, &uncached_devices); 15048c2ecf20Sopenharmony_ci /* attach to a matched cache set if it exists */ 15058c2ecf20Sopenharmony_ci list_for_each_entry(c, &bch_cache_sets, list) 15068c2ecf20Sopenharmony_ci bch_cached_dev_attach(dc, c, NULL); 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_ci if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || 15098c2ecf20Sopenharmony_ci BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { 15108c2ecf20Sopenharmony_ci err = "failed to run cached device"; 15118c2ecf20Sopenharmony_ci ret = bch_cached_dev_run(dc); 15128c2ecf20Sopenharmony_ci if (ret) 15138c2ecf20Sopenharmony_ci goto err; 15148c2ecf20Sopenharmony_ci } 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci return 0; 15178c2ecf20Sopenharmony_cierr: 15188c2ecf20Sopenharmony_ci pr_notice("error %s: %s\n", dc->backing_dev_name, err); 15198c2ecf20Sopenharmony_ci bcache_device_stop(&dc->disk); 15208c2ecf20Sopenharmony_ci return ret; 15218c2ecf20Sopenharmony_ci} 15228c2ecf20Sopenharmony_ci 15238c2ecf20Sopenharmony_ci/* Flash only volumes */ 15248c2ecf20Sopenharmony_ci 15258c2ecf20Sopenharmony_ci/* When d->kobj released */ 15268c2ecf20Sopenharmony_civoid bch_flash_dev_release(struct kobject *kobj) 15278c2ecf20Sopenharmony_ci{ 15288c2ecf20Sopenharmony_ci struct bcache_device *d = container_of(kobj, struct bcache_device, 15298c2ecf20Sopenharmony_ci kobj); 15308c2ecf20Sopenharmony_ci kfree(d); 15318c2ecf20Sopenharmony_ci} 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_cistatic void flash_dev_free(struct closure *cl) 15348c2ecf20Sopenharmony_ci{ 15358c2ecf20Sopenharmony_ci struct bcache_device *d = container_of(cl, struct bcache_device, cl); 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 15388c2ecf20Sopenharmony_ci atomic_long_sub(bcache_dev_sectors_dirty(d), 15398c2ecf20Sopenharmony_ci &d->c->flash_dev_dirty_sectors); 15408c2ecf20Sopenharmony_ci bcache_device_free(d); 15418c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 15428c2ecf20Sopenharmony_ci kobject_put(&d->kobj); 15438c2ecf20Sopenharmony_ci} 15448c2ecf20Sopenharmony_ci 15458c2ecf20Sopenharmony_cistatic void flash_dev_flush(struct closure *cl) 15468c2ecf20Sopenharmony_ci{ 15478c2ecf20Sopenharmony_ci struct bcache_device *d = container_of(cl, struct bcache_device, cl); 15488c2ecf20Sopenharmony_ci 15498c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 15508c2ecf20Sopenharmony_ci bcache_device_unlink(d); 15518c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 15528c2ecf20Sopenharmony_ci kobject_del(&d->kobj); 15538c2ecf20Sopenharmony_ci continue_at(cl, flash_dev_free, system_wq); 15548c2ecf20Sopenharmony_ci} 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_cistatic int flash_dev_run(struct cache_set *c, struct uuid_entry *u) 15578c2ecf20Sopenharmony_ci{ 15588c2ecf20Sopenharmony_ci struct bcache_device *d = kzalloc(sizeof(struct bcache_device), 15598c2ecf20Sopenharmony_ci GFP_KERNEL); 15608c2ecf20Sopenharmony_ci if (!d) 15618c2ecf20Sopenharmony_ci return -ENOMEM; 15628c2ecf20Sopenharmony_ci 15638c2ecf20Sopenharmony_ci closure_init(&d->cl, NULL); 15648c2ecf20Sopenharmony_ci set_closure_fn(&d->cl, flash_dev_flush, system_wq); 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci kobject_init(&d->kobj, &bch_flash_dev_ktype); 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci if (bcache_device_init(d, block_bytes(c->cache), u->sectors, 15698c2ecf20Sopenharmony_ci NULL, &bcache_flash_ops)) 15708c2ecf20Sopenharmony_ci goto err; 15718c2ecf20Sopenharmony_ci 15728c2ecf20Sopenharmony_ci bcache_device_attach(d, c, u - c->uuids); 15738c2ecf20Sopenharmony_ci bch_sectors_dirty_init(d); 15748c2ecf20Sopenharmony_ci bch_flash_dev_request_init(d); 15758c2ecf20Sopenharmony_ci add_disk(d->disk); 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_ci if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) 15788c2ecf20Sopenharmony_ci goto err; 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci bcache_device_link(d, c, "volume"); 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { 15838c2ecf20Sopenharmony_ci pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); 15848c2ecf20Sopenharmony_ci pr_err("Please update to the latest bcache-tools to create the cache device\n"); 15858c2ecf20Sopenharmony_ci set_disk_ro(d->disk, 1); 15868c2ecf20Sopenharmony_ci } 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_ci return 0; 15898c2ecf20Sopenharmony_cierr: 15908c2ecf20Sopenharmony_ci kobject_put(&d->kobj); 15918c2ecf20Sopenharmony_ci return -ENOMEM; 15928c2ecf20Sopenharmony_ci} 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_cistatic int flash_devs_run(struct cache_set *c) 15958c2ecf20Sopenharmony_ci{ 15968c2ecf20Sopenharmony_ci int ret = 0; 15978c2ecf20Sopenharmony_ci struct uuid_entry *u; 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_ci for (u = c->uuids; 16008c2ecf20Sopenharmony_ci u < c->uuids + c->nr_uuids && !ret; 16018c2ecf20Sopenharmony_ci u++) 16028c2ecf20Sopenharmony_ci if (UUID_FLASH_ONLY(u)) 16038c2ecf20Sopenharmony_ci ret = flash_dev_run(c, u); 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci return ret; 16068c2ecf20Sopenharmony_ci} 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ciint bch_flash_dev_create(struct cache_set *c, uint64_t size) 16098c2ecf20Sopenharmony_ci{ 16108c2ecf20Sopenharmony_ci struct uuid_entry *u; 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci if (test_bit(CACHE_SET_STOPPING, &c->flags)) 16138c2ecf20Sopenharmony_ci return -EINTR; 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci if (!test_bit(CACHE_SET_RUNNING, &c->flags)) 16168c2ecf20Sopenharmony_ci return -EPERM; 16178c2ecf20Sopenharmony_ci 16188c2ecf20Sopenharmony_ci u = uuid_find_empty(c); 16198c2ecf20Sopenharmony_ci if (!u) { 16208c2ecf20Sopenharmony_ci pr_err("Can't create volume, no room for UUID\n"); 16218c2ecf20Sopenharmony_ci return -EINVAL; 16228c2ecf20Sopenharmony_ci } 16238c2ecf20Sopenharmony_ci 16248c2ecf20Sopenharmony_ci get_random_bytes(u->uuid, 16); 16258c2ecf20Sopenharmony_ci memset(u->label, 0, 32); 16268c2ecf20Sopenharmony_ci u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds()); 16278c2ecf20Sopenharmony_ci 16288c2ecf20Sopenharmony_ci SET_UUID_FLASH_ONLY(u, 1); 16298c2ecf20Sopenharmony_ci u->sectors = size >> 9; 16308c2ecf20Sopenharmony_ci 16318c2ecf20Sopenharmony_ci bch_uuid_write(c); 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci return flash_dev_run(c, u); 16348c2ecf20Sopenharmony_ci} 16358c2ecf20Sopenharmony_ci 16368c2ecf20Sopenharmony_cibool bch_cached_dev_error(struct cached_dev *dc) 16378c2ecf20Sopenharmony_ci{ 16388c2ecf20Sopenharmony_ci if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) 16398c2ecf20Sopenharmony_ci return false; 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci dc->io_disable = true; 16428c2ecf20Sopenharmony_ci /* make others know io_disable is true earlier */ 16438c2ecf20Sopenharmony_ci smp_mb(); 16448c2ecf20Sopenharmony_ci 16458c2ecf20Sopenharmony_ci pr_err("stop %s: too many IO errors on backing device %s\n", 16468c2ecf20Sopenharmony_ci dc->disk.disk->disk_name, dc->backing_dev_name); 16478c2ecf20Sopenharmony_ci 16488c2ecf20Sopenharmony_ci bcache_device_stop(&dc->disk); 16498c2ecf20Sopenharmony_ci return true; 16508c2ecf20Sopenharmony_ci} 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_ci/* Cache set */ 16538c2ecf20Sopenharmony_ci 16548c2ecf20Sopenharmony_ci__printf(2, 3) 16558c2ecf20Sopenharmony_cibool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) 16568c2ecf20Sopenharmony_ci{ 16578c2ecf20Sopenharmony_ci struct va_format vaf; 16588c2ecf20Sopenharmony_ci va_list args; 16598c2ecf20Sopenharmony_ci 16608c2ecf20Sopenharmony_ci if (c->on_error != ON_ERROR_PANIC && 16618c2ecf20Sopenharmony_ci test_bit(CACHE_SET_STOPPING, &c->flags)) 16628c2ecf20Sopenharmony_ci return false; 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) 16658c2ecf20Sopenharmony_ci pr_info("CACHE_SET_IO_DISABLE already set\n"); 16668c2ecf20Sopenharmony_ci 16678c2ecf20Sopenharmony_ci /* 16688c2ecf20Sopenharmony_ci * XXX: we can be called from atomic context 16698c2ecf20Sopenharmony_ci * acquire_console_sem(); 16708c2ecf20Sopenharmony_ci */ 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_ci va_start(args, fmt); 16738c2ecf20Sopenharmony_ci 16748c2ecf20Sopenharmony_ci vaf.fmt = fmt; 16758c2ecf20Sopenharmony_ci vaf.va = &args; 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci pr_err("error on %pU: %pV, disabling caching\n", 16788c2ecf20Sopenharmony_ci c->set_uuid, &vaf); 16798c2ecf20Sopenharmony_ci 16808c2ecf20Sopenharmony_ci va_end(args); 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_ci if (c->on_error == ON_ERROR_PANIC) 16838c2ecf20Sopenharmony_ci panic("panic forced after error\n"); 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ci bch_cache_set_unregister(c); 16868c2ecf20Sopenharmony_ci return true; 16878c2ecf20Sopenharmony_ci} 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci/* When c->kobj released */ 16908c2ecf20Sopenharmony_civoid bch_cache_set_release(struct kobject *kobj) 16918c2ecf20Sopenharmony_ci{ 16928c2ecf20Sopenharmony_ci struct cache_set *c = container_of(kobj, struct cache_set, kobj); 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci kfree(c); 16958c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 16968c2ecf20Sopenharmony_ci} 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_cistatic void cache_set_free(struct closure *cl) 16998c2ecf20Sopenharmony_ci{ 17008c2ecf20Sopenharmony_ci struct cache_set *c = container_of(cl, struct cache_set, cl); 17018c2ecf20Sopenharmony_ci struct cache *ca; 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci debugfs_remove(c->debug); 17048c2ecf20Sopenharmony_ci 17058c2ecf20Sopenharmony_ci bch_open_buckets_free(c); 17068c2ecf20Sopenharmony_ci bch_btree_cache_free(c); 17078c2ecf20Sopenharmony_ci bch_journal_free(c); 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 17108c2ecf20Sopenharmony_ci bch_bset_sort_state_free(&c->sort); 17118c2ecf20Sopenharmony_ci free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb))); 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci ca = c->cache; 17148c2ecf20Sopenharmony_ci if (ca) { 17158c2ecf20Sopenharmony_ci ca->set = NULL; 17168c2ecf20Sopenharmony_ci c->cache = NULL; 17178c2ecf20Sopenharmony_ci kobject_put(&ca->kobj); 17188c2ecf20Sopenharmony_ci } 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci if (c->moving_gc_wq) 17228c2ecf20Sopenharmony_ci destroy_workqueue(c->moving_gc_wq); 17238c2ecf20Sopenharmony_ci bioset_exit(&c->bio_split); 17248c2ecf20Sopenharmony_ci mempool_exit(&c->fill_iter); 17258c2ecf20Sopenharmony_ci mempool_exit(&c->bio_meta); 17268c2ecf20Sopenharmony_ci mempool_exit(&c->search); 17278c2ecf20Sopenharmony_ci kfree(c->devices); 17288c2ecf20Sopenharmony_ci 17298c2ecf20Sopenharmony_ci list_del(&c->list); 17308c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci pr_info("Cache set %pU unregistered\n", c->set_uuid); 17338c2ecf20Sopenharmony_ci wake_up(&unregister_wait); 17348c2ecf20Sopenharmony_ci 17358c2ecf20Sopenharmony_ci closure_debug_destroy(&c->cl); 17368c2ecf20Sopenharmony_ci kobject_put(&c->kobj); 17378c2ecf20Sopenharmony_ci} 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_cistatic void cache_set_flush(struct closure *cl) 17408c2ecf20Sopenharmony_ci{ 17418c2ecf20Sopenharmony_ci struct cache_set *c = container_of(cl, struct cache_set, caching); 17428c2ecf20Sopenharmony_ci struct cache *ca = c->cache; 17438c2ecf20Sopenharmony_ci struct btree *b; 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci bch_cache_accounting_destroy(&c->accounting); 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_ci kobject_put(&c->internal); 17488c2ecf20Sopenharmony_ci kobject_del(&c->kobj); 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(c->gc_thread)) 17518c2ecf20Sopenharmony_ci kthread_stop(c->gc_thread); 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci if (!IS_ERR(c->root)) 17548c2ecf20Sopenharmony_ci list_add(&c->root->list, &c->btree_cache); 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci /* 17578c2ecf20Sopenharmony_ci * Avoid flushing cached nodes if cache set is retiring 17588c2ecf20Sopenharmony_ci * due to too many I/O errors detected. 17598c2ecf20Sopenharmony_ci */ 17608c2ecf20Sopenharmony_ci if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) 17618c2ecf20Sopenharmony_ci list_for_each_entry(b, &c->btree_cache, list) { 17628c2ecf20Sopenharmony_ci mutex_lock(&b->write_lock); 17638c2ecf20Sopenharmony_ci if (btree_node_dirty(b)) 17648c2ecf20Sopenharmony_ci __bch_btree_node_write(b, NULL); 17658c2ecf20Sopenharmony_ci mutex_unlock(&b->write_lock); 17668c2ecf20Sopenharmony_ci } 17678c2ecf20Sopenharmony_ci 17688c2ecf20Sopenharmony_ci if (ca->alloc_thread) 17698c2ecf20Sopenharmony_ci kthread_stop(ca->alloc_thread); 17708c2ecf20Sopenharmony_ci 17718c2ecf20Sopenharmony_ci if (c->journal.cur) { 17728c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&c->journal.work); 17738c2ecf20Sopenharmony_ci /* flush last journal entry if needed */ 17748c2ecf20Sopenharmony_ci c->journal.work.work.func(&c->journal.work.work); 17758c2ecf20Sopenharmony_ci } 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_ci closure_return(cl); 17788c2ecf20Sopenharmony_ci} 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci/* 17818c2ecf20Sopenharmony_ci * This function is only called when CACHE_SET_IO_DISABLE is set, which means 17828c2ecf20Sopenharmony_ci * cache set is unregistering due to too many I/O errors. In this condition, 17838c2ecf20Sopenharmony_ci * the bcache device might be stopped, it depends on stop_when_cache_set_failed 17848c2ecf20Sopenharmony_ci * value and whether the broken cache has dirty data: 17858c2ecf20Sopenharmony_ci * 17868c2ecf20Sopenharmony_ci * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device 17878c2ecf20Sopenharmony_ci * BCH_CACHED_STOP_AUTO 0 NO 17888c2ecf20Sopenharmony_ci * BCH_CACHED_STOP_AUTO 1 YES 17898c2ecf20Sopenharmony_ci * BCH_CACHED_DEV_STOP_ALWAYS 0 YES 17908c2ecf20Sopenharmony_ci * BCH_CACHED_DEV_STOP_ALWAYS 1 YES 17918c2ecf20Sopenharmony_ci * 17928c2ecf20Sopenharmony_ci * The expected behavior is, if stop_when_cache_set_failed is configured to 17938c2ecf20Sopenharmony_ci * "auto" via sysfs interface, the bcache device will not be stopped if the 17948c2ecf20Sopenharmony_ci * backing device is clean on the broken cache device. 17958c2ecf20Sopenharmony_ci */ 17968c2ecf20Sopenharmony_cistatic void conditional_stop_bcache_device(struct cache_set *c, 17978c2ecf20Sopenharmony_ci struct bcache_device *d, 17988c2ecf20Sopenharmony_ci struct cached_dev *dc) 17998c2ecf20Sopenharmony_ci{ 18008c2ecf20Sopenharmony_ci if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { 18018c2ecf20Sopenharmony_ci pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n", 18028c2ecf20Sopenharmony_ci d->disk->disk_name, c->set_uuid); 18038c2ecf20Sopenharmony_ci bcache_device_stop(d); 18048c2ecf20Sopenharmony_ci } else if (atomic_read(&dc->has_dirty)) { 18058c2ecf20Sopenharmony_ci /* 18068c2ecf20Sopenharmony_ci * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO 18078c2ecf20Sopenharmony_ci * and dc->has_dirty == 1 18088c2ecf20Sopenharmony_ci */ 18098c2ecf20Sopenharmony_ci pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n", 18108c2ecf20Sopenharmony_ci d->disk->disk_name); 18118c2ecf20Sopenharmony_ci /* 18128c2ecf20Sopenharmony_ci * There might be a small time gap that cache set is 18138c2ecf20Sopenharmony_ci * released but bcache device is not. Inside this time 18148c2ecf20Sopenharmony_ci * gap, regular I/O requests will directly go into 18158c2ecf20Sopenharmony_ci * backing device as no cache set attached to. This 18168c2ecf20Sopenharmony_ci * behavior may also introduce potential inconsistence 18178c2ecf20Sopenharmony_ci * data in writeback mode while cache is dirty. 18188c2ecf20Sopenharmony_ci * Therefore before calling bcache_device_stop() due 18198c2ecf20Sopenharmony_ci * to a broken cache device, dc->io_disable should be 18208c2ecf20Sopenharmony_ci * explicitly set to true. 18218c2ecf20Sopenharmony_ci */ 18228c2ecf20Sopenharmony_ci dc->io_disable = true; 18238c2ecf20Sopenharmony_ci /* make others know io_disable is true earlier */ 18248c2ecf20Sopenharmony_ci smp_mb(); 18258c2ecf20Sopenharmony_ci bcache_device_stop(d); 18268c2ecf20Sopenharmony_ci } else { 18278c2ecf20Sopenharmony_ci /* 18288c2ecf20Sopenharmony_ci * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO 18298c2ecf20Sopenharmony_ci * and dc->has_dirty == 0 18308c2ecf20Sopenharmony_ci */ 18318c2ecf20Sopenharmony_ci pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n", 18328c2ecf20Sopenharmony_ci d->disk->disk_name); 18338c2ecf20Sopenharmony_ci } 18348c2ecf20Sopenharmony_ci} 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_cistatic void __cache_set_unregister(struct closure *cl) 18378c2ecf20Sopenharmony_ci{ 18388c2ecf20Sopenharmony_ci struct cache_set *c = container_of(cl, struct cache_set, caching); 18398c2ecf20Sopenharmony_ci struct cached_dev *dc; 18408c2ecf20Sopenharmony_ci struct bcache_device *d; 18418c2ecf20Sopenharmony_ci size_t i; 18428c2ecf20Sopenharmony_ci 18438c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 18448c2ecf20Sopenharmony_ci 18458c2ecf20Sopenharmony_ci for (i = 0; i < c->devices_max_used; i++) { 18468c2ecf20Sopenharmony_ci d = c->devices[i]; 18478c2ecf20Sopenharmony_ci if (!d) 18488c2ecf20Sopenharmony_ci continue; 18498c2ecf20Sopenharmony_ci 18508c2ecf20Sopenharmony_ci if (!UUID_FLASH_ONLY(&c->uuids[i]) && 18518c2ecf20Sopenharmony_ci test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { 18528c2ecf20Sopenharmony_ci dc = container_of(d, struct cached_dev, disk); 18538c2ecf20Sopenharmony_ci bch_cached_dev_detach(dc); 18548c2ecf20Sopenharmony_ci if (test_bit(CACHE_SET_IO_DISABLE, &c->flags)) 18558c2ecf20Sopenharmony_ci conditional_stop_bcache_device(c, d, dc); 18568c2ecf20Sopenharmony_ci } else { 18578c2ecf20Sopenharmony_ci bcache_device_stop(d); 18588c2ecf20Sopenharmony_ci } 18598c2ecf20Sopenharmony_ci } 18608c2ecf20Sopenharmony_ci 18618c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 18628c2ecf20Sopenharmony_ci 18638c2ecf20Sopenharmony_ci continue_at(cl, cache_set_flush, system_wq); 18648c2ecf20Sopenharmony_ci} 18658c2ecf20Sopenharmony_ci 18668c2ecf20Sopenharmony_civoid bch_cache_set_stop(struct cache_set *c) 18678c2ecf20Sopenharmony_ci{ 18688c2ecf20Sopenharmony_ci if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) 18698c2ecf20Sopenharmony_ci /* closure_fn set to __cache_set_unregister() */ 18708c2ecf20Sopenharmony_ci closure_queue(&c->caching); 18718c2ecf20Sopenharmony_ci} 18728c2ecf20Sopenharmony_ci 18738c2ecf20Sopenharmony_civoid bch_cache_set_unregister(struct cache_set *c) 18748c2ecf20Sopenharmony_ci{ 18758c2ecf20Sopenharmony_ci set_bit(CACHE_SET_UNREGISTERING, &c->flags); 18768c2ecf20Sopenharmony_ci bch_cache_set_stop(c); 18778c2ecf20Sopenharmony_ci} 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_ci#define alloc_meta_bucket_pages(gfp, sb) \ 18808c2ecf20Sopenharmony_ci ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(meta_bucket_pages(sb)))) 18818c2ecf20Sopenharmony_ci 18828c2ecf20Sopenharmony_cistruct cache_set *bch_cache_set_alloc(struct cache_sb *sb) 18838c2ecf20Sopenharmony_ci{ 18848c2ecf20Sopenharmony_ci int iter_size; 18858c2ecf20Sopenharmony_ci struct cache *ca = container_of(sb, struct cache, sb); 18868c2ecf20Sopenharmony_ci struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); 18878c2ecf20Sopenharmony_ci 18888c2ecf20Sopenharmony_ci if (!c) 18898c2ecf20Sopenharmony_ci return NULL; 18908c2ecf20Sopenharmony_ci 18918c2ecf20Sopenharmony_ci __module_get(THIS_MODULE); 18928c2ecf20Sopenharmony_ci closure_init(&c->cl, NULL); 18938c2ecf20Sopenharmony_ci set_closure_fn(&c->cl, cache_set_free, system_wq); 18948c2ecf20Sopenharmony_ci 18958c2ecf20Sopenharmony_ci closure_init(&c->caching, &c->cl); 18968c2ecf20Sopenharmony_ci set_closure_fn(&c->caching, __cache_set_unregister, system_wq); 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci /* Maybe create continue_at_noreturn() and use it here? */ 18998c2ecf20Sopenharmony_ci closure_set_stopped(&c->cl); 19008c2ecf20Sopenharmony_ci closure_put(&c->cl); 19018c2ecf20Sopenharmony_ci 19028c2ecf20Sopenharmony_ci kobject_init(&c->kobj, &bch_cache_set_ktype); 19038c2ecf20Sopenharmony_ci kobject_init(&c->internal, &bch_cache_set_internal_ktype); 19048c2ecf20Sopenharmony_ci 19058c2ecf20Sopenharmony_ci bch_cache_accounting_init(&c->accounting, &c->cl); 19068c2ecf20Sopenharmony_ci 19078c2ecf20Sopenharmony_ci memcpy(c->set_uuid, sb->set_uuid, 16); 19088c2ecf20Sopenharmony_ci 19098c2ecf20Sopenharmony_ci c->cache = ca; 19108c2ecf20Sopenharmony_ci c->cache->set = c; 19118c2ecf20Sopenharmony_ci c->bucket_bits = ilog2(sb->bucket_size); 19128c2ecf20Sopenharmony_ci c->block_bits = ilog2(sb->block_size); 19138c2ecf20Sopenharmony_ci c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry); 19148c2ecf20Sopenharmony_ci c->devices_max_used = 0; 19158c2ecf20Sopenharmony_ci atomic_set(&c->attached_dev_nr, 0); 19168c2ecf20Sopenharmony_ci c->btree_pages = meta_bucket_pages(sb); 19178c2ecf20Sopenharmony_ci if (c->btree_pages > BTREE_MAX_PAGES) 19188c2ecf20Sopenharmony_ci c->btree_pages = max_t(int, c->btree_pages / 4, 19198c2ecf20Sopenharmony_ci BTREE_MAX_PAGES); 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci sema_init(&c->sb_write_mutex, 1); 19228c2ecf20Sopenharmony_ci mutex_init(&c->bucket_lock); 19238c2ecf20Sopenharmony_ci init_waitqueue_head(&c->btree_cache_wait); 19248c2ecf20Sopenharmony_ci spin_lock_init(&c->btree_cannibalize_lock); 19258c2ecf20Sopenharmony_ci init_waitqueue_head(&c->bucket_wait); 19268c2ecf20Sopenharmony_ci init_waitqueue_head(&c->gc_wait); 19278c2ecf20Sopenharmony_ci sema_init(&c->uuid_write_mutex, 1); 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_ci spin_lock_init(&c->btree_gc_time.lock); 19308c2ecf20Sopenharmony_ci spin_lock_init(&c->btree_split_time.lock); 19318c2ecf20Sopenharmony_ci spin_lock_init(&c->btree_read_time.lock); 19328c2ecf20Sopenharmony_ci 19338c2ecf20Sopenharmony_ci bch_moving_init_cache_set(c); 19348c2ecf20Sopenharmony_ci 19358c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&c->list); 19368c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&c->cached_devs); 19378c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&c->btree_cache); 19388c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&c->btree_cache_freeable); 19398c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&c->btree_cache_freed); 19408c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&c->data_buckets); 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) * 19438c2ecf20Sopenharmony_ci sizeof(struct btree_iter_set); 19448c2ecf20Sopenharmony_ci 19458c2ecf20Sopenharmony_ci c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); 19468c2ecf20Sopenharmony_ci if (!c->devices) 19478c2ecf20Sopenharmony_ci goto err; 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci if (mempool_init_slab_pool(&c->search, 32, bch_search_cache)) 19508c2ecf20Sopenharmony_ci goto err; 19518c2ecf20Sopenharmony_ci 19528c2ecf20Sopenharmony_ci if (mempool_init_kmalloc_pool(&c->bio_meta, 2, 19538c2ecf20Sopenharmony_ci sizeof(struct bbio) + 19548c2ecf20Sopenharmony_ci sizeof(struct bio_vec) * meta_bucket_pages(sb))) 19558c2ecf20Sopenharmony_ci goto err; 19568c2ecf20Sopenharmony_ci 19578c2ecf20Sopenharmony_ci if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size)) 19588c2ecf20Sopenharmony_ci goto err; 19598c2ecf20Sopenharmony_ci 19608c2ecf20Sopenharmony_ci if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), 19618c2ecf20Sopenharmony_ci BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 19628c2ecf20Sopenharmony_ci goto err; 19638c2ecf20Sopenharmony_ci 19648c2ecf20Sopenharmony_ci c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb); 19658c2ecf20Sopenharmony_ci if (!c->uuids) 19668c2ecf20Sopenharmony_ci goto err; 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0); 19698c2ecf20Sopenharmony_ci if (!c->moving_gc_wq) 19708c2ecf20Sopenharmony_ci goto err; 19718c2ecf20Sopenharmony_ci 19728c2ecf20Sopenharmony_ci if (bch_journal_alloc(c)) 19738c2ecf20Sopenharmony_ci goto err; 19748c2ecf20Sopenharmony_ci 19758c2ecf20Sopenharmony_ci if (bch_btree_cache_alloc(c)) 19768c2ecf20Sopenharmony_ci goto err; 19778c2ecf20Sopenharmony_ci 19788c2ecf20Sopenharmony_ci if (bch_open_buckets_alloc(c)) 19798c2ecf20Sopenharmony_ci goto err; 19808c2ecf20Sopenharmony_ci 19818c2ecf20Sopenharmony_ci if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) 19828c2ecf20Sopenharmony_ci goto err; 19838c2ecf20Sopenharmony_ci 19848c2ecf20Sopenharmony_ci c->congested_read_threshold_us = 2000; 19858c2ecf20Sopenharmony_ci c->congested_write_threshold_us = 20000; 19868c2ecf20Sopenharmony_ci c->error_limit = DEFAULT_IO_ERROR_LIMIT; 19878c2ecf20Sopenharmony_ci c->idle_max_writeback_rate_enabled = 1; 19888c2ecf20Sopenharmony_ci WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)); 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci return c; 19918c2ecf20Sopenharmony_cierr: 19928c2ecf20Sopenharmony_ci bch_cache_set_unregister(c); 19938c2ecf20Sopenharmony_ci return NULL; 19948c2ecf20Sopenharmony_ci} 19958c2ecf20Sopenharmony_ci 19968c2ecf20Sopenharmony_cistatic int run_cache_set(struct cache_set *c) 19978c2ecf20Sopenharmony_ci{ 19988c2ecf20Sopenharmony_ci const char *err = "cannot allocate memory"; 19998c2ecf20Sopenharmony_ci struct cached_dev *dc, *t; 20008c2ecf20Sopenharmony_ci struct cache *ca = c->cache; 20018c2ecf20Sopenharmony_ci struct closure cl; 20028c2ecf20Sopenharmony_ci LIST_HEAD(journal); 20038c2ecf20Sopenharmony_ci struct journal_replay *l; 20048c2ecf20Sopenharmony_ci 20058c2ecf20Sopenharmony_ci closure_init_stack(&cl); 20068c2ecf20Sopenharmony_ci 20078c2ecf20Sopenharmony_ci c->nbuckets = ca->sb.nbuckets; 20088c2ecf20Sopenharmony_ci set_gc_sectors(c); 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci if (CACHE_SYNC(&c->cache->sb)) { 20118c2ecf20Sopenharmony_ci struct bkey *k; 20128c2ecf20Sopenharmony_ci struct jset *j; 20138c2ecf20Sopenharmony_ci 20148c2ecf20Sopenharmony_ci err = "cannot allocate memory for journal"; 20158c2ecf20Sopenharmony_ci if (bch_journal_read(c, &journal)) 20168c2ecf20Sopenharmony_ci goto err; 20178c2ecf20Sopenharmony_ci 20188c2ecf20Sopenharmony_ci pr_debug("btree_journal_read() done\n"); 20198c2ecf20Sopenharmony_ci 20208c2ecf20Sopenharmony_ci err = "no journal entries found"; 20218c2ecf20Sopenharmony_ci if (list_empty(&journal)) 20228c2ecf20Sopenharmony_ci goto err; 20238c2ecf20Sopenharmony_ci 20248c2ecf20Sopenharmony_ci j = &list_entry(journal.prev, struct journal_replay, list)->j; 20258c2ecf20Sopenharmony_ci 20268c2ecf20Sopenharmony_ci err = "IO error reading priorities"; 20278c2ecf20Sopenharmony_ci if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) 20288c2ecf20Sopenharmony_ci goto err; 20298c2ecf20Sopenharmony_ci 20308c2ecf20Sopenharmony_ci /* 20318c2ecf20Sopenharmony_ci * If prio_read() fails it'll call cache_set_error and we'll 20328c2ecf20Sopenharmony_ci * tear everything down right away, but if we perhaps checked 20338c2ecf20Sopenharmony_ci * sooner we could avoid journal replay. 20348c2ecf20Sopenharmony_ci */ 20358c2ecf20Sopenharmony_ci 20368c2ecf20Sopenharmony_ci k = &j->btree_root; 20378c2ecf20Sopenharmony_ci 20388c2ecf20Sopenharmony_ci err = "bad btree root"; 20398c2ecf20Sopenharmony_ci if (__bch_btree_ptr_invalid(c, k)) 20408c2ecf20Sopenharmony_ci goto err; 20418c2ecf20Sopenharmony_ci 20428c2ecf20Sopenharmony_ci err = "error reading btree root"; 20438c2ecf20Sopenharmony_ci c->root = bch_btree_node_get(c, NULL, k, 20448c2ecf20Sopenharmony_ci j->btree_level, 20458c2ecf20Sopenharmony_ci true, NULL); 20468c2ecf20Sopenharmony_ci if (IS_ERR(c->root)) 20478c2ecf20Sopenharmony_ci goto err; 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ci list_del_init(&c->root->list); 20508c2ecf20Sopenharmony_ci rw_unlock(true, c->root); 20518c2ecf20Sopenharmony_ci 20528c2ecf20Sopenharmony_ci err = uuid_read(c, j, &cl); 20538c2ecf20Sopenharmony_ci if (err) 20548c2ecf20Sopenharmony_ci goto err; 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci err = "error in recovery"; 20578c2ecf20Sopenharmony_ci if (bch_btree_check(c)) 20588c2ecf20Sopenharmony_ci goto err; 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_ci bch_journal_mark(c, &journal); 20618c2ecf20Sopenharmony_ci bch_initial_gc_finish(c); 20628c2ecf20Sopenharmony_ci pr_debug("btree_check() done\n"); 20638c2ecf20Sopenharmony_ci 20648c2ecf20Sopenharmony_ci /* 20658c2ecf20Sopenharmony_ci * bcache_journal_next() can't happen sooner, or 20668c2ecf20Sopenharmony_ci * btree_gc_finish() will give spurious errors about last_gc > 20678c2ecf20Sopenharmony_ci * gc_gen - this is a hack but oh well. 20688c2ecf20Sopenharmony_ci */ 20698c2ecf20Sopenharmony_ci bch_journal_next(&c->journal); 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci err = "error starting allocator thread"; 20728c2ecf20Sopenharmony_ci if (bch_cache_allocator_start(ca)) 20738c2ecf20Sopenharmony_ci goto err; 20748c2ecf20Sopenharmony_ci 20758c2ecf20Sopenharmony_ci /* 20768c2ecf20Sopenharmony_ci * First place it's safe to allocate: btree_check() and 20778c2ecf20Sopenharmony_ci * btree_gc_finish() have to run before we have buckets to 20788c2ecf20Sopenharmony_ci * allocate, and bch_bucket_alloc_set() might cause a journal 20798c2ecf20Sopenharmony_ci * entry to be written so bcache_journal_next() has to be called 20808c2ecf20Sopenharmony_ci * first. 20818c2ecf20Sopenharmony_ci * 20828c2ecf20Sopenharmony_ci * If the uuids were in the old format we have to rewrite them 20838c2ecf20Sopenharmony_ci * before the next journal entry is written: 20848c2ecf20Sopenharmony_ci */ 20858c2ecf20Sopenharmony_ci if (j->version < BCACHE_JSET_VERSION_UUID) 20868c2ecf20Sopenharmony_ci __uuid_write(c); 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci err = "bcache: replay journal failed"; 20898c2ecf20Sopenharmony_ci if (bch_journal_replay(c, &journal)) 20908c2ecf20Sopenharmony_ci goto err; 20918c2ecf20Sopenharmony_ci } else { 20928c2ecf20Sopenharmony_ci unsigned int j; 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci pr_notice("invalidating existing data\n"); 20958c2ecf20Sopenharmony_ci ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, 20968c2ecf20Sopenharmony_ci 2, SB_JOURNAL_BUCKETS); 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci for (j = 0; j < ca->sb.keys; j++) 20998c2ecf20Sopenharmony_ci ca->sb.d[j] = ca->sb.first_bucket + j; 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci bch_initial_gc_finish(c); 21028c2ecf20Sopenharmony_ci 21038c2ecf20Sopenharmony_ci err = "error starting allocator thread"; 21048c2ecf20Sopenharmony_ci if (bch_cache_allocator_start(ca)) 21058c2ecf20Sopenharmony_ci goto err; 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_ci mutex_lock(&c->bucket_lock); 21088c2ecf20Sopenharmony_ci bch_prio_write(ca, true); 21098c2ecf20Sopenharmony_ci mutex_unlock(&c->bucket_lock); 21108c2ecf20Sopenharmony_ci 21118c2ecf20Sopenharmony_ci err = "cannot allocate new UUID bucket"; 21128c2ecf20Sopenharmony_ci if (__uuid_write(c)) 21138c2ecf20Sopenharmony_ci goto err; 21148c2ecf20Sopenharmony_ci 21158c2ecf20Sopenharmony_ci err = "cannot allocate new btree root"; 21168c2ecf20Sopenharmony_ci c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); 21178c2ecf20Sopenharmony_ci if (IS_ERR(c->root)) 21188c2ecf20Sopenharmony_ci goto err; 21198c2ecf20Sopenharmony_ci 21208c2ecf20Sopenharmony_ci mutex_lock(&c->root->write_lock); 21218c2ecf20Sopenharmony_ci bkey_copy_key(&c->root->key, &MAX_KEY); 21228c2ecf20Sopenharmony_ci bch_btree_node_write(c->root, &cl); 21238c2ecf20Sopenharmony_ci mutex_unlock(&c->root->write_lock); 21248c2ecf20Sopenharmony_ci 21258c2ecf20Sopenharmony_ci bch_btree_set_root(c->root); 21268c2ecf20Sopenharmony_ci rw_unlock(true, c->root); 21278c2ecf20Sopenharmony_ci 21288c2ecf20Sopenharmony_ci /* 21298c2ecf20Sopenharmony_ci * We don't want to write the first journal entry until 21308c2ecf20Sopenharmony_ci * everything is set up - fortunately journal entries won't be 21318c2ecf20Sopenharmony_ci * written until the SET_CACHE_SYNC() here: 21328c2ecf20Sopenharmony_ci */ 21338c2ecf20Sopenharmony_ci SET_CACHE_SYNC(&c->cache->sb, true); 21348c2ecf20Sopenharmony_ci 21358c2ecf20Sopenharmony_ci bch_journal_next(&c->journal); 21368c2ecf20Sopenharmony_ci bch_journal_meta(c, &cl); 21378c2ecf20Sopenharmony_ci } 21388c2ecf20Sopenharmony_ci 21398c2ecf20Sopenharmony_ci err = "error starting gc thread"; 21408c2ecf20Sopenharmony_ci if (bch_gc_thread_start(c)) 21418c2ecf20Sopenharmony_ci goto err; 21428c2ecf20Sopenharmony_ci 21438c2ecf20Sopenharmony_ci closure_sync(&cl); 21448c2ecf20Sopenharmony_ci c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); 21458c2ecf20Sopenharmony_ci bcache_write_super(c); 21468c2ecf20Sopenharmony_ci 21478c2ecf20Sopenharmony_ci if (bch_has_feature_obso_large_bucket(&c->cache->sb)) 21488c2ecf20Sopenharmony_ci pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); 21498c2ecf20Sopenharmony_ci 21508c2ecf20Sopenharmony_ci list_for_each_entry_safe(dc, t, &uncached_devices, list) 21518c2ecf20Sopenharmony_ci bch_cached_dev_attach(dc, c, NULL); 21528c2ecf20Sopenharmony_ci 21538c2ecf20Sopenharmony_ci flash_devs_run(c); 21548c2ecf20Sopenharmony_ci 21558c2ecf20Sopenharmony_ci bch_journal_space_reserve(&c->journal); 21568c2ecf20Sopenharmony_ci set_bit(CACHE_SET_RUNNING, &c->flags); 21578c2ecf20Sopenharmony_ci return 0; 21588c2ecf20Sopenharmony_cierr: 21598c2ecf20Sopenharmony_ci while (!list_empty(&journal)) { 21608c2ecf20Sopenharmony_ci l = list_first_entry(&journal, struct journal_replay, list); 21618c2ecf20Sopenharmony_ci list_del(&l->list); 21628c2ecf20Sopenharmony_ci kfree(l); 21638c2ecf20Sopenharmony_ci } 21648c2ecf20Sopenharmony_ci 21658c2ecf20Sopenharmony_ci closure_sync(&cl); 21668c2ecf20Sopenharmony_ci 21678c2ecf20Sopenharmony_ci bch_cache_set_error(c, "%s", err); 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci return -EIO; 21708c2ecf20Sopenharmony_ci} 21718c2ecf20Sopenharmony_ci 21728c2ecf20Sopenharmony_cistatic const char *register_cache_set(struct cache *ca) 21738c2ecf20Sopenharmony_ci{ 21748c2ecf20Sopenharmony_ci char buf[12]; 21758c2ecf20Sopenharmony_ci const char *err = "cannot allocate memory"; 21768c2ecf20Sopenharmony_ci struct cache_set *c; 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci list_for_each_entry(c, &bch_cache_sets, list) 21798c2ecf20Sopenharmony_ci if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) { 21808c2ecf20Sopenharmony_ci if (c->cache) 21818c2ecf20Sopenharmony_ci return "duplicate cache set member"; 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci goto found; 21848c2ecf20Sopenharmony_ci } 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci c = bch_cache_set_alloc(&ca->sb); 21878c2ecf20Sopenharmony_ci if (!c) 21888c2ecf20Sopenharmony_ci return err; 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci err = "error creating kobject"; 21918c2ecf20Sopenharmony_ci if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) || 21928c2ecf20Sopenharmony_ci kobject_add(&c->internal, &c->kobj, "internal")) 21938c2ecf20Sopenharmony_ci goto err; 21948c2ecf20Sopenharmony_ci 21958c2ecf20Sopenharmony_ci if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) 21968c2ecf20Sopenharmony_ci goto err; 21978c2ecf20Sopenharmony_ci 21988c2ecf20Sopenharmony_ci bch_debug_init_cache_set(c); 21998c2ecf20Sopenharmony_ci 22008c2ecf20Sopenharmony_ci list_add(&c->list, &bch_cache_sets); 22018c2ecf20Sopenharmony_cifound: 22028c2ecf20Sopenharmony_ci sprintf(buf, "cache%i", ca->sb.nr_this_dev); 22038c2ecf20Sopenharmony_ci if (sysfs_create_link(&ca->kobj, &c->kobj, "set") || 22048c2ecf20Sopenharmony_ci sysfs_create_link(&c->kobj, &ca->kobj, buf)) 22058c2ecf20Sopenharmony_ci goto err; 22068c2ecf20Sopenharmony_ci 22078c2ecf20Sopenharmony_ci kobject_get(&ca->kobj); 22088c2ecf20Sopenharmony_ci ca->set = c; 22098c2ecf20Sopenharmony_ci ca->set->cache = ca; 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_ci err = "failed to run cache set"; 22128c2ecf20Sopenharmony_ci if (run_cache_set(c) < 0) 22138c2ecf20Sopenharmony_ci goto err; 22148c2ecf20Sopenharmony_ci 22158c2ecf20Sopenharmony_ci return NULL; 22168c2ecf20Sopenharmony_cierr: 22178c2ecf20Sopenharmony_ci bch_cache_set_unregister(c); 22188c2ecf20Sopenharmony_ci return err; 22198c2ecf20Sopenharmony_ci} 22208c2ecf20Sopenharmony_ci 22218c2ecf20Sopenharmony_ci/* Cache device */ 22228c2ecf20Sopenharmony_ci 22238c2ecf20Sopenharmony_ci/* When ca->kobj released */ 22248c2ecf20Sopenharmony_civoid bch_cache_release(struct kobject *kobj) 22258c2ecf20Sopenharmony_ci{ 22268c2ecf20Sopenharmony_ci struct cache *ca = container_of(kobj, struct cache, kobj); 22278c2ecf20Sopenharmony_ci unsigned int i; 22288c2ecf20Sopenharmony_ci 22298c2ecf20Sopenharmony_ci if (ca->set) { 22308c2ecf20Sopenharmony_ci BUG_ON(ca->set->cache != ca); 22318c2ecf20Sopenharmony_ci ca->set->cache = NULL; 22328c2ecf20Sopenharmony_ci } 22338c2ecf20Sopenharmony_ci 22348c2ecf20Sopenharmony_ci free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb))); 22358c2ecf20Sopenharmony_ci kfree(ca->prio_buckets); 22368c2ecf20Sopenharmony_ci vfree(ca->buckets); 22378c2ecf20Sopenharmony_ci 22388c2ecf20Sopenharmony_ci free_heap(&ca->heap); 22398c2ecf20Sopenharmony_ci free_fifo(&ca->free_inc); 22408c2ecf20Sopenharmony_ci 22418c2ecf20Sopenharmony_ci for (i = 0; i < RESERVE_NR; i++) 22428c2ecf20Sopenharmony_ci free_fifo(&ca->free[i]); 22438c2ecf20Sopenharmony_ci 22448c2ecf20Sopenharmony_ci if (ca->sb_disk) 22458c2ecf20Sopenharmony_ci put_page(virt_to_page(ca->sb_disk)); 22468c2ecf20Sopenharmony_ci 22478c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(ca->bdev)) 22488c2ecf20Sopenharmony_ci blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci kfree(ca); 22518c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 22528c2ecf20Sopenharmony_ci} 22538c2ecf20Sopenharmony_ci 22548c2ecf20Sopenharmony_cistatic int cache_alloc(struct cache *ca) 22558c2ecf20Sopenharmony_ci{ 22568c2ecf20Sopenharmony_ci size_t free; 22578c2ecf20Sopenharmony_ci size_t btree_buckets; 22588c2ecf20Sopenharmony_ci struct bucket *b; 22598c2ecf20Sopenharmony_ci int ret = -ENOMEM; 22608c2ecf20Sopenharmony_ci const char *err = NULL; 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci __module_get(THIS_MODULE); 22638c2ecf20Sopenharmony_ci kobject_init(&ca->kobj, &bch_cache_ktype); 22648c2ecf20Sopenharmony_ci 22658c2ecf20Sopenharmony_ci bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8); 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_ci /* 22688c2ecf20Sopenharmony_ci * when ca->sb.njournal_buckets is not zero, journal exists, 22698c2ecf20Sopenharmony_ci * and in bch_journal_replay(), tree node may split, 22708c2ecf20Sopenharmony_ci * so bucket of RESERVE_BTREE type is needed, 22718c2ecf20Sopenharmony_ci * the worst situation is all journal buckets are valid journal, 22728c2ecf20Sopenharmony_ci * and all the keys need to replay, 22738c2ecf20Sopenharmony_ci * so the number of RESERVE_BTREE type buckets should be as much 22748c2ecf20Sopenharmony_ci * as journal buckets 22758c2ecf20Sopenharmony_ci */ 22768c2ecf20Sopenharmony_ci btree_buckets = ca->sb.njournal_buckets ?: 8; 22778c2ecf20Sopenharmony_ci free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; 22788c2ecf20Sopenharmony_ci if (!free) { 22798c2ecf20Sopenharmony_ci ret = -EPERM; 22808c2ecf20Sopenharmony_ci err = "ca->sb.nbuckets is too small"; 22818c2ecf20Sopenharmony_ci goto err_free; 22828c2ecf20Sopenharmony_ci } 22838c2ecf20Sopenharmony_ci 22848c2ecf20Sopenharmony_ci if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, 22858c2ecf20Sopenharmony_ci GFP_KERNEL)) { 22868c2ecf20Sopenharmony_ci err = "ca->free[RESERVE_BTREE] alloc failed"; 22878c2ecf20Sopenharmony_ci goto err_btree_alloc; 22888c2ecf20Sopenharmony_ci } 22898c2ecf20Sopenharmony_ci 22908c2ecf20Sopenharmony_ci if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), 22918c2ecf20Sopenharmony_ci GFP_KERNEL)) { 22928c2ecf20Sopenharmony_ci err = "ca->free[RESERVE_PRIO] alloc failed"; 22938c2ecf20Sopenharmony_ci goto err_prio_alloc; 22948c2ecf20Sopenharmony_ci } 22958c2ecf20Sopenharmony_ci 22968c2ecf20Sopenharmony_ci if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) { 22978c2ecf20Sopenharmony_ci err = "ca->free[RESERVE_MOVINGGC] alloc failed"; 22988c2ecf20Sopenharmony_ci goto err_movinggc_alloc; 22998c2ecf20Sopenharmony_ci } 23008c2ecf20Sopenharmony_ci 23018c2ecf20Sopenharmony_ci if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) { 23028c2ecf20Sopenharmony_ci err = "ca->free[RESERVE_NONE] alloc failed"; 23038c2ecf20Sopenharmony_ci goto err_none_alloc; 23048c2ecf20Sopenharmony_ci } 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_ci if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) { 23078c2ecf20Sopenharmony_ci err = "ca->free_inc alloc failed"; 23088c2ecf20Sopenharmony_ci goto err_free_inc_alloc; 23098c2ecf20Sopenharmony_ci } 23108c2ecf20Sopenharmony_ci 23118c2ecf20Sopenharmony_ci if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) { 23128c2ecf20Sopenharmony_ci err = "ca->heap alloc failed"; 23138c2ecf20Sopenharmony_ci goto err_heap_alloc; 23148c2ecf20Sopenharmony_ci } 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci ca->buckets = vzalloc(array_size(sizeof(struct bucket), 23178c2ecf20Sopenharmony_ci ca->sb.nbuckets)); 23188c2ecf20Sopenharmony_ci if (!ca->buckets) { 23198c2ecf20Sopenharmony_ci err = "ca->buckets alloc failed"; 23208c2ecf20Sopenharmony_ci goto err_buckets_alloc; 23218c2ecf20Sopenharmony_ci } 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), 23248c2ecf20Sopenharmony_ci prio_buckets(ca), 2), 23258c2ecf20Sopenharmony_ci GFP_KERNEL); 23268c2ecf20Sopenharmony_ci if (!ca->prio_buckets) { 23278c2ecf20Sopenharmony_ci err = "ca->prio_buckets alloc failed"; 23288c2ecf20Sopenharmony_ci goto err_prio_buckets_alloc; 23298c2ecf20Sopenharmony_ci } 23308c2ecf20Sopenharmony_ci 23318c2ecf20Sopenharmony_ci ca->disk_buckets = alloc_meta_bucket_pages(GFP_KERNEL, &ca->sb); 23328c2ecf20Sopenharmony_ci if (!ca->disk_buckets) { 23338c2ecf20Sopenharmony_ci err = "ca->disk_buckets alloc failed"; 23348c2ecf20Sopenharmony_ci goto err_disk_buckets_alloc; 23358c2ecf20Sopenharmony_ci } 23368c2ecf20Sopenharmony_ci 23378c2ecf20Sopenharmony_ci ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); 23388c2ecf20Sopenharmony_ci 23398c2ecf20Sopenharmony_ci for_each_bucket(b, ca) 23408c2ecf20Sopenharmony_ci atomic_set(&b->pin, 0); 23418c2ecf20Sopenharmony_ci return 0; 23428c2ecf20Sopenharmony_ci 23438c2ecf20Sopenharmony_cierr_disk_buckets_alloc: 23448c2ecf20Sopenharmony_ci kfree(ca->prio_buckets); 23458c2ecf20Sopenharmony_cierr_prio_buckets_alloc: 23468c2ecf20Sopenharmony_ci vfree(ca->buckets); 23478c2ecf20Sopenharmony_cierr_buckets_alloc: 23488c2ecf20Sopenharmony_ci free_heap(&ca->heap); 23498c2ecf20Sopenharmony_cierr_heap_alloc: 23508c2ecf20Sopenharmony_ci free_fifo(&ca->free_inc); 23518c2ecf20Sopenharmony_cierr_free_inc_alloc: 23528c2ecf20Sopenharmony_ci free_fifo(&ca->free[RESERVE_NONE]); 23538c2ecf20Sopenharmony_cierr_none_alloc: 23548c2ecf20Sopenharmony_ci free_fifo(&ca->free[RESERVE_MOVINGGC]); 23558c2ecf20Sopenharmony_cierr_movinggc_alloc: 23568c2ecf20Sopenharmony_ci free_fifo(&ca->free[RESERVE_PRIO]); 23578c2ecf20Sopenharmony_cierr_prio_alloc: 23588c2ecf20Sopenharmony_ci free_fifo(&ca->free[RESERVE_BTREE]); 23598c2ecf20Sopenharmony_cierr_btree_alloc: 23608c2ecf20Sopenharmony_cierr_free: 23618c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 23628c2ecf20Sopenharmony_ci if (err) 23638c2ecf20Sopenharmony_ci pr_notice("error %s: %s\n", ca->cache_dev_name, err); 23648c2ecf20Sopenharmony_ci return ret; 23658c2ecf20Sopenharmony_ci} 23668c2ecf20Sopenharmony_ci 23678c2ecf20Sopenharmony_cistatic int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, 23688c2ecf20Sopenharmony_ci struct block_device *bdev, struct cache *ca) 23698c2ecf20Sopenharmony_ci{ 23708c2ecf20Sopenharmony_ci const char *err = NULL; /* must be set for any error case */ 23718c2ecf20Sopenharmony_ci int ret = 0; 23728c2ecf20Sopenharmony_ci 23738c2ecf20Sopenharmony_ci bdevname(bdev, ca->cache_dev_name); 23748c2ecf20Sopenharmony_ci memcpy(&ca->sb, sb, sizeof(struct cache_sb)); 23758c2ecf20Sopenharmony_ci ca->bdev = bdev; 23768c2ecf20Sopenharmony_ci ca->bdev->bd_holder = ca; 23778c2ecf20Sopenharmony_ci ca->sb_disk = sb_disk; 23788c2ecf20Sopenharmony_ci 23798c2ecf20Sopenharmony_ci if (blk_queue_discard(bdev_get_queue(bdev))) 23808c2ecf20Sopenharmony_ci ca->discard = CACHE_DISCARD(&ca->sb); 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci ret = cache_alloc(ca); 23838c2ecf20Sopenharmony_ci if (ret != 0) { 23848c2ecf20Sopenharmony_ci /* 23858c2ecf20Sopenharmony_ci * If we failed here, it means ca->kobj is not initialized yet, 23868c2ecf20Sopenharmony_ci * kobject_put() won't be called and there is no chance to 23878c2ecf20Sopenharmony_ci * call blkdev_put() to bdev in bch_cache_release(). So we 23888c2ecf20Sopenharmony_ci * explicitly call blkdev_put() here. 23898c2ecf20Sopenharmony_ci */ 23908c2ecf20Sopenharmony_ci blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 23918c2ecf20Sopenharmony_ci if (ret == -ENOMEM) 23928c2ecf20Sopenharmony_ci err = "cache_alloc(): -ENOMEM"; 23938c2ecf20Sopenharmony_ci else if (ret == -EPERM) 23948c2ecf20Sopenharmony_ci err = "cache_alloc(): cache device is too small"; 23958c2ecf20Sopenharmony_ci else 23968c2ecf20Sopenharmony_ci err = "cache_alloc(): unknown error"; 23978c2ecf20Sopenharmony_ci goto err; 23988c2ecf20Sopenharmony_ci } 23998c2ecf20Sopenharmony_ci 24008c2ecf20Sopenharmony_ci if (kobject_add(&ca->kobj, 24018c2ecf20Sopenharmony_ci &part_to_dev(bdev->bd_part)->kobj, 24028c2ecf20Sopenharmony_ci "bcache")) { 24038c2ecf20Sopenharmony_ci err = "error calling kobject_add"; 24048c2ecf20Sopenharmony_ci ret = -ENOMEM; 24058c2ecf20Sopenharmony_ci goto out; 24068c2ecf20Sopenharmony_ci } 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 24098c2ecf20Sopenharmony_ci err = register_cache_set(ca); 24108c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_ci if (err) { 24138c2ecf20Sopenharmony_ci ret = -ENODEV; 24148c2ecf20Sopenharmony_ci goto out; 24158c2ecf20Sopenharmony_ci } 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci pr_info("registered cache device %s\n", ca->cache_dev_name); 24188c2ecf20Sopenharmony_ci 24198c2ecf20Sopenharmony_ciout: 24208c2ecf20Sopenharmony_ci kobject_put(&ca->kobj); 24218c2ecf20Sopenharmony_ci 24228c2ecf20Sopenharmony_cierr: 24238c2ecf20Sopenharmony_ci if (err) 24248c2ecf20Sopenharmony_ci pr_notice("error %s: %s\n", ca->cache_dev_name, err); 24258c2ecf20Sopenharmony_ci 24268c2ecf20Sopenharmony_ci return ret; 24278c2ecf20Sopenharmony_ci} 24288c2ecf20Sopenharmony_ci 24298c2ecf20Sopenharmony_ci/* Global interfaces/init */ 24308c2ecf20Sopenharmony_ci 24318c2ecf20Sopenharmony_cistatic ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, 24328c2ecf20Sopenharmony_ci const char *buffer, size_t size); 24338c2ecf20Sopenharmony_cistatic ssize_t bch_pending_bdevs_cleanup(struct kobject *k, 24348c2ecf20Sopenharmony_ci struct kobj_attribute *attr, 24358c2ecf20Sopenharmony_ci const char *buffer, size_t size); 24368c2ecf20Sopenharmony_ci 24378c2ecf20Sopenharmony_cikobj_attribute_write(register, register_bcache); 24388c2ecf20Sopenharmony_cikobj_attribute_write(register_quiet, register_bcache); 24398c2ecf20Sopenharmony_cikobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); 24408c2ecf20Sopenharmony_ci 24418c2ecf20Sopenharmony_cistatic bool bch_is_open_backing(struct block_device *bdev) 24428c2ecf20Sopenharmony_ci{ 24438c2ecf20Sopenharmony_ci struct cache_set *c, *tc; 24448c2ecf20Sopenharmony_ci struct cached_dev *dc, *t; 24458c2ecf20Sopenharmony_ci 24468c2ecf20Sopenharmony_ci list_for_each_entry_safe(c, tc, &bch_cache_sets, list) 24478c2ecf20Sopenharmony_ci list_for_each_entry_safe(dc, t, &c->cached_devs, list) 24488c2ecf20Sopenharmony_ci if (dc->bdev == bdev) 24498c2ecf20Sopenharmony_ci return true; 24508c2ecf20Sopenharmony_ci list_for_each_entry_safe(dc, t, &uncached_devices, list) 24518c2ecf20Sopenharmony_ci if (dc->bdev == bdev) 24528c2ecf20Sopenharmony_ci return true; 24538c2ecf20Sopenharmony_ci return false; 24548c2ecf20Sopenharmony_ci} 24558c2ecf20Sopenharmony_ci 24568c2ecf20Sopenharmony_cistatic bool bch_is_open_cache(struct block_device *bdev) 24578c2ecf20Sopenharmony_ci{ 24588c2ecf20Sopenharmony_ci struct cache_set *c, *tc; 24598c2ecf20Sopenharmony_ci 24608c2ecf20Sopenharmony_ci list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { 24618c2ecf20Sopenharmony_ci struct cache *ca = c->cache; 24628c2ecf20Sopenharmony_ci 24638c2ecf20Sopenharmony_ci if (ca->bdev == bdev) 24648c2ecf20Sopenharmony_ci return true; 24658c2ecf20Sopenharmony_ci } 24668c2ecf20Sopenharmony_ci 24678c2ecf20Sopenharmony_ci return false; 24688c2ecf20Sopenharmony_ci} 24698c2ecf20Sopenharmony_ci 24708c2ecf20Sopenharmony_cistatic bool bch_is_open(struct block_device *bdev) 24718c2ecf20Sopenharmony_ci{ 24728c2ecf20Sopenharmony_ci return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); 24738c2ecf20Sopenharmony_ci} 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_cistruct async_reg_args { 24768c2ecf20Sopenharmony_ci struct delayed_work reg_work; 24778c2ecf20Sopenharmony_ci char *path; 24788c2ecf20Sopenharmony_ci struct cache_sb *sb; 24798c2ecf20Sopenharmony_ci struct cache_sb_disk *sb_disk; 24808c2ecf20Sopenharmony_ci struct block_device *bdev; 24818c2ecf20Sopenharmony_ci}; 24828c2ecf20Sopenharmony_ci 24838c2ecf20Sopenharmony_cistatic void register_bdev_worker(struct work_struct *work) 24848c2ecf20Sopenharmony_ci{ 24858c2ecf20Sopenharmony_ci int fail = false; 24868c2ecf20Sopenharmony_ci struct async_reg_args *args = 24878c2ecf20Sopenharmony_ci container_of(work, struct async_reg_args, reg_work.work); 24888c2ecf20Sopenharmony_ci struct cached_dev *dc; 24898c2ecf20Sopenharmony_ci 24908c2ecf20Sopenharmony_ci dc = kzalloc(sizeof(*dc), GFP_KERNEL); 24918c2ecf20Sopenharmony_ci if (!dc) { 24928c2ecf20Sopenharmony_ci fail = true; 24938c2ecf20Sopenharmony_ci put_page(virt_to_page(args->sb_disk)); 24948c2ecf20Sopenharmony_ci blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 24958c2ecf20Sopenharmony_ci goto out; 24968c2ecf20Sopenharmony_ci } 24978c2ecf20Sopenharmony_ci 24988c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 24998c2ecf20Sopenharmony_ci if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) 25008c2ecf20Sopenharmony_ci fail = true; 25018c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 25028c2ecf20Sopenharmony_ci 25038c2ecf20Sopenharmony_ciout: 25048c2ecf20Sopenharmony_ci if (fail) 25058c2ecf20Sopenharmony_ci pr_info("error %s: fail to register backing device\n", 25068c2ecf20Sopenharmony_ci args->path); 25078c2ecf20Sopenharmony_ci kfree(args->sb); 25088c2ecf20Sopenharmony_ci kfree(args->path); 25098c2ecf20Sopenharmony_ci kfree(args); 25108c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 25118c2ecf20Sopenharmony_ci} 25128c2ecf20Sopenharmony_ci 25138c2ecf20Sopenharmony_cistatic void register_cache_worker(struct work_struct *work) 25148c2ecf20Sopenharmony_ci{ 25158c2ecf20Sopenharmony_ci int fail = false; 25168c2ecf20Sopenharmony_ci struct async_reg_args *args = 25178c2ecf20Sopenharmony_ci container_of(work, struct async_reg_args, reg_work.work); 25188c2ecf20Sopenharmony_ci struct cache *ca; 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ci ca = kzalloc(sizeof(*ca), GFP_KERNEL); 25218c2ecf20Sopenharmony_ci if (!ca) { 25228c2ecf20Sopenharmony_ci fail = true; 25238c2ecf20Sopenharmony_ci put_page(virt_to_page(args->sb_disk)); 25248c2ecf20Sopenharmony_ci blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 25258c2ecf20Sopenharmony_ci goto out; 25268c2ecf20Sopenharmony_ci } 25278c2ecf20Sopenharmony_ci 25288c2ecf20Sopenharmony_ci /* blkdev_put() will be called in bch_cache_release() */ 25298c2ecf20Sopenharmony_ci if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) 25308c2ecf20Sopenharmony_ci fail = true; 25318c2ecf20Sopenharmony_ci 25328c2ecf20Sopenharmony_ciout: 25338c2ecf20Sopenharmony_ci if (fail) 25348c2ecf20Sopenharmony_ci pr_info("error %s: fail to register cache device\n", 25358c2ecf20Sopenharmony_ci args->path); 25368c2ecf20Sopenharmony_ci kfree(args->sb); 25378c2ecf20Sopenharmony_ci kfree(args->path); 25388c2ecf20Sopenharmony_ci kfree(args); 25398c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 25408c2ecf20Sopenharmony_ci} 25418c2ecf20Sopenharmony_ci 25428c2ecf20Sopenharmony_cistatic void register_device_aync(struct async_reg_args *args) 25438c2ecf20Sopenharmony_ci{ 25448c2ecf20Sopenharmony_ci if (SB_IS_BDEV(args->sb)) 25458c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker); 25468c2ecf20Sopenharmony_ci else 25478c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&args->reg_work, register_cache_worker); 25488c2ecf20Sopenharmony_ci 25498c2ecf20Sopenharmony_ci /* 10 jiffies is enough for a delay */ 25508c2ecf20Sopenharmony_ci queue_delayed_work(system_wq, &args->reg_work, 10); 25518c2ecf20Sopenharmony_ci} 25528c2ecf20Sopenharmony_ci 25538c2ecf20Sopenharmony_cistatic ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, 25548c2ecf20Sopenharmony_ci const char *buffer, size_t size) 25558c2ecf20Sopenharmony_ci{ 25568c2ecf20Sopenharmony_ci const char *err; 25578c2ecf20Sopenharmony_ci char *path = NULL; 25588c2ecf20Sopenharmony_ci struct cache_sb *sb; 25598c2ecf20Sopenharmony_ci struct cache_sb_disk *sb_disk; 25608c2ecf20Sopenharmony_ci struct block_device *bdev; 25618c2ecf20Sopenharmony_ci ssize_t ret; 25628c2ecf20Sopenharmony_ci bool async_registration = false; 25638c2ecf20Sopenharmony_ci 25648c2ecf20Sopenharmony_ci#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION 25658c2ecf20Sopenharmony_ci async_registration = true; 25668c2ecf20Sopenharmony_ci#endif 25678c2ecf20Sopenharmony_ci 25688c2ecf20Sopenharmony_ci ret = -EBUSY; 25698c2ecf20Sopenharmony_ci err = "failed to reference bcache module"; 25708c2ecf20Sopenharmony_ci if (!try_module_get(THIS_MODULE)) 25718c2ecf20Sopenharmony_ci goto out; 25728c2ecf20Sopenharmony_ci 25738c2ecf20Sopenharmony_ci /* For latest state of bcache_is_reboot */ 25748c2ecf20Sopenharmony_ci smp_mb(); 25758c2ecf20Sopenharmony_ci err = "bcache is in reboot"; 25768c2ecf20Sopenharmony_ci if (bcache_is_reboot) 25778c2ecf20Sopenharmony_ci goto out_module_put; 25788c2ecf20Sopenharmony_ci 25798c2ecf20Sopenharmony_ci ret = -ENOMEM; 25808c2ecf20Sopenharmony_ci err = "cannot allocate memory"; 25818c2ecf20Sopenharmony_ci path = kstrndup(buffer, size, GFP_KERNEL); 25828c2ecf20Sopenharmony_ci if (!path) 25838c2ecf20Sopenharmony_ci goto out_module_put; 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL); 25868c2ecf20Sopenharmony_ci if (!sb) 25878c2ecf20Sopenharmony_ci goto out_free_path; 25888c2ecf20Sopenharmony_ci 25898c2ecf20Sopenharmony_ci ret = -EINVAL; 25908c2ecf20Sopenharmony_ci err = "failed to open device"; 25918c2ecf20Sopenharmony_ci bdev = blkdev_get_by_path(strim(path), 25928c2ecf20Sopenharmony_ci FMODE_READ|FMODE_WRITE|FMODE_EXCL, 25938c2ecf20Sopenharmony_ci sb); 25948c2ecf20Sopenharmony_ci if (IS_ERR(bdev)) { 25958c2ecf20Sopenharmony_ci if (bdev == ERR_PTR(-EBUSY)) { 25968c2ecf20Sopenharmony_ci bdev = lookup_bdev(strim(path)); 25978c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 25988c2ecf20Sopenharmony_ci if (!IS_ERR(bdev) && bch_is_open(bdev)) 25998c2ecf20Sopenharmony_ci err = "device already registered"; 26008c2ecf20Sopenharmony_ci else 26018c2ecf20Sopenharmony_ci err = "device busy"; 26028c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 26038c2ecf20Sopenharmony_ci if (!IS_ERR(bdev)) 26048c2ecf20Sopenharmony_ci bdput(bdev); 26058c2ecf20Sopenharmony_ci if (attr == &ksysfs_register_quiet) 26068c2ecf20Sopenharmony_ci goto done; 26078c2ecf20Sopenharmony_ci } 26088c2ecf20Sopenharmony_ci goto out_free_sb; 26098c2ecf20Sopenharmony_ci } 26108c2ecf20Sopenharmony_ci 26118c2ecf20Sopenharmony_ci err = "failed to set blocksize"; 26128c2ecf20Sopenharmony_ci if (set_blocksize(bdev, 4096)) 26138c2ecf20Sopenharmony_ci goto out_blkdev_put; 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci err = read_super(sb, bdev, &sb_disk); 26168c2ecf20Sopenharmony_ci if (err) 26178c2ecf20Sopenharmony_ci goto out_blkdev_put; 26188c2ecf20Sopenharmony_ci 26198c2ecf20Sopenharmony_ci err = "failed to register device"; 26208c2ecf20Sopenharmony_ci 26218c2ecf20Sopenharmony_ci if (async_registration) { 26228c2ecf20Sopenharmony_ci /* register in asynchronous way */ 26238c2ecf20Sopenharmony_ci struct async_reg_args *args = 26248c2ecf20Sopenharmony_ci kzalloc(sizeof(struct async_reg_args), GFP_KERNEL); 26258c2ecf20Sopenharmony_ci 26268c2ecf20Sopenharmony_ci if (!args) { 26278c2ecf20Sopenharmony_ci ret = -ENOMEM; 26288c2ecf20Sopenharmony_ci err = "cannot allocate memory"; 26298c2ecf20Sopenharmony_ci goto out_put_sb_page; 26308c2ecf20Sopenharmony_ci } 26318c2ecf20Sopenharmony_ci 26328c2ecf20Sopenharmony_ci args->path = path; 26338c2ecf20Sopenharmony_ci args->sb = sb; 26348c2ecf20Sopenharmony_ci args->sb_disk = sb_disk; 26358c2ecf20Sopenharmony_ci args->bdev = bdev; 26368c2ecf20Sopenharmony_ci register_device_aync(args); 26378c2ecf20Sopenharmony_ci /* No wait and returns to user space */ 26388c2ecf20Sopenharmony_ci goto async_done; 26398c2ecf20Sopenharmony_ci } 26408c2ecf20Sopenharmony_ci 26418c2ecf20Sopenharmony_ci if (SB_IS_BDEV(sb)) { 26428c2ecf20Sopenharmony_ci struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); 26438c2ecf20Sopenharmony_ci 26448c2ecf20Sopenharmony_ci if (!dc) 26458c2ecf20Sopenharmony_ci goto out_put_sb_page; 26468c2ecf20Sopenharmony_ci 26478c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 26488c2ecf20Sopenharmony_ci ret = register_bdev(sb, sb_disk, bdev, dc); 26498c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 26508c2ecf20Sopenharmony_ci /* blkdev_put() will be called in cached_dev_free() */ 26518c2ecf20Sopenharmony_ci if (ret < 0) 26528c2ecf20Sopenharmony_ci goto out_free_sb; 26538c2ecf20Sopenharmony_ci } else { 26548c2ecf20Sopenharmony_ci struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 26558c2ecf20Sopenharmony_ci 26568c2ecf20Sopenharmony_ci if (!ca) 26578c2ecf20Sopenharmony_ci goto out_put_sb_page; 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci /* blkdev_put() will be called in bch_cache_release() */ 26608c2ecf20Sopenharmony_ci if (register_cache(sb, sb_disk, bdev, ca) != 0) 26618c2ecf20Sopenharmony_ci goto out_free_sb; 26628c2ecf20Sopenharmony_ci } 26638c2ecf20Sopenharmony_ci 26648c2ecf20Sopenharmony_cidone: 26658c2ecf20Sopenharmony_ci kfree(sb); 26668c2ecf20Sopenharmony_ci kfree(path); 26678c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 26688c2ecf20Sopenharmony_ciasync_done: 26698c2ecf20Sopenharmony_ci return size; 26708c2ecf20Sopenharmony_ci 26718c2ecf20Sopenharmony_ciout_put_sb_page: 26728c2ecf20Sopenharmony_ci put_page(virt_to_page(sb_disk)); 26738c2ecf20Sopenharmony_ciout_blkdev_put: 26748c2ecf20Sopenharmony_ci blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 26758c2ecf20Sopenharmony_ciout_free_sb: 26768c2ecf20Sopenharmony_ci kfree(sb); 26778c2ecf20Sopenharmony_ciout_free_path: 26788c2ecf20Sopenharmony_ci kfree(path); 26798c2ecf20Sopenharmony_ci path = NULL; 26808c2ecf20Sopenharmony_ciout_module_put: 26818c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 26828c2ecf20Sopenharmony_ciout: 26838c2ecf20Sopenharmony_ci pr_info("error %s: %s\n", path?path:"", err); 26848c2ecf20Sopenharmony_ci return ret; 26858c2ecf20Sopenharmony_ci} 26868c2ecf20Sopenharmony_ci 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_cistruct pdev { 26898c2ecf20Sopenharmony_ci struct list_head list; 26908c2ecf20Sopenharmony_ci struct cached_dev *dc; 26918c2ecf20Sopenharmony_ci}; 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_cistatic ssize_t bch_pending_bdevs_cleanup(struct kobject *k, 26948c2ecf20Sopenharmony_ci struct kobj_attribute *attr, 26958c2ecf20Sopenharmony_ci const char *buffer, 26968c2ecf20Sopenharmony_ci size_t size) 26978c2ecf20Sopenharmony_ci{ 26988c2ecf20Sopenharmony_ci LIST_HEAD(pending_devs); 26998c2ecf20Sopenharmony_ci ssize_t ret = size; 27008c2ecf20Sopenharmony_ci struct cached_dev *dc, *tdc; 27018c2ecf20Sopenharmony_ci struct pdev *pdev, *tpdev; 27028c2ecf20Sopenharmony_ci struct cache_set *c, *tc; 27038c2ecf20Sopenharmony_ci 27048c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 27058c2ecf20Sopenharmony_ci list_for_each_entry_safe(dc, tdc, &uncached_devices, list) { 27068c2ecf20Sopenharmony_ci pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL); 27078c2ecf20Sopenharmony_ci if (!pdev) 27088c2ecf20Sopenharmony_ci break; 27098c2ecf20Sopenharmony_ci pdev->dc = dc; 27108c2ecf20Sopenharmony_ci list_add(&pdev->list, &pending_devs); 27118c2ecf20Sopenharmony_ci } 27128c2ecf20Sopenharmony_ci 27138c2ecf20Sopenharmony_ci list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { 27148c2ecf20Sopenharmony_ci list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { 27158c2ecf20Sopenharmony_ci char *pdev_set_uuid = pdev->dc->sb.set_uuid; 27168c2ecf20Sopenharmony_ci char *set_uuid = c->set_uuid; 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci if (!memcmp(pdev_set_uuid, set_uuid, 16)) { 27198c2ecf20Sopenharmony_ci list_del(&pdev->list); 27208c2ecf20Sopenharmony_ci kfree(pdev); 27218c2ecf20Sopenharmony_ci break; 27228c2ecf20Sopenharmony_ci } 27238c2ecf20Sopenharmony_ci } 27248c2ecf20Sopenharmony_ci } 27258c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 27268c2ecf20Sopenharmony_ci 27278c2ecf20Sopenharmony_ci list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { 27288c2ecf20Sopenharmony_ci pr_info("delete pdev %p\n", pdev); 27298c2ecf20Sopenharmony_ci list_del(&pdev->list); 27308c2ecf20Sopenharmony_ci bcache_device_stop(&pdev->dc->disk); 27318c2ecf20Sopenharmony_ci kfree(pdev); 27328c2ecf20Sopenharmony_ci } 27338c2ecf20Sopenharmony_ci 27348c2ecf20Sopenharmony_ci return ret; 27358c2ecf20Sopenharmony_ci} 27368c2ecf20Sopenharmony_ci 27378c2ecf20Sopenharmony_cistatic int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) 27388c2ecf20Sopenharmony_ci{ 27398c2ecf20Sopenharmony_ci if (bcache_is_reboot) 27408c2ecf20Sopenharmony_ci return NOTIFY_DONE; 27418c2ecf20Sopenharmony_ci 27428c2ecf20Sopenharmony_ci if (code == SYS_DOWN || 27438c2ecf20Sopenharmony_ci code == SYS_HALT || 27448c2ecf20Sopenharmony_ci code == SYS_POWER_OFF) { 27458c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 27468c2ecf20Sopenharmony_ci unsigned long start = jiffies; 27478c2ecf20Sopenharmony_ci bool stopped = false; 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_ci struct cache_set *c, *tc; 27508c2ecf20Sopenharmony_ci struct cached_dev *dc, *tdc; 27518c2ecf20Sopenharmony_ci 27528c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 27538c2ecf20Sopenharmony_ci 27548c2ecf20Sopenharmony_ci if (bcache_is_reboot) 27558c2ecf20Sopenharmony_ci goto out; 27568c2ecf20Sopenharmony_ci 27578c2ecf20Sopenharmony_ci /* New registration is rejected since now */ 27588c2ecf20Sopenharmony_ci bcache_is_reboot = true; 27598c2ecf20Sopenharmony_ci /* 27608c2ecf20Sopenharmony_ci * Make registering caller (if there is) on other CPU 27618c2ecf20Sopenharmony_ci * core know bcache_is_reboot set to true earlier 27628c2ecf20Sopenharmony_ci */ 27638c2ecf20Sopenharmony_ci smp_mb(); 27648c2ecf20Sopenharmony_ci 27658c2ecf20Sopenharmony_ci if (list_empty(&bch_cache_sets) && 27668c2ecf20Sopenharmony_ci list_empty(&uncached_devices)) 27678c2ecf20Sopenharmony_ci goto out; 27688c2ecf20Sopenharmony_ci 27698c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 27708c2ecf20Sopenharmony_ci 27718c2ecf20Sopenharmony_ci pr_info("Stopping all devices:\n"); 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_ci /* 27748c2ecf20Sopenharmony_ci * The reason bch_register_lock is not held to call 27758c2ecf20Sopenharmony_ci * bch_cache_set_stop() and bcache_device_stop() is to 27768c2ecf20Sopenharmony_ci * avoid potential deadlock during reboot, because cache 27778c2ecf20Sopenharmony_ci * set or bcache device stopping process will acqurie 27788c2ecf20Sopenharmony_ci * bch_register_lock too. 27798c2ecf20Sopenharmony_ci * 27808c2ecf20Sopenharmony_ci * We are safe here because bcache_is_reboot sets to 27818c2ecf20Sopenharmony_ci * true already, register_bcache() will reject new 27828c2ecf20Sopenharmony_ci * registration now. bcache_is_reboot also makes sure 27838c2ecf20Sopenharmony_ci * bcache_reboot() won't be re-entered on by other thread, 27848c2ecf20Sopenharmony_ci * so there is no race in following list iteration by 27858c2ecf20Sopenharmony_ci * list_for_each_entry_safe(). 27868c2ecf20Sopenharmony_ci */ 27878c2ecf20Sopenharmony_ci list_for_each_entry_safe(c, tc, &bch_cache_sets, list) 27888c2ecf20Sopenharmony_ci bch_cache_set_stop(c); 27898c2ecf20Sopenharmony_ci 27908c2ecf20Sopenharmony_ci list_for_each_entry_safe(dc, tdc, &uncached_devices, list) 27918c2ecf20Sopenharmony_ci bcache_device_stop(&dc->disk); 27928c2ecf20Sopenharmony_ci 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_ci /* 27958c2ecf20Sopenharmony_ci * Give an early chance for other kthreads and 27968c2ecf20Sopenharmony_ci * kworkers to stop themselves 27978c2ecf20Sopenharmony_ci */ 27988c2ecf20Sopenharmony_ci schedule(); 27998c2ecf20Sopenharmony_ci 28008c2ecf20Sopenharmony_ci /* What's a condition variable? */ 28018c2ecf20Sopenharmony_ci while (1) { 28028c2ecf20Sopenharmony_ci long timeout = start + 10 * HZ - jiffies; 28038c2ecf20Sopenharmony_ci 28048c2ecf20Sopenharmony_ci mutex_lock(&bch_register_lock); 28058c2ecf20Sopenharmony_ci stopped = list_empty(&bch_cache_sets) && 28068c2ecf20Sopenharmony_ci list_empty(&uncached_devices); 28078c2ecf20Sopenharmony_ci 28088c2ecf20Sopenharmony_ci if (timeout < 0 || stopped) 28098c2ecf20Sopenharmony_ci break; 28108c2ecf20Sopenharmony_ci 28118c2ecf20Sopenharmony_ci prepare_to_wait(&unregister_wait, &wait, 28128c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 28138c2ecf20Sopenharmony_ci 28148c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 28158c2ecf20Sopenharmony_ci schedule_timeout(timeout); 28168c2ecf20Sopenharmony_ci } 28178c2ecf20Sopenharmony_ci 28188c2ecf20Sopenharmony_ci finish_wait(&unregister_wait, &wait); 28198c2ecf20Sopenharmony_ci 28208c2ecf20Sopenharmony_ci if (stopped) 28218c2ecf20Sopenharmony_ci pr_info("All devices stopped\n"); 28228c2ecf20Sopenharmony_ci else 28238c2ecf20Sopenharmony_ci pr_notice("Timeout waiting for devices to be closed\n"); 28248c2ecf20Sopenharmony_ciout: 28258c2ecf20Sopenharmony_ci mutex_unlock(&bch_register_lock); 28268c2ecf20Sopenharmony_ci } 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci return NOTIFY_DONE; 28298c2ecf20Sopenharmony_ci} 28308c2ecf20Sopenharmony_ci 28318c2ecf20Sopenharmony_cistatic struct notifier_block reboot = { 28328c2ecf20Sopenharmony_ci .notifier_call = bcache_reboot, 28338c2ecf20Sopenharmony_ci .priority = INT_MAX, /* before any real devices */ 28348c2ecf20Sopenharmony_ci}; 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_cistatic void bcache_exit(void) 28378c2ecf20Sopenharmony_ci{ 28388c2ecf20Sopenharmony_ci bch_debug_exit(); 28398c2ecf20Sopenharmony_ci bch_request_exit(); 28408c2ecf20Sopenharmony_ci if (bcache_kobj) 28418c2ecf20Sopenharmony_ci kobject_put(bcache_kobj); 28428c2ecf20Sopenharmony_ci if (bcache_wq) 28438c2ecf20Sopenharmony_ci destroy_workqueue(bcache_wq); 28448c2ecf20Sopenharmony_ci if (bch_journal_wq) 28458c2ecf20Sopenharmony_ci destroy_workqueue(bch_journal_wq); 28468c2ecf20Sopenharmony_ci if (bch_flush_wq) 28478c2ecf20Sopenharmony_ci destroy_workqueue(bch_flush_wq); 28488c2ecf20Sopenharmony_ci bch_btree_exit(); 28498c2ecf20Sopenharmony_ci 28508c2ecf20Sopenharmony_ci if (bcache_major) 28518c2ecf20Sopenharmony_ci unregister_blkdev(bcache_major, "bcache"); 28528c2ecf20Sopenharmony_ci unregister_reboot_notifier(&reboot); 28538c2ecf20Sopenharmony_ci mutex_destroy(&bch_register_lock); 28548c2ecf20Sopenharmony_ci} 28558c2ecf20Sopenharmony_ci 28568c2ecf20Sopenharmony_ci/* Check and fixup module parameters */ 28578c2ecf20Sopenharmony_cistatic void check_module_parameters(void) 28588c2ecf20Sopenharmony_ci{ 28598c2ecf20Sopenharmony_ci if (bch_cutoff_writeback_sync == 0) 28608c2ecf20Sopenharmony_ci bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC; 28618c2ecf20Sopenharmony_ci else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) { 28628c2ecf20Sopenharmony_ci pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n", 28638c2ecf20Sopenharmony_ci bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX); 28648c2ecf20Sopenharmony_ci bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX; 28658c2ecf20Sopenharmony_ci } 28668c2ecf20Sopenharmony_ci 28678c2ecf20Sopenharmony_ci if (bch_cutoff_writeback == 0) 28688c2ecf20Sopenharmony_ci bch_cutoff_writeback = CUTOFF_WRITEBACK; 28698c2ecf20Sopenharmony_ci else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) { 28708c2ecf20Sopenharmony_ci pr_warn("set bch_cutoff_writeback (%u) to max value %u\n", 28718c2ecf20Sopenharmony_ci bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX); 28728c2ecf20Sopenharmony_ci bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX; 28738c2ecf20Sopenharmony_ci } 28748c2ecf20Sopenharmony_ci 28758c2ecf20Sopenharmony_ci if (bch_cutoff_writeback > bch_cutoff_writeback_sync) { 28768c2ecf20Sopenharmony_ci pr_warn("set bch_cutoff_writeback (%u) to %u\n", 28778c2ecf20Sopenharmony_ci bch_cutoff_writeback, bch_cutoff_writeback_sync); 28788c2ecf20Sopenharmony_ci bch_cutoff_writeback = bch_cutoff_writeback_sync; 28798c2ecf20Sopenharmony_ci } 28808c2ecf20Sopenharmony_ci} 28818c2ecf20Sopenharmony_ci 28828c2ecf20Sopenharmony_cistatic int __init bcache_init(void) 28838c2ecf20Sopenharmony_ci{ 28848c2ecf20Sopenharmony_ci static const struct attribute *files[] = { 28858c2ecf20Sopenharmony_ci &ksysfs_register.attr, 28868c2ecf20Sopenharmony_ci &ksysfs_register_quiet.attr, 28878c2ecf20Sopenharmony_ci &ksysfs_pendings_cleanup.attr, 28888c2ecf20Sopenharmony_ci NULL 28898c2ecf20Sopenharmony_ci }; 28908c2ecf20Sopenharmony_ci 28918c2ecf20Sopenharmony_ci check_module_parameters(); 28928c2ecf20Sopenharmony_ci 28938c2ecf20Sopenharmony_ci mutex_init(&bch_register_lock); 28948c2ecf20Sopenharmony_ci init_waitqueue_head(&unregister_wait); 28958c2ecf20Sopenharmony_ci register_reboot_notifier(&reboot); 28968c2ecf20Sopenharmony_ci 28978c2ecf20Sopenharmony_ci bcache_major = register_blkdev(0, "bcache"); 28988c2ecf20Sopenharmony_ci if (bcache_major < 0) { 28998c2ecf20Sopenharmony_ci unregister_reboot_notifier(&reboot); 29008c2ecf20Sopenharmony_ci mutex_destroy(&bch_register_lock); 29018c2ecf20Sopenharmony_ci return bcache_major; 29028c2ecf20Sopenharmony_ci } 29038c2ecf20Sopenharmony_ci 29048c2ecf20Sopenharmony_ci if (bch_btree_init()) 29058c2ecf20Sopenharmony_ci goto err; 29068c2ecf20Sopenharmony_ci 29078c2ecf20Sopenharmony_ci bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0); 29088c2ecf20Sopenharmony_ci if (!bcache_wq) 29098c2ecf20Sopenharmony_ci goto err; 29108c2ecf20Sopenharmony_ci 29118c2ecf20Sopenharmony_ci /* 29128c2ecf20Sopenharmony_ci * Let's not make this `WQ_MEM_RECLAIM` for the following reasons: 29138c2ecf20Sopenharmony_ci * 29148c2ecf20Sopenharmony_ci * 1. It used `system_wq` before which also does no memory reclaim. 29158c2ecf20Sopenharmony_ci * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and 29168c2ecf20Sopenharmony_ci * reduced throughput can be observed. 29178c2ecf20Sopenharmony_ci * 29188c2ecf20Sopenharmony_ci * We still want to user our own queue to not congest the `system_wq`. 29198c2ecf20Sopenharmony_ci */ 29208c2ecf20Sopenharmony_ci bch_flush_wq = alloc_workqueue("bch_flush", 0, 0); 29218c2ecf20Sopenharmony_ci if (!bch_flush_wq) 29228c2ecf20Sopenharmony_ci goto err; 29238c2ecf20Sopenharmony_ci 29248c2ecf20Sopenharmony_ci bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0); 29258c2ecf20Sopenharmony_ci if (!bch_journal_wq) 29268c2ecf20Sopenharmony_ci goto err; 29278c2ecf20Sopenharmony_ci 29288c2ecf20Sopenharmony_ci bcache_kobj = kobject_create_and_add("bcache", fs_kobj); 29298c2ecf20Sopenharmony_ci if (!bcache_kobj) 29308c2ecf20Sopenharmony_ci goto err; 29318c2ecf20Sopenharmony_ci 29328c2ecf20Sopenharmony_ci if (bch_request_init() || 29338c2ecf20Sopenharmony_ci sysfs_create_files(bcache_kobj, files)) 29348c2ecf20Sopenharmony_ci goto err; 29358c2ecf20Sopenharmony_ci 29368c2ecf20Sopenharmony_ci bch_debug_init(); 29378c2ecf20Sopenharmony_ci closure_debug_init(); 29388c2ecf20Sopenharmony_ci 29398c2ecf20Sopenharmony_ci bcache_is_reboot = false; 29408c2ecf20Sopenharmony_ci 29418c2ecf20Sopenharmony_ci return 0; 29428c2ecf20Sopenharmony_cierr: 29438c2ecf20Sopenharmony_ci bcache_exit(); 29448c2ecf20Sopenharmony_ci return -ENOMEM; 29458c2ecf20Sopenharmony_ci} 29468c2ecf20Sopenharmony_ci 29478c2ecf20Sopenharmony_ci/* 29488c2ecf20Sopenharmony_ci * Module hooks 29498c2ecf20Sopenharmony_ci */ 29508c2ecf20Sopenharmony_cimodule_exit(bcache_exit); 29518c2ecf20Sopenharmony_cimodule_init(bcache_init); 29528c2ecf20Sopenharmony_ci 29538c2ecf20Sopenharmony_cimodule_param(bch_cutoff_writeback, uint, 0); 29548c2ecf20Sopenharmony_ciMODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback"); 29558c2ecf20Sopenharmony_ci 29568c2ecf20Sopenharmony_cimodule_param(bch_cutoff_writeback_sync, uint, 0); 29578c2ecf20Sopenharmony_ciMODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback"); 29588c2ecf20Sopenharmony_ci 29598c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Bcache: a Linux block layer cache"); 29608c2ecf20Sopenharmony_ciMODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>"); 29618c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 2962