162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Zoned block device handling 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2015, Hannes Reinecke 662306a36Sopenharmony_ci * Copyright (c) 2015, SUSE Linux GmbH 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright (c) 2016, Damien Le Moal 962306a36Sopenharmony_ci * Copyright (c) 2016, Western Digital 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/kernel.h> 1362306a36Sopenharmony_ci#include <linux/module.h> 1462306a36Sopenharmony_ci#include <linux/rbtree.h> 1562306a36Sopenharmony_ci#include <linux/blkdev.h> 1662306a36Sopenharmony_ci#include <linux/blk-mq.h> 1762306a36Sopenharmony_ci#include <linux/mm.h> 1862306a36Sopenharmony_ci#include <linux/vmalloc.h> 1962306a36Sopenharmony_ci#include <linux/sched/mm.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#include "blk.h" 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name 2462306a36Sopenharmony_cistatic const char *const zone_cond_name[] = { 2562306a36Sopenharmony_ci ZONE_COND_NAME(NOT_WP), 2662306a36Sopenharmony_ci ZONE_COND_NAME(EMPTY), 2762306a36Sopenharmony_ci ZONE_COND_NAME(IMP_OPEN), 2862306a36Sopenharmony_ci ZONE_COND_NAME(EXP_OPEN), 2962306a36Sopenharmony_ci ZONE_COND_NAME(CLOSED), 3062306a36Sopenharmony_ci ZONE_COND_NAME(READONLY), 3162306a36Sopenharmony_ci ZONE_COND_NAME(FULL), 3262306a36Sopenharmony_ci ZONE_COND_NAME(OFFLINE), 3362306a36Sopenharmony_ci}; 3462306a36Sopenharmony_ci#undef ZONE_COND_NAME 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci/** 3762306a36Sopenharmony_ci * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. 3862306a36Sopenharmony_ci * @zone_cond: BLK_ZONE_COND_XXX. 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX 4162306a36Sopenharmony_ci * into string format. Useful in the debugging and tracing zone conditions. For 4262306a36Sopenharmony_ci * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_ciconst char *blk_zone_cond_str(enum blk_zone_cond zone_cond) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci static const char *zone_cond_str = "UNKNOWN"; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond]) 4962306a36Sopenharmony_ci zone_cond_str = zone_cond_name[zone_cond]; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci return zone_cond_str; 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_zone_cond_str); 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci/* 5662306a36Sopenharmony_ci * Return true if a request is a write requests that needs zone write locking. 5762306a36Sopenharmony_ci */ 5862306a36Sopenharmony_cibool blk_req_needs_zone_write_lock(struct request *rq) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci if (!rq->q->disk->seq_zones_wlock) 6162306a36Sopenharmony_ci return false; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci return blk_rq_is_seq_zoned_write(rq); 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cibool blk_req_zone_write_trylock(struct request *rq) 6862306a36Sopenharmony_ci{ 6962306a36Sopenharmony_ci unsigned int zno = blk_rq_zone_no(rq); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci if (test_and_set_bit(zno, rq->q->disk->seq_zones_wlock)) 7262306a36Sopenharmony_ci return false; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); 7562306a36Sopenharmony_ci rq->rq_flags |= RQF_ZONE_WRITE_LOCKED; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci return true; 7862306a36Sopenharmony_ci} 7962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_req_zone_write_trylock); 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_civoid __blk_req_zone_write_lock(struct request *rq) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq), 8462306a36Sopenharmony_ci rq->q->disk->seq_zones_wlock))) 8562306a36Sopenharmony_ci return; 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); 8862306a36Sopenharmony_ci rq->rq_flags |= RQF_ZONE_WRITE_LOCKED; 8962306a36Sopenharmony_ci} 9062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__blk_req_zone_write_lock); 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_civoid __blk_req_zone_write_unlock(struct request *rq) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED; 9562306a36Sopenharmony_ci if (rq->q->disk->seq_zones_wlock) 9662306a36Sopenharmony_ci WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq), 9762306a36Sopenharmony_ci rq->q->disk->seq_zones_wlock)); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci/** 10262306a36Sopenharmony_ci * bdev_nr_zones - Get number of zones 10362306a36Sopenharmony_ci * @bdev: Target device 10462306a36Sopenharmony_ci * 10562306a36Sopenharmony_ci * Return the total number of zones of a zoned block device. For a block 10662306a36Sopenharmony_ci * device without zone capabilities, the number of zones is always 0. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_ciunsigned int bdev_nr_zones(struct block_device *bdev) 10962306a36Sopenharmony_ci{ 11062306a36Sopenharmony_ci sector_t zone_sectors = bdev_zone_sectors(bdev); 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci if (!bdev_is_zoned(bdev)) 11362306a36Sopenharmony_ci return 0; 11462306a36Sopenharmony_ci return (bdev_nr_sectors(bdev) + zone_sectors - 1) >> 11562306a36Sopenharmony_ci ilog2(zone_sectors); 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(bdev_nr_zones); 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci/** 12062306a36Sopenharmony_ci * blkdev_report_zones - Get zones information 12162306a36Sopenharmony_ci * @bdev: Target block device 12262306a36Sopenharmony_ci * @sector: Sector from which to report zones 12362306a36Sopenharmony_ci * @nr_zones: Maximum number of zones to report 12462306a36Sopenharmony_ci * @cb: Callback function called for each reported zone 12562306a36Sopenharmony_ci * @data: Private data for the callback 12662306a36Sopenharmony_ci * 12762306a36Sopenharmony_ci * Description: 12862306a36Sopenharmony_ci * Get zone information starting from the zone containing @sector for at most 12962306a36Sopenharmony_ci * @nr_zones, and call @cb for each zone reported by the device. 13062306a36Sopenharmony_ci * To report all zones in a device starting from @sector, the BLK_ALL_ZONES 13162306a36Sopenharmony_ci * constant can be passed to @nr_zones. 13262306a36Sopenharmony_ci * Returns the number of zones reported by the device, or a negative errno 13362306a36Sopenharmony_ci * value in case of failure. 13462306a36Sopenharmony_ci * 13562306a36Sopenharmony_ci * Note: The caller must use memalloc_noXX_save/restore() calls to control 13662306a36Sopenharmony_ci * memory allocations done within this function. 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_ciint blkdev_report_zones(struct block_device *bdev, sector_t sector, 13962306a36Sopenharmony_ci unsigned int nr_zones, report_zones_cb cb, void *data) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci struct gendisk *disk = bdev->bd_disk; 14262306a36Sopenharmony_ci sector_t capacity = get_capacity(disk); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) 14562306a36Sopenharmony_ci return -EOPNOTSUPP; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (!nr_zones || sector >= capacity) 14862306a36Sopenharmony_ci return 0; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci return disk->fops->report_zones(disk, sector, nr_zones, cb, data); 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blkdev_report_zones); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cistatic inline unsigned long *blk_alloc_zone_bitmap(int node, 15562306a36Sopenharmony_ci unsigned int nr_zones) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), 15862306a36Sopenharmony_ci GFP_NOIO, node); 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cistatic int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, 16262306a36Sopenharmony_ci void *data) 16362306a36Sopenharmony_ci{ 16462306a36Sopenharmony_ci /* 16562306a36Sopenharmony_ci * For an all-zones reset, ignore conventional, empty, read-only 16662306a36Sopenharmony_ci * and offline zones. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_ci switch (zone->cond) { 16962306a36Sopenharmony_ci case BLK_ZONE_COND_NOT_WP: 17062306a36Sopenharmony_ci case BLK_ZONE_COND_EMPTY: 17162306a36Sopenharmony_ci case BLK_ZONE_COND_READONLY: 17262306a36Sopenharmony_ci case BLK_ZONE_COND_OFFLINE: 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci default: 17562306a36Sopenharmony_ci set_bit(idx, (unsigned long *)data); 17662306a36Sopenharmony_ci return 0; 17762306a36Sopenharmony_ci } 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic int blkdev_zone_reset_all_emulated(struct block_device *bdev, 18162306a36Sopenharmony_ci gfp_t gfp_mask) 18262306a36Sopenharmony_ci{ 18362306a36Sopenharmony_ci struct gendisk *disk = bdev->bd_disk; 18462306a36Sopenharmony_ci sector_t capacity = bdev_nr_sectors(bdev); 18562306a36Sopenharmony_ci sector_t zone_sectors = bdev_zone_sectors(bdev); 18662306a36Sopenharmony_ci unsigned long *need_reset; 18762306a36Sopenharmony_ci struct bio *bio = NULL; 18862306a36Sopenharmony_ci sector_t sector = 0; 18962306a36Sopenharmony_ci int ret; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones); 19262306a36Sopenharmony_ci if (!need_reset) 19362306a36Sopenharmony_ci return -ENOMEM; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci ret = disk->fops->report_zones(disk, 0, disk->nr_zones, 19662306a36Sopenharmony_ci blk_zone_need_reset_cb, need_reset); 19762306a36Sopenharmony_ci if (ret < 0) 19862306a36Sopenharmony_ci goto out_free_need_reset; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci ret = 0; 20162306a36Sopenharmony_ci while (sector < capacity) { 20262306a36Sopenharmony_ci if (!test_bit(disk_zone_no(disk, sector), need_reset)) { 20362306a36Sopenharmony_ci sector += zone_sectors; 20462306a36Sopenharmony_ci continue; 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC, 20862306a36Sopenharmony_ci gfp_mask); 20962306a36Sopenharmony_ci bio->bi_iter.bi_sector = sector; 21062306a36Sopenharmony_ci sector += zone_sectors; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci /* This may take a while, so be nice to others */ 21362306a36Sopenharmony_ci cond_resched(); 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (bio) { 21762306a36Sopenharmony_ci ret = submit_bio_wait(bio); 21862306a36Sopenharmony_ci bio_put(bio); 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ciout_free_need_reset: 22262306a36Sopenharmony_ci kfree(need_reset); 22362306a36Sopenharmony_ci return ret; 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct bio bio; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC); 23162306a36Sopenharmony_ci return submit_bio_wait(&bio); 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci/** 23562306a36Sopenharmony_ci * blkdev_zone_mgmt - Execute a zone management operation on a range of zones 23662306a36Sopenharmony_ci * @bdev: Target block device 23762306a36Sopenharmony_ci * @op: Operation to be performed on the zones 23862306a36Sopenharmony_ci * @sector: Start sector of the first zone to operate on 23962306a36Sopenharmony_ci * @nr_sectors: Number of sectors, should be at least the length of one zone and 24062306a36Sopenharmony_ci * must be zone size aligned. 24162306a36Sopenharmony_ci * @gfp_mask: Memory allocation flags (for bio_alloc) 24262306a36Sopenharmony_ci * 24362306a36Sopenharmony_ci * Description: 24462306a36Sopenharmony_ci * Perform the specified operation on the range of zones specified by 24562306a36Sopenharmony_ci * @sector..@sector+@nr_sectors. Specifying the entire disk sector range 24662306a36Sopenharmony_ci * is valid, but the specified range should not contain conventional zones. 24762306a36Sopenharmony_ci * The operation to execute on each zone can be a zone reset, open, close 24862306a36Sopenharmony_ci * or finish request. 24962306a36Sopenharmony_ci */ 25062306a36Sopenharmony_ciint blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, 25162306a36Sopenharmony_ci sector_t sector, sector_t nr_sectors, gfp_t gfp_mask) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci struct request_queue *q = bdev_get_queue(bdev); 25462306a36Sopenharmony_ci sector_t zone_sectors = bdev_zone_sectors(bdev); 25562306a36Sopenharmony_ci sector_t capacity = bdev_nr_sectors(bdev); 25662306a36Sopenharmony_ci sector_t end_sector = sector + nr_sectors; 25762306a36Sopenharmony_ci struct bio *bio = NULL; 25862306a36Sopenharmony_ci int ret = 0; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci if (!bdev_is_zoned(bdev)) 26162306a36Sopenharmony_ci return -EOPNOTSUPP; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (bdev_read_only(bdev)) 26462306a36Sopenharmony_ci return -EPERM; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci if (!op_is_zone_mgmt(op)) 26762306a36Sopenharmony_ci return -EOPNOTSUPP; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci if (end_sector <= sector || end_sector > capacity) 27062306a36Sopenharmony_ci /* Out of range */ 27162306a36Sopenharmony_ci return -EINVAL; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci /* Check alignment (handle eventual smaller last zone) */ 27462306a36Sopenharmony_ci if (!bdev_is_zone_start(bdev, sector)) 27562306a36Sopenharmony_ci return -EINVAL; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity) 27862306a36Sopenharmony_ci return -EINVAL; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci /* 28162306a36Sopenharmony_ci * In the case of a zone reset operation over all zones, 28262306a36Sopenharmony_ci * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this 28362306a36Sopenharmony_ci * command. For other devices, we emulate this command behavior by 28462306a36Sopenharmony_ci * identifying the zones needing a reset. 28562306a36Sopenharmony_ci */ 28662306a36Sopenharmony_ci if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) { 28762306a36Sopenharmony_ci if (!blk_queue_zone_resetall(q)) 28862306a36Sopenharmony_ci return blkdev_zone_reset_all_emulated(bdev, gfp_mask); 28962306a36Sopenharmony_ci return blkdev_zone_reset_all(bdev, gfp_mask); 29062306a36Sopenharmony_ci } 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci while (sector < end_sector) { 29362306a36Sopenharmony_ci bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask); 29462306a36Sopenharmony_ci bio->bi_iter.bi_sector = sector; 29562306a36Sopenharmony_ci sector += zone_sectors; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci /* This may take a while, so be nice to others */ 29862306a36Sopenharmony_ci cond_resched(); 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci ret = submit_bio_wait(bio); 30262306a36Sopenharmony_ci bio_put(bio); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci return ret; 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blkdev_zone_mgmt); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistruct zone_report_args { 30962306a36Sopenharmony_ci struct blk_zone __user *zones; 31062306a36Sopenharmony_ci}; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_cistatic int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, 31362306a36Sopenharmony_ci void *data) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci struct zone_report_args *args = data; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) 31862306a36Sopenharmony_ci return -EFAULT; 31962306a36Sopenharmony_ci return 0; 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci/* 32362306a36Sopenharmony_ci * BLKREPORTZONE ioctl processing. 32462306a36Sopenharmony_ci * Called from blkdev_ioctl. 32562306a36Sopenharmony_ci */ 32662306a36Sopenharmony_ciint blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, 32762306a36Sopenharmony_ci unsigned long arg) 32862306a36Sopenharmony_ci{ 32962306a36Sopenharmony_ci void __user *argp = (void __user *)arg; 33062306a36Sopenharmony_ci struct zone_report_args args; 33162306a36Sopenharmony_ci struct blk_zone_report rep; 33262306a36Sopenharmony_ci int ret; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci if (!argp) 33562306a36Sopenharmony_ci return -EINVAL; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (!bdev_is_zoned(bdev)) 33862306a36Sopenharmony_ci return -ENOTTY; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) 34162306a36Sopenharmony_ci return -EFAULT; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci if (!rep.nr_zones) 34462306a36Sopenharmony_ci return -EINVAL; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci args.zones = argp + sizeof(struct blk_zone_report); 34762306a36Sopenharmony_ci ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, 34862306a36Sopenharmony_ci blkdev_copy_zone_to_user, &args); 34962306a36Sopenharmony_ci if (ret < 0) 35062306a36Sopenharmony_ci return ret; 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci rep.nr_zones = ret; 35362306a36Sopenharmony_ci rep.flags = BLK_ZONE_REP_CAPACITY; 35462306a36Sopenharmony_ci if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) 35562306a36Sopenharmony_ci return -EFAULT; 35662306a36Sopenharmony_ci return 0; 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_cistatic int blkdev_truncate_zone_range(struct block_device *bdev, 36062306a36Sopenharmony_ci blk_mode_t mode, const struct blk_zone_range *zrange) 36162306a36Sopenharmony_ci{ 36262306a36Sopenharmony_ci loff_t start, end; 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (zrange->sector + zrange->nr_sectors <= zrange->sector || 36562306a36Sopenharmony_ci zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) 36662306a36Sopenharmony_ci /* Out of range */ 36762306a36Sopenharmony_ci return -EINVAL; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci start = zrange->sector << SECTOR_SHIFT; 37062306a36Sopenharmony_ci end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci return truncate_bdev_range(bdev, mode, start, end); 37362306a36Sopenharmony_ci} 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci/* 37662306a36Sopenharmony_ci * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. 37762306a36Sopenharmony_ci * Called from blkdev_ioctl. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ciint blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, 38062306a36Sopenharmony_ci unsigned int cmd, unsigned long arg) 38162306a36Sopenharmony_ci{ 38262306a36Sopenharmony_ci void __user *argp = (void __user *)arg; 38362306a36Sopenharmony_ci struct blk_zone_range zrange; 38462306a36Sopenharmony_ci enum req_op op; 38562306a36Sopenharmony_ci int ret; 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci if (!argp) 38862306a36Sopenharmony_ci return -EINVAL; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci if (!bdev_is_zoned(bdev)) 39162306a36Sopenharmony_ci return -ENOTTY; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci if (!(mode & BLK_OPEN_WRITE)) 39462306a36Sopenharmony_ci return -EBADF; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) 39762306a36Sopenharmony_ci return -EFAULT; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci switch (cmd) { 40062306a36Sopenharmony_ci case BLKRESETZONE: 40162306a36Sopenharmony_ci op = REQ_OP_ZONE_RESET; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci /* Invalidate the page cache, including dirty pages. */ 40462306a36Sopenharmony_ci filemap_invalidate_lock(bdev->bd_inode->i_mapping); 40562306a36Sopenharmony_ci ret = blkdev_truncate_zone_range(bdev, mode, &zrange); 40662306a36Sopenharmony_ci if (ret) 40762306a36Sopenharmony_ci goto fail; 40862306a36Sopenharmony_ci break; 40962306a36Sopenharmony_ci case BLKOPENZONE: 41062306a36Sopenharmony_ci op = REQ_OP_ZONE_OPEN; 41162306a36Sopenharmony_ci break; 41262306a36Sopenharmony_ci case BLKCLOSEZONE: 41362306a36Sopenharmony_ci op = REQ_OP_ZONE_CLOSE; 41462306a36Sopenharmony_ci break; 41562306a36Sopenharmony_ci case BLKFINISHZONE: 41662306a36Sopenharmony_ci op = REQ_OP_ZONE_FINISH; 41762306a36Sopenharmony_ci break; 41862306a36Sopenharmony_ci default: 41962306a36Sopenharmony_ci return -ENOTTY; 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, 42362306a36Sopenharmony_ci GFP_KERNEL); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_cifail: 42662306a36Sopenharmony_ci if (cmd == BLKRESETZONE) 42762306a36Sopenharmony_ci filemap_invalidate_unlock(bdev->bd_inode->i_mapping); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci return ret; 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_civoid disk_free_zone_bitmaps(struct gendisk *disk) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci kfree(disk->conv_zones_bitmap); 43562306a36Sopenharmony_ci disk->conv_zones_bitmap = NULL; 43662306a36Sopenharmony_ci kfree(disk->seq_zones_wlock); 43762306a36Sopenharmony_ci disk->seq_zones_wlock = NULL; 43862306a36Sopenharmony_ci} 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_cistruct blk_revalidate_zone_args { 44162306a36Sopenharmony_ci struct gendisk *disk; 44262306a36Sopenharmony_ci unsigned long *conv_zones_bitmap; 44362306a36Sopenharmony_ci unsigned long *seq_zones_wlock; 44462306a36Sopenharmony_ci unsigned int nr_zones; 44562306a36Sopenharmony_ci sector_t sector; 44662306a36Sopenharmony_ci}; 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci/* 44962306a36Sopenharmony_ci * Helper function to check the validity of zones of a zoned block device. 45062306a36Sopenharmony_ci */ 45162306a36Sopenharmony_cistatic int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, 45262306a36Sopenharmony_ci void *data) 45362306a36Sopenharmony_ci{ 45462306a36Sopenharmony_ci struct blk_revalidate_zone_args *args = data; 45562306a36Sopenharmony_ci struct gendisk *disk = args->disk; 45662306a36Sopenharmony_ci struct request_queue *q = disk->queue; 45762306a36Sopenharmony_ci sector_t capacity = get_capacity(disk); 45862306a36Sopenharmony_ci sector_t zone_sectors = q->limits.chunk_sectors; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci /* Check for bad zones and holes in the zone report */ 46162306a36Sopenharmony_ci if (zone->start != args->sector) { 46262306a36Sopenharmony_ci pr_warn("%s: Zone gap at sectors %llu..%llu\n", 46362306a36Sopenharmony_ci disk->disk_name, args->sector, zone->start); 46462306a36Sopenharmony_ci return -ENODEV; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if (zone->start >= capacity || !zone->len) { 46862306a36Sopenharmony_ci pr_warn("%s: Invalid zone start %llu, length %llu\n", 46962306a36Sopenharmony_ci disk->disk_name, zone->start, zone->len); 47062306a36Sopenharmony_ci return -ENODEV; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci /* 47462306a36Sopenharmony_ci * All zones must have the same size, with the exception on an eventual 47562306a36Sopenharmony_ci * smaller last zone. 47662306a36Sopenharmony_ci */ 47762306a36Sopenharmony_ci if (zone->start + zone->len < capacity) { 47862306a36Sopenharmony_ci if (zone->len != zone_sectors) { 47962306a36Sopenharmony_ci pr_warn("%s: Invalid zoned device with non constant zone size\n", 48062306a36Sopenharmony_ci disk->disk_name); 48162306a36Sopenharmony_ci return -ENODEV; 48262306a36Sopenharmony_ci } 48362306a36Sopenharmony_ci } else if (zone->len > zone_sectors) { 48462306a36Sopenharmony_ci pr_warn("%s: Invalid zoned device with larger last zone size\n", 48562306a36Sopenharmony_ci disk->disk_name); 48662306a36Sopenharmony_ci return -ENODEV; 48762306a36Sopenharmony_ci } 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci /* Check zone type */ 49062306a36Sopenharmony_ci switch (zone->type) { 49162306a36Sopenharmony_ci case BLK_ZONE_TYPE_CONVENTIONAL: 49262306a36Sopenharmony_ci if (!args->conv_zones_bitmap) { 49362306a36Sopenharmony_ci args->conv_zones_bitmap = 49462306a36Sopenharmony_ci blk_alloc_zone_bitmap(q->node, args->nr_zones); 49562306a36Sopenharmony_ci if (!args->conv_zones_bitmap) 49662306a36Sopenharmony_ci return -ENOMEM; 49762306a36Sopenharmony_ci } 49862306a36Sopenharmony_ci set_bit(idx, args->conv_zones_bitmap); 49962306a36Sopenharmony_ci break; 50062306a36Sopenharmony_ci case BLK_ZONE_TYPE_SEQWRITE_REQ: 50162306a36Sopenharmony_ci case BLK_ZONE_TYPE_SEQWRITE_PREF: 50262306a36Sopenharmony_ci if (!args->seq_zones_wlock) { 50362306a36Sopenharmony_ci args->seq_zones_wlock = 50462306a36Sopenharmony_ci blk_alloc_zone_bitmap(q->node, args->nr_zones); 50562306a36Sopenharmony_ci if (!args->seq_zones_wlock) 50662306a36Sopenharmony_ci return -ENOMEM; 50762306a36Sopenharmony_ci } 50862306a36Sopenharmony_ci break; 50962306a36Sopenharmony_ci default: 51062306a36Sopenharmony_ci pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", 51162306a36Sopenharmony_ci disk->disk_name, (int)zone->type, zone->start); 51262306a36Sopenharmony_ci return -ENODEV; 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci args->sector += zone->len; 51662306a36Sopenharmony_ci return 0; 51762306a36Sopenharmony_ci} 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci/** 52062306a36Sopenharmony_ci * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps 52162306a36Sopenharmony_ci * @disk: Target disk 52262306a36Sopenharmony_ci * @update_driver_data: Callback to update driver data on the frozen disk 52362306a36Sopenharmony_ci * 52462306a36Sopenharmony_ci * Helper function for low-level device drivers to check and (re) allocate and 52562306a36Sopenharmony_ci * initialize a disk request queue zone bitmaps. This functions should normally 52662306a36Sopenharmony_ci * be called within the disk ->revalidate method for blk-mq based drivers. 52762306a36Sopenharmony_ci * Before calling this function, the device driver must already have set the 52862306a36Sopenharmony_ci * device zone size (chunk_sector limit) and the max zone append limit. 52962306a36Sopenharmony_ci * For BIO based drivers, this function cannot be used. BIO based device drivers 53062306a36Sopenharmony_ci * only need to set disk->nr_zones so that the sysfs exposed value is correct. 53162306a36Sopenharmony_ci * If the @update_driver_data callback function is not NULL, the callback is 53262306a36Sopenharmony_ci * executed with the device request queue frozen after all zones have been 53362306a36Sopenharmony_ci * checked. 53462306a36Sopenharmony_ci */ 53562306a36Sopenharmony_ciint blk_revalidate_disk_zones(struct gendisk *disk, 53662306a36Sopenharmony_ci void (*update_driver_data)(struct gendisk *disk)) 53762306a36Sopenharmony_ci{ 53862306a36Sopenharmony_ci struct request_queue *q = disk->queue; 53962306a36Sopenharmony_ci sector_t zone_sectors = q->limits.chunk_sectors; 54062306a36Sopenharmony_ci sector_t capacity = get_capacity(disk); 54162306a36Sopenharmony_ci struct blk_revalidate_zone_args args = { }; 54262306a36Sopenharmony_ci unsigned int noio_flag; 54362306a36Sopenharmony_ci int ret; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) 54662306a36Sopenharmony_ci return -EIO; 54762306a36Sopenharmony_ci if (WARN_ON_ONCE(!queue_is_mq(q))) 54862306a36Sopenharmony_ci return -EIO; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci if (!capacity) 55162306a36Sopenharmony_ci return -ENODEV; 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci /* 55462306a36Sopenharmony_ci * Checks that the device driver indicated a valid zone size and that 55562306a36Sopenharmony_ci * the max zone append limit is set. 55662306a36Sopenharmony_ci */ 55762306a36Sopenharmony_ci if (!zone_sectors || !is_power_of_2(zone_sectors)) { 55862306a36Sopenharmony_ci pr_warn("%s: Invalid non power of two zone size (%llu)\n", 55962306a36Sopenharmony_ci disk->disk_name, zone_sectors); 56062306a36Sopenharmony_ci return -ENODEV; 56162306a36Sopenharmony_ci } 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci if (!q->limits.max_zone_append_sectors) { 56462306a36Sopenharmony_ci pr_warn("%s: Invalid 0 maximum zone append limit\n", 56562306a36Sopenharmony_ci disk->disk_name); 56662306a36Sopenharmony_ci return -ENODEV; 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci /* 57062306a36Sopenharmony_ci * Ensure that all memory allocations in this context are done as if 57162306a36Sopenharmony_ci * GFP_NOIO was specified. 57262306a36Sopenharmony_ci */ 57362306a36Sopenharmony_ci args.disk = disk; 57462306a36Sopenharmony_ci args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); 57562306a36Sopenharmony_ci noio_flag = memalloc_noio_save(); 57662306a36Sopenharmony_ci ret = disk->fops->report_zones(disk, 0, UINT_MAX, 57762306a36Sopenharmony_ci blk_revalidate_zone_cb, &args); 57862306a36Sopenharmony_ci if (!ret) { 57962306a36Sopenharmony_ci pr_warn("%s: No zones reported\n", disk->disk_name); 58062306a36Sopenharmony_ci ret = -ENODEV; 58162306a36Sopenharmony_ci } 58262306a36Sopenharmony_ci memalloc_noio_restore(noio_flag); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci /* 58562306a36Sopenharmony_ci * If zones where reported, make sure that the entire disk capacity 58662306a36Sopenharmony_ci * has been checked. 58762306a36Sopenharmony_ci */ 58862306a36Sopenharmony_ci if (ret > 0 && args.sector != capacity) { 58962306a36Sopenharmony_ci pr_warn("%s: Missing zones from sector %llu\n", 59062306a36Sopenharmony_ci disk->disk_name, args.sector); 59162306a36Sopenharmony_ci ret = -ENODEV; 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci /* 59562306a36Sopenharmony_ci * Install the new bitmaps and update nr_zones only once the queue is 59662306a36Sopenharmony_ci * stopped and all I/Os are completed (i.e. a scheduler is not 59762306a36Sopenharmony_ci * referencing the bitmaps). 59862306a36Sopenharmony_ci */ 59962306a36Sopenharmony_ci blk_mq_freeze_queue(q); 60062306a36Sopenharmony_ci if (ret > 0) { 60162306a36Sopenharmony_ci disk->nr_zones = args.nr_zones; 60262306a36Sopenharmony_ci swap(disk->seq_zones_wlock, args.seq_zones_wlock); 60362306a36Sopenharmony_ci swap(disk->conv_zones_bitmap, args.conv_zones_bitmap); 60462306a36Sopenharmony_ci if (update_driver_data) 60562306a36Sopenharmony_ci update_driver_data(disk); 60662306a36Sopenharmony_ci ret = 0; 60762306a36Sopenharmony_ci } else { 60862306a36Sopenharmony_ci pr_warn("%s: failed to revalidate zones\n", disk->disk_name); 60962306a36Sopenharmony_ci disk_free_zone_bitmaps(disk); 61062306a36Sopenharmony_ci } 61162306a36Sopenharmony_ci blk_mq_unfreeze_queue(q); 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci kfree(args.seq_zones_wlock); 61462306a36Sopenharmony_ci kfree(args.conv_zones_bitmap); 61562306a36Sopenharmony_ci return ret; 61662306a36Sopenharmony_ci} 61762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_civoid disk_clear_zone_settings(struct gendisk *disk) 62062306a36Sopenharmony_ci{ 62162306a36Sopenharmony_ci struct request_queue *q = disk->queue; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci blk_mq_freeze_queue(q); 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci disk_free_zone_bitmaps(disk); 62662306a36Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q); 62762306a36Sopenharmony_ci q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE; 62862306a36Sopenharmony_ci disk->nr_zones = 0; 62962306a36Sopenharmony_ci disk->max_open_zones = 0; 63062306a36Sopenharmony_ci disk->max_active_zones = 0; 63162306a36Sopenharmony_ci q->limits.chunk_sectors = 0; 63262306a36Sopenharmony_ci q->limits.zone_write_granularity = 0; 63362306a36Sopenharmony_ci q->limits.max_zone_append_sectors = 0; 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci blk_mq_unfreeze_queue(q); 63662306a36Sopenharmony_ci} 637