162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Zoned block device handling
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (c) 2015, Hannes Reinecke
662306a36Sopenharmony_ci * Copyright (c) 2015, SUSE Linux GmbH
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Copyright (c) 2016, Damien Le Moal
962306a36Sopenharmony_ci * Copyright (c) 2016, Western Digital
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <linux/kernel.h>
1362306a36Sopenharmony_ci#include <linux/module.h>
1462306a36Sopenharmony_ci#include <linux/rbtree.h>
1562306a36Sopenharmony_ci#include <linux/blkdev.h>
1662306a36Sopenharmony_ci#include <linux/blk-mq.h>
1762306a36Sopenharmony_ci#include <linux/mm.h>
1862306a36Sopenharmony_ci#include <linux/vmalloc.h>
1962306a36Sopenharmony_ci#include <linux/sched/mm.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#include "blk.h"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
2462306a36Sopenharmony_cistatic const char *const zone_cond_name[] = {
2562306a36Sopenharmony_ci	ZONE_COND_NAME(NOT_WP),
2662306a36Sopenharmony_ci	ZONE_COND_NAME(EMPTY),
2762306a36Sopenharmony_ci	ZONE_COND_NAME(IMP_OPEN),
2862306a36Sopenharmony_ci	ZONE_COND_NAME(EXP_OPEN),
2962306a36Sopenharmony_ci	ZONE_COND_NAME(CLOSED),
3062306a36Sopenharmony_ci	ZONE_COND_NAME(READONLY),
3162306a36Sopenharmony_ci	ZONE_COND_NAME(FULL),
3262306a36Sopenharmony_ci	ZONE_COND_NAME(OFFLINE),
3362306a36Sopenharmony_ci};
3462306a36Sopenharmony_ci#undef ZONE_COND_NAME
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/**
3762306a36Sopenharmony_ci * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
3862306a36Sopenharmony_ci * @zone_cond: BLK_ZONE_COND_XXX.
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
4162306a36Sopenharmony_ci * into string format. Useful in the debugging and tracing zone conditions. For
4262306a36Sopenharmony_ci * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_ciconst char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	static const char *zone_cond_str = "UNKNOWN";
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
4962306a36Sopenharmony_ci		zone_cond_str = zone_cond_name[zone_cond];
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	return zone_cond_str;
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_zone_cond_str);
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * Return true if a request is a write requests that needs zone write locking.
5762306a36Sopenharmony_ci */
5862306a36Sopenharmony_cibool blk_req_needs_zone_write_lock(struct request *rq)
5962306a36Sopenharmony_ci{
6062306a36Sopenharmony_ci	if (!rq->q->disk->seq_zones_wlock)
6162306a36Sopenharmony_ci		return false;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	return blk_rq_is_seq_zoned_write(rq);
6462306a36Sopenharmony_ci}
6562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cibool blk_req_zone_write_trylock(struct request *rq)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	unsigned int zno = blk_rq_zone_no(rq);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	if (test_and_set_bit(zno, rq->q->disk->seq_zones_wlock))
7262306a36Sopenharmony_ci		return false;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
7562306a36Sopenharmony_ci	rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	return true;
7862306a36Sopenharmony_ci}
7962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_civoid __blk_req_zone_write_lock(struct request *rq)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
8462306a36Sopenharmony_ci					  rq->q->disk->seq_zones_wlock)))
8562306a36Sopenharmony_ci		return;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
8862306a36Sopenharmony_ci	rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_civoid __blk_req_zone_write_unlock(struct request *rq)
9362306a36Sopenharmony_ci{
9462306a36Sopenharmony_ci	rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
9562306a36Sopenharmony_ci	if (rq->q->disk->seq_zones_wlock)
9662306a36Sopenharmony_ci		WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
9762306a36Sopenharmony_ci						 rq->q->disk->seq_zones_wlock));
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci/**
10262306a36Sopenharmony_ci * bdev_nr_zones - Get number of zones
10362306a36Sopenharmony_ci * @bdev:	Target device
10462306a36Sopenharmony_ci *
10562306a36Sopenharmony_ci * Return the total number of zones of a zoned block device.  For a block
10662306a36Sopenharmony_ci * device without zone capabilities, the number of zones is always 0.
10762306a36Sopenharmony_ci */
10862306a36Sopenharmony_ciunsigned int bdev_nr_zones(struct block_device *bdev)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	sector_t zone_sectors = bdev_zone_sectors(bdev);
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if (!bdev_is_zoned(bdev))
11362306a36Sopenharmony_ci		return 0;
11462306a36Sopenharmony_ci	return (bdev_nr_sectors(bdev) + zone_sectors - 1) >>
11562306a36Sopenharmony_ci		ilog2(zone_sectors);
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(bdev_nr_zones);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/**
12062306a36Sopenharmony_ci * blkdev_report_zones - Get zones information
12162306a36Sopenharmony_ci * @bdev:	Target block device
12262306a36Sopenharmony_ci * @sector:	Sector from which to report zones
12362306a36Sopenharmony_ci * @nr_zones:	Maximum number of zones to report
12462306a36Sopenharmony_ci * @cb:		Callback function called for each reported zone
12562306a36Sopenharmony_ci * @data:	Private data for the callback
12662306a36Sopenharmony_ci *
12762306a36Sopenharmony_ci * Description:
12862306a36Sopenharmony_ci *    Get zone information starting from the zone containing @sector for at most
12962306a36Sopenharmony_ci *    @nr_zones, and call @cb for each zone reported by the device.
13062306a36Sopenharmony_ci *    To report all zones in a device starting from @sector, the BLK_ALL_ZONES
13162306a36Sopenharmony_ci *    constant can be passed to @nr_zones.
13262306a36Sopenharmony_ci *    Returns the number of zones reported by the device, or a negative errno
13362306a36Sopenharmony_ci *    value in case of failure.
13462306a36Sopenharmony_ci *
13562306a36Sopenharmony_ci *    Note: The caller must use memalloc_noXX_save/restore() calls to control
13662306a36Sopenharmony_ci *    memory allocations done within this function.
13762306a36Sopenharmony_ci */
13862306a36Sopenharmony_ciint blkdev_report_zones(struct block_device *bdev, sector_t sector,
13962306a36Sopenharmony_ci			unsigned int nr_zones, report_zones_cb cb, void *data)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	struct gendisk *disk = bdev->bd_disk;
14262306a36Sopenharmony_ci	sector_t capacity = get_capacity(disk);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
14562306a36Sopenharmony_ci		return -EOPNOTSUPP;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	if (!nr_zones || sector >= capacity)
14862306a36Sopenharmony_ci		return 0;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blkdev_report_zones);
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cistatic inline unsigned long *blk_alloc_zone_bitmap(int node,
15562306a36Sopenharmony_ci						   unsigned int nr_zones)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
15862306a36Sopenharmony_ci			    GFP_NOIO, node);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
16262306a36Sopenharmony_ci				  void *data)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	/*
16562306a36Sopenharmony_ci	 * For an all-zones reset, ignore conventional, empty, read-only
16662306a36Sopenharmony_ci	 * and offline zones.
16762306a36Sopenharmony_ci	 */
16862306a36Sopenharmony_ci	switch (zone->cond) {
16962306a36Sopenharmony_ci	case BLK_ZONE_COND_NOT_WP:
17062306a36Sopenharmony_ci	case BLK_ZONE_COND_EMPTY:
17162306a36Sopenharmony_ci	case BLK_ZONE_COND_READONLY:
17262306a36Sopenharmony_ci	case BLK_ZONE_COND_OFFLINE:
17362306a36Sopenharmony_ci		return 0;
17462306a36Sopenharmony_ci	default:
17562306a36Sopenharmony_ci		set_bit(idx, (unsigned long *)data);
17662306a36Sopenharmony_ci		return 0;
17762306a36Sopenharmony_ci	}
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_cistatic int blkdev_zone_reset_all_emulated(struct block_device *bdev,
18162306a36Sopenharmony_ci					  gfp_t gfp_mask)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	struct gendisk *disk = bdev->bd_disk;
18462306a36Sopenharmony_ci	sector_t capacity = bdev_nr_sectors(bdev);
18562306a36Sopenharmony_ci	sector_t zone_sectors = bdev_zone_sectors(bdev);
18662306a36Sopenharmony_ci	unsigned long *need_reset;
18762306a36Sopenharmony_ci	struct bio *bio = NULL;
18862306a36Sopenharmony_ci	sector_t sector = 0;
18962306a36Sopenharmony_ci	int ret;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
19262306a36Sopenharmony_ci	if (!need_reset)
19362306a36Sopenharmony_ci		return -ENOMEM;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
19662306a36Sopenharmony_ci				       blk_zone_need_reset_cb, need_reset);
19762306a36Sopenharmony_ci	if (ret < 0)
19862306a36Sopenharmony_ci		goto out_free_need_reset;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	ret = 0;
20162306a36Sopenharmony_ci	while (sector < capacity) {
20262306a36Sopenharmony_ci		if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
20362306a36Sopenharmony_ci			sector += zone_sectors;
20462306a36Sopenharmony_ci			continue;
20562306a36Sopenharmony_ci		}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
20862306a36Sopenharmony_ci				   gfp_mask);
20962306a36Sopenharmony_ci		bio->bi_iter.bi_sector = sector;
21062306a36Sopenharmony_ci		sector += zone_sectors;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci		/* This may take a while, so be nice to others */
21362306a36Sopenharmony_ci		cond_resched();
21462306a36Sopenharmony_ci	}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	if (bio) {
21762306a36Sopenharmony_ci		ret = submit_bio_wait(bio);
21862306a36Sopenharmony_ci		bio_put(bio);
21962306a36Sopenharmony_ci	}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ciout_free_need_reset:
22262306a36Sopenharmony_ci	kfree(need_reset);
22362306a36Sopenharmony_ci	return ret;
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cistatic int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	struct bio bio;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
23162306a36Sopenharmony_ci	return submit_bio_wait(&bio);
23262306a36Sopenharmony_ci}
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci/**
23562306a36Sopenharmony_ci * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
23662306a36Sopenharmony_ci * @bdev:	Target block device
23762306a36Sopenharmony_ci * @op:		Operation to be performed on the zones
23862306a36Sopenharmony_ci * @sector:	Start sector of the first zone to operate on
23962306a36Sopenharmony_ci * @nr_sectors:	Number of sectors, should be at least the length of one zone and
24062306a36Sopenharmony_ci *		must be zone size aligned.
24162306a36Sopenharmony_ci * @gfp_mask:	Memory allocation flags (for bio_alloc)
24262306a36Sopenharmony_ci *
24362306a36Sopenharmony_ci * Description:
24462306a36Sopenharmony_ci *    Perform the specified operation on the range of zones specified by
24562306a36Sopenharmony_ci *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
24662306a36Sopenharmony_ci *    is valid, but the specified range should not contain conventional zones.
24762306a36Sopenharmony_ci *    The operation to execute on each zone can be a zone reset, open, close
24862306a36Sopenharmony_ci *    or finish request.
24962306a36Sopenharmony_ci */
25062306a36Sopenharmony_ciint blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
25162306a36Sopenharmony_ci		     sector_t sector, sector_t nr_sectors, gfp_t gfp_mask)
25262306a36Sopenharmony_ci{
25362306a36Sopenharmony_ci	struct request_queue *q = bdev_get_queue(bdev);
25462306a36Sopenharmony_ci	sector_t zone_sectors = bdev_zone_sectors(bdev);
25562306a36Sopenharmony_ci	sector_t capacity = bdev_nr_sectors(bdev);
25662306a36Sopenharmony_ci	sector_t end_sector = sector + nr_sectors;
25762306a36Sopenharmony_ci	struct bio *bio = NULL;
25862306a36Sopenharmony_ci	int ret = 0;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	if (!bdev_is_zoned(bdev))
26162306a36Sopenharmony_ci		return -EOPNOTSUPP;
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	if (bdev_read_only(bdev))
26462306a36Sopenharmony_ci		return -EPERM;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	if (!op_is_zone_mgmt(op))
26762306a36Sopenharmony_ci		return -EOPNOTSUPP;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	if (end_sector <= sector || end_sector > capacity)
27062306a36Sopenharmony_ci		/* Out of range */
27162306a36Sopenharmony_ci		return -EINVAL;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	/* Check alignment (handle eventual smaller last zone) */
27462306a36Sopenharmony_ci	if (!bdev_is_zone_start(bdev, sector))
27562306a36Sopenharmony_ci		return -EINVAL;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity)
27862306a36Sopenharmony_ci		return -EINVAL;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	/*
28162306a36Sopenharmony_ci	 * In the case of a zone reset operation over all zones,
28262306a36Sopenharmony_ci	 * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
28362306a36Sopenharmony_ci	 * command. For other devices, we emulate this command behavior by
28462306a36Sopenharmony_ci	 * identifying the zones needing a reset.
28562306a36Sopenharmony_ci	 */
28662306a36Sopenharmony_ci	if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
28762306a36Sopenharmony_ci		if (!blk_queue_zone_resetall(q))
28862306a36Sopenharmony_ci			return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
28962306a36Sopenharmony_ci		return blkdev_zone_reset_all(bdev, gfp_mask);
29062306a36Sopenharmony_ci	}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	while (sector < end_sector) {
29362306a36Sopenharmony_ci		bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
29462306a36Sopenharmony_ci		bio->bi_iter.bi_sector = sector;
29562306a36Sopenharmony_ci		sector += zone_sectors;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci		/* This may take a while, so be nice to others */
29862306a36Sopenharmony_ci		cond_resched();
29962306a36Sopenharmony_ci	}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	ret = submit_bio_wait(bio);
30262306a36Sopenharmony_ci	bio_put(bio);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	return ret;
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_cistruct zone_report_args {
30962306a36Sopenharmony_ci	struct blk_zone __user *zones;
31062306a36Sopenharmony_ci};
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_cistatic int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
31362306a36Sopenharmony_ci				    void *data)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	struct zone_report_args *args = data;
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
31862306a36Sopenharmony_ci		return -EFAULT;
31962306a36Sopenharmony_ci	return 0;
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/*
32362306a36Sopenharmony_ci * BLKREPORTZONE ioctl processing.
32462306a36Sopenharmony_ci * Called from blkdev_ioctl.
32562306a36Sopenharmony_ci */
32662306a36Sopenharmony_ciint blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
32762306a36Sopenharmony_ci		unsigned long arg)
32862306a36Sopenharmony_ci{
32962306a36Sopenharmony_ci	void __user *argp = (void __user *)arg;
33062306a36Sopenharmony_ci	struct zone_report_args args;
33162306a36Sopenharmony_ci	struct blk_zone_report rep;
33262306a36Sopenharmony_ci	int ret;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	if (!argp)
33562306a36Sopenharmony_ci		return -EINVAL;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	if (!bdev_is_zoned(bdev))
33862306a36Sopenharmony_ci		return -ENOTTY;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
34162306a36Sopenharmony_ci		return -EFAULT;
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	if (!rep.nr_zones)
34462306a36Sopenharmony_ci		return -EINVAL;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	args.zones = argp + sizeof(struct blk_zone_report);
34762306a36Sopenharmony_ci	ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
34862306a36Sopenharmony_ci				  blkdev_copy_zone_to_user, &args);
34962306a36Sopenharmony_ci	if (ret < 0)
35062306a36Sopenharmony_ci		return ret;
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	rep.nr_zones = ret;
35362306a36Sopenharmony_ci	rep.flags = BLK_ZONE_REP_CAPACITY;
35462306a36Sopenharmony_ci	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
35562306a36Sopenharmony_ci		return -EFAULT;
35662306a36Sopenharmony_ci	return 0;
35762306a36Sopenharmony_ci}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_cistatic int blkdev_truncate_zone_range(struct block_device *bdev,
36062306a36Sopenharmony_ci		blk_mode_t mode, const struct blk_zone_range *zrange)
36162306a36Sopenharmony_ci{
36262306a36Sopenharmony_ci	loff_t start, end;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
36562306a36Sopenharmony_ci	    zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
36662306a36Sopenharmony_ci		/* Out of range */
36762306a36Sopenharmony_ci		return -EINVAL;
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	start = zrange->sector << SECTOR_SHIFT;
37062306a36Sopenharmony_ci	end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	return truncate_bdev_range(bdev, mode, start, end);
37362306a36Sopenharmony_ci}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci/*
37662306a36Sopenharmony_ci * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
37762306a36Sopenharmony_ci * Called from blkdev_ioctl.
37862306a36Sopenharmony_ci */
37962306a36Sopenharmony_ciint blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
38062306a36Sopenharmony_ci			   unsigned int cmd, unsigned long arg)
38162306a36Sopenharmony_ci{
38262306a36Sopenharmony_ci	void __user *argp = (void __user *)arg;
38362306a36Sopenharmony_ci	struct blk_zone_range zrange;
38462306a36Sopenharmony_ci	enum req_op op;
38562306a36Sopenharmony_ci	int ret;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	if (!argp)
38862306a36Sopenharmony_ci		return -EINVAL;
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	if (!bdev_is_zoned(bdev))
39162306a36Sopenharmony_ci		return -ENOTTY;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	if (!(mode & BLK_OPEN_WRITE))
39462306a36Sopenharmony_ci		return -EBADF;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
39762306a36Sopenharmony_ci		return -EFAULT;
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	switch (cmd) {
40062306a36Sopenharmony_ci	case BLKRESETZONE:
40162306a36Sopenharmony_ci		op = REQ_OP_ZONE_RESET;
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci		/* Invalidate the page cache, including dirty pages. */
40462306a36Sopenharmony_ci		filemap_invalidate_lock(bdev->bd_inode->i_mapping);
40562306a36Sopenharmony_ci		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
40662306a36Sopenharmony_ci		if (ret)
40762306a36Sopenharmony_ci			goto fail;
40862306a36Sopenharmony_ci		break;
40962306a36Sopenharmony_ci	case BLKOPENZONE:
41062306a36Sopenharmony_ci		op = REQ_OP_ZONE_OPEN;
41162306a36Sopenharmony_ci		break;
41262306a36Sopenharmony_ci	case BLKCLOSEZONE:
41362306a36Sopenharmony_ci		op = REQ_OP_ZONE_CLOSE;
41462306a36Sopenharmony_ci		break;
41562306a36Sopenharmony_ci	case BLKFINISHZONE:
41662306a36Sopenharmony_ci		op = REQ_OP_ZONE_FINISH;
41762306a36Sopenharmony_ci		break;
41862306a36Sopenharmony_ci	default:
41962306a36Sopenharmony_ci		return -ENOTTY;
42062306a36Sopenharmony_ci	}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
42362306a36Sopenharmony_ci			       GFP_KERNEL);
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_cifail:
42662306a36Sopenharmony_ci	if (cmd == BLKRESETZONE)
42762306a36Sopenharmony_ci		filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	return ret;
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_civoid disk_free_zone_bitmaps(struct gendisk *disk)
43362306a36Sopenharmony_ci{
43462306a36Sopenharmony_ci	kfree(disk->conv_zones_bitmap);
43562306a36Sopenharmony_ci	disk->conv_zones_bitmap = NULL;
43662306a36Sopenharmony_ci	kfree(disk->seq_zones_wlock);
43762306a36Sopenharmony_ci	disk->seq_zones_wlock = NULL;
43862306a36Sopenharmony_ci}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_cistruct blk_revalidate_zone_args {
44162306a36Sopenharmony_ci	struct gendisk	*disk;
44262306a36Sopenharmony_ci	unsigned long	*conv_zones_bitmap;
44362306a36Sopenharmony_ci	unsigned long	*seq_zones_wlock;
44462306a36Sopenharmony_ci	unsigned int	nr_zones;
44562306a36Sopenharmony_ci	sector_t	sector;
44662306a36Sopenharmony_ci};
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci/*
44962306a36Sopenharmony_ci * Helper function to check the validity of zones of a zoned block device.
45062306a36Sopenharmony_ci */
45162306a36Sopenharmony_cistatic int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
45262306a36Sopenharmony_ci				  void *data)
45362306a36Sopenharmony_ci{
45462306a36Sopenharmony_ci	struct blk_revalidate_zone_args *args = data;
45562306a36Sopenharmony_ci	struct gendisk *disk = args->disk;
45662306a36Sopenharmony_ci	struct request_queue *q = disk->queue;
45762306a36Sopenharmony_ci	sector_t capacity = get_capacity(disk);
45862306a36Sopenharmony_ci	sector_t zone_sectors = q->limits.chunk_sectors;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	/* Check for bad zones and holes in the zone report */
46162306a36Sopenharmony_ci	if (zone->start != args->sector) {
46262306a36Sopenharmony_ci		pr_warn("%s: Zone gap at sectors %llu..%llu\n",
46362306a36Sopenharmony_ci			disk->disk_name, args->sector, zone->start);
46462306a36Sopenharmony_ci		return -ENODEV;
46562306a36Sopenharmony_ci	}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if (zone->start >= capacity || !zone->len) {
46862306a36Sopenharmony_ci		pr_warn("%s: Invalid zone start %llu, length %llu\n",
46962306a36Sopenharmony_ci			disk->disk_name, zone->start, zone->len);
47062306a36Sopenharmony_ci		return -ENODEV;
47162306a36Sopenharmony_ci	}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	/*
47462306a36Sopenharmony_ci	 * All zones must have the same size, with the exception on an eventual
47562306a36Sopenharmony_ci	 * smaller last zone.
47662306a36Sopenharmony_ci	 */
47762306a36Sopenharmony_ci	if (zone->start + zone->len < capacity) {
47862306a36Sopenharmony_ci		if (zone->len != zone_sectors) {
47962306a36Sopenharmony_ci			pr_warn("%s: Invalid zoned device with non constant zone size\n",
48062306a36Sopenharmony_ci				disk->disk_name);
48162306a36Sopenharmony_ci			return -ENODEV;
48262306a36Sopenharmony_ci		}
48362306a36Sopenharmony_ci	} else if (zone->len > zone_sectors) {
48462306a36Sopenharmony_ci		pr_warn("%s: Invalid zoned device with larger last zone size\n",
48562306a36Sopenharmony_ci			disk->disk_name);
48662306a36Sopenharmony_ci		return -ENODEV;
48762306a36Sopenharmony_ci	}
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	/* Check zone type */
49062306a36Sopenharmony_ci	switch (zone->type) {
49162306a36Sopenharmony_ci	case BLK_ZONE_TYPE_CONVENTIONAL:
49262306a36Sopenharmony_ci		if (!args->conv_zones_bitmap) {
49362306a36Sopenharmony_ci			args->conv_zones_bitmap =
49462306a36Sopenharmony_ci				blk_alloc_zone_bitmap(q->node, args->nr_zones);
49562306a36Sopenharmony_ci			if (!args->conv_zones_bitmap)
49662306a36Sopenharmony_ci				return -ENOMEM;
49762306a36Sopenharmony_ci		}
49862306a36Sopenharmony_ci		set_bit(idx, args->conv_zones_bitmap);
49962306a36Sopenharmony_ci		break;
50062306a36Sopenharmony_ci	case BLK_ZONE_TYPE_SEQWRITE_REQ:
50162306a36Sopenharmony_ci	case BLK_ZONE_TYPE_SEQWRITE_PREF:
50262306a36Sopenharmony_ci		if (!args->seq_zones_wlock) {
50362306a36Sopenharmony_ci			args->seq_zones_wlock =
50462306a36Sopenharmony_ci				blk_alloc_zone_bitmap(q->node, args->nr_zones);
50562306a36Sopenharmony_ci			if (!args->seq_zones_wlock)
50662306a36Sopenharmony_ci				return -ENOMEM;
50762306a36Sopenharmony_ci		}
50862306a36Sopenharmony_ci		break;
50962306a36Sopenharmony_ci	default:
51062306a36Sopenharmony_ci		pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
51162306a36Sopenharmony_ci			disk->disk_name, (int)zone->type, zone->start);
51262306a36Sopenharmony_ci		return -ENODEV;
51362306a36Sopenharmony_ci	}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	args->sector += zone->len;
51662306a36Sopenharmony_ci	return 0;
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci/**
52062306a36Sopenharmony_ci * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
52162306a36Sopenharmony_ci * @disk:	Target disk
52262306a36Sopenharmony_ci * @update_driver_data:	Callback to update driver data on the frozen disk
52362306a36Sopenharmony_ci *
52462306a36Sopenharmony_ci * Helper function for low-level device drivers to check and (re) allocate and
52562306a36Sopenharmony_ci * initialize a disk request queue zone bitmaps. This functions should normally
52662306a36Sopenharmony_ci * be called within the disk ->revalidate method for blk-mq based drivers.
52762306a36Sopenharmony_ci * Before calling this function, the device driver must already have set the
52862306a36Sopenharmony_ci * device zone size (chunk_sector limit) and the max zone append limit.
52962306a36Sopenharmony_ci * For BIO based drivers, this function cannot be used. BIO based device drivers
53062306a36Sopenharmony_ci * only need to set disk->nr_zones so that the sysfs exposed value is correct.
53162306a36Sopenharmony_ci * If the @update_driver_data callback function is not NULL, the callback is
53262306a36Sopenharmony_ci * executed with the device request queue frozen after all zones have been
53362306a36Sopenharmony_ci * checked.
53462306a36Sopenharmony_ci */
53562306a36Sopenharmony_ciint blk_revalidate_disk_zones(struct gendisk *disk,
53662306a36Sopenharmony_ci			      void (*update_driver_data)(struct gendisk *disk))
53762306a36Sopenharmony_ci{
53862306a36Sopenharmony_ci	struct request_queue *q = disk->queue;
53962306a36Sopenharmony_ci	sector_t zone_sectors = q->limits.chunk_sectors;
54062306a36Sopenharmony_ci	sector_t capacity = get_capacity(disk);
54162306a36Sopenharmony_ci	struct blk_revalidate_zone_args args = { };
54262306a36Sopenharmony_ci	unsigned int noio_flag;
54362306a36Sopenharmony_ci	int ret;
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
54662306a36Sopenharmony_ci		return -EIO;
54762306a36Sopenharmony_ci	if (WARN_ON_ONCE(!queue_is_mq(q)))
54862306a36Sopenharmony_ci		return -EIO;
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	if (!capacity)
55162306a36Sopenharmony_ci		return -ENODEV;
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	/*
55462306a36Sopenharmony_ci	 * Checks that the device driver indicated a valid zone size and that
55562306a36Sopenharmony_ci	 * the max zone append limit is set.
55662306a36Sopenharmony_ci	 */
55762306a36Sopenharmony_ci	if (!zone_sectors || !is_power_of_2(zone_sectors)) {
55862306a36Sopenharmony_ci		pr_warn("%s: Invalid non power of two zone size (%llu)\n",
55962306a36Sopenharmony_ci			disk->disk_name, zone_sectors);
56062306a36Sopenharmony_ci		return -ENODEV;
56162306a36Sopenharmony_ci	}
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	if (!q->limits.max_zone_append_sectors) {
56462306a36Sopenharmony_ci		pr_warn("%s: Invalid 0 maximum zone append limit\n",
56562306a36Sopenharmony_ci			disk->disk_name);
56662306a36Sopenharmony_ci		return -ENODEV;
56762306a36Sopenharmony_ci	}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	/*
57062306a36Sopenharmony_ci	 * Ensure that all memory allocations in this context are done as if
57162306a36Sopenharmony_ci	 * GFP_NOIO was specified.
57262306a36Sopenharmony_ci	 */
57362306a36Sopenharmony_ci	args.disk = disk;
57462306a36Sopenharmony_ci	args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors);
57562306a36Sopenharmony_ci	noio_flag = memalloc_noio_save();
57662306a36Sopenharmony_ci	ret = disk->fops->report_zones(disk, 0, UINT_MAX,
57762306a36Sopenharmony_ci				       blk_revalidate_zone_cb, &args);
57862306a36Sopenharmony_ci	if (!ret) {
57962306a36Sopenharmony_ci		pr_warn("%s: No zones reported\n", disk->disk_name);
58062306a36Sopenharmony_ci		ret = -ENODEV;
58162306a36Sopenharmony_ci	}
58262306a36Sopenharmony_ci	memalloc_noio_restore(noio_flag);
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	/*
58562306a36Sopenharmony_ci	 * If zones where reported, make sure that the entire disk capacity
58662306a36Sopenharmony_ci	 * has been checked.
58762306a36Sopenharmony_ci	 */
58862306a36Sopenharmony_ci	if (ret > 0 && args.sector != capacity) {
58962306a36Sopenharmony_ci		pr_warn("%s: Missing zones from sector %llu\n",
59062306a36Sopenharmony_ci			disk->disk_name, args.sector);
59162306a36Sopenharmony_ci		ret = -ENODEV;
59262306a36Sopenharmony_ci	}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	/*
59562306a36Sopenharmony_ci	 * Install the new bitmaps and update nr_zones only once the queue is
59662306a36Sopenharmony_ci	 * stopped and all I/Os are completed (i.e. a scheduler is not
59762306a36Sopenharmony_ci	 * referencing the bitmaps).
59862306a36Sopenharmony_ci	 */
59962306a36Sopenharmony_ci	blk_mq_freeze_queue(q);
60062306a36Sopenharmony_ci	if (ret > 0) {
60162306a36Sopenharmony_ci		disk->nr_zones = args.nr_zones;
60262306a36Sopenharmony_ci		swap(disk->seq_zones_wlock, args.seq_zones_wlock);
60362306a36Sopenharmony_ci		swap(disk->conv_zones_bitmap, args.conv_zones_bitmap);
60462306a36Sopenharmony_ci		if (update_driver_data)
60562306a36Sopenharmony_ci			update_driver_data(disk);
60662306a36Sopenharmony_ci		ret = 0;
60762306a36Sopenharmony_ci	} else {
60862306a36Sopenharmony_ci		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
60962306a36Sopenharmony_ci		disk_free_zone_bitmaps(disk);
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci	blk_mq_unfreeze_queue(q);
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	kfree(args.seq_zones_wlock);
61462306a36Sopenharmony_ci	kfree(args.conv_zones_bitmap);
61562306a36Sopenharmony_ci	return ret;
61662306a36Sopenharmony_ci}
61762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_civoid disk_clear_zone_settings(struct gendisk *disk)
62062306a36Sopenharmony_ci{
62162306a36Sopenharmony_ci	struct request_queue *q = disk->queue;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	blk_mq_freeze_queue(q);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	disk_free_zone_bitmaps(disk);
62662306a36Sopenharmony_ci	blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q);
62762306a36Sopenharmony_ci	q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE;
62862306a36Sopenharmony_ci	disk->nr_zones = 0;
62962306a36Sopenharmony_ci	disk->max_open_zones = 0;
63062306a36Sopenharmony_ci	disk->max_active_zones = 0;
63162306a36Sopenharmony_ci	q->limits.chunk_sectors = 0;
63262306a36Sopenharmony_ci	q->limits.zone_write_granularity = 0;
63362306a36Sopenharmony_ci	q->limits.max_zone_append_sectors = 0;
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	blk_mq_unfreeze_queue(q);
63662306a36Sopenharmony_ci}
637