162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2021 Western Digital Corporation or its affiliates. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/blkdev.h> 762306a36Sopenharmony_ci#include <linux/mm.h> 862306a36Sopenharmony_ci#include <linux/sched/mm.h> 962306a36Sopenharmony_ci#include <linux/slab.h> 1062306a36Sopenharmony_ci#include <linux/bitmap.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include "dm-core.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#define DM_MSG_PREFIX "zone" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#define DM_ZONE_INVALID_WP_OFST UINT_MAX 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * For internal zone reports bypassing the top BIO submission path. 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_cistatic int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, 2262306a36Sopenharmony_ci sector_t sector, unsigned int nr_zones, 2362306a36Sopenharmony_ci report_zones_cb cb, void *data) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci struct gendisk *disk = md->disk; 2662306a36Sopenharmony_ci int ret; 2762306a36Sopenharmony_ci struct dm_report_zones_args args = { 2862306a36Sopenharmony_ci .next_sector = sector, 2962306a36Sopenharmony_ci .orig_data = data, 3062306a36Sopenharmony_ci .orig_cb = cb, 3162306a36Sopenharmony_ci }; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci do { 3462306a36Sopenharmony_ci struct dm_target *tgt; 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci tgt = dm_table_find_target(t, args.next_sector); 3762306a36Sopenharmony_ci if (WARN_ON_ONCE(!tgt->type->report_zones)) 3862306a36Sopenharmony_ci return -EIO; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci args.tgt = tgt; 4162306a36Sopenharmony_ci ret = tgt->type->report_zones(tgt, &args, 4262306a36Sopenharmony_ci nr_zones - args.zone_idx); 4362306a36Sopenharmony_ci if (ret < 0) 4462306a36Sopenharmony_ci return ret; 4562306a36Sopenharmony_ci } while (args.zone_idx < nr_zones && 4662306a36Sopenharmony_ci args.next_sector < get_capacity(disk)); 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci return args.zone_idx; 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci/* 5262306a36Sopenharmony_ci * User facing dm device block device report zone operation. This calls the 5362306a36Sopenharmony_ci * report_zones operation for each target of a device table. This operation is 5462306a36Sopenharmony_ci * generally implemented by targets using dm_report_zones(). 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ciint dm_blk_report_zones(struct gendisk *disk, sector_t sector, 5762306a36Sopenharmony_ci unsigned int nr_zones, report_zones_cb cb, void *data) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci struct mapped_device *md = disk->private_data; 6062306a36Sopenharmony_ci struct dm_table *map; 6162306a36Sopenharmony_ci int srcu_idx, ret; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci if (dm_suspended_md(md)) 6462306a36Sopenharmony_ci return -EAGAIN; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci map = dm_get_live_table(md, &srcu_idx); 6762306a36Sopenharmony_ci if (!map) 6862306a36Sopenharmony_ci return -EIO; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci return ret; 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_cistatic int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, 7862306a36Sopenharmony_ci void *data) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci struct dm_report_zones_args *args = data; 8162306a36Sopenharmony_ci sector_t sector_diff = args->tgt->begin - args->start; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci /* 8462306a36Sopenharmony_ci * Ignore zones beyond the target range. 8562306a36Sopenharmony_ci */ 8662306a36Sopenharmony_ci if (zone->start >= args->start + args->tgt->len) 8762306a36Sopenharmony_ci return 0; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci /* 9062306a36Sopenharmony_ci * Remap the start sector and write pointer position of the zone 9162306a36Sopenharmony_ci * to match its position in the target range. 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_ci zone->start += sector_diff; 9462306a36Sopenharmony_ci if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { 9562306a36Sopenharmony_ci if (zone->cond == BLK_ZONE_COND_FULL) 9662306a36Sopenharmony_ci zone->wp = zone->start + zone->len; 9762306a36Sopenharmony_ci else if (zone->cond == BLK_ZONE_COND_EMPTY) 9862306a36Sopenharmony_ci zone->wp = zone->start; 9962306a36Sopenharmony_ci else 10062306a36Sopenharmony_ci zone->wp += sector_diff; 10162306a36Sopenharmony_ci } 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci args->next_sector = zone->start + zone->len; 10462306a36Sopenharmony_ci return args->orig_cb(zone, args->zone_idx++, args->orig_data); 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci/* 10862306a36Sopenharmony_ci * Helper for drivers of zoned targets to implement struct target_type 10962306a36Sopenharmony_ci * report_zones operation. 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_ciint dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, 11262306a36Sopenharmony_ci struct dm_report_zones_args *args, unsigned int nr_zones) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci /* 11562306a36Sopenharmony_ci * Set the target mapping start sector first so that 11662306a36Sopenharmony_ci * dm_report_zones_cb() can correctly remap zone information. 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_ci args->start = start; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci return blkdev_report_zones(bdev, sector, nr_zones, 12162306a36Sopenharmony_ci dm_report_zones_cb, args); 12262306a36Sopenharmony_ci} 12362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_report_zones); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cibool dm_is_zone_write(struct mapped_device *md, struct bio *bio) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci struct request_queue *q = md->queue; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (!blk_queue_is_zoned(q)) 13062306a36Sopenharmony_ci return false; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci switch (bio_op(bio)) { 13362306a36Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 13462306a36Sopenharmony_ci case REQ_OP_WRITE: 13562306a36Sopenharmony_ci return !op_is_flush(bio->bi_opf) && bio_sectors(bio); 13662306a36Sopenharmony_ci default: 13762306a36Sopenharmony_ci return false; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_civoid dm_cleanup_zoned_dev(struct mapped_device *md) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci if (md->disk) { 14462306a36Sopenharmony_ci bitmap_free(md->disk->conv_zones_bitmap); 14562306a36Sopenharmony_ci md->disk->conv_zones_bitmap = NULL; 14662306a36Sopenharmony_ci bitmap_free(md->disk->seq_zones_wlock); 14762306a36Sopenharmony_ci md->disk->seq_zones_wlock = NULL; 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci kvfree(md->zwp_offset); 15162306a36Sopenharmony_ci md->zwp_offset = NULL; 15262306a36Sopenharmony_ci md->nr_zones = 0; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic unsigned int dm_get_zone_wp_offset(struct blk_zone *zone) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci switch (zone->cond) { 15862306a36Sopenharmony_ci case BLK_ZONE_COND_IMP_OPEN: 15962306a36Sopenharmony_ci case BLK_ZONE_COND_EXP_OPEN: 16062306a36Sopenharmony_ci case BLK_ZONE_COND_CLOSED: 16162306a36Sopenharmony_ci return zone->wp - zone->start; 16262306a36Sopenharmony_ci case BLK_ZONE_COND_FULL: 16362306a36Sopenharmony_ci return zone->len; 16462306a36Sopenharmony_ci case BLK_ZONE_COND_EMPTY: 16562306a36Sopenharmony_ci case BLK_ZONE_COND_NOT_WP: 16662306a36Sopenharmony_ci case BLK_ZONE_COND_OFFLINE: 16762306a36Sopenharmony_ci case BLK_ZONE_COND_READONLY: 16862306a36Sopenharmony_ci default: 16962306a36Sopenharmony_ci /* 17062306a36Sopenharmony_ci * Conventional, offline and read-only zones do not have a valid 17162306a36Sopenharmony_ci * write pointer. Use 0 as for an empty zone. 17262306a36Sopenharmony_ci */ 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci} 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_cistatic int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, 17862306a36Sopenharmony_ci void *data) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci struct mapped_device *md = data; 18162306a36Sopenharmony_ci struct gendisk *disk = md->disk; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci switch (zone->type) { 18462306a36Sopenharmony_ci case BLK_ZONE_TYPE_CONVENTIONAL: 18562306a36Sopenharmony_ci if (!disk->conv_zones_bitmap) { 18662306a36Sopenharmony_ci disk->conv_zones_bitmap = bitmap_zalloc(disk->nr_zones, 18762306a36Sopenharmony_ci GFP_NOIO); 18862306a36Sopenharmony_ci if (!disk->conv_zones_bitmap) 18962306a36Sopenharmony_ci return -ENOMEM; 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci set_bit(idx, disk->conv_zones_bitmap); 19262306a36Sopenharmony_ci break; 19362306a36Sopenharmony_ci case BLK_ZONE_TYPE_SEQWRITE_REQ: 19462306a36Sopenharmony_ci case BLK_ZONE_TYPE_SEQWRITE_PREF: 19562306a36Sopenharmony_ci if (!disk->seq_zones_wlock) { 19662306a36Sopenharmony_ci disk->seq_zones_wlock = bitmap_zalloc(disk->nr_zones, 19762306a36Sopenharmony_ci GFP_NOIO); 19862306a36Sopenharmony_ci if (!disk->seq_zones_wlock) 19962306a36Sopenharmony_ci return -ENOMEM; 20062306a36Sopenharmony_ci } 20162306a36Sopenharmony_ci if (!md->zwp_offset) { 20262306a36Sopenharmony_ci md->zwp_offset = 20362306a36Sopenharmony_ci kvcalloc(disk->nr_zones, sizeof(unsigned int), 20462306a36Sopenharmony_ci GFP_KERNEL); 20562306a36Sopenharmony_ci if (!md->zwp_offset) 20662306a36Sopenharmony_ci return -ENOMEM; 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci md->zwp_offset[idx] = dm_get_zone_wp_offset(zone); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci break; 21162306a36Sopenharmony_ci default: 21262306a36Sopenharmony_ci DMERR("Invalid zone type 0x%x at sectors %llu", 21362306a36Sopenharmony_ci (int)zone->type, zone->start); 21462306a36Sopenharmony_ci return -ENODEV; 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci return 0; 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci/* 22162306a36Sopenharmony_ci * Revalidate the zones of a mapped device to initialize resource necessary 22262306a36Sopenharmony_ci * for zone append emulation. Note that we cannot simply use the block layer 22362306a36Sopenharmony_ci * blk_revalidate_disk_zones() function here as the mapped device is suspended 22462306a36Sopenharmony_ci * (this is called from __bind() context). 22562306a36Sopenharmony_ci */ 22662306a36Sopenharmony_cistatic int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct gendisk *disk = md->disk; 22962306a36Sopenharmony_ci unsigned int noio_flag; 23062306a36Sopenharmony_ci int ret; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci /* 23362306a36Sopenharmony_ci * Check if something changed. If yes, cleanup the current resources 23462306a36Sopenharmony_ci * and reallocate everything. 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ci if (!disk->nr_zones || disk->nr_zones != md->nr_zones) 23762306a36Sopenharmony_ci dm_cleanup_zoned_dev(md); 23862306a36Sopenharmony_ci if (md->nr_zones) 23962306a36Sopenharmony_ci return 0; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * Scan all zones to initialize everything. Ensure that all vmalloc 24362306a36Sopenharmony_ci * operations in this context are done as if GFP_NOIO was specified. 24462306a36Sopenharmony_ci */ 24562306a36Sopenharmony_ci noio_flag = memalloc_noio_save(); 24662306a36Sopenharmony_ci ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones, 24762306a36Sopenharmony_ci dm_zone_revalidate_cb, md); 24862306a36Sopenharmony_ci memalloc_noio_restore(noio_flag); 24962306a36Sopenharmony_ci if (ret < 0) 25062306a36Sopenharmony_ci goto err; 25162306a36Sopenharmony_ci if (ret != disk->nr_zones) { 25262306a36Sopenharmony_ci ret = -EIO; 25362306a36Sopenharmony_ci goto err; 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci md->nr_zones = disk->nr_zones; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci return 0; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_cierr: 26162306a36Sopenharmony_ci DMERR("Revalidate zones failed %d", ret); 26262306a36Sopenharmony_ci dm_cleanup_zoned_dev(md); 26362306a36Sopenharmony_ci return ret; 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_cistatic int device_not_zone_append_capable(struct dm_target *ti, 26762306a36Sopenharmony_ci struct dm_dev *dev, sector_t start, 26862306a36Sopenharmony_ci sector_t len, void *data) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci return !bdev_is_zoned(dev->bdev); 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic bool dm_table_supports_zone_append(struct dm_table *t) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci for (unsigned int i = 0; i < t->num_targets; i++) { 27662306a36Sopenharmony_ci struct dm_target *ti = dm_table_get_target(t, i); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci if (ti->emulate_zone_append) 27962306a36Sopenharmony_ci return false; 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (!ti->type->iterate_devices || 28262306a36Sopenharmony_ci ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) 28362306a36Sopenharmony_ci return false; 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci return true; 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ciint dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci struct mapped_device *md = t->md; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci /* 29462306a36Sopenharmony_ci * For a zoned target, the number of zones should be updated for the 29562306a36Sopenharmony_ci * correct value to be exposed in sysfs queue/nr_zones. 29662306a36Sopenharmony_ci */ 29762306a36Sopenharmony_ci WARN_ON_ONCE(queue_is_mq(q)); 29862306a36Sopenharmony_ci md->disk->nr_zones = bdev_nr_zones(md->disk->part0); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci /* Check if zone append is natively supported */ 30162306a36Sopenharmony_ci if (dm_table_supports_zone_append(t)) { 30262306a36Sopenharmony_ci clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 30362306a36Sopenharmony_ci dm_cleanup_zoned_dev(md); 30462306a36Sopenharmony_ci return 0; 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci /* 30862306a36Sopenharmony_ci * Mark the mapped device as needing zone append emulation and 30962306a36Sopenharmony_ci * initialize the emulation resources once the capacity is set. 31062306a36Sopenharmony_ci */ 31162306a36Sopenharmony_ci set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 31262306a36Sopenharmony_ci if (!get_capacity(md->disk)) 31362306a36Sopenharmony_ci return 0; 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci return dm_revalidate_zones(md, t); 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_cistatic int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, 31962306a36Sopenharmony_ci void *data) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci unsigned int *wp_offset = data; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci *wp_offset = dm_get_zone_wp_offset(zone); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci return 0; 32662306a36Sopenharmony_ci} 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_cistatic int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, 32962306a36Sopenharmony_ci unsigned int *wp_ofst) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci sector_t sector = zno * bdev_zone_sectors(md->disk->part0); 33262306a36Sopenharmony_ci unsigned int noio_flag; 33362306a36Sopenharmony_ci struct dm_table *t; 33462306a36Sopenharmony_ci int srcu_idx, ret; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci t = dm_get_live_table(md, &srcu_idx); 33762306a36Sopenharmony_ci if (!t) 33862306a36Sopenharmony_ci return -EIO; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci /* 34162306a36Sopenharmony_ci * Ensure that all memory allocations in this context are done as if 34262306a36Sopenharmony_ci * GFP_NOIO was specified. 34362306a36Sopenharmony_ci */ 34462306a36Sopenharmony_ci noio_flag = memalloc_noio_save(); 34562306a36Sopenharmony_ci ret = dm_blk_do_report_zones(md, t, sector, 1, 34662306a36Sopenharmony_ci dm_update_zone_wp_offset_cb, wp_ofst); 34762306a36Sopenharmony_ci memalloc_noio_restore(noio_flag); 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci if (ret != 1) 35262306a36Sopenharmony_ci return -EIO; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci return 0; 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cistruct orig_bio_details { 35862306a36Sopenharmony_ci enum req_op op; 35962306a36Sopenharmony_ci unsigned int nr_sectors; 36062306a36Sopenharmony_ci}; 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci/* 36362306a36Sopenharmony_ci * First phase of BIO mapping for targets with zone append emulation: 36462306a36Sopenharmony_ci * check all BIO that change a zone writer pointer and change zone 36562306a36Sopenharmony_ci * append operations into regular write operations. 36662306a36Sopenharmony_ci */ 36762306a36Sopenharmony_cistatic bool dm_zone_map_bio_begin(struct mapped_device *md, 36862306a36Sopenharmony_ci unsigned int zno, struct bio *clone) 36962306a36Sopenharmony_ci{ 37062306a36Sopenharmony_ci sector_t zsectors = bdev_zone_sectors(md->disk->part0); 37162306a36Sopenharmony_ci unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci /* 37462306a36Sopenharmony_ci * If the target zone is in an error state, recover by inspecting the 37562306a36Sopenharmony_ci * zone to get its current write pointer position. Note that since the 37662306a36Sopenharmony_ci * target zone is already locked, a BIO issuing context should never 37762306a36Sopenharmony_ci * see the zone write in the DM_ZONE_UPDATING_WP_OFST state. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci if (zwp_offset == DM_ZONE_INVALID_WP_OFST) { 38062306a36Sopenharmony_ci if (dm_update_zone_wp_offset(md, zno, &zwp_offset)) 38162306a36Sopenharmony_ci return false; 38262306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], zwp_offset); 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci switch (bio_op(clone)) { 38662306a36Sopenharmony_ci case REQ_OP_ZONE_RESET: 38762306a36Sopenharmony_ci case REQ_OP_ZONE_FINISH: 38862306a36Sopenharmony_ci return true; 38962306a36Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 39062306a36Sopenharmony_ci case REQ_OP_WRITE: 39162306a36Sopenharmony_ci /* Writes must be aligned to the zone write pointer */ 39262306a36Sopenharmony_ci if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset) 39362306a36Sopenharmony_ci return false; 39462306a36Sopenharmony_ci break; 39562306a36Sopenharmony_ci case REQ_OP_ZONE_APPEND: 39662306a36Sopenharmony_ci /* 39762306a36Sopenharmony_ci * Change zone append operations into a non-mergeable regular 39862306a36Sopenharmony_ci * writes directed at the current write pointer position of the 39962306a36Sopenharmony_ci * target zone. 40062306a36Sopenharmony_ci */ 40162306a36Sopenharmony_ci clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | 40262306a36Sopenharmony_ci (clone->bi_opf & (~REQ_OP_MASK)); 40362306a36Sopenharmony_ci clone->bi_iter.bi_sector += zwp_offset; 40462306a36Sopenharmony_ci break; 40562306a36Sopenharmony_ci default: 40662306a36Sopenharmony_ci DMWARN_LIMIT("Invalid BIO operation"); 40762306a36Sopenharmony_ci return false; 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci /* Cannot write to a full zone */ 41162306a36Sopenharmony_ci if (zwp_offset >= zsectors) 41262306a36Sopenharmony_ci return false; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci return true; 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci/* 41862306a36Sopenharmony_ci * Second phase of BIO mapping for targets with zone append emulation: 41962306a36Sopenharmony_ci * update the zone write pointer offset array to account for the additional 42062306a36Sopenharmony_ci * data written to a zone. Note that at this point, the remapped clone BIO 42162306a36Sopenharmony_ci * may already have completed, so we do not touch it. 42262306a36Sopenharmony_ci */ 42362306a36Sopenharmony_cistatic blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, 42462306a36Sopenharmony_ci struct orig_bio_details *orig_bio_details, 42562306a36Sopenharmony_ci unsigned int nr_sectors) 42662306a36Sopenharmony_ci{ 42762306a36Sopenharmony_ci unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci /* The clone BIO may already have been completed and failed */ 43062306a36Sopenharmony_ci if (zwp_offset == DM_ZONE_INVALID_WP_OFST) 43162306a36Sopenharmony_ci return BLK_STS_IOERR; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci /* Update the zone wp offset */ 43462306a36Sopenharmony_ci switch (orig_bio_details->op) { 43562306a36Sopenharmony_ci case REQ_OP_ZONE_RESET: 43662306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], 0); 43762306a36Sopenharmony_ci return BLK_STS_OK; 43862306a36Sopenharmony_ci case REQ_OP_ZONE_FINISH: 43962306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], 44062306a36Sopenharmony_ci bdev_zone_sectors(md->disk->part0)); 44162306a36Sopenharmony_ci return BLK_STS_OK; 44262306a36Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 44362306a36Sopenharmony_ci case REQ_OP_WRITE: 44462306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); 44562306a36Sopenharmony_ci return BLK_STS_OK; 44662306a36Sopenharmony_ci case REQ_OP_ZONE_APPEND: 44762306a36Sopenharmony_ci /* 44862306a36Sopenharmony_ci * Check that the target did not truncate the write operation 44962306a36Sopenharmony_ci * emulating a zone append. 45062306a36Sopenharmony_ci */ 45162306a36Sopenharmony_ci if (nr_sectors != orig_bio_details->nr_sectors) { 45262306a36Sopenharmony_ci DMWARN_LIMIT("Truncated write for zone append"); 45362306a36Sopenharmony_ci return BLK_STS_IOERR; 45462306a36Sopenharmony_ci } 45562306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); 45662306a36Sopenharmony_ci return BLK_STS_OK; 45762306a36Sopenharmony_ci default: 45862306a36Sopenharmony_ci DMWARN_LIMIT("Invalid BIO operation"); 45962306a36Sopenharmony_ci return BLK_STS_IOERR; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_cistatic inline void dm_zone_lock(struct gendisk *disk, unsigned int zno, 46462306a36Sopenharmony_ci struct bio *clone) 46562306a36Sopenharmony_ci{ 46662306a36Sopenharmony_ci if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) 46762306a36Sopenharmony_ci return; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); 47062306a36Sopenharmony_ci bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); 47162306a36Sopenharmony_ci} 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_cistatic inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno, 47462306a36Sopenharmony_ci struct bio *clone) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) 47762306a36Sopenharmony_ci return; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock)); 48062306a36Sopenharmony_ci clear_bit_unlock(zno, disk->seq_zones_wlock); 48162306a36Sopenharmony_ci smp_mb__after_atomic(); 48262306a36Sopenharmony_ci wake_up_bit(disk->seq_zones_wlock, zno); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_cistatic bool dm_need_zone_wp_tracking(struct bio *bio) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci /* 49062306a36Sopenharmony_ci * Special processing is not needed for operations that do not need the 49162306a36Sopenharmony_ci * zone write lock, that is, all operations that target conventional 49262306a36Sopenharmony_ci * zones and all operations that do not modify directly a sequential 49362306a36Sopenharmony_ci * zone write pointer. 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_ci if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) 49662306a36Sopenharmony_ci return false; 49762306a36Sopenharmony_ci switch (bio_op(bio)) { 49862306a36Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 49962306a36Sopenharmony_ci case REQ_OP_WRITE: 50062306a36Sopenharmony_ci case REQ_OP_ZONE_RESET: 50162306a36Sopenharmony_ci case REQ_OP_ZONE_FINISH: 50262306a36Sopenharmony_ci case REQ_OP_ZONE_APPEND: 50362306a36Sopenharmony_ci return bio_zone_is_seq(bio); 50462306a36Sopenharmony_ci default: 50562306a36Sopenharmony_ci return false; 50662306a36Sopenharmony_ci } 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci/* 51062306a36Sopenharmony_ci * Special IO mapping for targets needing zone append emulation. 51162306a36Sopenharmony_ci */ 51262306a36Sopenharmony_ciint dm_zone_map_bio(struct dm_target_io *tio) 51362306a36Sopenharmony_ci{ 51462306a36Sopenharmony_ci struct dm_io *io = tio->io; 51562306a36Sopenharmony_ci struct dm_target *ti = tio->ti; 51662306a36Sopenharmony_ci struct mapped_device *md = io->md; 51762306a36Sopenharmony_ci struct bio *clone = &tio->clone; 51862306a36Sopenharmony_ci struct orig_bio_details orig_bio_details; 51962306a36Sopenharmony_ci unsigned int zno; 52062306a36Sopenharmony_ci blk_status_t sts; 52162306a36Sopenharmony_ci int r; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci /* 52462306a36Sopenharmony_ci * IOs that do not change a zone write pointer do not need 52562306a36Sopenharmony_ci * any additional special processing. 52662306a36Sopenharmony_ci */ 52762306a36Sopenharmony_ci if (!dm_need_zone_wp_tracking(clone)) 52862306a36Sopenharmony_ci return ti->type->map(ti, clone); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci /* Lock the target zone */ 53162306a36Sopenharmony_ci zno = bio_zone_no(clone); 53262306a36Sopenharmony_ci dm_zone_lock(md->disk, zno, clone); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci orig_bio_details.nr_sectors = bio_sectors(clone); 53562306a36Sopenharmony_ci orig_bio_details.op = bio_op(clone); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci /* 53862306a36Sopenharmony_ci * Check that the bio and the target zone write pointer offset are 53962306a36Sopenharmony_ci * both valid, and if the bio is a zone append, remap it to a write. 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_ci if (!dm_zone_map_bio_begin(md, zno, clone)) { 54262306a36Sopenharmony_ci dm_zone_unlock(md->disk, zno, clone); 54362306a36Sopenharmony_ci return DM_MAPIO_KILL; 54462306a36Sopenharmony_ci } 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci /* Let the target do its work */ 54762306a36Sopenharmony_ci r = ti->type->map(ti, clone); 54862306a36Sopenharmony_ci switch (r) { 54962306a36Sopenharmony_ci case DM_MAPIO_SUBMITTED: 55062306a36Sopenharmony_ci /* 55162306a36Sopenharmony_ci * The target submitted the clone BIO. The target zone will 55262306a36Sopenharmony_ci * be unlocked on completion of the clone. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ci sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, 55562306a36Sopenharmony_ci *tio->len_ptr); 55662306a36Sopenharmony_ci break; 55762306a36Sopenharmony_ci case DM_MAPIO_REMAPPED: 55862306a36Sopenharmony_ci /* 55962306a36Sopenharmony_ci * The target only remapped the clone BIO. In case of error, 56062306a36Sopenharmony_ci * unlock the target zone here as the clone will not be 56162306a36Sopenharmony_ci * submitted. 56262306a36Sopenharmony_ci */ 56362306a36Sopenharmony_ci sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, 56462306a36Sopenharmony_ci *tio->len_ptr); 56562306a36Sopenharmony_ci if (sts != BLK_STS_OK) 56662306a36Sopenharmony_ci dm_zone_unlock(md->disk, zno, clone); 56762306a36Sopenharmony_ci break; 56862306a36Sopenharmony_ci case DM_MAPIO_REQUEUE: 56962306a36Sopenharmony_ci case DM_MAPIO_KILL: 57062306a36Sopenharmony_ci default: 57162306a36Sopenharmony_ci dm_zone_unlock(md->disk, zno, clone); 57262306a36Sopenharmony_ci sts = BLK_STS_IOERR; 57362306a36Sopenharmony_ci break; 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci if (sts != BLK_STS_OK) 57762306a36Sopenharmony_ci return DM_MAPIO_KILL; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci return r; 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci/* 58362306a36Sopenharmony_ci * IO completion callback called from clone_endio(). 58462306a36Sopenharmony_ci */ 58562306a36Sopenharmony_civoid dm_zone_endio(struct dm_io *io, struct bio *clone) 58662306a36Sopenharmony_ci{ 58762306a36Sopenharmony_ci struct mapped_device *md = io->md; 58862306a36Sopenharmony_ci struct gendisk *disk = md->disk; 58962306a36Sopenharmony_ci struct bio *orig_bio = io->orig_bio; 59062306a36Sopenharmony_ci unsigned int zwp_offset; 59162306a36Sopenharmony_ci unsigned int zno; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci /* 59462306a36Sopenharmony_ci * For targets that do not emulate zone append, we only need to 59562306a36Sopenharmony_ci * handle native zone-append bios. 59662306a36Sopenharmony_ci */ 59762306a36Sopenharmony_ci if (!dm_emulate_zone_append(md)) { 59862306a36Sopenharmony_ci /* 59962306a36Sopenharmony_ci * Get the offset within the zone of the written sector 60062306a36Sopenharmony_ci * and add that to the original bio sector position. 60162306a36Sopenharmony_ci */ 60262306a36Sopenharmony_ci if (clone->bi_status == BLK_STS_OK && 60362306a36Sopenharmony_ci bio_op(clone) == REQ_OP_ZONE_APPEND) { 60462306a36Sopenharmony_ci sector_t mask = 60562306a36Sopenharmony_ci (sector_t)bdev_zone_sectors(disk->part0) - 1; 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci orig_bio->bi_iter.bi_sector += 60862306a36Sopenharmony_ci clone->bi_iter.bi_sector & mask; 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci return; 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci /* 61562306a36Sopenharmony_ci * For targets that do emulate zone append, if the clone BIO does not 61662306a36Sopenharmony_ci * own the target zone write lock, we have nothing to do. 61762306a36Sopenharmony_ci */ 61862306a36Sopenharmony_ci if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) 61962306a36Sopenharmony_ci return; 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci zno = bio_zone_no(orig_bio); 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci if (clone->bi_status != BLK_STS_OK) { 62462306a36Sopenharmony_ci /* 62562306a36Sopenharmony_ci * BIOs that modify a zone write pointer may leave the zone 62662306a36Sopenharmony_ci * in an unknown state in case of failure (e.g. the write 62762306a36Sopenharmony_ci * pointer was only partially advanced). In this case, set 62862306a36Sopenharmony_ci * the target zone write pointer as invalid unless it is 62962306a36Sopenharmony_ci * already being updated. 63062306a36Sopenharmony_ci */ 63162306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST); 63262306a36Sopenharmony_ci } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { 63362306a36Sopenharmony_ci /* 63462306a36Sopenharmony_ci * Get the written sector for zone append operation that were 63562306a36Sopenharmony_ci * emulated using regular write operations. 63662306a36Sopenharmony_ci */ 63762306a36Sopenharmony_ci zwp_offset = READ_ONCE(md->zwp_offset[zno]); 63862306a36Sopenharmony_ci if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio))) 63962306a36Sopenharmony_ci WRITE_ONCE(md->zwp_offset[zno], 64062306a36Sopenharmony_ci DM_ZONE_INVALID_WP_OFST); 64162306a36Sopenharmony_ci else 64262306a36Sopenharmony_ci orig_bio->bi_iter.bi_sector += 64362306a36Sopenharmony_ci zwp_offset - bio_sectors(orig_bio); 64462306a36Sopenharmony_ci } 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci dm_zone_unlock(disk, zno, clone); 64762306a36Sopenharmony_ci} 648