xref: /kernel/linux/linux-5.10/fs/zonefs/super.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Simple file system for zoned block devices exposing zones as files.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2019 Western Digital Corporation or its affiliates.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci#include <linux/module.h>
88c2ecf20Sopenharmony_ci#include <linux/fs.h>
98c2ecf20Sopenharmony_ci#include <linux/magic.h>
108c2ecf20Sopenharmony_ci#include <linux/iomap.h>
118c2ecf20Sopenharmony_ci#include <linux/init.h>
128c2ecf20Sopenharmony_ci#include <linux/slab.h>
138c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
148c2ecf20Sopenharmony_ci#include <linux/statfs.h>
158c2ecf20Sopenharmony_ci#include <linux/writeback.h>
168c2ecf20Sopenharmony_ci#include <linux/quotaops.h>
178c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
188c2ecf20Sopenharmony_ci#include <linux/parser.h>
198c2ecf20Sopenharmony_ci#include <linux/uio.h>
208c2ecf20Sopenharmony_ci#include <linux/mman.h>
218c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
228c2ecf20Sopenharmony_ci#include <linux/crc32.h>
238c2ecf20Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#include "zonefs.h"
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_cistatic inline int zonefs_zone_mgmt(struct inode *inode,
288c2ecf20Sopenharmony_ci				   enum req_opf op)
298c2ecf20Sopenharmony_ci{
308c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
318c2ecf20Sopenharmony_ci	int ret;
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	lockdep_assert_held(&zi->i_truncate_mutex);
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	/*
368c2ecf20Sopenharmony_ci	 * With ZNS drives, closing an explicitly open zone that has not been
378c2ecf20Sopenharmony_ci	 * written will change the zone state to "closed", that is, the zone
388c2ecf20Sopenharmony_ci	 * will remain active. Since this can then cause failure of explicit
398c2ecf20Sopenharmony_ci	 * open operation on other zones if the drive active zone resources
408c2ecf20Sopenharmony_ci	 * are exceeded, make sure that the zone does not remain active by
418c2ecf20Sopenharmony_ci	 * resetting it.
428c2ecf20Sopenharmony_ci	 */
438c2ecf20Sopenharmony_ci	if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
448c2ecf20Sopenharmony_ci		op = REQ_OP_ZONE_RESET;
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
478c2ecf20Sopenharmony_ci			       zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
488c2ecf20Sopenharmony_ci	if (ret) {
498c2ecf20Sopenharmony_ci		zonefs_err(inode->i_sb,
508c2ecf20Sopenharmony_ci			   "Zone management operation %s at %llu failed %d\n",
518c2ecf20Sopenharmony_ci			   blk_op_str(op), zi->i_zsector, ret);
528c2ecf20Sopenharmony_ci		return ret;
538c2ecf20Sopenharmony_ci	}
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	return 0;
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_cistatic inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	i_size_write(inode, isize);
638c2ecf20Sopenharmony_ci	/*
648c2ecf20Sopenharmony_ci	 * A full zone is no longer open/active and does not need
658c2ecf20Sopenharmony_ci	 * explicit closing.
668c2ecf20Sopenharmony_ci	 */
678c2ecf20Sopenharmony_ci	if (isize >= zi->i_max_size)
688c2ecf20Sopenharmony_ci		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cistatic int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
728c2ecf20Sopenharmony_ci				   loff_t length, unsigned int flags,
738c2ecf20Sopenharmony_ci				   struct iomap *iomap, struct iomap *srcmap)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
768c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
778c2ecf20Sopenharmony_ci	loff_t isize;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	/*
808c2ecf20Sopenharmony_ci	 * All blocks are always mapped below EOF. If reading past EOF,
818c2ecf20Sopenharmony_ci	 * act as if there is a hole up to the file maximum size.
828c2ecf20Sopenharmony_ci	 */
838c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
848c2ecf20Sopenharmony_ci	iomap->bdev = inode->i_sb->s_bdev;
858c2ecf20Sopenharmony_ci	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
868c2ecf20Sopenharmony_ci	isize = i_size_read(inode);
878c2ecf20Sopenharmony_ci	if (iomap->offset >= isize) {
888c2ecf20Sopenharmony_ci		iomap->type = IOMAP_HOLE;
898c2ecf20Sopenharmony_ci		iomap->addr = IOMAP_NULL_ADDR;
908c2ecf20Sopenharmony_ci		iomap->length = length;
918c2ecf20Sopenharmony_ci	} else {
928c2ecf20Sopenharmony_ci		iomap->type = IOMAP_MAPPED;
938c2ecf20Sopenharmony_ci		iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
948c2ecf20Sopenharmony_ci		iomap->length = isize - iomap->offset;
958c2ecf20Sopenharmony_ci	}
968c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	return 0;
998c2ecf20Sopenharmony_ci}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_cistatic const struct iomap_ops zonefs_read_iomap_ops = {
1028c2ecf20Sopenharmony_ci	.iomap_begin	= zonefs_read_iomap_begin,
1038c2ecf20Sopenharmony_ci};
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_cistatic int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
1068c2ecf20Sopenharmony_ci				    loff_t length, unsigned int flags,
1078c2ecf20Sopenharmony_ci				    struct iomap *iomap, struct iomap *srcmap)
1088c2ecf20Sopenharmony_ci{
1098c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
1108c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
1118c2ecf20Sopenharmony_ci	loff_t isize;
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	/* All write I/Os should always be within the file maximum size */
1148c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(offset + length > zi->i_max_size))
1158c2ecf20Sopenharmony_ci		return -EIO;
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	/*
1188c2ecf20Sopenharmony_ci	 * Sequential zones can only accept direct writes. This is already
1198c2ecf20Sopenharmony_ci	 * checked when writes are issued, so warn if we see a page writeback
1208c2ecf20Sopenharmony_ci	 * operation.
1218c2ecf20Sopenharmony_ci	 */
1228c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
1238c2ecf20Sopenharmony_ci			 !(flags & IOMAP_DIRECT)))
1248c2ecf20Sopenharmony_ci		return -EIO;
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	/*
1278c2ecf20Sopenharmony_ci	 * For conventional zones, all blocks are always mapped. For sequential
1288c2ecf20Sopenharmony_ci	 * zones, all blocks after always mapped below the inode size (zone
1298c2ecf20Sopenharmony_ci	 * write pointer) and unwriten beyond.
1308c2ecf20Sopenharmony_ci	 */
1318c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
1328c2ecf20Sopenharmony_ci	iomap->bdev = inode->i_sb->s_bdev;
1338c2ecf20Sopenharmony_ci	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
1348c2ecf20Sopenharmony_ci	iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
1358c2ecf20Sopenharmony_ci	isize = i_size_read(inode);
1368c2ecf20Sopenharmony_ci	if (iomap->offset >= isize) {
1378c2ecf20Sopenharmony_ci		iomap->type = IOMAP_UNWRITTEN;
1388c2ecf20Sopenharmony_ci		iomap->length = zi->i_max_size - iomap->offset;
1398c2ecf20Sopenharmony_ci	} else {
1408c2ecf20Sopenharmony_ci		iomap->type = IOMAP_MAPPED;
1418c2ecf20Sopenharmony_ci		iomap->length = isize - iomap->offset;
1428c2ecf20Sopenharmony_ci	}
1438c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	return 0;
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic const struct iomap_ops zonefs_write_iomap_ops = {
1498c2ecf20Sopenharmony_ci	.iomap_begin	= zonefs_write_iomap_begin,
1508c2ecf20Sopenharmony_ci};
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cistatic int zonefs_readpage(struct file *unused, struct page *page)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	return iomap_readpage(page, &zonefs_read_iomap_ops);
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic void zonefs_readahead(struct readahead_control *rac)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	iomap_readahead(rac, &zonefs_read_iomap_ops);
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci/*
1638c2ecf20Sopenharmony_ci * Map blocks for page writeback. This is used only on conventional zone files,
1648c2ecf20Sopenharmony_ci * which implies that the page range can only be within the fixed inode size.
1658c2ecf20Sopenharmony_ci */
1668c2ecf20Sopenharmony_cistatic int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
1678c2ecf20Sopenharmony_ci				   struct inode *inode, loff_t offset)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
1728c2ecf20Sopenharmony_ci		return -EIO;
1738c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
1748c2ecf20Sopenharmony_ci		return -EIO;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	/* If the mapping is already OK, nothing needs to be done */
1778c2ecf20Sopenharmony_ci	if (offset >= wpc->iomap.offset &&
1788c2ecf20Sopenharmony_ci	    offset < wpc->iomap.offset + wpc->iomap.length)
1798c2ecf20Sopenharmony_ci		return 0;
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci	return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
1828c2ecf20Sopenharmony_ci					IOMAP_WRITE, &wpc->iomap, NULL);
1838c2ecf20Sopenharmony_ci}
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_cistatic const struct iomap_writeback_ops zonefs_writeback_ops = {
1868c2ecf20Sopenharmony_ci	.map_blocks		= zonefs_write_map_blocks,
1878c2ecf20Sopenharmony_ci};
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_cistatic int zonefs_writepage(struct page *page, struct writeback_control *wbc)
1908c2ecf20Sopenharmony_ci{
1918c2ecf20Sopenharmony_ci	struct iomap_writepage_ctx wpc = { };
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	return iomap_writepage(page, wbc, &wpc, &zonefs_writeback_ops);
1948c2ecf20Sopenharmony_ci}
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_cistatic int zonefs_writepages(struct address_space *mapping,
1978c2ecf20Sopenharmony_ci			     struct writeback_control *wbc)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	struct iomap_writepage_ctx wpc = { };
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
2028c2ecf20Sopenharmony_ci}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_cistatic int zonefs_swap_activate(struct swap_info_struct *sis,
2058c2ecf20Sopenharmony_ci				struct file *swap_file, sector_t *span)
2068c2ecf20Sopenharmony_ci{
2078c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(swap_file);
2088c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
2118c2ecf20Sopenharmony_ci		zonefs_err(inode->i_sb,
2128c2ecf20Sopenharmony_ci			   "swap file: not a conventional zone file\n");
2138c2ecf20Sopenharmony_ci		return -EINVAL;
2148c2ecf20Sopenharmony_ci	}
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	return iomap_swapfile_activate(sis, swap_file, span,
2178c2ecf20Sopenharmony_ci				       &zonefs_read_iomap_ops);
2188c2ecf20Sopenharmony_ci}
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_cistatic const struct address_space_operations zonefs_file_aops = {
2218c2ecf20Sopenharmony_ci	.readpage		= zonefs_readpage,
2228c2ecf20Sopenharmony_ci	.readahead		= zonefs_readahead,
2238c2ecf20Sopenharmony_ci	.writepage		= zonefs_writepage,
2248c2ecf20Sopenharmony_ci	.writepages		= zonefs_writepages,
2258c2ecf20Sopenharmony_ci	.set_page_dirty		= iomap_set_page_dirty,
2268c2ecf20Sopenharmony_ci	.releasepage		= iomap_releasepage,
2278c2ecf20Sopenharmony_ci	.invalidatepage		= iomap_invalidatepage,
2288c2ecf20Sopenharmony_ci	.migratepage		= iomap_migrate_page,
2298c2ecf20Sopenharmony_ci	.is_partially_uptodate	= iomap_is_partially_uptodate,
2308c2ecf20Sopenharmony_ci	.error_remove_page	= generic_error_remove_page,
2318c2ecf20Sopenharmony_ci	.direct_IO		= noop_direct_IO,
2328c2ecf20Sopenharmony_ci	.swap_activate		= zonefs_swap_activate,
2338c2ecf20Sopenharmony_ci};
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_cistatic void zonefs_update_stats(struct inode *inode, loff_t new_isize)
2368c2ecf20Sopenharmony_ci{
2378c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
2388c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
2398c2ecf20Sopenharmony_ci	loff_t old_isize = i_size_read(inode);
2408c2ecf20Sopenharmony_ci	loff_t nr_blocks;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	if (new_isize == old_isize)
2438c2ecf20Sopenharmony_ci		return;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	spin_lock(&sbi->s_lock);
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	/*
2488c2ecf20Sopenharmony_ci	 * This may be called for an update after an IO error.
2498c2ecf20Sopenharmony_ci	 * So beware of the values seen.
2508c2ecf20Sopenharmony_ci	 */
2518c2ecf20Sopenharmony_ci	if (new_isize < old_isize) {
2528c2ecf20Sopenharmony_ci		nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits;
2538c2ecf20Sopenharmony_ci		if (sbi->s_used_blocks > nr_blocks)
2548c2ecf20Sopenharmony_ci			sbi->s_used_blocks -= nr_blocks;
2558c2ecf20Sopenharmony_ci		else
2568c2ecf20Sopenharmony_ci			sbi->s_used_blocks = 0;
2578c2ecf20Sopenharmony_ci	} else {
2588c2ecf20Sopenharmony_ci		sbi->s_used_blocks +=
2598c2ecf20Sopenharmony_ci			(new_isize - old_isize) >> sb->s_blocksize_bits;
2608c2ecf20Sopenharmony_ci		if (sbi->s_used_blocks > sbi->s_blocks)
2618c2ecf20Sopenharmony_ci			sbi->s_used_blocks = sbi->s_blocks;
2628c2ecf20Sopenharmony_ci	}
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	spin_unlock(&sbi->s_lock);
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci/*
2688c2ecf20Sopenharmony_ci * Check a zone condition and adjust its file inode access permissions for
2698c2ecf20Sopenharmony_ci * offline and readonly zones. Return the inode size corresponding to the
2708c2ecf20Sopenharmony_ci * amount of readable data in the zone.
2718c2ecf20Sopenharmony_ci */
2728c2ecf20Sopenharmony_cistatic loff_t zonefs_check_zone_condition(struct inode *inode,
2738c2ecf20Sopenharmony_ci					  struct blk_zone *zone, bool warn,
2748c2ecf20Sopenharmony_ci					  bool mount)
2758c2ecf20Sopenharmony_ci{
2768c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	switch (zone->cond) {
2798c2ecf20Sopenharmony_ci	case BLK_ZONE_COND_OFFLINE:
2808c2ecf20Sopenharmony_ci		/*
2818c2ecf20Sopenharmony_ci		 * Dead zone: make the inode immutable, disable all accesses
2828c2ecf20Sopenharmony_ci		 * and set the file size to 0 (zone wp set to zone start).
2838c2ecf20Sopenharmony_ci		 */
2848c2ecf20Sopenharmony_ci		if (warn)
2858c2ecf20Sopenharmony_ci			zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
2868c2ecf20Sopenharmony_ci				    inode->i_ino);
2878c2ecf20Sopenharmony_ci		inode->i_flags |= S_IMMUTABLE;
2888c2ecf20Sopenharmony_ci		inode->i_mode &= ~0777;
2898c2ecf20Sopenharmony_ci		zone->wp = zone->start;
2908c2ecf20Sopenharmony_ci		return 0;
2918c2ecf20Sopenharmony_ci	case BLK_ZONE_COND_READONLY:
2928c2ecf20Sopenharmony_ci		/*
2938c2ecf20Sopenharmony_ci		 * The write pointer of read-only zones is invalid. If such a
2948c2ecf20Sopenharmony_ci		 * zone is found during mount, the file size cannot be retrieved
2958c2ecf20Sopenharmony_ci		 * so we treat the zone as offline (mount == true case).
2968c2ecf20Sopenharmony_ci		 * Otherwise, keep the file size as it was when last updated
2978c2ecf20Sopenharmony_ci		 * so that the user can recover data. In both cases, writes are
2988c2ecf20Sopenharmony_ci		 * always disabled for the zone.
2998c2ecf20Sopenharmony_ci		 */
3008c2ecf20Sopenharmony_ci		if (warn)
3018c2ecf20Sopenharmony_ci			zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
3028c2ecf20Sopenharmony_ci				    inode->i_ino);
3038c2ecf20Sopenharmony_ci		inode->i_flags |= S_IMMUTABLE;
3048c2ecf20Sopenharmony_ci		if (mount) {
3058c2ecf20Sopenharmony_ci			zone->cond = BLK_ZONE_COND_OFFLINE;
3068c2ecf20Sopenharmony_ci			inode->i_mode &= ~0777;
3078c2ecf20Sopenharmony_ci			zone->wp = zone->start;
3088c2ecf20Sopenharmony_ci			return 0;
3098c2ecf20Sopenharmony_ci		}
3108c2ecf20Sopenharmony_ci		inode->i_mode &= ~0222;
3118c2ecf20Sopenharmony_ci		return i_size_read(inode);
3128c2ecf20Sopenharmony_ci	case BLK_ZONE_COND_FULL:
3138c2ecf20Sopenharmony_ci		/* The write pointer of full zones is invalid. */
3148c2ecf20Sopenharmony_ci		return zi->i_max_size;
3158c2ecf20Sopenharmony_ci	default:
3168c2ecf20Sopenharmony_ci		if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
3178c2ecf20Sopenharmony_ci			return zi->i_max_size;
3188c2ecf20Sopenharmony_ci		return (zone->wp - zone->start) << SECTOR_SHIFT;
3198c2ecf20Sopenharmony_ci	}
3208c2ecf20Sopenharmony_ci}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_cistruct zonefs_ioerr_data {
3238c2ecf20Sopenharmony_ci	struct inode	*inode;
3248c2ecf20Sopenharmony_ci	bool		write;
3258c2ecf20Sopenharmony_ci};
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_cistatic int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
3288c2ecf20Sopenharmony_ci			      void *data)
3298c2ecf20Sopenharmony_ci{
3308c2ecf20Sopenharmony_ci	struct zonefs_ioerr_data *err = data;
3318c2ecf20Sopenharmony_ci	struct inode *inode = err->inode;
3328c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
3338c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
3348c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
3358c2ecf20Sopenharmony_ci	loff_t isize, data_size;
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	/*
3388c2ecf20Sopenharmony_ci	 * Check the zone condition: if the zone is not "bad" (offline or
3398c2ecf20Sopenharmony_ci	 * read-only), read errors are simply signaled to the IO issuer as long
3408c2ecf20Sopenharmony_ci	 * as there is no inconsistency between the inode size and the amount of
3418c2ecf20Sopenharmony_ci	 * data writen in the zone (data_size).
3428c2ecf20Sopenharmony_ci	 */
3438c2ecf20Sopenharmony_ci	data_size = zonefs_check_zone_condition(inode, zone, true, false);
3448c2ecf20Sopenharmony_ci	isize = i_size_read(inode);
3458c2ecf20Sopenharmony_ci	if (zone->cond != BLK_ZONE_COND_OFFLINE &&
3468c2ecf20Sopenharmony_ci	    zone->cond != BLK_ZONE_COND_READONLY &&
3478c2ecf20Sopenharmony_ci	    !err->write && isize == data_size)
3488c2ecf20Sopenharmony_ci		return 0;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	/*
3518c2ecf20Sopenharmony_ci	 * At this point, we detected either a bad zone or an inconsistency
3528c2ecf20Sopenharmony_ci	 * between the inode size and the amount of data written in the zone.
3538c2ecf20Sopenharmony_ci	 * For the latter case, the cause may be a write IO error or an external
3548c2ecf20Sopenharmony_ci	 * action on the device. Two error patterns exist:
3558c2ecf20Sopenharmony_ci	 * 1) The inode size is lower than the amount of data in the zone:
3568c2ecf20Sopenharmony_ci	 *    a write operation partially failed and data was writen at the end
3578c2ecf20Sopenharmony_ci	 *    of the file. This can happen in the case of a large direct IO
3588c2ecf20Sopenharmony_ci	 *    needing several BIOs and/or write requests to be processed.
3598c2ecf20Sopenharmony_ci	 * 2) The inode size is larger than the amount of data in the zone:
3608c2ecf20Sopenharmony_ci	 *    this can happen with a deferred write error with the use of the
3618c2ecf20Sopenharmony_ci	 *    device side write cache after getting successful write IO
3628c2ecf20Sopenharmony_ci	 *    completions. Other possibilities are (a) an external corruption,
3638c2ecf20Sopenharmony_ci	 *    e.g. an application reset the zone directly, or (b) the device
3648c2ecf20Sopenharmony_ci	 *    has a serious problem (e.g. firmware bug).
3658c2ecf20Sopenharmony_ci	 *
3668c2ecf20Sopenharmony_ci	 * In all cases, warn about inode size inconsistency and handle the
3678c2ecf20Sopenharmony_ci	 * IO error according to the zone condition and to the mount options.
3688c2ecf20Sopenharmony_ci	 */
3698c2ecf20Sopenharmony_ci	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size)
3708c2ecf20Sopenharmony_ci		zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
3718c2ecf20Sopenharmony_ci			    inode->i_ino, isize, data_size);
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	/*
3748c2ecf20Sopenharmony_ci	 * First handle bad zones signaled by hardware. The mount options
3758c2ecf20Sopenharmony_ci	 * errors=zone-ro and errors=zone-offline result in changing the
3768c2ecf20Sopenharmony_ci	 * zone condition to read-only and offline respectively, as if the
3778c2ecf20Sopenharmony_ci	 * condition was signaled by the hardware.
3788c2ecf20Sopenharmony_ci	 */
3798c2ecf20Sopenharmony_ci	if (zone->cond == BLK_ZONE_COND_OFFLINE ||
3808c2ecf20Sopenharmony_ci	    sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) {
3818c2ecf20Sopenharmony_ci		zonefs_warn(sb, "inode %lu: read/write access disabled\n",
3828c2ecf20Sopenharmony_ci			    inode->i_ino);
3838c2ecf20Sopenharmony_ci		if (zone->cond != BLK_ZONE_COND_OFFLINE) {
3848c2ecf20Sopenharmony_ci			zone->cond = BLK_ZONE_COND_OFFLINE;
3858c2ecf20Sopenharmony_ci			data_size = zonefs_check_zone_condition(inode, zone,
3868c2ecf20Sopenharmony_ci								false, false);
3878c2ecf20Sopenharmony_ci		}
3888c2ecf20Sopenharmony_ci	} else if (zone->cond == BLK_ZONE_COND_READONLY ||
3898c2ecf20Sopenharmony_ci		   sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
3908c2ecf20Sopenharmony_ci		zonefs_warn(sb, "inode %lu: write access disabled\n",
3918c2ecf20Sopenharmony_ci			    inode->i_ino);
3928c2ecf20Sopenharmony_ci		if (zone->cond != BLK_ZONE_COND_READONLY) {
3938c2ecf20Sopenharmony_ci			zone->cond = BLK_ZONE_COND_READONLY;
3948c2ecf20Sopenharmony_ci			data_size = zonefs_check_zone_condition(inode, zone,
3958c2ecf20Sopenharmony_ci								false, false);
3968c2ecf20Sopenharmony_ci		}
3978c2ecf20Sopenharmony_ci	} else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
3988c2ecf20Sopenharmony_ci		   data_size > isize) {
3998c2ecf20Sopenharmony_ci		/* Do not expose garbage data */
4008c2ecf20Sopenharmony_ci		data_size = isize;
4018c2ecf20Sopenharmony_ci	}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	/*
4048c2ecf20Sopenharmony_ci	 * If the filesystem is mounted with the explicit-open mount option, we
4058c2ecf20Sopenharmony_ci	 * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
4068c2ecf20Sopenharmony_ci	 * the read-only or offline condition, to avoid attempting an explicit
4078c2ecf20Sopenharmony_ci	 * close of the zone when the inode file is closed.
4088c2ecf20Sopenharmony_ci	 */
4098c2ecf20Sopenharmony_ci	if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
4108c2ecf20Sopenharmony_ci	    (zone->cond == BLK_ZONE_COND_OFFLINE ||
4118c2ecf20Sopenharmony_ci	     zone->cond == BLK_ZONE_COND_READONLY))
4128c2ecf20Sopenharmony_ci		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	/*
4158c2ecf20Sopenharmony_ci	 * If error=remount-ro was specified, any error result in remounting
4168c2ecf20Sopenharmony_ci	 * the volume as read-only.
4178c2ecf20Sopenharmony_ci	 */
4188c2ecf20Sopenharmony_ci	if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
4198c2ecf20Sopenharmony_ci		zonefs_warn(sb, "remounting filesystem read-only\n");
4208c2ecf20Sopenharmony_ci		sb->s_flags |= SB_RDONLY;
4218c2ecf20Sopenharmony_ci	}
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci	/*
4248c2ecf20Sopenharmony_ci	 * Update block usage stats and the inode size  to prevent access to
4258c2ecf20Sopenharmony_ci	 * invalid data.
4268c2ecf20Sopenharmony_ci	 */
4278c2ecf20Sopenharmony_ci	zonefs_update_stats(inode, data_size);
4288c2ecf20Sopenharmony_ci	zonefs_i_size_write(inode, data_size);
4298c2ecf20Sopenharmony_ci	zi->i_wpoffset = data_size;
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	return 0;
4328c2ecf20Sopenharmony_ci}
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci/*
4358c2ecf20Sopenharmony_ci * When an file IO error occurs, check the file zone to see if there is a change
4368c2ecf20Sopenharmony_ci * in the zone condition (e.g. offline or read-only). For a failed write to a
4378c2ecf20Sopenharmony_ci * sequential zone, the zone write pointer position must also be checked to
4388c2ecf20Sopenharmony_ci * eventually correct the file size and zonefs inode write pointer offset
4398c2ecf20Sopenharmony_ci * (which can be out of sync with the drive due to partial write failures).
4408c2ecf20Sopenharmony_ci */
4418c2ecf20Sopenharmony_cistatic void __zonefs_io_error(struct inode *inode, bool write)
4428c2ecf20Sopenharmony_ci{
4438c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
4448c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
4458c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
4468c2ecf20Sopenharmony_ci	unsigned int noio_flag;
4478c2ecf20Sopenharmony_ci	unsigned int nr_zones = 1;
4488c2ecf20Sopenharmony_ci	struct zonefs_ioerr_data err = {
4498c2ecf20Sopenharmony_ci		.inode = inode,
4508c2ecf20Sopenharmony_ci		.write = write,
4518c2ecf20Sopenharmony_ci	};
4528c2ecf20Sopenharmony_ci	int ret;
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	/*
4558c2ecf20Sopenharmony_ci	 * The only files that have more than one zone are conventional zone
4568c2ecf20Sopenharmony_ci	 * files with aggregated conventional zones, for which the inode zone
4578c2ecf20Sopenharmony_ci	 * size is always larger than the device zone size.
4588c2ecf20Sopenharmony_ci	 */
4598c2ecf20Sopenharmony_ci	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
4608c2ecf20Sopenharmony_ci		nr_zones = zi->i_zone_size >>
4618c2ecf20Sopenharmony_ci			(sbi->s_zone_sectors_shift + SECTOR_SHIFT);
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	/*
4648c2ecf20Sopenharmony_ci	 * Memory allocations in blkdev_report_zones() can trigger a memory
4658c2ecf20Sopenharmony_ci	 * reclaim which may in turn cause a recursion into zonefs as well as
4668c2ecf20Sopenharmony_ci	 * struct request allocations for the same device. The former case may
4678c2ecf20Sopenharmony_ci	 * end up in a deadlock on the inode truncate mutex, while the latter
4688c2ecf20Sopenharmony_ci	 * may prevent IO forward progress. Executing the report zones under
4698c2ecf20Sopenharmony_ci	 * the GFP_NOIO context avoids both problems.
4708c2ecf20Sopenharmony_ci	 */
4718c2ecf20Sopenharmony_ci	noio_flag = memalloc_noio_save();
4728c2ecf20Sopenharmony_ci	ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones,
4738c2ecf20Sopenharmony_ci				  zonefs_io_error_cb, &err);
4748c2ecf20Sopenharmony_ci	if (ret != nr_zones)
4758c2ecf20Sopenharmony_ci		zonefs_err(sb, "Get inode %lu zone information failed %d\n",
4768c2ecf20Sopenharmony_ci			   inode->i_ino, ret);
4778c2ecf20Sopenharmony_ci	memalloc_noio_restore(noio_flag);
4788c2ecf20Sopenharmony_ci}
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_cistatic void zonefs_io_error(struct inode *inode, bool write)
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
4858c2ecf20Sopenharmony_ci	__zonefs_io_error(inode, write);
4868c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
4878c2ecf20Sopenharmony_ci}
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_cistatic int zonefs_file_truncate(struct inode *inode, loff_t isize)
4908c2ecf20Sopenharmony_ci{
4918c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
4928c2ecf20Sopenharmony_ci	loff_t old_isize;
4938c2ecf20Sopenharmony_ci	enum req_opf op;
4948c2ecf20Sopenharmony_ci	int ret = 0;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	/*
4978c2ecf20Sopenharmony_ci	 * Only sequential zone files can be truncated and truncation is allowed
4988c2ecf20Sopenharmony_ci	 * only down to a 0 size, which is equivalent to a zone reset, and to
4998c2ecf20Sopenharmony_ci	 * the maximum file size, which is equivalent to a zone finish.
5008c2ecf20Sopenharmony_ci	 */
5018c2ecf20Sopenharmony_ci	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
5028c2ecf20Sopenharmony_ci		return -EPERM;
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	if (!isize)
5058c2ecf20Sopenharmony_ci		op = REQ_OP_ZONE_RESET;
5068c2ecf20Sopenharmony_ci	else if (isize == zi->i_max_size)
5078c2ecf20Sopenharmony_ci		op = REQ_OP_ZONE_FINISH;
5088c2ecf20Sopenharmony_ci	else
5098c2ecf20Sopenharmony_ci		return -EPERM;
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci	inode_dio_wait(inode);
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	/* Serialize against page faults */
5148c2ecf20Sopenharmony_ci	down_write(&zi->i_mmap_sem);
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	/* Serialize against zonefs_iomap_begin() */
5178c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	old_isize = i_size_read(inode);
5208c2ecf20Sopenharmony_ci	if (isize == old_isize)
5218c2ecf20Sopenharmony_ci		goto unlock;
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	ret = zonefs_zone_mgmt(inode, op);
5248c2ecf20Sopenharmony_ci	if (ret)
5258c2ecf20Sopenharmony_ci		goto unlock;
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	/*
5288c2ecf20Sopenharmony_ci	 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
5298c2ecf20Sopenharmony_ci	 * take care of open zones.
5308c2ecf20Sopenharmony_ci	 */
5318c2ecf20Sopenharmony_ci	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
5328c2ecf20Sopenharmony_ci		/*
5338c2ecf20Sopenharmony_ci		 * Truncating a zone to EMPTY or FULL is the equivalent of
5348c2ecf20Sopenharmony_ci		 * closing the zone. For a truncation to 0, we need to
5358c2ecf20Sopenharmony_ci		 * re-open the zone to ensure new writes can be processed.
5368c2ecf20Sopenharmony_ci		 * For a truncation to the maximum file size, the zone is
5378c2ecf20Sopenharmony_ci		 * closed and writes cannot be accepted anymore, so clear
5388c2ecf20Sopenharmony_ci		 * the open flag.
5398c2ecf20Sopenharmony_ci		 */
5408c2ecf20Sopenharmony_ci		if (!isize)
5418c2ecf20Sopenharmony_ci			ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
5428c2ecf20Sopenharmony_ci		else
5438c2ecf20Sopenharmony_ci			zi->i_flags &= ~ZONEFS_ZONE_OPEN;
5448c2ecf20Sopenharmony_ci	}
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	zonefs_update_stats(inode, isize);
5478c2ecf20Sopenharmony_ci	truncate_setsize(inode, isize);
5488c2ecf20Sopenharmony_ci	zi->i_wpoffset = isize;
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ciunlock:
5518c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
5528c2ecf20Sopenharmony_ci	up_write(&zi->i_mmap_sem);
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci	return ret;
5558c2ecf20Sopenharmony_ci}
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_cistatic int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
5588c2ecf20Sopenharmony_ci{
5598c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(dentry);
5608c2ecf20Sopenharmony_ci	int ret;
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
5638c2ecf20Sopenharmony_ci		return -EPERM;
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	ret = setattr_prepare(dentry, iattr);
5668c2ecf20Sopenharmony_ci	if (ret)
5678c2ecf20Sopenharmony_ci		return ret;
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	/*
5708c2ecf20Sopenharmony_ci	 * Since files and directories cannot be created nor deleted, do not
5718c2ecf20Sopenharmony_ci	 * allow setting any write attributes on the sub-directories grouping
5728c2ecf20Sopenharmony_ci	 * files by zone type.
5738c2ecf20Sopenharmony_ci	 */
5748c2ecf20Sopenharmony_ci	if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
5758c2ecf20Sopenharmony_ci	    (iattr->ia_mode & 0222))
5768c2ecf20Sopenharmony_ci		return -EPERM;
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	if (((iattr->ia_valid & ATTR_UID) &&
5798c2ecf20Sopenharmony_ci	     !uid_eq(iattr->ia_uid, inode->i_uid)) ||
5808c2ecf20Sopenharmony_ci	    ((iattr->ia_valid & ATTR_GID) &&
5818c2ecf20Sopenharmony_ci	     !gid_eq(iattr->ia_gid, inode->i_gid))) {
5828c2ecf20Sopenharmony_ci		ret = dquot_transfer(inode, iattr);
5838c2ecf20Sopenharmony_ci		if (ret)
5848c2ecf20Sopenharmony_ci			return ret;
5858c2ecf20Sopenharmony_ci	}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	if (iattr->ia_valid & ATTR_SIZE) {
5888c2ecf20Sopenharmony_ci		ret = zonefs_file_truncate(inode, iattr->ia_size);
5898c2ecf20Sopenharmony_ci		if (ret)
5908c2ecf20Sopenharmony_ci			return ret;
5918c2ecf20Sopenharmony_ci	}
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci	setattr_copy(inode, iattr);
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	return 0;
5968c2ecf20Sopenharmony_ci}
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_cistatic const struct inode_operations zonefs_file_inode_operations = {
5998c2ecf20Sopenharmony_ci	.setattr	= zonefs_inode_setattr,
6008c2ecf20Sopenharmony_ci};
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_cistatic int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
6038c2ecf20Sopenharmony_ci			     int datasync)
6048c2ecf20Sopenharmony_ci{
6058c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
6068c2ecf20Sopenharmony_ci	int ret = 0;
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
6098c2ecf20Sopenharmony_ci		return -EPERM;
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	/*
6128c2ecf20Sopenharmony_ci	 * Since only direct writes are allowed in sequential files, page cache
6138c2ecf20Sopenharmony_ci	 * flush is needed only for conventional zone files.
6148c2ecf20Sopenharmony_ci	 */
6158c2ecf20Sopenharmony_ci	if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
6168c2ecf20Sopenharmony_ci		ret = file_write_and_wait_range(file, start, end);
6178c2ecf20Sopenharmony_ci	if (!ret)
6188c2ecf20Sopenharmony_ci		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	if (ret)
6218c2ecf20Sopenharmony_ci		zonefs_io_error(inode, true);
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	return ret;
6248c2ecf20Sopenharmony_ci}
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_cistatic vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
6278c2ecf20Sopenharmony_ci{
6288c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
6298c2ecf20Sopenharmony_ci	vm_fault_t ret;
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	down_read(&zi->i_mmap_sem);
6328c2ecf20Sopenharmony_ci	ret = filemap_fault(vmf);
6338c2ecf20Sopenharmony_ci	up_read(&zi->i_mmap_sem);
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	return ret;
6368c2ecf20Sopenharmony_ci}
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_cistatic vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
6398c2ecf20Sopenharmony_ci{
6408c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(vmf->vma->vm_file);
6418c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
6428c2ecf20Sopenharmony_ci	vm_fault_t ret;
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
6458c2ecf20Sopenharmony_ci		return VM_FAULT_SIGBUS;
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	/*
6488c2ecf20Sopenharmony_ci	 * Sanity check: only conventional zone files can have shared
6498c2ecf20Sopenharmony_ci	 * writeable mappings.
6508c2ecf20Sopenharmony_ci	 */
6518c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
6528c2ecf20Sopenharmony_ci		return VM_FAULT_NOPAGE;
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	sb_start_pagefault(inode->i_sb);
6558c2ecf20Sopenharmony_ci	file_update_time(vmf->vma->vm_file);
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	/* Serialize against truncates */
6588c2ecf20Sopenharmony_ci	down_read(&zi->i_mmap_sem);
6598c2ecf20Sopenharmony_ci	ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
6608c2ecf20Sopenharmony_ci	up_read(&zi->i_mmap_sem);
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	sb_end_pagefault(inode->i_sb);
6638c2ecf20Sopenharmony_ci	return ret;
6648c2ecf20Sopenharmony_ci}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_cistatic const struct vm_operations_struct zonefs_file_vm_ops = {
6678c2ecf20Sopenharmony_ci	.fault		= zonefs_filemap_fault,
6688c2ecf20Sopenharmony_ci	.map_pages	= filemap_map_pages,
6698c2ecf20Sopenharmony_ci	.page_mkwrite	= zonefs_filemap_page_mkwrite,
6708c2ecf20Sopenharmony_ci};
6718c2ecf20Sopenharmony_ci
6728c2ecf20Sopenharmony_cistatic int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
6738c2ecf20Sopenharmony_ci{
6748c2ecf20Sopenharmony_ci	/*
6758c2ecf20Sopenharmony_ci	 * Conventional zones accept random writes, so their files can support
6768c2ecf20Sopenharmony_ci	 * shared writable mappings. For sequential zone files, only read
6778c2ecf20Sopenharmony_ci	 * mappings are possible since there are no guarantees for write
6788c2ecf20Sopenharmony_ci	 * ordering between msync() and page cache writeback.
6798c2ecf20Sopenharmony_ci	 */
6808c2ecf20Sopenharmony_ci	if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
6818c2ecf20Sopenharmony_ci	    (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
6828c2ecf20Sopenharmony_ci		return -EINVAL;
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_ci	file_accessed(file);
6858c2ecf20Sopenharmony_ci	vma->vm_ops = &zonefs_file_vm_ops;
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci	return 0;
6888c2ecf20Sopenharmony_ci}
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_cistatic loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
6918c2ecf20Sopenharmony_ci{
6928c2ecf20Sopenharmony_ci	loff_t isize = i_size_read(file_inode(file));
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	/*
6958c2ecf20Sopenharmony_ci	 * Seeks are limited to below the zone size for conventional zones
6968c2ecf20Sopenharmony_ci	 * and below the zone write pointer for sequential zones. In both
6978c2ecf20Sopenharmony_ci	 * cases, this limit is the inode size.
6988c2ecf20Sopenharmony_ci	 */
6998c2ecf20Sopenharmony_ci	return generic_file_llseek_size(file, offset, whence, isize, isize);
7008c2ecf20Sopenharmony_ci}
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_cistatic int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
7038c2ecf20Sopenharmony_ci					int error, unsigned int flags)
7048c2ecf20Sopenharmony_ci{
7058c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
7068c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci	if (error) {
7098c2ecf20Sopenharmony_ci		zonefs_io_error(inode, true);
7108c2ecf20Sopenharmony_ci		return error;
7118c2ecf20Sopenharmony_ci	}
7128c2ecf20Sopenharmony_ci
7138c2ecf20Sopenharmony_ci	if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
7148c2ecf20Sopenharmony_ci		/*
7158c2ecf20Sopenharmony_ci		 * Note that we may be seeing completions out of order,
7168c2ecf20Sopenharmony_ci		 * but that is not a problem since a write completed
7178c2ecf20Sopenharmony_ci		 * successfully necessarily means that all preceding writes
7188c2ecf20Sopenharmony_ci		 * were also successful. So we can safely increase the inode
7198c2ecf20Sopenharmony_ci		 * size to the write end location.
7208c2ecf20Sopenharmony_ci		 */
7218c2ecf20Sopenharmony_ci		mutex_lock(&zi->i_truncate_mutex);
7228c2ecf20Sopenharmony_ci		if (i_size_read(inode) < iocb->ki_pos + size) {
7238c2ecf20Sopenharmony_ci			zonefs_update_stats(inode, iocb->ki_pos + size);
7248c2ecf20Sopenharmony_ci			zonefs_i_size_write(inode, iocb->ki_pos + size);
7258c2ecf20Sopenharmony_ci		}
7268c2ecf20Sopenharmony_ci		mutex_unlock(&zi->i_truncate_mutex);
7278c2ecf20Sopenharmony_ci	}
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	return 0;
7308c2ecf20Sopenharmony_ci}
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_cistatic const struct iomap_dio_ops zonefs_write_dio_ops = {
7338c2ecf20Sopenharmony_ci	.end_io			= zonefs_file_write_dio_end_io,
7348c2ecf20Sopenharmony_ci};
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_cistatic ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
7378c2ecf20Sopenharmony_ci{
7388c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
7398c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
7408c2ecf20Sopenharmony_ci	struct block_device *bdev = inode->i_sb->s_bdev;
7418c2ecf20Sopenharmony_ci	unsigned int max;
7428c2ecf20Sopenharmony_ci	struct bio *bio;
7438c2ecf20Sopenharmony_ci	ssize_t size;
7448c2ecf20Sopenharmony_ci	int nr_pages;
7458c2ecf20Sopenharmony_ci	ssize_t ret;
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci	max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
7488c2ecf20Sopenharmony_ci	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
7498c2ecf20Sopenharmony_ci	iov_iter_truncate(from, max);
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci	nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
7528c2ecf20Sopenharmony_ci	if (!nr_pages)
7538c2ecf20Sopenharmony_ci		return 0;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci	bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
7568c2ecf20Sopenharmony_ci	if (!bio)
7578c2ecf20Sopenharmony_ci		return -ENOMEM;
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	bio_set_dev(bio, bdev);
7608c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = zi->i_zsector;
7618c2ecf20Sopenharmony_ci	bio->bi_write_hint = iocb->ki_hint;
7628c2ecf20Sopenharmony_ci	bio->bi_ioprio = iocb->ki_ioprio;
7638c2ecf20Sopenharmony_ci	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
7648c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DSYNC)
7658c2ecf20Sopenharmony_ci		bio->bi_opf |= REQ_FUA;
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	ret = bio_iov_iter_get_pages(bio, from);
7688c2ecf20Sopenharmony_ci	if (unlikely(ret))
7698c2ecf20Sopenharmony_ci		goto out_release;
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	size = bio->bi_iter.bi_size;
7728c2ecf20Sopenharmony_ci	task_io_account_write(size);
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_HIPRI)
7758c2ecf20Sopenharmony_ci		bio_set_polled(bio, iocb);
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci	ret = submit_bio_wait(bio);
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci	/*
7808c2ecf20Sopenharmony_ci	 * If the file zone was written underneath the file system, the zone
7818c2ecf20Sopenharmony_ci	 * write pointer may not be where we expect it to be, but the zone
7828c2ecf20Sopenharmony_ci	 * append write can still succeed. So check manually that we wrote where
7838c2ecf20Sopenharmony_ci	 * we intended to, that is, at zi->i_wpoffset.
7848c2ecf20Sopenharmony_ci	 */
7858c2ecf20Sopenharmony_ci	if (!ret) {
7868c2ecf20Sopenharmony_ci		sector_t wpsector =
7878c2ecf20Sopenharmony_ci			zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci		if (bio->bi_iter.bi_sector != wpsector) {
7908c2ecf20Sopenharmony_ci			zonefs_warn(inode->i_sb,
7918c2ecf20Sopenharmony_ci				"Corrupted write pointer %llu for zone at %llu\n",
7928c2ecf20Sopenharmony_ci				bio->bi_iter.bi_sector, zi->i_zsector);
7938c2ecf20Sopenharmony_ci			ret = -EIO;
7948c2ecf20Sopenharmony_ci		}
7958c2ecf20Sopenharmony_ci	}
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ciout_release:
8008c2ecf20Sopenharmony_ci	bio_release_pages(bio, false);
8018c2ecf20Sopenharmony_ci	bio_put(bio);
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci	if (ret >= 0) {
8048c2ecf20Sopenharmony_ci		iocb->ki_pos += size;
8058c2ecf20Sopenharmony_ci		return size;
8068c2ecf20Sopenharmony_ci	}
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci	return ret;
8098c2ecf20Sopenharmony_ci}
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci/*
8128c2ecf20Sopenharmony_ci * Do not exceed the LFS limits nor the file zone size. If pos is under the
8138c2ecf20Sopenharmony_ci * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
8148c2ecf20Sopenharmony_ci */
8158c2ecf20Sopenharmony_cistatic loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
8168c2ecf20Sopenharmony_ci					loff_t count)
8178c2ecf20Sopenharmony_ci{
8188c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
8198c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
8208c2ecf20Sopenharmony_ci	loff_t limit = rlimit(RLIMIT_FSIZE);
8218c2ecf20Sopenharmony_ci	loff_t max_size = zi->i_max_size;
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci	if (limit != RLIM_INFINITY) {
8248c2ecf20Sopenharmony_ci		if (pos >= limit) {
8258c2ecf20Sopenharmony_ci			send_sig(SIGXFSZ, current, 0);
8268c2ecf20Sopenharmony_ci			return -EFBIG;
8278c2ecf20Sopenharmony_ci		}
8288c2ecf20Sopenharmony_ci		count = min(count, limit - pos);
8298c2ecf20Sopenharmony_ci	}
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci	if (!(file->f_flags & O_LARGEFILE))
8328c2ecf20Sopenharmony_ci		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci	if (unlikely(pos >= max_size))
8358c2ecf20Sopenharmony_ci		return -EFBIG;
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci	return min(count, max_size - pos);
8388c2ecf20Sopenharmony_ci}
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_cistatic ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
8418c2ecf20Sopenharmony_ci{
8428c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
8438c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
8448c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
8458c2ecf20Sopenharmony_ci	loff_t count;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	if (IS_SWAPFILE(inode))
8488c2ecf20Sopenharmony_ci		return -ETXTBSY;
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	if (!iov_iter_count(from))
8518c2ecf20Sopenharmony_ci		return 0;
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
8548c2ecf20Sopenharmony_ci		return -EINVAL;
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_APPEND) {
8578c2ecf20Sopenharmony_ci		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
8588c2ecf20Sopenharmony_ci			return -EINVAL;
8598c2ecf20Sopenharmony_ci		mutex_lock(&zi->i_truncate_mutex);
8608c2ecf20Sopenharmony_ci		iocb->ki_pos = zi->i_wpoffset;
8618c2ecf20Sopenharmony_ci		mutex_unlock(&zi->i_truncate_mutex);
8628c2ecf20Sopenharmony_ci	}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci	count = zonefs_write_check_limits(file, iocb->ki_pos,
8658c2ecf20Sopenharmony_ci					  iov_iter_count(from));
8668c2ecf20Sopenharmony_ci	if (count < 0)
8678c2ecf20Sopenharmony_ci		return count;
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci	iov_iter_truncate(from, count);
8708c2ecf20Sopenharmony_ci	return iov_iter_count(from);
8718c2ecf20Sopenharmony_ci}
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci/*
8748c2ecf20Sopenharmony_ci * Handle direct writes. For sequential zone files, this is the only possible
8758c2ecf20Sopenharmony_ci * write path. For these files, check that the user is issuing writes
8768c2ecf20Sopenharmony_ci * sequentially from the end of the file. This code assumes that the block layer
8778c2ecf20Sopenharmony_ci * delivers write requests to the device in sequential order. This is always the
8788c2ecf20Sopenharmony_ci * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
8798c2ecf20Sopenharmony_ci * elevator feature is being used (e.g. mq-deadline). The block layer always
8808c2ecf20Sopenharmony_ci * automatically select such an elevator for zoned block devices during the
8818c2ecf20Sopenharmony_ci * device initialization.
8828c2ecf20Sopenharmony_ci */
8838c2ecf20Sopenharmony_cistatic ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
8848c2ecf20Sopenharmony_ci{
8858c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
8868c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
8878c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
8888c2ecf20Sopenharmony_ci	bool sync = is_sync_kiocb(iocb);
8898c2ecf20Sopenharmony_ci	bool append = false;
8908c2ecf20Sopenharmony_ci	ssize_t ret, count;
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci	/*
8938c2ecf20Sopenharmony_ci	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
8948c2ecf20Sopenharmony_ci	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
8958c2ecf20Sopenharmony_ci	 * on the inode lock but the second goes through but is now unaligned).
8968c2ecf20Sopenharmony_ci	 */
8978c2ecf20Sopenharmony_ci	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
8988c2ecf20Sopenharmony_ci	    (iocb->ki_flags & IOCB_NOWAIT))
8998c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
9008c2ecf20Sopenharmony_ci
9018c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
9028c2ecf20Sopenharmony_ci		if (!inode_trylock(inode))
9038c2ecf20Sopenharmony_ci			return -EAGAIN;
9048c2ecf20Sopenharmony_ci	} else {
9058c2ecf20Sopenharmony_ci		inode_lock(inode);
9068c2ecf20Sopenharmony_ci	}
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci	count = zonefs_write_checks(iocb, from);
9098c2ecf20Sopenharmony_ci	if (count <= 0) {
9108c2ecf20Sopenharmony_ci		ret = count;
9118c2ecf20Sopenharmony_ci		goto inode_unlock;
9128c2ecf20Sopenharmony_ci	}
9138c2ecf20Sopenharmony_ci
9148c2ecf20Sopenharmony_ci	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
9158c2ecf20Sopenharmony_ci		ret = -EINVAL;
9168c2ecf20Sopenharmony_ci		goto inode_unlock;
9178c2ecf20Sopenharmony_ci	}
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	/* Enforce sequential writes (append only) in sequential zones */
9208c2ecf20Sopenharmony_ci	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
9218c2ecf20Sopenharmony_ci		mutex_lock(&zi->i_truncate_mutex);
9228c2ecf20Sopenharmony_ci		if (iocb->ki_pos != zi->i_wpoffset) {
9238c2ecf20Sopenharmony_ci			mutex_unlock(&zi->i_truncate_mutex);
9248c2ecf20Sopenharmony_ci			ret = -EINVAL;
9258c2ecf20Sopenharmony_ci			goto inode_unlock;
9268c2ecf20Sopenharmony_ci		}
9278c2ecf20Sopenharmony_ci		mutex_unlock(&zi->i_truncate_mutex);
9288c2ecf20Sopenharmony_ci		append = sync;
9298c2ecf20Sopenharmony_ci	}
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	if (append)
9328c2ecf20Sopenharmony_ci		ret = zonefs_file_dio_append(iocb, from);
9338c2ecf20Sopenharmony_ci	else
9348c2ecf20Sopenharmony_ci		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
9358c2ecf20Sopenharmony_ci				   &zonefs_write_dio_ops, sync);
9368c2ecf20Sopenharmony_ci	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
9378c2ecf20Sopenharmony_ci	    (ret > 0 || ret == -EIOCBQUEUED)) {
9388c2ecf20Sopenharmony_ci		if (ret > 0)
9398c2ecf20Sopenharmony_ci			count = ret;
9408c2ecf20Sopenharmony_ci		mutex_lock(&zi->i_truncate_mutex);
9418c2ecf20Sopenharmony_ci		zi->i_wpoffset += count;
9428c2ecf20Sopenharmony_ci		mutex_unlock(&zi->i_truncate_mutex);
9438c2ecf20Sopenharmony_ci	}
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ciinode_unlock:
9468c2ecf20Sopenharmony_ci	inode_unlock(inode);
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci	return ret;
9498c2ecf20Sopenharmony_ci}
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_cistatic ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
9528c2ecf20Sopenharmony_ci					  struct iov_iter *from)
9538c2ecf20Sopenharmony_ci{
9548c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
9558c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
9568c2ecf20Sopenharmony_ci	ssize_t ret;
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ci	/*
9598c2ecf20Sopenharmony_ci	 * Direct IO writes are mandatory for sequential zone files so that the
9608c2ecf20Sopenharmony_ci	 * write IO issuing order is preserved.
9618c2ecf20Sopenharmony_ci	 */
9628c2ecf20Sopenharmony_ci	if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
9638c2ecf20Sopenharmony_ci		return -EIO;
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
9668c2ecf20Sopenharmony_ci		if (!inode_trylock(inode))
9678c2ecf20Sopenharmony_ci			return -EAGAIN;
9688c2ecf20Sopenharmony_ci	} else {
9698c2ecf20Sopenharmony_ci		inode_lock(inode);
9708c2ecf20Sopenharmony_ci	}
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	ret = zonefs_write_checks(iocb, from);
9738c2ecf20Sopenharmony_ci	if (ret <= 0)
9748c2ecf20Sopenharmony_ci		goto inode_unlock;
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci	ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
9778c2ecf20Sopenharmony_ci	if (ret > 0)
9788c2ecf20Sopenharmony_ci		iocb->ki_pos += ret;
9798c2ecf20Sopenharmony_ci	else if (ret == -EIO)
9808c2ecf20Sopenharmony_ci		zonefs_io_error(inode, true);
9818c2ecf20Sopenharmony_ci
9828c2ecf20Sopenharmony_ciinode_unlock:
9838c2ecf20Sopenharmony_ci	inode_unlock(inode);
9848c2ecf20Sopenharmony_ci	if (ret > 0)
9858c2ecf20Sopenharmony_ci		ret = generic_write_sync(iocb, ret);
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	return ret;
9888c2ecf20Sopenharmony_ci}
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_cistatic ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
9918c2ecf20Sopenharmony_ci{
9928c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
9938c2ecf20Sopenharmony_ci
9948c2ecf20Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
9958c2ecf20Sopenharmony_ci		return -EPERM;
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	if (sb_rdonly(inode->i_sb))
9988c2ecf20Sopenharmony_ci		return -EROFS;
9998c2ecf20Sopenharmony_ci
10008c2ecf20Sopenharmony_ci	/* Write operations beyond the zone size are not allowed */
10018c2ecf20Sopenharmony_ci	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
10028c2ecf20Sopenharmony_ci		return -EFBIG;
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT) {
10058c2ecf20Sopenharmony_ci		ssize_t ret = zonefs_file_dio_write(iocb, from);
10068c2ecf20Sopenharmony_ci		if (ret != -ENOTBLK)
10078c2ecf20Sopenharmony_ci			return ret;
10088c2ecf20Sopenharmony_ci	}
10098c2ecf20Sopenharmony_ci
10108c2ecf20Sopenharmony_ci	return zonefs_file_buffered_write(iocb, from);
10118c2ecf20Sopenharmony_ci}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_cistatic int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
10148c2ecf20Sopenharmony_ci				       int error, unsigned int flags)
10158c2ecf20Sopenharmony_ci{
10168c2ecf20Sopenharmony_ci	if (error) {
10178c2ecf20Sopenharmony_ci		zonefs_io_error(file_inode(iocb->ki_filp), false);
10188c2ecf20Sopenharmony_ci		return error;
10198c2ecf20Sopenharmony_ci	}
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci	return 0;
10228c2ecf20Sopenharmony_ci}
10238c2ecf20Sopenharmony_ci
10248c2ecf20Sopenharmony_cistatic const struct iomap_dio_ops zonefs_read_dio_ops = {
10258c2ecf20Sopenharmony_ci	.end_io			= zonefs_file_read_dio_end_io,
10268c2ecf20Sopenharmony_ci};
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_cistatic ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
10298c2ecf20Sopenharmony_ci{
10308c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
10318c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
10328c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
10338c2ecf20Sopenharmony_ci	loff_t isize;
10348c2ecf20Sopenharmony_ci	ssize_t ret;
10358c2ecf20Sopenharmony_ci
10368c2ecf20Sopenharmony_ci	/* Offline zones cannot be read */
10378c2ecf20Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
10388c2ecf20Sopenharmony_ci		return -EPERM;
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci	if (iocb->ki_pos >= zi->i_max_size)
10418c2ecf20Sopenharmony_ci		return 0;
10428c2ecf20Sopenharmony_ci
10438c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
10448c2ecf20Sopenharmony_ci		if (!inode_trylock_shared(inode))
10458c2ecf20Sopenharmony_ci			return -EAGAIN;
10468c2ecf20Sopenharmony_ci	} else {
10478c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
10488c2ecf20Sopenharmony_ci	}
10498c2ecf20Sopenharmony_ci
10508c2ecf20Sopenharmony_ci	/* Limit read operations to written data */
10518c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
10528c2ecf20Sopenharmony_ci	isize = i_size_read(inode);
10538c2ecf20Sopenharmony_ci	if (iocb->ki_pos >= isize) {
10548c2ecf20Sopenharmony_ci		mutex_unlock(&zi->i_truncate_mutex);
10558c2ecf20Sopenharmony_ci		ret = 0;
10568c2ecf20Sopenharmony_ci		goto inode_unlock;
10578c2ecf20Sopenharmony_ci	}
10588c2ecf20Sopenharmony_ci	iov_iter_truncate(to, isize - iocb->ki_pos);
10598c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
10608c2ecf20Sopenharmony_ci
10618c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT) {
10628c2ecf20Sopenharmony_ci		size_t count = iov_iter_count(to);
10638c2ecf20Sopenharmony_ci
10648c2ecf20Sopenharmony_ci		if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
10658c2ecf20Sopenharmony_ci			ret = -EINVAL;
10668c2ecf20Sopenharmony_ci			goto inode_unlock;
10678c2ecf20Sopenharmony_ci		}
10688c2ecf20Sopenharmony_ci		file_accessed(iocb->ki_filp);
10698c2ecf20Sopenharmony_ci		ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
10708c2ecf20Sopenharmony_ci				   &zonefs_read_dio_ops, is_sync_kiocb(iocb));
10718c2ecf20Sopenharmony_ci	} else {
10728c2ecf20Sopenharmony_ci		ret = generic_file_read_iter(iocb, to);
10738c2ecf20Sopenharmony_ci		if (ret == -EIO)
10748c2ecf20Sopenharmony_ci			zonefs_io_error(inode, false);
10758c2ecf20Sopenharmony_ci	}
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ciinode_unlock:
10788c2ecf20Sopenharmony_ci	inode_unlock_shared(inode);
10798c2ecf20Sopenharmony_ci
10808c2ecf20Sopenharmony_ci	return ret;
10818c2ecf20Sopenharmony_ci}
10828c2ecf20Sopenharmony_ci
10838c2ecf20Sopenharmony_cistatic inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *file)
10848c2ecf20Sopenharmony_ci{
10858c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
10868c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
10878c2ecf20Sopenharmony_ci
10888c2ecf20Sopenharmony_ci	if (!(sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN))
10898c2ecf20Sopenharmony_ci		return false;
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
10928c2ecf20Sopenharmony_ci		return false;
10938c2ecf20Sopenharmony_ci
10948c2ecf20Sopenharmony_ci	if (!(file->f_mode & FMODE_WRITE))
10958c2ecf20Sopenharmony_ci		return false;
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci	return true;
10988c2ecf20Sopenharmony_ci}
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_cistatic int zonefs_open_zone(struct inode *inode)
11018c2ecf20Sopenharmony_ci{
11028c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
11038c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
11048c2ecf20Sopenharmony_ci	int ret = 0;
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
11078c2ecf20Sopenharmony_ci
11088c2ecf20Sopenharmony_ci	if (!zi->i_wr_refcnt) {
11098c2ecf20Sopenharmony_ci		if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
11108c2ecf20Sopenharmony_ci			atomic_dec(&sbi->s_open_zones);
11118c2ecf20Sopenharmony_ci			ret = -EBUSY;
11128c2ecf20Sopenharmony_ci			goto unlock;
11138c2ecf20Sopenharmony_ci		}
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci		if (i_size_read(inode) < zi->i_max_size) {
11168c2ecf20Sopenharmony_ci			ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
11178c2ecf20Sopenharmony_ci			if (ret) {
11188c2ecf20Sopenharmony_ci				atomic_dec(&sbi->s_open_zones);
11198c2ecf20Sopenharmony_ci				goto unlock;
11208c2ecf20Sopenharmony_ci			}
11218c2ecf20Sopenharmony_ci			zi->i_flags |= ZONEFS_ZONE_OPEN;
11228c2ecf20Sopenharmony_ci		}
11238c2ecf20Sopenharmony_ci	}
11248c2ecf20Sopenharmony_ci
11258c2ecf20Sopenharmony_ci	zi->i_wr_refcnt++;
11268c2ecf20Sopenharmony_ci
11278c2ecf20Sopenharmony_ciunlock:
11288c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	return ret;
11318c2ecf20Sopenharmony_ci}
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_cistatic int zonefs_file_open(struct inode *inode, struct file *file)
11348c2ecf20Sopenharmony_ci{
11358c2ecf20Sopenharmony_ci	int ret;
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci	ret = generic_file_open(inode, file);
11388c2ecf20Sopenharmony_ci	if (ret)
11398c2ecf20Sopenharmony_ci		return ret;
11408c2ecf20Sopenharmony_ci
11418c2ecf20Sopenharmony_ci	if (zonefs_file_use_exp_open(inode, file))
11428c2ecf20Sopenharmony_ci		return zonefs_open_zone(inode);
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_ci	return 0;
11458c2ecf20Sopenharmony_ci}
11468c2ecf20Sopenharmony_ci
11478c2ecf20Sopenharmony_cistatic void zonefs_close_zone(struct inode *inode)
11488c2ecf20Sopenharmony_ci{
11498c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
11508c2ecf20Sopenharmony_ci	int ret = 0;
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	mutex_lock(&zi->i_truncate_mutex);
11538c2ecf20Sopenharmony_ci	zi->i_wr_refcnt--;
11548c2ecf20Sopenharmony_ci	if (!zi->i_wr_refcnt) {
11558c2ecf20Sopenharmony_ci		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
11568c2ecf20Sopenharmony_ci		struct super_block *sb = inode->i_sb;
11578c2ecf20Sopenharmony_ci
11588c2ecf20Sopenharmony_ci		/*
11598c2ecf20Sopenharmony_ci		 * If the file zone is full, it is not open anymore and we only
11608c2ecf20Sopenharmony_ci		 * need to decrement the open count.
11618c2ecf20Sopenharmony_ci		 */
11628c2ecf20Sopenharmony_ci		if (!(zi->i_flags & ZONEFS_ZONE_OPEN))
11638c2ecf20Sopenharmony_ci			goto dec;
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
11668c2ecf20Sopenharmony_ci		if (ret) {
11678c2ecf20Sopenharmony_ci			__zonefs_io_error(inode, false);
11688c2ecf20Sopenharmony_ci			/*
11698c2ecf20Sopenharmony_ci			 * Leaving zones explicitly open may lead to a state
11708c2ecf20Sopenharmony_ci			 * where most zones cannot be written (zone resources
11718c2ecf20Sopenharmony_ci			 * exhausted). So take preventive action by remounting
11728c2ecf20Sopenharmony_ci			 * read-only.
11738c2ecf20Sopenharmony_ci			 */
11748c2ecf20Sopenharmony_ci			if (zi->i_flags & ZONEFS_ZONE_OPEN &&
11758c2ecf20Sopenharmony_ci			    !(sb->s_flags & SB_RDONLY)) {
11768c2ecf20Sopenharmony_ci				zonefs_warn(sb, "closing zone failed, remounting filesystem read-only\n");
11778c2ecf20Sopenharmony_ci				sb->s_flags |= SB_RDONLY;
11788c2ecf20Sopenharmony_ci			}
11798c2ecf20Sopenharmony_ci		}
11808c2ecf20Sopenharmony_ci		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
11818c2ecf20Sopenharmony_cidec:
11828c2ecf20Sopenharmony_ci		atomic_dec(&sbi->s_open_zones);
11838c2ecf20Sopenharmony_ci	}
11848c2ecf20Sopenharmony_ci	mutex_unlock(&zi->i_truncate_mutex);
11858c2ecf20Sopenharmony_ci}
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_cistatic int zonefs_file_release(struct inode *inode, struct file *file)
11888c2ecf20Sopenharmony_ci{
11898c2ecf20Sopenharmony_ci	/*
11908c2ecf20Sopenharmony_ci	 * If we explicitly open a zone we must close it again as well, but the
11918c2ecf20Sopenharmony_ci	 * zone management operation can fail (either due to an IO error or as
11928c2ecf20Sopenharmony_ci	 * the zone has gone offline or read-only). Make sure we don't fail the
11938c2ecf20Sopenharmony_ci	 * close(2) for user-space.
11948c2ecf20Sopenharmony_ci	 */
11958c2ecf20Sopenharmony_ci	if (zonefs_file_use_exp_open(inode, file))
11968c2ecf20Sopenharmony_ci		zonefs_close_zone(inode);
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci	return 0;
11998c2ecf20Sopenharmony_ci}
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_cistatic const struct file_operations zonefs_file_operations = {
12028c2ecf20Sopenharmony_ci	.open		= zonefs_file_open,
12038c2ecf20Sopenharmony_ci	.release	= zonefs_file_release,
12048c2ecf20Sopenharmony_ci	.fsync		= zonefs_file_fsync,
12058c2ecf20Sopenharmony_ci	.mmap		= zonefs_file_mmap,
12068c2ecf20Sopenharmony_ci	.llseek		= zonefs_file_llseek,
12078c2ecf20Sopenharmony_ci	.read_iter	= zonefs_file_read_iter,
12088c2ecf20Sopenharmony_ci	.write_iter	= zonefs_file_write_iter,
12098c2ecf20Sopenharmony_ci	.splice_read	= generic_file_splice_read,
12108c2ecf20Sopenharmony_ci	.splice_write	= iter_file_splice_write,
12118c2ecf20Sopenharmony_ci	.iopoll		= iomap_dio_iopoll,
12128c2ecf20Sopenharmony_ci};
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_cistatic struct kmem_cache *zonefs_inode_cachep;
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_cistatic struct inode *zonefs_alloc_inode(struct super_block *sb)
12178c2ecf20Sopenharmony_ci{
12188c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi;
12198c2ecf20Sopenharmony_ci
12208c2ecf20Sopenharmony_ci	zi = kmem_cache_alloc(zonefs_inode_cachep, GFP_KERNEL);
12218c2ecf20Sopenharmony_ci	if (!zi)
12228c2ecf20Sopenharmony_ci		return NULL;
12238c2ecf20Sopenharmony_ci
12248c2ecf20Sopenharmony_ci	inode_init_once(&zi->i_vnode);
12258c2ecf20Sopenharmony_ci	mutex_init(&zi->i_truncate_mutex);
12268c2ecf20Sopenharmony_ci	init_rwsem(&zi->i_mmap_sem);
12278c2ecf20Sopenharmony_ci	zi->i_wr_refcnt = 0;
12288c2ecf20Sopenharmony_ci	zi->i_flags = 0;
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_ci	return &zi->i_vnode;
12318c2ecf20Sopenharmony_ci}
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_cistatic void zonefs_free_inode(struct inode *inode)
12348c2ecf20Sopenharmony_ci{
12358c2ecf20Sopenharmony_ci	kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
12368c2ecf20Sopenharmony_ci}
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_ci/*
12398c2ecf20Sopenharmony_ci * File system stat.
12408c2ecf20Sopenharmony_ci */
12418c2ecf20Sopenharmony_cistatic int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
12428c2ecf20Sopenharmony_ci{
12438c2ecf20Sopenharmony_ci	struct super_block *sb = dentry->d_sb;
12448c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
12458c2ecf20Sopenharmony_ci	enum zonefs_ztype t;
12468c2ecf20Sopenharmony_ci	u64 fsid;
12478c2ecf20Sopenharmony_ci
12488c2ecf20Sopenharmony_ci	buf->f_type = ZONEFS_MAGIC;
12498c2ecf20Sopenharmony_ci	buf->f_bsize = sb->s_blocksize;
12508c2ecf20Sopenharmony_ci	buf->f_namelen = ZONEFS_NAME_MAX;
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_ci	spin_lock(&sbi->s_lock);
12538c2ecf20Sopenharmony_ci
12548c2ecf20Sopenharmony_ci	buf->f_blocks = sbi->s_blocks;
12558c2ecf20Sopenharmony_ci	if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
12568c2ecf20Sopenharmony_ci		buf->f_bfree = 0;
12578c2ecf20Sopenharmony_ci	else
12588c2ecf20Sopenharmony_ci		buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
12598c2ecf20Sopenharmony_ci	buf->f_bavail = buf->f_bfree;
12608c2ecf20Sopenharmony_ci
12618c2ecf20Sopenharmony_ci	for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
12628c2ecf20Sopenharmony_ci		if (sbi->s_nr_files[t])
12638c2ecf20Sopenharmony_ci			buf->f_files += sbi->s_nr_files[t] + 1;
12648c2ecf20Sopenharmony_ci	}
12658c2ecf20Sopenharmony_ci	buf->f_ffree = 0;
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_ci	spin_unlock(&sbi->s_lock);
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci	fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^
12708c2ecf20Sopenharmony_ci		le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64));
12718c2ecf20Sopenharmony_ci	buf->f_fsid = u64_to_fsid(fsid);
12728c2ecf20Sopenharmony_ci
12738c2ecf20Sopenharmony_ci	return 0;
12748c2ecf20Sopenharmony_ci}
12758c2ecf20Sopenharmony_ci
12768c2ecf20Sopenharmony_cienum {
12778c2ecf20Sopenharmony_ci	Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
12788c2ecf20Sopenharmony_ci	Opt_explicit_open, Opt_err,
12798c2ecf20Sopenharmony_ci};
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_cistatic const match_table_t tokens = {
12828c2ecf20Sopenharmony_ci	{ Opt_errors_ro,	"errors=remount-ro"},
12838c2ecf20Sopenharmony_ci	{ Opt_errors_zro,	"errors=zone-ro"},
12848c2ecf20Sopenharmony_ci	{ Opt_errors_zol,	"errors=zone-offline"},
12858c2ecf20Sopenharmony_ci	{ Opt_errors_repair,	"errors=repair"},
12868c2ecf20Sopenharmony_ci	{ Opt_explicit_open,	"explicit-open" },
12878c2ecf20Sopenharmony_ci	{ Opt_err,		NULL}
12888c2ecf20Sopenharmony_ci};
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_cistatic int zonefs_parse_options(struct super_block *sb, char *options)
12918c2ecf20Sopenharmony_ci{
12928c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
12938c2ecf20Sopenharmony_ci	substring_t args[MAX_OPT_ARGS];
12948c2ecf20Sopenharmony_ci	char *p;
12958c2ecf20Sopenharmony_ci
12968c2ecf20Sopenharmony_ci	if (!options)
12978c2ecf20Sopenharmony_ci		return 0;
12988c2ecf20Sopenharmony_ci
12998c2ecf20Sopenharmony_ci	while ((p = strsep(&options, ",")) != NULL) {
13008c2ecf20Sopenharmony_ci		int token;
13018c2ecf20Sopenharmony_ci
13028c2ecf20Sopenharmony_ci		if (!*p)
13038c2ecf20Sopenharmony_ci			continue;
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci		token = match_token(p, tokens, args);
13068c2ecf20Sopenharmony_ci		switch (token) {
13078c2ecf20Sopenharmony_ci		case Opt_errors_ro:
13088c2ecf20Sopenharmony_ci			sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
13098c2ecf20Sopenharmony_ci			sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
13108c2ecf20Sopenharmony_ci			break;
13118c2ecf20Sopenharmony_ci		case Opt_errors_zro:
13128c2ecf20Sopenharmony_ci			sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
13138c2ecf20Sopenharmony_ci			sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
13148c2ecf20Sopenharmony_ci			break;
13158c2ecf20Sopenharmony_ci		case Opt_errors_zol:
13168c2ecf20Sopenharmony_ci			sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
13178c2ecf20Sopenharmony_ci			sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
13188c2ecf20Sopenharmony_ci			break;
13198c2ecf20Sopenharmony_ci		case Opt_errors_repair:
13208c2ecf20Sopenharmony_ci			sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
13218c2ecf20Sopenharmony_ci			sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
13228c2ecf20Sopenharmony_ci			break;
13238c2ecf20Sopenharmony_ci		case Opt_explicit_open:
13248c2ecf20Sopenharmony_ci			sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
13258c2ecf20Sopenharmony_ci			break;
13268c2ecf20Sopenharmony_ci		default:
13278c2ecf20Sopenharmony_ci			return -EINVAL;
13288c2ecf20Sopenharmony_ci		}
13298c2ecf20Sopenharmony_ci	}
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_ci	return 0;
13328c2ecf20Sopenharmony_ci}
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_cistatic int zonefs_show_options(struct seq_file *seq, struct dentry *root)
13358c2ecf20Sopenharmony_ci{
13368c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
13398c2ecf20Sopenharmony_ci		seq_puts(seq, ",errors=remount-ro");
13408c2ecf20Sopenharmony_ci	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
13418c2ecf20Sopenharmony_ci		seq_puts(seq, ",errors=zone-ro");
13428c2ecf20Sopenharmony_ci	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
13438c2ecf20Sopenharmony_ci		seq_puts(seq, ",errors=zone-offline");
13448c2ecf20Sopenharmony_ci	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
13458c2ecf20Sopenharmony_ci		seq_puts(seq, ",errors=repair");
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci	return 0;
13488c2ecf20Sopenharmony_ci}
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_cistatic int zonefs_remount(struct super_block *sb, int *flags, char *data)
13518c2ecf20Sopenharmony_ci{
13528c2ecf20Sopenharmony_ci	sync_filesystem(sb);
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci	return zonefs_parse_options(sb, data);
13558c2ecf20Sopenharmony_ci}
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_cistatic const struct super_operations zonefs_sops = {
13588c2ecf20Sopenharmony_ci	.alloc_inode	= zonefs_alloc_inode,
13598c2ecf20Sopenharmony_ci	.free_inode	= zonefs_free_inode,
13608c2ecf20Sopenharmony_ci	.statfs		= zonefs_statfs,
13618c2ecf20Sopenharmony_ci	.remount_fs	= zonefs_remount,
13628c2ecf20Sopenharmony_ci	.show_options	= zonefs_show_options,
13638c2ecf20Sopenharmony_ci};
13648c2ecf20Sopenharmony_ci
13658c2ecf20Sopenharmony_cistatic const struct inode_operations zonefs_dir_inode_operations = {
13668c2ecf20Sopenharmony_ci	.lookup		= simple_lookup,
13678c2ecf20Sopenharmony_ci	.setattr	= zonefs_inode_setattr,
13688c2ecf20Sopenharmony_ci};
13698c2ecf20Sopenharmony_ci
13708c2ecf20Sopenharmony_cistatic void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
13718c2ecf20Sopenharmony_ci				  enum zonefs_ztype type)
13728c2ecf20Sopenharmony_ci{
13738c2ecf20Sopenharmony_ci	struct super_block *sb = parent->i_sb;
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1;
13768c2ecf20Sopenharmony_ci	inode_init_owner(inode, parent, S_IFDIR | 0555);
13778c2ecf20Sopenharmony_ci	inode->i_op = &zonefs_dir_inode_operations;
13788c2ecf20Sopenharmony_ci	inode->i_fop = &simple_dir_operations;
13798c2ecf20Sopenharmony_ci	set_nlink(inode, 2);
13808c2ecf20Sopenharmony_ci	inc_nlink(parent);
13818c2ecf20Sopenharmony_ci}
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_cistatic int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
13848c2ecf20Sopenharmony_ci				  enum zonefs_ztype type)
13858c2ecf20Sopenharmony_ci{
13868c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
13878c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
13888c2ecf20Sopenharmony_ci	struct zonefs_inode_info *zi = ZONEFS_I(inode);
13898c2ecf20Sopenharmony_ci	int ret = 0;
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ci	inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
13928c2ecf20Sopenharmony_ci	inode->i_mode = S_IFREG | sbi->s_perm;
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci	zi->i_ztype = type;
13958c2ecf20Sopenharmony_ci	zi->i_zsector = zone->start;
13968c2ecf20Sopenharmony_ci	zi->i_zone_size = zone->len << SECTOR_SHIFT;
13978c2ecf20Sopenharmony_ci	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
13988c2ecf20Sopenharmony_ci	    !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
13998c2ecf20Sopenharmony_ci		zonefs_err(sb,
14008c2ecf20Sopenharmony_ci			   "zone size %llu doesn't match device's zone sectors %llu\n",
14018c2ecf20Sopenharmony_ci			   zi->i_zone_size,
14028c2ecf20Sopenharmony_ci			   bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
14038c2ecf20Sopenharmony_ci		return -EINVAL;
14048c2ecf20Sopenharmony_ci	}
14058c2ecf20Sopenharmony_ci
14068c2ecf20Sopenharmony_ci	zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
14078c2ecf20Sopenharmony_ci			       zone->capacity << SECTOR_SHIFT);
14088c2ecf20Sopenharmony_ci	zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true);
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	inode->i_uid = sbi->s_uid;
14118c2ecf20Sopenharmony_ci	inode->i_gid = sbi->s_gid;
14128c2ecf20Sopenharmony_ci	inode->i_size = zi->i_wpoffset;
14138c2ecf20Sopenharmony_ci	inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT;
14148c2ecf20Sopenharmony_ci
14158c2ecf20Sopenharmony_ci	inode->i_op = &zonefs_file_inode_operations;
14168c2ecf20Sopenharmony_ci	inode->i_fop = &zonefs_file_operations;
14178c2ecf20Sopenharmony_ci	inode->i_mapping->a_ops = &zonefs_file_aops;
14188c2ecf20Sopenharmony_ci
14198c2ecf20Sopenharmony_ci	sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
14208c2ecf20Sopenharmony_ci	sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
14218c2ecf20Sopenharmony_ci	sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
14228c2ecf20Sopenharmony_ci
14238c2ecf20Sopenharmony_ci	/*
14248c2ecf20Sopenharmony_ci	 * For sequential zones, make sure that any open zone is closed first
14258c2ecf20Sopenharmony_ci	 * to ensure that the initial number of open zones is 0, in sync with
14268c2ecf20Sopenharmony_ci	 * the open zone accounting done when the mount option
14278c2ecf20Sopenharmony_ci	 * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
14288c2ecf20Sopenharmony_ci	 */
14298c2ecf20Sopenharmony_ci	if (type == ZONEFS_ZTYPE_SEQ &&
14308c2ecf20Sopenharmony_ci	    (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
14318c2ecf20Sopenharmony_ci	     zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
14328c2ecf20Sopenharmony_ci		mutex_lock(&zi->i_truncate_mutex);
14338c2ecf20Sopenharmony_ci		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
14348c2ecf20Sopenharmony_ci		mutex_unlock(&zi->i_truncate_mutex);
14358c2ecf20Sopenharmony_ci	}
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci	return ret;
14388c2ecf20Sopenharmony_ci}
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_cistatic struct dentry *zonefs_create_inode(struct dentry *parent,
14418c2ecf20Sopenharmony_ci					const char *name, struct blk_zone *zone,
14428c2ecf20Sopenharmony_ci					enum zonefs_ztype type)
14438c2ecf20Sopenharmony_ci{
14448c2ecf20Sopenharmony_ci	struct inode *dir = d_inode(parent);
14458c2ecf20Sopenharmony_ci	struct dentry *dentry;
14468c2ecf20Sopenharmony_ci	struct inode *inode;
14478c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_ci	dentry = d_alloc_name(parent, name);
14508c2ecf20Sopenharmony_ci	if (!dentry)
14518c2ecf20Sopenharmony_ci		return ERR_PTR(ret);
14528c2ecf20Sopenharmony_ci
14538c2ecf20Sopenharmony_ci	inode = new_inode(parent->d_sb);
14548c2ecf20Sopenharmony_ci	if (!inode)
14558c2ecf20Sopenharmony_ci		goto dput;
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_ci	inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
14588c2ecf20Sopenharmony_ci	if (zone) {
14598c2ecf20Sopenharmony_ci		ret = zonefs_init_file_inode(inode, zone, type);
14608c2ecf20Sopenharmony_ci		if (ret) {
14618c2ecf20Sopenharmony_ci			iput(inode);
14628c2ecf20Sopenharmony_ci			goto dput;
14638c2ecf20Sopenharmony_ci		}
14648c2ecf20Sopenharmony_ci	} else {
14658c2ecf20Sopenharmony_ci		zonefs_init_dir_inode(dir, inode, type);
14668c2ecf20Sopenharmony_ci	}
14678c2ecf20Sopenharmony_ci
14688c2ecf20Sopenharmony_ci	d_add(dentry, inode);
14698c2ecf20Sopenharmony_ci	dir->i_size++;
14708c2ecf20Sopenharmony_ci
14718c2ecf20Sopenharmony_ci	return dentry;
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_cidput:
14748c2ecf20Sopenharmony_ci	dput(dentry);
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
14778c2ecf20Sopenharmony_ci}
14788c2ecf20Sopenharmony_ci
14798c2ecf20Sopenharmony_cistruct zonefs_zone_data {
14808c2ecf20Sopenharmony_ci	struct super_block	*sb;
14818c2ecf20Sopenharmony_ci	unsigned int		nr_zones[ZONEFS_ZTYPE_MAX];
14828c2ecf20Sopenharmony_ci	struct blk_zone		*zones;
14838c2ecf20Sopenharmony_ci};
14848c2ecf20Sopenharmony_ci
14858c2ecf20Sopenharmony_ci/*
14868c2ecf20Sopenharmony_ci * Create a zone group and populate it with zone files.
14878c2ecf20Sopenharmony_ci */
14888c2ecf20Sopenharmony_cistatic int zonefs_create_zgroup(struct zonefs_zone_data *zd,
14898c2ecf20Sopenharmony_ci				enum zonefs_ztype type)
14908c2ecf20Sopenharmony_ci{
14918c2ecf20Sopenharmony_ci	struct super_block *sb = zd->sb;
14928c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
14938c2ecf20Sopenharmony_ci	struct blk_zone *zone, *next, *end;
14948c2ecf20Sopenharmony_ci	const char *zgroup_name;
14958c2ecf20Sopenharmony_ci	char *file_name;
14968c2ecf20Sopenharmony_ci	struct dentry *dir, *dent;
14978c2ecf20Sopenharmony_ci	unsigned int n = 0;
14988c2ecf20Sopenharmony_ci	int ret;
14998c2ecf20Sopenharmony_ci
15008c2ecf20Sopenharmony_ci	/* If the group is empty, there is nothing to do */
15018c2ecf20Sopenharmony_ci	if (!zd->nr_zones[type])
15028c2ecf20Sopenharmony_ci		return 0;
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
15058c2ecf20Sopenharmony_ci	if (!file_name)
15068c2ecf20Sopenharmony_ci		return -ENOMEM;
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	if (type == ZONEFS_ZTYPE_CNV)
15098c2ecf20Sopenharmony_ci		zgroup_name = "cnv";
15108c2ecf20Sopenharmony_ci	else
15118c2ecf20Sopenharmony_ci		zgroup_name = "seq";
15128c2ecf20Sopenharmony_ci
15138c2ecf20Sopenharmony_ci	dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
15148c2ecf20Sopenharmony_ci	if (IS_ERR(dir)) {
15158c2ecf20Sopenharmony_ci		ret = PTR_ERR(dir);
15168c2ecf20Sopenharmony_ci		goto free;
15178c2ecf20Sopenharmony_ci	}
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_ci	/*
15208c2ecf20Sopenharmony_ci	 * The first zone contains the super block: skip it.
15218c2ecf20Sopenharmony_ci	 */
15228c2ecf20Sopenharmony_ci	end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk);
15238c2ecf20Sopenharmony_ci	for (zone = &zd->zones[1]; zone < end; zone = next) {
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_ci		next = zone + 1;
15268c2ecf20Sopenharmony_ci		if (zonefs_zone_type(zone) != type)
15278c2ecf20Sopenharmony_ci			continue;
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci		/*
15308c2ecf20Sopenharmony_ci		 * For conventional zones, contiguous zones can be aggregated
15318c2ecf20Sopenharmony_ci		 * together to form larger files. Note that this overwrites the
15328c2ecf20Sopenharmony_ci		 * length of the first zone of the set of contiguous zones
15338c2ecf20Sopenharmony_ci		 * aggregated together. If one offline or read-only zone is
15348c2ecf20Sopenharmony_ci		 * found, assume that all zones aggregated have the same
15358c2ecf20Sopenharmony_ci		 * condition.
15368c2ecf20Sopenharmony_ci		 */
15378c2ecf20Sopenharmony_ci		if (type == ZONEFS_ZTYPE_CNV &&
15388c2ecf20Sopenharmony_ci		    (sbi->s_features & ZONEFS_F_AGGRCNV)) {
15398c2ecf20Sopenharmony_ci			for (; next < end; next++) {
15408c2ecf20Sopenharmony_ci				if (zonefs_zone_type(next) != type)
15418c2ecf20Sopenharmony_ci					break;
15428c2ecf20Sopenharmony_ci				zone->len += next->len;
15438c2ecf20Sopenharmony_ci				zone->capacity += next->capacity;
15448c2ecf20Sopenharmony_ci				if (next->cond == BLK_ZONE_COND_READONLY &&
15458c2ecf20Sopenharmony_ci				    zone->cond != BLK_ZONE_COND_OFFLINE)
15468c2ecf20Sopenharmony_ci					zone->cond = BLK_ZONE_COND_READONLY;
15478c2ecf20Sopenharmony_ci				else if (next->cond == BLK_ZONE_COND_OFFLINE)
15488c2ecf20Sopenharmony_ci					zone->cond = BLK_ZONE_COND_OFFLINE;
15498c2ecf20Sopenharmony_ci			}
15508c2ecf20Sopenharmony_ci			if (zone->capacity != zone->len) {
15518c2ecf20Sopenharmony_ci				zonefs_err(sb, "Invalid conventional zone capacity\n");
15528c2ecf20Sopenharmony_ci				ret = -EINVAL;
15538c2ecf20Sopenharmony_ci				goto free;
15548c2ecf20Sopenharmony_ci			}
15558c2ecf20Sopenharmony_ci		}
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci		/*
15588c2ecf20Sopenharmony_ci		 * Use the file number within its group as file name.
15598c2ecf20Sopenharmony_ci		 */
15608c2ecf20Sopenharmony_ci		snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
15618c2ecf20Sopenharmony_ci		dent = zonefs_create_inode(dir, file_name, zone, type);
15628c2ecf20Sopenharmony_ci		if (IS_ERR(dent)) {
15638c2ecf20Sopenharmony_ci			ret = PTR_ERR(dent);
15648c2ecf20Sopenharmony_ci			goto free;
15658c2ecf20Sopenharmony_ci		}
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci		n++;
15688c2ecf20Sopenharmony_ci	}
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci	zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
15718c2ecf20Sopenharmony_ci		    zgroup_name, n, n > 1 ? "s" : "");
15728c2ecf20Sopenharmony_ci
15738c2ecf20Sopenharmony_ci	sbi->s_nr_files[type] = n;
15748c2ecf20Sopenharmony_ci	ret = 0;
15758c2ecf20Sopenharmony_ci
15768c2ecf20Sopenharmony_cifree:
15778c2ecf20Sopenharmony_ci	kfree(file_name);
15788c2ecf20Sopenharmony_ci
15798c2ecf20Sopenharmony_ci	return ret;
15808c2ecf20Sopenharmony_ci}
15818c2ecf20Sopenharmony_ci
15828c2ecf20Sopenharmony_cistatic int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
15838c2ecf20Sopenharmony_ci				   void *data)
15848c2ecf20Sopenharmony_ci{
15858c2ecf20Sopenharmony_ci	struct zonefs_zone_data *zd = data;
15868c2ecf20Sopenharmony_ci
15878c2ecf20Sopenharmony_ci	/*
15888c2ecf20Sopenharmony_ci	 * Count the number of usable zones: the first zone at index 0 contains
15898c2ecf20Sopenharmony_ci	 * the super block and is ignored.
15908c2ecf20Sopenharmony_ci	 */
15918c2ecf20Sopenharmony_ci	switch (zone->type) {
15928c2ecf20Sopenharmony_ci	case BLK_ZONE_TYPE_CONVENTIONAL:
15938c2ecf20Sopenharmony_ci		zone->wp = zone->start + zone->len;
15948c2ecf20Sopenharmony_ci		if (idx)
15958c2ecf20Sopenharmony_ci			zd->nr_zones[ZONEFS_ZTYPE_CNV]++;
15968c2ecf20Sopenharmony_ci		break;
15978c2ecf20Sopenharmony_ci	case BLK_ZONE_TYPE_SEQWRITE_REQ:
15988c2ecf20Sopenharmony_ci	case BLK_ZONE_TYPE_SEQWRITE_PREF:
15998c2ecf20Sopenharmony_ci		if (idx)
16008c2ecf20Sopenharmony_ci			zd->nr_zones[ZONEFS_ZTYPE_SEQ]++;
16018c2ecf20Sopenharmony_ci		break;
16028c2ecf20Sopenharmony_ci	default:
16038c2ecf20Sopenharmony_ci		zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
16048c2ecf20Sopenharmony_ci			   zone->type);
16058c2ecf20Sopenharmony_ci		return -EIO;
16068c2ecf20Sopenharmony_ci	}
16078c2ecf20Sopenharmony_ci
16088c2ecf20Sopenharmony_ci	memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));
16098c2ecf20Sopenharmony_ci
16108c2ecf20Sopenharmony_ci	return 0;
16118c2ecf20Sopenharmony_ci}
16128c2ecf20Sopenharmony_ci
16138c2ecf20Sopenharmony_cistatic int zonefs_get_zone_info(struct zonefs_zone_data *zd)
16148c2ecf20Sopenharmony_ci{
16158c2ecf20Sopenharmony_ci	struct block_device *bdev = zd->sb->s_bdev;
16168c2ecf20Sopenharmony_ci	int ret;
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_ci	zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk),
16198c2ecf20Sopenharmony_ci			     sizeof(struct blk_zone), GFP_KERNEL);
16208c2ecf20Sopenharmony_ci	if (!zd->zones)
16218c2ecf20Sopenharmony_ci		return -ENOMEM;
16228c2ecf20Sopenharmony_ci
16238c2ecf20Sopenharmony_ci	/* Get zones information from the device */
16248c2ecf20Sopenharmony_ci	ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES,
16258c2ecf20Sopenharmony_ci				  zonefs_get_zone_info_cb, zd);
16268c2ecf20Sopenharmony_ci	if (ret < 0) {
16278c2ecf20Sopenharmony_ci		zonefs_err(zd->sb, "Zone report failed %d\n", ret);
16288c2ecf20Sopenharmony_ci		return ret;
16298c2ecf20Sopenharmony_ci	}
16308c2ecf20Sopenharmony_ci
16318c2ecf20Sopenharmony_ci	if (ret != blkdev_nr_zones(bdev->bd_disk)) {
16328c2ecf20Sopenharmony_ci		zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
16338c2ecf20Sopenharmony_ci			   ret, blkdev_nr_zones(bdev->bd_disk));
16348c2ecf20Sopenharmony_ci		return -EIO;
16358c2ecf20Sopenharmony_ci	}
16368c2ecf20Sopenharmony_ci
16378c2ecf20Sopenharmony_ci	return 0;
16388c2ecf20Sopenharmony_ci}
16398c2ecf20Sopenharmony_ci
16408c2ecf20Sopenharmony_cistatic inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd)
16418c2ecf20Sopenharmony_ci{
16428c2ecf20Sopenharmony_ci	kvfree(zd->zones);
16438c2ecf20Sopenharmony_ci}
16448c2ecf20Sopenharmony_ci
16458c2ecf20Sopenharmony_ci/*
16468c2ecf20Sopenharmony_ci * Read super block information from the device.
16478c2ecf20Sopenharmony_ci */
16488c2ecf20Sopenharmony_cistatic int zonefs_read_super(struct super_block *sb)
16498c2ecf20Sopenharmony_ci{
16508c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
16518c2ecf20Sopenharmony_ci	struct zonefs_super *super;
16528c2ecf20Sopenharmony_ci	u32 crc, stored_crc;
16538c2ecf20Sopenharmony_ci	struct page *page;
16548c2ecf20Sopenharmony_ci	struct bio_vec bio_vec;
16558c2ecf20Sopenharmony_ci	struct bio bio;
16568c2ecf20Sopenharmony_ci	int ret;
16578c2ecf20Sopenharmony_ci
16588c2ecf20Sopenharmony_ci	page = alloc_page(GFP_KERNEL);
16598c2ecf20Sopenharmony_ci	if (!page)
16608c2ecf20Sopenharmony_ci		return -ENOMEM;
16618c2ecf20Sopenharmony_ci
16628c2ecf20Sopenharmony_ci	bio_init(&bio, &bio_vec, 1);
16638c2ecf20Sopenharmony_ci	bio.bi_iter.bi_sector = 0;
16648c2ecf20Sopenharmony_ci	bio.bi_opf = REQ_OP_READ;
16658c2ecf20Sopenharmony_ci	bio_set_dev(&bio, sb->s_bdev);
16668c2ecf20Sopenharmony_ci	bio_add_page(&bio, page, PAGE_SIZE, 0);
16678c2ecf20Sopenharmony_ci
16688c2ecf20Sopenharmony_ci	ret = submit_bio_wait(&bio);
16698c2ecf20Sopenharmony_ci	if (ret)
16708c2ecf20Sopenharmony_ci		goto free_page;
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_ci	super = kmap(page);
16738c2ecf20Sopenharmony_ci
16748c2ecf20Sopenharmony_ci	ret = -EINVAL;
16758c2ecf20Sopenharmony_ci	if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
16768c2ecf20Sopenharmony_ci		goto unmap;
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci	stored_crc = le32_to_cpu(super->s_crc);
16798c2ecf20Sopenharmony_ci	super->s_crc = 0;
16808c2ecf20Sopenharmony_ci	crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super));
16818c2ecf20Sopenharmony_ci	if (crc != stored_crc) {
16828c2ecf20Sopenharmony_ci		zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)",
16838c2ecf20Sopenharmony_ci			   crc, stored_crc);
16848c2ecf20Sopenharmony_ci		goto unmap;
16858c2ecf20Sopenharmony_ci	}
16868c2ecf20Sopenharmony_ci
16878c2ecf20Sopenharmony_ci	sbi->s_features = le64_to_cpu(super->s_features);
16888c2ecf20Sopenharmony_ci	if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) {
16898c2ecf20Sopenharmony_ci		zonefs_err(sb, "Unknown features set 0x%llx\n",
16908c2ecf20Sopenharmony_ci			   sbi->s_features);
16918c2ecf20Sopenharmony_ci		goto unmap;
16928c2ecf20Sopenharmony_ci	}
16938c2ecf20Sopenharmony_ci
16948c2ecf20Sopenharmony_ci	if (sbi->s_features & ZONEFS_F_UID) {
16958c2ecf20Sopenharmony_ci		sbi->s_uid = make_kuid(current_user_ns(),
16968c2ecf20Sopenharmony_ci				       le32_to_cpu(super->s_uid));
16978c2ecf20Sopenharmony_ci		if (!uid_valid(sbi->s_uid)) {
16988c2ecf20Sopenharmony_ci			zonefs_err(sb, "Invalid UID feature\n");
16998c2ecf20Sopenharmony_ci			goto unmap;
17008c2ecf20Sopenharmony_ci		}
17018c2ecf20Sopenharmony_ci	}
17028c2ecf20Sopenharmony_ci
17038c2ecf20Sopenharmony_ci	if (sbi->s_features & ZONEFS_F_GID) {
17048c2ecf20Sopenharmony_ci		sbi->s_gid = make_kgid(current_user_ns(),
17058c2ecf20Sopenharmony_ci				       le32_to_cpu(super->s_gid));
17068c2ecf20Sopenharmony_ci		if (!gid_valid(sbi->s_gid)) {
17078c2ecf20Sopenharmony_ci			zonefs_err(sb, "Invalid GID feature\n");
17088c2ecf20Sopenharmony_ci			goto unmap;
17098c2ecf20Sopenharmony_ci		}
17108c2ecf20Sopenharmony_ci	}
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	if (sbi->s_features & ZONEFS_F_PERM)
17138c2ecf20Sopenharmony_ci		sbi->s_perm = le32_to_cpu(super->s_perm);
17148c2ecf20Sopenharmony_ci
17158c2ecf20Sopenharmony_ci	if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) {
17168c2ecf20Sopenharmony_ci		zonefs_err(sb, "Reserved area is being used\n");
17178c2ecf20Sopenharmony_ci		goto unmap;
17188c2ecf20Sopenharmony_ci	}
17198c2ecf20Sopenharmony_ci
17208c2ecf20Sopenharmony_ci	import_uuid(&sbi->s_uuid, super->s_uuid);
17218c2ecf20Sopenharmony_ci	ret = 0;
17228c2ecf20Sopenharmony_ci
17238c2ecf20Sopenharmony_ciunmap:
17248c2ecf20Sopenharmony_ci	kunmap(page);
17258c2ecf20Sopenharmony_cifree_page:
17268c2ecf20Sopenharmony_ci	__free_page(page);
17278c2ecf20Sopenharmony_ci
17288c2ecf20Sopenharmony_ci	return ret;
17298c2ecf20Sopenharmony_ci}
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci/*
17328c2ecf20Sopenharmony_ci * Check that the device is zoned. If it is, get the list of zones and create
17338c2ecf20Sopenharmony_ci * sub-directories and files according to the device zone configuration and
17348c2ecf20Sopenharmony_ci * format options.
17358c2ecf20Sopenharmony_ci */
17368c2ecf20Sopenharmony_cistatic int zonefs_fill_super(struct super_block *sb, void *data, int silent)
17378c2ecf20Sopenharmony_ci{
17388c2ecf20Sopenharmony_ci	struct zonefs_zone_data zd;
17398c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi;
17408c2ecf20Sopenharmony_ci	struct inode *inode;
17418c2ecf20Sopenharmony_ci	enum zonefs_ztype t;
17428c2ecf20Sopenharmony_ci	int ret;
17438c2ecf20Sopenharmony_ci
17448c2ecf20Sopenharmony_ci	if (!bdev_is_zoned(sb->s_bdev)) {
17458c2ecf20Sopenharmony_ci		zonefs_err(sb, "Not a zoned block device\n");
17468c2ecf20Sopenharmony_ci		return -EINVAL;
17478c2ecf20Sopenharmony_ci	}
17488c2ecf20Sopenharmony_ci
17498c2ecf20Sopenharmony_ci	/*
17508c2ecf20Sopenharmony_ci	 * Initialize super block information: the maximum file size is updated
17518c2ecf20Sopenharmony_ci	 * when the zone files are created so that the format option
17528c2ecf20Sopenharmony_ci	 * ZONEFS_F_AGGRCNV which increases the maximum file size of a file
17538c2ecf20Sopenharmony_ci	 * beyond the zone size is taken into account.
17548c2ecf20Sopenharmony_ci	 */
17558c2ecf20Sopenharmony_ci	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
17568c2ecf20Sopenharmony_ci	if (!sbi)
17578c2ecf20Sopenharmony_ci		return -ENOMEM;
17588c2ecf20Sopenharmony_ci
17598c2ecf20Sopenharmony_ci	spin_lock_init(&sbi->s_lock);
17608c2ecf20Sopenharmony_ci	sb->s_fs_info = sbi;
17618c2ecf20Sopenharmony_ci	sb->s_magic = ZONEFS_MAGIC;
17628c2ecf20Sopenharmony_ci	sb->s_maxbytes = 0;
17638c2ecf20Sopenharmony_ci	sb->s_op = &zonefs_sops;
17648c2ecf20Sopenharmony_ci	sb->s_time_gran	= 1;
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci	/*
17678c2ecf20Sopenharmony_ci	 * The block size is set to the device physical sector size to ensure
17688c2ecf20Sopenharmony_ci	 * that write operations on 512e devices (512B logical block and 4KB
17698c2ecf20Sopenharmony_ci	 * physical block) are always aligned to the device physical blocks,
17708c2ecf20Sopenharmony_ci	 * as mandated by the ZBC/ZAC specifications.
17718c2ecf20Sopenharmony_ci	 */
17728c2ecf20Sopenharmony_ci	sb_set_blocksize(sb, bdev_physical_block_size(sb->s_bdev));
17738c2ecf20Sopenharmony_ci	sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev));
17748c2ecf20Sopenharmony_ci	sbi->s_uid = GLOBAL_ROOT_UID;
17758c2ecf20Sopenharmony_ci	sbi->s_gid = GLOBAL_ROOT_GID;
17768c2ecf20Sopenharmony_ci	sbi->s_perm = 0640;
17778c2ecf20Sopenharmony_ci	sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
17788c2ecf20Sopenharmony_ci	sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev);
17798c2ecf20Sopenharmony_ci	atomic_set(&sbi->s_open_zones, 0);
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci	ret = zonefs_read_super(sb);
17828c2ecf20Sopenharmony_ci	if (ret)
17838c2ecf20Sopenharmony_ci		return ret;
17848c2ecf20Sopenharmony_ci
17858c2ecf20Sopenharmony_ci	ret = zonefs_parse_options(sb, data);
17868c2ecf20Sopenharmony_ci	if (ret)
17878c2ecf20Sopenharmony_ci		return ret;
17888c2ecf20Sopenharmony_ci
17898c2ecf20Sopenharmony_ci	memset(&zd, 0, sizeof(struct zonefs_zone_data));
17908c2ecf20Sopenharmony_ci	zd.sb = sb;
17918c2ecf20Sopenharmony_ci	ret = zonefs_get_zone_info(&zd);
17928c2ecf20Sopenharmony_ci	if (ret)
17938c2ecf20Sopenharmony_ci		goto cleanup;
17948c2ecf20Sopenharmony_ci
17958c2ecf20Sopenharmony_ci	zonefs_info(sb, "Mounting %u zones",
17968c2ecf20Sopenharmony_ci		    blkdev_nr_zones(sb->s_bdev->bd_disk));
17978c2ecf20Sopenharmony_ci
17988c2ecf20Sopenharmony_ci	if (!sbi->s_max_open_zones &&
17998c2ecf20Sopenharmony_ci	    sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
18008c2ecf20Sopenharmony_ci		zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
18018c2ecf20Sopenharmony_ci		sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
18028c2ecf20Sopenharmony_ci	}
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ci	/* Create root directory inode */
18058c2ecf20Sopenharmony_ci	ret = -ENOMEM;
18068c2ecf20Sopenharmony_ci	inode = new_inode(sb);
18078c2ecf20Sopenharmony_ci	if (!inode)
18088c2ecf20Sopenharmony_ci		goto cleanup;
18098c2ecf20Sopenharmony_ci
18108c2ecf20Sopenharmony_ci	inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk);
18118c2ecf20Sopenharmony_ci	inode->i_mode = S_IFDIR | 0555;
18128c2ecf20Sopenharmony_ci	inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
18138c2ecf20Sopenharmony_ci	inode->i_op = &zonefs_dir_inode_operations;
18148c2ecf20Sopenharmony_ci	inode->i_fop = &simple_dir_operations;
18158c2ecf20Sopenharmony_ci	set_nlink(inode, 2);
18168c2ecf20Sopenharmony_ci
18178c2ecf20Sopenharmony_ci	sb->s_root = d_make_root(inode);
18188c2ecf20Sopenharmony_ci	if (!sb->s_root)
18198c2ecf20Sopenharmony_ci		goto cleanup;
18208c2ecf20Sopenharmony_ci
18218c2ecf20Sopenharmony_ci	/* Create and populate files in zone groups directories */
18228c2ecf20Sopenharmony_ci	for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
18238c2ecf20Sopenharmony_ci		ret = zonefs_create_zgroup(&zd, t);
18248c2ecf20Sopenharmony_ci		if (ret)
18258c2ecf20Sopenharmony_ci			break;
18268c2ecf20Sopenharmony_ci	}
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_cicleanup:
18298c2ecf20Sopenharmony_ci	zonefs_cleanup_zone_info(&zd);
18308c2ecf20Sopenharmony_ci
18318c2ecf20Sopenharmony_ci	return ret;
18328c2ecf20Sopenharmony_ci}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_cistatic struct dentry *zonefs_mount(struct file_system_type *fs_type,
18358c2ecf20Sopenharmony_ci				   int flags, const char *dev_name, void *data)
18368c2ecf20Sopenharmony_ci{
18378c2ecf20Sopenharmony_ci	return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super);
18388c2ecf20Sopenharmony_ci}
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_cistatic void zonefs_kill_super(struct super_block *sb)
18418c2ecf20Sopenharmony_ci{
18428c2ecf20Sopenharmony_ci	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ci	if (sb->s_root)
18458c2ecf20Sopenharmony_ci		d_genocide(sb->s_root);
18468c2ecf20Sopenharmony_ci	kill_block_super(sb);
18478c2ecf20Sopenharmony_ci	kfree(sbi);
18488c2ecf20Sopenharmony_ci}
18498c2ecf20Sopenharmony_ci
18508c2ecf20Sopenharmony_ci/*
18518c2ecf20Sopenharmony_ci * File system definition and registration.
18528c2ecf20Sopenharmony_ci */
18538c2ecf20Sopenharmony_cistatic struct file_system_type zonefs_type = {
18548c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
18558c2ecf20Sopenharmony_ci	.name		= "zonefs",
18568c2ecf20Sopenharmony_ci	.mount		= zonefs_mount,
18578c2ecf20Sopenharmony_ci	.kill_sb	= zonefs_kill_super,
18588c2ecf20Sopenharmony_ci	.fs_flags	= FS_REQUIRES_DEV,
18598c2ecf20Sopenharmony_ci};
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_cistatic int __init zonefs_init_inodecache(void)
18628c2ecf20Sopenharmony_ci{
18638c2ecf20Sopenharmony_ci	zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache",
18648c2ecf20Sopenharmony_ci			sizeof(struct zonefs_inode_info), 0,
18658c2ecf20Sopenharmony_ci			(SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
18668c2ecf20Sopenharmony_ci			NULL);
18678c2ecf20Sopenharmony_ci	if (zonefs_inode_cachep == NULL)
18688c2ecf20Sopenharmony_ci		return -ENOMEM;
18698c2ecf20Sopenharmony_ci	return 0;
18708c2ecf20Sopenharmony_ci}
18718c2ecf20Sopenharmony_ci
18728c2ecf20Sopenharmony_cistatic void zonefs_destroy_inodecache(void)
18738c2ecf20Sopenharmony_ci{
18748c2ecf20Sopenharmony_ci	/*
18758c2ecf20Sopenharmony_ci	 * Make sure all delayed rcu free inodes are flushed before we
18768c2ecf20Sopenharmony_ci	 * destroy the inode cache.
18778c2ecf20Sopenharmony_ci	 */
18788c2ecf20Sopenharmony_ci	rcu_barrier();
18798c2ecf20Sopenharmony_ci	kmem_cache_destroy(zonefs_inode_cachep);
18808c2ecf20Sopenharmony_ci}
18818c2ecf20Sopenharmony_ci
18828c2ecf20Sopenharmony_cistatic int __init zonefs_init(void)
18838c2ecf20Sopenharmony_ci{
18848c2ecf20Sopenharmony_ci	int ret;
18858c2ecf20Sopenharmony_ci
18868c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
18878c2ecf20Sopenharmony_ci
18888c2ecf20Sopenharmony_ci	ret = zonefs_init_inodecache();
18898c2ecf20Sopenharmony_ci	if (ret)
18908c2ecf20Sopenharmony_ci		return ret;
18918c2ecf20Sopenharmony_ci
18928c2ecf20Sopenharmony_ci	ret = register_filesystem(&zonefs_type);
18938c2ecf20Sopenharmony_ci	if (ret) {
18948c2ecf20Sopenharmony_ci		zonefs_destroy_inodecache();
18958c2ecf20Sopenharmony_ci		return ret;
18968c2ecf20Sopenharmony_ci	}
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_ci	return 0;
18998c2ecf20Sopenharmony_ci}
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_cistatic void __exit zonefs_exit(void)
19028c2ecf20Sopenharmony_ci{
19038c2ecf20Sopenharmony_ci	zonefs_destroy_inodecache();
19048c2ecf20Sopenharmony_ci	unregister_filesystem(&zonefs_type);
19058c2ecf20Sopenharmony_ci}
19068c2ecf20Sopenharmony_ci
19078c2ecf20Sopenharmony_ciMODULE_AUTHOR("Damien Le Moal");
19088c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Zone file system for zoned block devices");
19098c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
19108c2ecf20Sopenharmony_ciMODULE_ALIAS_FS("zonefs");
19118c2ecf20Sopenharmony_cimodule_init(zonefs_init);
19128c2ecf20Sopenharmony_cimodule_exit(zonefs_exit);
1913