18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci *  linux/fs/nfs/blocklayout/blocklayout.c
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci *  Module for the NFSv4.1 pNFS block layout driver.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci *  Copyright (c) 2006 The Regents of the University of Michigan.
78c2ecf20Sopenharmony_ci *  All rights reserved.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci *  Andy Adamson <andros@citi.umich.edu>
108c2ecf20Sopenharmony_ci *  Fred Isaman <iisaman@umich.edu>
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * permission is granted to use, copy, create derivative works and
138c2ecf20Sopenharmony_ci * redistribute this software and such derivative works for any purpose,
148c2ecf20Sopenharmony_ci * so long as the name of the university of michigan is not used in
158c2ecf20Sopenharmony_ci * any advertising or publicity pertaining to the use or distribution
168c2ecf20Sopenharmony_ci * of this software without specific, written prior authorization.  if
178c2ecf20Sopenharmony_ci * the above copyright notice or any other identification of the
188c2ecf20Sopenharmony_ci * university of michigan is included in any copy of any portion of
198c2ecf20Sopenharmony_ci * this software, then the disclaimer below must also be included.
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci * this software is provided as is, without representation from the
228c2ecf20Sopenharmony_ci * university of michigan as to its fitness for any purpose, and without
238c2ecf20Sopenharmony_ci * warranty by the university of michigan of any kind, either express
248c2ecf20Sopenharmony_ci * or implied, including without limitation the implied warranties of
258c2ecf20Sopenharmony_ci * merchantability and fitness for a particular purpose.  the regents
268c2ecf20Sopenharmony_ci * of the university of michigan shall not be liable for any damages,
278c2ecf20Sopenharmony_ci * including special, indirect, incidental, or consequential damages,
288c2ecf20Sopenharmony_ci * with respect to any claim arising out or in connection with the use
298c2ecf20Sopenharmony_ci * of the software, even if it has been or is hereafter advised of the
308c2ecf20Sopenharmony_ci * possibility of such damages.
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#include <linux/module.h>
348c2ecf20Sopenharmony_ci#include <linux/init.h>
358c2ecf20Sopenharmony_ci#include <linux/mount.h>
368c2ecf20Sopenharmony_ci#include <linux/namei.h>
378c2ecf20Sopenharmony_ci#include <linux/bio.h>		/* struct bio */
388c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
398c2ecf20Sopenharmony_ci#include <linux/pagevec.h>
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci#include "../pnfs.h"
428c2ecf20Sopenharmony_ci#include "../nfs4session.h"
438c2ecf20Sopenharmony_ci#include "../internal.h"
448c2ecf20Sopenharmony_ci#include "blocklayout.h"
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci#define NFSDBG_FACILITY	NFSDBG_PNFS_LD
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
498c2ecf20Sopenharmony_ciMODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
508c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic bool is_hole(struct pnfs_block_extent *be)
538c2ecf20Sopenharmony_ci{
548c2ecf20Sopenharmony_ci	switch (be->be_state) {
558c2ecf20Sopenharmony_ci	case PNFS_BLOCK_NONE_DATA:
568c2ecf20Sopenharmony_ci		return true;
578c2ecf20Sopenharmony_ci	case PNFS_BLOCK_INVALID_DATA:
588c2ecf20Sopenharmony_ci		return be->be_tag ? false : true;
598c2ecf20Sopenharmony_ci	default:
608c2ecf20Sopenharmony_ci		return false;
618c2ecf20Sopenharmony_ci	}
628c2ecf20Sopenharmony_ci}
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci/* The data we are handed might be spread across several bios.  We need
658c2ecf20Sopenharmony_ci * to track when the last one is finished.
668c2ecf20Sopenharmony_ci */
678c2ecf20Sopenharmony_cistruct parallel_io {
688c2ecf20Sopenharmony_ci	struct kref refcnt;
698c2ecf20Sopenharmony_ci	void (*pnfs_callback) (void *data);
708c2ecf20Sopenharmony_ci	void *data;
718c2ecf20Sopenharmony_ci};
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_cistatic inline struct parallel_io *alloc_parallel(void *data)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	struct parallel_io *rv;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	rv  = kmalloc(sizeof(*rv), GFP_NOFS);
788c2ecf20Sopenharmony_ci	if (rv) {
798c2ecf20Sopenharmony_ci		rv->data = data;
808c2ecf20Sopenharmony_ci		kref_init(&rv->refcnt);
818c2ecf20Sopenharmony_ci	}
828c2ecf20Sopenharmony_ci	return rv;
838c2ecf20Sopenharmony_ci}
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_cistatic inline void get_parallel(struct parallel_io *p)
868c2ecf20Sopenharmony_ci{
878c2ecf20Sopenharmony_ci	kref_get(&p->refcnt);
888c2ecf20Sopenharmony_ci}
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_cistatic void destroy_parallel(struct kref *kref)
918c2ecf20Sopenharmony_ci{
928c2ecf20Sopenharmony_ci	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
958c2ecf20Sopenharmony_ci	p->pnfs_callback(p->data);
968c2ecf20Sopenharmony_ci	kfree(p);
978c2ecf20Sopenharmony_ci}
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_cistatic inline void put_parallel(struct parallel_io *p)
1008c2ecf20Sopenharmony_ci{
1018c2ecf20Sopenharmony_ci	kref_put(&p->refcnt, destroy_parallel);
1028c2ecf20Sopenharmony_ci}
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistatic struct bio *
1058c2ecf20Sopenharmony_cibl_submit_bio(struct bio *bio)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	if (bio) {
1088c2ecf20Sopenharmony_ci		get_parallel(bio->bi_private);
1098c2ecf20Sopenharmony_ci		dprintk("%s submitting %s bio %u@%llu\n", __func__,
1108c2ecf20Sopenharmony_ci			bio_op(bio) == READ ? "read" : "write",
1118c2ecf20Sopenharmony_ci			bio->bi_iter.bi_size,
1128c2ecf20Sopenharmony_ci			(unsigned long long)bio->bi_iter.bi_sector);
1138c2ecf20Sopenharmony_ci		submit_bio(bio);
1148c2ecf20Sopenharmony_ci	}
1158c2ecf20Sopenharmony_ci	return NULL;
1168c2ecf20Sopenharmony_ci}
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_cistatic struct bio *
1198c2ecf20Sopenharmony_cibl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
1208c2ecf20Sopenharmony_ci		bio_end_io_t end_io, struct parallel_io *par)
1218c2ecf20Sopenharmony_ci{
1228c2ecf20Sopenharmony_ci	struct bio *bio;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	npg = min(npg, BIO_MAX_PAGES);
1258c2ecf20Sopenharmony_ci	bio = bio_alloc(GFP_NOIO, npg);
1268c2ecf20Sopenharmony_ci	if (!bio && (current->flags & PF_MEMALLOC)) {
1278c2ecf20Sopenharmony_ci		while (!bio && (npg /= 2))
1288c2ecf20Sopenharmony_ci			bio = bio_alloc(GFP_NOIO, npg);
1298c2ecf20Sopenharmony_ci	}
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	if (bio) {
1328c2ecf20Sopenharmony_ci		bio->bi_iter.bi_sector = disk_sector;
1338c2ecf20Sopenharmony_ci		bio_set_dev(bio, bdev);
1348c2ecf20Sopenharmony_ci		bio->bi_end_io = end_io;
1358c2ecf20Sopenharmony_ci		bio->bi_private = par;
1368c2ecf20Sopenharmony_ci	}
1378c2ecf20Sopenharmony_ci	return bio;
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
1418c2ecf20Sopenharmony_ci{
1428c2ecf20Sopenharmony_ci	return offset >= map->start && offset < map->start + map->len;
1438c2ecf20Sopenharmony_ci}
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_cistatic struct bio *
1468c2ecf20Sopenharmony_cido_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
1478c2ecf20Sopenharmony_ci		struct page *page, struct pnfs_block_dev_map *map,
1488c2ecf20Sopenharmony_ci		struct pnfs_block_extent *be, bio_end_io_t end_io,
1498c2ecf20Sopenharmony_ci		struct parallel_io *par, unsigned int offset, int *len)
1508c2ecf20Sopenharmony_ci{
1518c2ecf20Sopenharmony_ci	struct pnfs_block_dev *dev =
1528c2ecf20Sopenharmony_ci		container_of(be->be_device, struct pnfs_block_dev, node);
1538c2ecf20Sopenharmony_ci	u64 disk_addr, end;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
1568c2ecf20Sopenharmony_ci		npg, rw, (unsigned long long)isect, offset, *len);
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	/* translate to device offset */
1598c2ecf20Sopenharmony_ci	isect += be->be_v_offset;
1608c2ecf20Sopenharmony_ci	isect -= be->be_f_offset;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	/* translate to physical disk offset */
1638c2ecf20Sopenharmony_ci	disk_addr = (u64)isect << SECTOR_SHIFT;
1648c2ecf20Sopenharmony_ci	if (!offset_in_map(disk_addr, map)) {
1658c2ecf20Sopenharmony_ci		if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map))
1668c2ecf20Sopenharmony_ci			return ERR_PTR(-EIO);
1678c2ecf20Sopenharmony_ci		bio = bl_submit_bio(bio);
1688c2ecf20Sopenharmony_ci	}
1698c2ecf20Sopenharmony_ci	disk_addr += map->disk_offset;
1708c2ecf20Sopenharmony_ci	disk_addr -= map->start;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	/* limit length to what the device mapping allows */
1738c2ecf20Sopenharmony_ci	end = disk_addr + *len;
1748c2ecf20Sopenharmony_ci	if (end >= map->start + map->len)
1758c2ecf20Sopenharmony_ci		*len = map->start + map->len - disk_addr;
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ciretry:
1788c2ecf20Sopenharmony_ci	if (!bio) {
1798c2ecf20Sopenharmony_ci		bio = bl_alloc_init_bio(npg, map->bdev,
1808c2ecf20Sopenharmony_ci				disk_addr >> SECTOR_SHIFT, end_io, par);
1818c2ecf20Sopenharmony_ci		if (!bio)
1828c2ecf20Sopenharmony_ci			return ERR_PTR(-ENOMEM);
1838c2ecf20Sopenharmony_ci		bio_set_op_attrs(bio, rw, 0);
1848c2ecf20Sopenharmony_ci	}
1858c2ecf20Sopenharmony_ci	if (bio_add_page(bio, page, *len, offset) < *len) {
1868c2ecf20Sopenharmony_ci		bio = bl_submit_bio(bio);
1878c2ecf20Sopenharmony_ci		goto retry;
1888c2ecf20Sopenharmony_ci	}
1898c2ecf20Sopenharmony_ci	return bio;
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_cistatic void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw)
1938c2ecf20Sopenharmony_ci{
1948c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
1958c2ecf20Sopenharmony_ci	size_t bytes_left = header->args.count;
1968c2ecf20Sopenharmony_ci	sector_t isect, extent_length = 0;
1978c2ecf20Sopenharmony_ci	struct pnfs_block_extent be;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	isect = header->args.offset >> SECTOR_SHIFT;
2008c2ecf20Sopenharmony_ci	bytes_left += header->args.offset - (isect << SECTOR_SHIFT);
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	while (bytes_left > 0) {
2038c2ecf20Sopenharmony_ci		if (!ext_tree_lookup(bl, isect, &be, rw))
2048c2ecf20Sopenharmony_ci				return;
2058c2ecf20Sopenharmony_ci		extent_length = be.be_length - (isect - be.be_f_offset);
2068c2ecf20Sopenharmony_ci		nfs4_mark_deviceid_unavailable(be.be_device);
2078c2ecf20Sopenharmony_ci		isect += extent_length;
2088c2ecf20Sopenharmony_ci		if (bytes_left > extent_length << SECTOR_SHIFT)
2098c2ecf20Sopenharmony_ci			bytes_left -= extent_length << SECTOR_SHIFT;
2108c2ecf20Sopenharmony_ci		else
2118c2ecf20Sopenharmony_ci			bytes_left = 0;
2128c2ecf20Sopenharmony_ci	}
2138c2ecf20Sopenharmony_ci}
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_cistatic void bl_end_io_read(struct bio *bio)
2168c2ecf20Sopenharmony_ci{
2178c2ecf20Sopenharmony_ci	struct parallel_io *par = bio->bi_private;
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	if (bio->bi_status) {
2208c2ecf20Sopenharmony_ci		struct nfs_pgio_header *header = par->data;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci		if (!header->pnfs_error)
2238c2ecf20Sopenharmony_ci			header->pnfs_error = -EIO;
2248c2ecf20Sopenharmony_ci		pnfs_set_lo_fail(header->lseg);
2258c2ecf20Sopenharmony_ci		bl_mark_devices_unavailable(header, false);
2268c2ecf20Sopenharmony_ci	}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	bio_put(bio);
2298c2ecf20Sopenharmony_ci	put_parallel(par);
2308c2ecf20Sopenharmony_ci}
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_cistatic void bl_read_cleanup(struct work_struct *work)
2338c2ecf20Sopenharmony_ci{
2348c2ecf20Sopenharmony_ci	struct rpc_task *task;
2358c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr;
2368c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
2378c2ecf20Sopenharmony_ci	task = container_of(work, struct rpc_task, u.tk_work);
2388c2ecf20Sopenharmony_ci	hdr = container_of(task, struct nfs_pgio_header, task);
2398c2ecf20Sopenharmony_ci	pnfs_ld_read_done(hdr);
2408c2ecf20Sopenharmony_ci}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_cistatic void
2438c2ecf20Sopenharmony_cibl_end_par_io_read(void *data)
2448c2ecf20Sopenharmony_ci{
2458c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	hdr->task.tk_status = hdr->pnfs_error;
2488c2ecf20Sopenharmony_ci	INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
2498c2ecf20Sopenharmony_ci	schedule_work(&hdr->task.u.tk_work);
2508c2ecf20Sopenharmony_ci}
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_cistatic enum pnfs_try_status
2538c2ecf20Sopenharmony_cibl_read_pagelist(struct nfs_pgio_header *header)
2548c2ecf20Sopenharmony_ci{
2558c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
2568c2ecf20Sopenharmony_ci	struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
2578c2ecf20Sopenharmony_ci	struct bio *bio = NULL;
2588c2ecf20Sopenharmony_ci	struct pnfs_block_extent be;
2598c2ecf20Sopenharmony_ci	sector_t isect, extent_length = 0;
2608c2ecf20Sopenharmony_ci	struct parallel_io *par;
2618c2ecf20Sopenharmony_ci	loff_t f_offset = header->args.offset;
2628c2ecf20Sopenharmony_ci	size_t bytes_left = header->args.count;
2638c2ecf20Sopenharmony_ci	unsigned int pg_offset = header->args.pgbase, pg_len;
2648c2ecf20Sopenharmony_ci	struct page **pages = header->args.pages;
2658c2ecf20Sopenharmony_ci	int pg_index = header->args.pgbase >> PAGE_SHIFT;
2668c2ecf20Sopenharmony_ci	const bool is_dio = (header->dreq != NULL);
2678c2ecf20Sopenharmony_ci	struct blk_plug plug;
2688c2ecf20Sopenharmony_ci	int i;
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
2718c2ecf20Sopenharmony_ci		header->page_array.npages, f_offset,
2728c2ecf20Sopenharmony_ci		(unsigned int)header->args.count);
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	par = alloc_parallel(header);
2758c2ecf20Sopenharmony_ci	if (!par)
2768c2ecf20Sopenharmony_ci		return PNFS_NOT_ATTEMPTED;
2778c2ecf20Sopenharmony_ci	par->pnfs_callback = bl_end_par_io_read;
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
2828c2ecf20Sopenharmony_ci	/* Code assumes extents are page-aligned */
2838c2ecf20Sopenharmony_ci	for (i = pg_index; i < header->page_array.npages; i++) {
2848c2ecf20Sopenharmony_ci		if (extent_length <= 0) {
2858c2ecf20Sopenharmony_ci			/* We've used up the previous extent */
2868c2ecf20Sopenharmony_ci			bio = bl_submit_bio(bio);
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci			/* Get the next one */
2898c2ecf20Sopenharmony_ci			if (!ext_tree_lookup(bl, isect, &be, false)) {
2908c2ecf20Sopenharmony_ci				header->pnfs_error = -EIO;
2918c2ecf20Sopenharmony_ci				goto out;
2928c2ecf20Sopenharmony_ci			}
2938c2ecf20Sopenharmony_ci			extent_length = be.be_length - (isect - be.be_f_offset);
2948c2ecf20Sopenharmony_ci		}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci		if (is_dio) {
2978c2ecf20Sopenharmony_ci			if (pg_offset + bytes_left > PAGE_SIZE)
2988c2ecf20Sopenharmony_ci				pg_len = PAGE_SIZE - pg_offset;
2998c2ecf20Sopenharmony_ci			else
3008c2ecf20Sopenharmony_ci				pg_len = bytes_left;
3018c2ecf20Sopenharmony_ci		} else {
3028c2ecf20Sopenharmony_ci			BUG_ON(pg_offset != 0);
3038c2ecf20Sopenharmony_ci			pg_len = PAGE_SIZE;
3048c2ecf20Sopenharmony_ci		}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci		if (is_hole(&be)) {
3078c2ecf20Sopenharmony_ci			bio = bl_submit_bio(bio);
3088c2ecf20Sopenharmony_ci			/* Fill hole w/ zeroes w/o accessing device */
3098c2ecf20Sopenharmony_ci			dprintk("%s Zeroing page for hole\n", __func__);
3108c2ecf20Sopenharmony_ci			zero_user_segment(pages[i], pg_offset, pg_len);
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci			/* invalidate map */
3138c2ecf20Sopenharmony_ci			map.start = NFS4_MAX_UINT64;
3148c2ecf20Sopenharmony_ci		} else {
3158c2ecf20Sopenharmony_ci			bio = do_add_page_to_bio(bio,
3168c2ecf20Sopenharmony_ci						 header->page_array.npages - i,
3178c2ecf20Sopenharmony_ci						 READ,
3188c2ecf20Sopenharmony_ci						 isect, pages[i], &map, &be,
3198c2ecf20Sopenharmony_ci						 bl_end_io_read, par,
3208c2ecf20Sopenharmony_ci						 pg_offset, &pg_len);
3218c2ecf20Sopenharmony_ci			if (IS_ERR(bio)) {
3228c2ecf20Sopenharmony_ci				header->pnfs_error = PTR_ERR(bio);
3238c2ecf20Sopenharmony_ci				bio = NULL;
3248c2ecf20Sopenharmony_ci				goto out;
3258c2ecf20Sopenharmony_ci			}
3268c2ecf20Sopenharmony_ci		}
3278c2ecf20Sopenharmony_ci		isect += (pg_len >> SECTOR_SHIFT);
3288c2ecf20Sopenharmony_ci		extent_length -= (pg_len >> SECTOR_SHIFT);
3298c2ecf20Sopenharmony_ci		f_offset += pg_len;
3308c2ecf20Sopenharmony_ci		bytes_left -= pg_len;
3318c2ecf20Sopenharmony_ci		pg_offset = 0;
3328c2ecf20Sopenharmony_ci	}
3338c2ecf20Sopenharmony_ci	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
3348c2ecf20Sopenharmony_ci		header->res.eof = 1;
3358c2ecf20Sopenharmony_ci		header->res.count = header->inode->i_size - header->args.offset;
3368c2ecf20Sopenharmony_ci	} else {
3378c2ecf20Sopenharmony_ci		header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;
3388c2ecf20Sopenharmony_ci	}
3398c2ecf20Sopenharmony_ciout:
3408c2ecf20Sopenharmony_ci	bl_submit_bio(bio);
3418c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
3428c2ecf20Sopenharmony_ci	put_parallel(par);
3438c2ecf20Sopenharmony_ci	return PNFS_ATTEMPTED;
3448c2ecf20Sopenharmony_ci}
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_cistatic void bl_end_io_write(struct bio *bio)
3478c2ecf20Sopenharmony_ci{
3488c2ecf20Sopenharmony_ci	struct parallel_io *par = bio->bi_private;
3498c2ecf20Sopenharmony_ci	struct nfs_pgio_header *header = par->data;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	if (bio->bi_status) {
3528c2ecf20Sopenharmony_ci		if (!header->pnfs_error)
3538c2ecf20Sopenharmony_ci			header->pnfs_error = -EIO;
3548c2ecf20Sopenharmony_ci		pnfs_set_lo_fail(header->lseg);
3558c2ecf20Sopenharmony_ci		bl_mark_devices_unavailable(header, true);
3568c2ecf20Sopenharmony_ci	}
3578c2ecf20Sopenharmony_ci	bio_put(bio);
3588c2ecf20Sopenharmony_ci	put_parallel(par);
3598c2ecf20Sopenharmony_ci}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci/* Function scheduled for call during bl_end_par_io_write,
3628c2ecf20Sopenharmony_ci * it marks sectors as written and extends the commitlist.
3638c2ecf20Sopenharmony_ci */
3648c2ecf20Sopenharmony_cistatic void bl_write_cleanup(struct work_struct *work)
3658c2ecf20Sopenharmony_ci{
3668c2ecf20Sopenharmony_ci	struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work);
3678c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr =
3688c2ecf20Sopenharmony_ci			container_of(task, struct nfs_pgio_header, task);
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	if (likely(!hdr->pnfs_error)) {
3738c2ecf20Sopenharmony_ci		struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
3748c2ecf20Sopenharmony_ci		u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
3758c2ecf20Sopenharmony_ci		u64 end = (hdr->args.offset + hdr->args.count +
3768c2ecf20Sopenharmony_ci			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
3778c2ecf20Sopenharmony_ci		u64 lwb = hdr->args.offset + hdr->args.count;
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
3808c2ecf20Sopenharmony_ci					(end - start) >> SECTOR_SHIFT, lwb);
3818c2ecf20Sopenharmony_ci	}
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	pnfs_ld_write_done(hdr);
3848c2ecf20Sopenharmony_ci}
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci/* Called when last of bios associated with a bl_write_pagelist call finishes */
3878c2ecf20Sopenharmony_cistatic void bl_end_par_io_write(void *data)
3888c2ecf20Sopenharmony_ci{
3898c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci	hdr->task.tk_status = hdr->pnfs_error;
3928c2ecf20Sopenharmony_ci	hdr->verf.committed = NFS_FILE_SYNC;
3938c2ecf20Sopenharmony_ci	INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
3948c2ecf20Sopenharmony_ci	schedule_work(&hdr->task.u.tk_work);
3958c2ecf20Sopenharmony_ci}
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_cistatic enum pnfs_try_status
3988c2ecf20Sopenharmony_cibl_write_pagelist(struct nfs_pgio_header *header, int sync)
3998c2ecf20Sopenharmony_ci{
4008c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
4018c2ecf20Sopenharmony_ci	struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
4028c2ecf20Sopenharmony_ci	struct bio *bio = NULL;
4038c2ecf20Sopenharmony_ci	struct pnfs_block_extent be;
4048c2ecf20Sopenharmony_ci	sector_t isect, extent_length = 0;
4058c2ecf20Sopenharmony_ci	struct parallel_io *par = NULL;
4068c2ecf20Sopenharmony_ci	loff_t offset = header->args.offset;
4078c2ecf20Sopenharmony_ci	size_t count = header->args.count;
4088c2ecf20Sopenharmony_ci	struct page **pages = header->args.pages;
4098c2ecf20Sopenharmony_ci	int pg_index = header->args.pgbase >> PAGE_SHIFT;
4108c2ecf20Sopenharmony_ci	unsigned int pg_len;
4118c2ecf20Sopenharmony_ci	struct blk_plug plug;
4128c2ecf20Sopenharmony_ci	int i;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	dprintk("%s enter, %zu@%lld\n", __func__, count, offset);
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	/* At this point, header->page_aray is a (sequential) list of nfs_pages.
4178c2ecf20Sopenharmony_ci	 * We want to write each, and if there is an error set pnfs_error
4188c2ecf20Sopenharmony_ci	 * to have it redone using nfs.
4198c2ecf20Sopenharmony_ci	 */
4208c2ecf20Sopenharmony_ci	par = alloc_parallel(header);
4218c2ecf20Sopenharmony_ci	if (!par)
4228c2ecf20Sopenharmony_ci		return PNFS_NOT_ATTEMPTED;
4238c2ecf20Sopenharmony_ci	par->pnfs_callback = bl_end_par_io_write;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	/* we always write out the whole page */
4288c2ecf20Sopenharmony_ci	offset = offset & (loff_t)PAGE_MASK;
4298c2ecf20Sopenharmony_ci	isect = offset >> SECTOR_SHIFT;
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	for (i = pg_index; i < header->page_array.npages; i++) {
4328c2ecf20Sopenharmony_ci		if (extent_length <= 0) {
4338c2ecf20Sopenharmony_ci			/* We've used up the previous extent */
4348c2ecf20Sopenharmony_ci			bio = bl_submit_bio(bio);
4358c2ecf20Sopenharmony_ci			/* Get the next one */
4368c2ecf20Sopenharmony_ci			if (!ext_tree_lookup(bl, isect, &be, true)) {
4378c2ecf20Sopenharmony_ci				header->pnfs_error = -EINVAL;
4388c2ecf20Sopenharmony_ci				goto out;
4398c2ecf20Sopenharmony_ci			}
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci			extent_length = be.be_length - (isect - be.be_f_offset);
4428c2ecf20Sopenharmony_ci		}
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci		pg_len = PAGE_SIZE;
4458c2ecf20Sopenharmony_ci		bio = do_add_page_to_bio(bio, header->page_array.npages - i,
4468c2ecf20Sopenharmony_ci					 WRITE, isect, pages[i], &map, &be,
4478c2ecf20Sopenharmony_ci					 bl_end_io_write, par,
4488c2ecf20Sopenharmony_ci					 0, &pg_len);
4498c2ecf20Sopenharmony_ci		if (IS_ERR(bio)) {
4508c2ecf20Sopenharmony_ci			header->pnfs_error = PTR_ERR(bio);
4518c2ecf20Sopenharmony_ci			bio = NULL;
4528c2ecf20Sopenharmony_ci			goto out;
4538c2ecf20Sopenharmony_ci		}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci		offset += pg_len;
4568c2ecf20Sopenharmony_ci		count -= pg_len;
4578c2ecf20Sopenharmony_ci		isect += (pg_len >> SECTOR_SHIFT);
4588c2ecf20Sopenharmony_ci		extent_length -= (pg_len >> SECTOR_SHIFT);
4598c2ecf20Sopenharmony_ci	}
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	header->res.count = header->args.count;
4628c2ecf20Sopenharmony_ciout:
4638c2ecf20Sopenharmony_ci	bl_submit_bio(bio);
4648c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
4658c2ecf20Sopenharmony_ci	put_parallel(par);
4668c2ecf20Sopenharmony_ci	return PNFS_ATTEMPTED;
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_cistatic void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
4708c2ecf20Sopenharmony_ci{
4718c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
4728c2ecf20Sopenharmony_ci	int err;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci	err = ext_tree_remove(bl, true, 0, LLONG_MAX);
4778c2ecf20Sopenharmony_ci	WARN_ON(err);
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	kfree_rcu(bl, bl_layout.plh_rcu);
4808c2ecf20Sopenharmony_ci}
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
4838c2ecf20Sopenharmony_ci		gfp_t gfp_flags, bool is_scsi_layout)
4848c2ecf20Sopenharmony_ci{
4858c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl;
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
4888c2ecf20Sopenharmony_ci	bl = kzalloc(sizeof(*bl), gfp_flags);
4898c2ecf20Sopenharmony_ci	if (!bl)
4908c2ecf20Sopenharmony_ci		return NULL;
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	bl->bl_ext_rw = RB_ROOT;
4938c2ecf20Sopenharmony_ci	bl->bl_ext_ro = RB_ROOT;
4948c2ecf20Sopenharmony_ci	spin_lock_init(&bl->bl_ext_lock);
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	bl->bl_scsi_layout = is_scsi_layout;
4978c2ecf20Sopenharmony_ci	return &bl->bl_layout;
4988c2ecf20Sopenharmony_ci}
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
5018c2ecf20Sopenharmony_ci						   gfp_t gfp_flags)
5028c2ecf20Sopenharmony_ci{
5038c2ecf20Sopenharmony_ci	return __bl_alloc_layout_hdr(inode, gfp_flags, false);
5048c2ecf20Sopenharmony_ci}
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
5078c2ecf20Sopenharmony_ci						   gfp_t gfp_flags)
5088c2ecf20Sopenharmony_ci{
5098c2ecf20Sopenharmony_ci	return __bl_alloc_layout_hdr(inode, gfp_flags, true);
5108c2ecf20Sopenharmony_ci}
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_cistatic void bl_free_lseg(struct pnfs_layout_segment *lseg)
5138c2ecf20Sopenharmony_ci{
5148c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
5158c2ecf20Sopenharmony_ci	kfree(lseg);
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci/* Tracks info needed to ensure extents in layout obey constraints of spec */
5198c2ecf20Sopenharmony_cistruct layout_verification {
5208c2ecf20Sopenharmony_ci	u32 mode;	/* R or RW */
5218c2ecf20Sopenharmony_ci	u64 start;	/* Expected start of next non-COW extent */
5228c2ecf20Sopenharmony_ci	u64 inval;	/* Start of INVAL coverage */
5238c2ecf20Sopenharmony_ci	u64 cowread;	/* End of COW read coverage */
5248c2ecf20Sopenharmony_ci};
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci/* Verify the extent meets the layout requirements of the pnfs-block draft,
5278c2ecf20Sopenharmony_ci * section 2.3.1.
5288c2ecf20Sopenharmony_ci */
5298c2ecf20Sopenharmony_cistatic int verify_extent(struct pnfs_block_extent *be,
5308c2ecf20Sopenharmony_ci			 struct layout_verification *lv)
5318c2ecf20Sopenharmony_ci{
5328c2ecf20Sopenharmony_ci	if (lv->mode == IOMODE_READ) {
5338c2ecf20Sopenharmony_ci		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
5348c2ecf20Sopenharmony_ci		    be->be_state == PNFS_BLOCK_INVALID_DATA)
5358c2ecf20Sopenharmony_ci			return -EIO;
5368c2ecf20Sopenharmony_ci		if (be->be_f_offset != lv->start)
5378c2ecf20Sopenharmony_ci			return -EIO;
5388c2ecf20Sopenharmony_ci		lv->start += be->be_length;
5398c2ecf20Sopenharmony_ci		return 0;
5408c2ecf20Sopenharmony_ci	}
5418c2ecf20Sopenharmony_ci	/* lv->mode == IOMODE_RW */
5428c2ecf20Sopenharmony_ci	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
5438c2ecf20Sopenharmony_ci		if (be->be_f_offset != lv->start)
5448c2ecf20Sopenharmony_ci			return -EIO;
5458c2ecf20Sopenharmony_ci		if (lv->cowread > lv->start)
5468c2ecf20Sopenharmony_ci			return -EIO;
5478c2ecf20Sopenharmony_ci		lv->start += be->be_length;
5488c2ecf20Sopenharmony_ci		lv->inval = lv->start;
5498c2ecf20Sopenharmony_ci		return 0;
5508c2ecf20Sopenharmony_ci	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
5518c2ecf20Sopenharmony_ci		if (be->be_f_offset != lv->start)
5528c2ecf20Sopenharmony_ci			return -EIO;
5538c2ecf20Sopenharmony_ci		lv->start += be->be_length;
5548c2ecf20Sopenharmony_ci		return 0;
5558c2ecf20Sopenharmony_ci	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
5568c2ecf20Sopenharmony_ci		if (be->be_f_offset > lv->start)
5578c2ecf20Sopenharmony_ci			return -EIO;
5588c2ecf20Sopenharmony_ci		if (be->be_f_offset < lv->inval)
5598c2ecf20Sopenharmony_ci			return -EIO;
5608c2ecf20Sopenharmony_ci		if (be->be_f_offset < lv->cowread)
5618c2ecf20Sopenharmony_ci			return -EIO;
5628c2ecf20Sopenharmony_ci		/* It looks like you might want to min this with lv->start,
5638c2ecf20Sopenharmony_ci		 * but you really don't.
5648c2ecf20Sopenharmony_ci		 */
5658c2ecf20Sopenharmony_ci		lv->inval = lv->inval + be->be_length;
5668c2ecf20Sopenharmony_ci		lv->cowread = be->be_f_offset + be->be_length;
5678c2ecf20Sopenharmony_ci		return 0;
5688c2ecf20Sopenharmony_ci	} else
5698c2ecf20Sopenharmony_ci		return -EIO;
5708c2ecf20Sopenharmony_ci}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_cistatic int decode_sector_number(__be32 **rp, sector_t *sp)
5738c2ecf20Sopenharmony_ci{
5748c2ecf20Sopenharmony_ci	uint64_t s;
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	*rp = xdr_decode_hyper(*rp, &s);
5778c2ecf20Sopenharmony_ci	if (s & 0x1ff) {
5788c2ecf20Sopenharmony_ci		printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
5798c2ecf20Sopenharmony_ci		return -1;
5808c2ecf20Sopenharmony_ci	}
5818c2ecf20Sopenharmony_ci	*sp = s >> SECTOR_SHIFT;
5828c2ecf20Sopenharmony_ci	return 0;
5838c2ecf20Sopenharmony_ci}
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_cistatic struct nfs4_deviceid_node *
5868c2ecf20Sopenharmony_cibl_find_get_deviceid(struct nfs_server *server,
5878c2ecf20Sopenharmony_ci		const struct nfs4_deviceid *id, const struct cred *cred,
5888c2ecf20Sopenharmony_ci		gfp_t gfp_mask)
5898c2ecf20Sopenharmony_ci{
5908c2ecf20Sopenharmony_ci	struct nfs4_deviceid_node *node;
5918c2ecf20Sopenharmony_ci	unsigned long start, end;
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ciretry:
5948c2ecf20Sopenharmony_ci	node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
5958c2ecf20Sopenharmony_ci	if (!node)
5968c2ecf20Sopenharmony_ci		return ERR_PTR(-ENODEV);
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
5998c2ecf20Sopenharmony_ci		return node;
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	end = jiffies;
6028c2ecf20Sopenharmony_ci	start = end - PNFS_DEVICE_RETRY_TIMEOUT;
6038c2ecf20Sopenharmony_ci	if (!time_in_range(node->timestamp_unavailable, start, end)) {
6048c2ecf20Sopenharmony_ci		nfs4_delete_deviceid(node->ld, node->nfs_client, id);
6058c2ecf20Sopenharmony_ci		goto retry;
6068c2ecf20Sopenharmony_ci	}
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	nfs4_put_deviceid_node(node);
6098c2ecf20Sopenharmony_ci	return ERR_PTR(-ENODEV);
6108c2ecf20Sopenharmony_ci}
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_cistatic int
6138c2ecf20Sopenharmony_cibl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
6148c2ecf20Sopenharmony_ci		struct layout_verification *lv, struct list_head *extents,
6158c2ecf20Sopenharmony_ci		gfp_t gfp_mask)
6168c2ecf20Sopenharmony_ci{
6178c2ecf20Sopenharmony_ci	struct pnfs_block_extent *be;
6188c2ecf20Sopenharmony_ci	struct nfs4_deviceid id;
6198c2ecf20Sopenharmony_ci	int error;
6208c2ecf20Sopenharmony_ci	__be32 *p;
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE);
6238c2ecf20Sopenharmony_ci	if (!p)
6248c2ecf20Sopenharmony_ci		return -EIO;
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	be = kzalloc(sizeof(*be), GFP_NOFS);
6278c2ecf20Sopenharmony_ci	if (!be)
6288c2ecf20Sopenharmony_ci		return -ENOMEM;
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci	memcpy(&id, p, NFS4_DEVICEID4_SIZE);
6318c2ecf20Sopenharmony_ci	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
6328c2ecf20Sopenharmony_ci
6338c2ecf20Sopenharmony_ci	be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
6348c2ecf20Sopenharmony_ci						lo->plh_lc_cred, gfp_mask);
6358c2ecf20Sopenharmony_ci	if (IS_ERR(be->be_device)) {
6368c2ecf20Sopenharmony_ci		error = PTR_ERR(be->be_device);
6378c2ecf20Sopenharmony_ci		goto out_free_be;
6388c2ecf20Sopenharmony_ci	}
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci	/*
6418c2ecf20Sopenharmony_ci	 * The next three values are read in as bytes, but stored in the
6428c2ecf20Sopenharmony_ci	 * extent structure in 512-byte granularity.
6438c2ecf20Sopenharmony_ci	 */
6448c2ecf20Sopenharmony_ci	error = -EIO;
6458c2ecf20Sopenharmony_ci	if (decode_sector_number(&p, &be->be_f_offset) < 0)
6468c2ecf20Sopenharmony_ci		goto out_put_deviceid;
6478c2ecf20Sopenharmony_ci	if (decode_sector_number(&p, &be->be_length) < 0)
6488c2ecf20Sopenharmony_ci		goto out_put_deviceid;
6498c2ecf20Sopenharmony_ci	if (decode_sector_number(&p, &be->be_v_offset) < 0)
6508c2ecf20Sopenharmony_ci		goto out_put_deviceid;
6518c2ecf20Sopenharmony_ci	be->be_state = be32_to_cpup(p++);
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci	error = verify_extent(be, lv);
6548c2ecf20Sopenharmony_ci	if (error) {
6558c2ecf20Sopenharmony_ci		dprintk("%s: extent verification failed\n", __func__);
6568c2ecf20Sopenharmony_ci		goto out_put_deviceid;
6578c2ecf20Sopenharmony_ci	}
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	list_add_tail(&be->be_list, extents);
6608c2ecf20Sopenharmony_ci	return 0;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ciout_put_deviceid:
6638c2ecf20Sopenharmony_ci	nfs4_put_deviceid_node(be->be_device);
6648c2ecf20Sopenharmony_ciout_free_be:
6658c2ecf20Sopenharmony_ci	kfree(be);
6668c2ecf20Sopenharmony_ci	return error;
6678c2ecf20Sopenharmony_ci}
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_cistatic struct pnfs_layout_segment *
6708c2ecf20Sopenharmony_cibl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
6718c2ecf20Sopenharmony_ci		gfp_t gfp_mask)
6728c2ecf20Sopenharmony_ci{
6738c2ecf20Sopenharmony_ci	struct layout_verification lv = {
6748c2ecf20Sopenharmony_ci		.mode = lgr->range.iomode,
6758c2ecf20Sopenharmony_ci		.start = lgr->range.offset >> SECTOR_SHIFT,
6768c2ecf20Sopenharmony_ci		.inval = lgr->range.offset >> SECTOR_SHIFT,
6778c2ecf20Sopenharmony_ci		.cowread = lgr->range.offset >> SECTOR_SHIFT,
6788c2ecf20Sopenharmony_ci	};
6798c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
6808c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg;
6818c2ecf20Sopenharmony_ci	struct xdr_buf buf;
6828c2ecf20Sopenharmony_ci	struct xdr_stream xdr;
6838c2ecf20Sopenharmony_ci	struct page *scratch;
6848c2ecf20Sopenharmony_ci	int status, i;
6858c2ecf20Sopenharmony_ci	uint32_t count;
6868c2ecf20Sopenharmony_ci	__be32 *p;
6878c2ecf20Sopenharmony_ci	LIST_HEAD(extents);
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	dprintk("---> %s\n", __func__);
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	lseg = kzalloc(sizeof(*lseg), gfp_mask);
6928c2ecf20Sopenharmony_ci	if (!lseg)
6938c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	status = -ENOMEM;
6968c2ecf20Sopenharmony_ci	scratch = alloc_page(gfp_mask);
6978c2ecf20Sopenharmony_ci	if (!scratch)
6988c2ecf20Sopenharmony_ci		goto out;
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	xdr_init_decode_pages(&xdr, &buf,
7018c2ecf20Sopenharmony_ci			lgr->layoutp->pages, lgr->layoutp->len);
7028c2ecf20Sopenharmony_ci	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	status = -EIO;
7058c2ecf20Sopenharmony_ci	p = xdr_inline_decode(&xdr, 4);
7068c2ecf20Sopenharmony_ci	if (unlikely(!p))
7078c2ecf20Sopenharmony_ci		goto out_free_scratch;
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	count = be32_to_cpup(p++);
7108c2ecf20Sopenharmony_ci	dprintk("%s: number of extents %d\n", __func__, count);
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	/*
7138c2ecf20Sopenharmony_ci	 * Decode individual extents, putting them in temporary staging area
7148c2ecf20Sopenharmony_ci	 * until whole layout is decoded to make error recovery easier.
7158c2ecf20Sopenharmony_ci	 */
7168c2ecf20Sopenharmony_ci	for (i = 0; i < count; i++) {
7178c2ecf20Sopenharmony_ci		status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask);
7188c2ecf20Sopenharmony_ci		if (status)
7198c2ecf20Sopenharmony_ci			goto process_extents;
7208c2ecf20Sopenharmony_ci	}
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	if (lgr->range.offset + lgr->range.length !=
7238c2ecf20Sopenharmony_ci			lv.start << SECTOR_SHIFT) {
7248c2ecf20Sopenharmony_ci		dprintk("%s Final length mismatch\n", __func__);
7258c2ecf20Sopenharmony_ci		status = -EIO;
7268c2ecf20Sopenharmony_ci		goto process_extents;
7278c2ecf20Sopenharmony_ci	}
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	if (lv.start < lv.cowread) {
7308c2ecf20Sopenharmony_ci		dprintk("%s Final uncovered COW extent\n", __func__);
7318c2ecf20Sopenharmony_ci		status = -EIO;
7328c2ecf20Sopenharmony_ci	}
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_ciprocess_extents:
7358c2ecf20Sopenharmony_ci	while (!list_empty(&extents)) {
7368c2ecf20Sopenharmony_ci		struct pnfs_block_extent *be =
7378c2ecf20Sopenharmony_ci			list_first_entry(&extents, struct pnfs_block_extent,
7388c2ecf20Sopenharmony_ci					 be_list);
7398c2ecf20Sopenharmony_ci		list_del(&be->be_list);
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci		if (!status)
7428c2ecf20Sopenharmony_ci			status = ext_tree_insert(bl, be);
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci		if (status) {
7458c2ecf20Sopenharmony_ci			nfs4_put_deviceid_node(be->be_device);
7468c2ecf20Sopenharmony_ci			kfree(be);
7478c2ecf20Sopenharmony_ci		}
7488c2ecf20Sopenharmony_ci	}
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ciout_free_scratch:
7518c2ecf20Sopenharmony_ci	__free_page(scratch);
7528c2ecf20Sopenharmony_ciout:
7538c2ecf20Sopenharmony_ci	dprintk("%s returns %d\n", __func__, status);
7548c2ecf20Sopenharmony_ci	switch (status) {
7558c2ecf20Sopenharmony_ci	case -ENODEV:
7568c2ecf20Sopenharmony_ci		/* Our extent block devices are unavailable */
7578c2ecf20Sopenharmony_ci		set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
7588c2ecf20Sopenharmony_ci		fallthrough;
7598c2ecf20Sopenharmony_ci	case 0:
7608c2ecf20Sopenharmony_ci		return lseg;
7618c2ecf20Sopenharmony_ci	default:
7628c2ecf20Sopenharmony_ci		kfree(lseg);
7638c2ecf20Sopenharmony_ci		return ERR_PTR(status);
7648c2ecf20Sopenharmony_ci	}
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_cistatic void
7688c2ecf20Sopenharmony_cibl_return_range(struct pnfs_layout_hdr *lo,
7698c2ecf20Sopenharmony_ci		struct pnfs_layout_range *range)
7708c2ecf20Sopenharmony_ci{
7718c2ecf20Sopenharmony_ci	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
7728c2ecf20Sopenharmony_ci	sector_t offset = range->offset >> SECTOR_SHIFT, end;
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_ci	if (range->offset % 8) {
7758c2ecf20Sopenharmony_ci		dprintk("%s: offset %lld not block size aligned\n",
7768c2ecf20Sopenharmony_ci			__func__, range->offset);
7778c2ecf20Sopenharmony_ci		return;
7788c2ecf20Sopenharmony_ci	}
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	if (range->length != NFS4_MAX_UINT64) {
7818c2ecf20Sopenharmony_ci		if (range->length % 8) {
7828c2ecf20Sopenharmony_ci			dprintk("%s: length %lld not block size aligned\n",
7838c2ecf20Sopenharmony_ci				__func__, range->length);
7848c2ecf20Sopenharmony_ci			return;
7858c2ecf20Sopenharmony_ci		}
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci		end = offset + (range->length >> SECTOR_SHIFT);
7888c2ecf20Sopenharmony_ci	} else {
7898c2ecf20Sopenharmony_ci		end = round_down(NFS4_MAX_UINT64, PAGE_SIZE);
7908c2ecf20Sopenharmony_ci	}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end);
7938c2ecf20Sopenharmony_ci}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_cistatic int
7968c2ecf20Sopenharmony_cibl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg)
7978c2ecf20Sopenharmony_ci{
7988c2ecf20Sopenharmony_ci	return ext_tree_prepare_commit(arg);
7998c2ecf20Sopenharmony_ci}
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_cistatic void
8028c2ecf20Sopenharmony_cibl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
8038c2ecf20Sopenharmony_ci{
8048c2ecf20Sopenharmony_ci	ext_tree_mark_committed(&lcdata->args, lcdata->res.status);
8058c2ecf20Sopenharmony_ci}
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_cistatic int
8088c2ecf20Sopenharmony_cibl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
8098c2ecf20Sopenharmony_ci{
8108c2ecf20Sopenharmony_ci	dprintk("%s enter\n", __func__);
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	if (server->pnfs_blksize == 0) {
8138c2ecf20Sopenharmony_ci		dprintk("%s Server did not return blksize\n", __func__);
8148c2ecf20Sopenharmony_ci		return -EINVAL;
8158c2ecf20Sopenharmony_ci	}
8168c2ecf20Sopenharmony_ci	if (server->pnfs_blksize > PAGE_SIZE) {
8178c2ecf20Sopenharmony_ci		printk(KERN_ERR "%s: pNFS blksize %d not supported.\n",
8188c2ecf20Sopenharmony_ci			__func__, server->pnfs_blksize);
8198c2ecf20Sopenharmony_ci		return -EINVAL;
8208c2ecf20Sopenharmony_ci	}
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci	return 0;
8238c2ecf20Sopenharmony_ci}
8248c2ecf20Sopenharmony_ci
8258c2ecf20Sopenharmony_cistatic bool
8268c2ecf20Sopenharmony_ciis_aligned_req(struct nfs_pageio_descriptor *pgio,
8278c2ecf20Sopenharmony_ci		struct nfs_page *req, unsigned int alignment, bool is_write)
8288c2ecf20Sopenharmony_ci{
8298c2ecf20Sopenharmony_ci	/*
8308c2ecf20Sopenharmony_ci	 * Always accept buffered writes, higher layers take care of the
8318c2ecf20Sopenharmony_ci	 * right alignment.
8328c2ecf20Sopenharmony_ci	 */
8338c2ecf20Sopenharmony_ci	if (pgio->pg_dreq == NULL)
8348c2ecf20Sopenharmony_ci		return true;
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(req->wb_offset, alignment))
8378c2ecf20Sopenharmony_ci		return false;
8388c2ecf20Sopenharmony_ci
8398c2ecf20Sopenharmony_ci	if (IS_ALIGNED(req->wb_bytes, alignment))
8408c2ecf20Sopenharmony_ci		return true;
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ci	if (is_write &&
8438c2ecf20Sopenharmony_ci	    (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
8448c2ecf20Sopenharmony_ci		/*
8458c2ecf20Sopenharmony_ci		 * If the write goes up to the inode size, just write
8468c2ecf20Sopenharmony_ci		 * the full page.  Data past the inode size is
8478c2ecf20Sopenharmony_ci		 * guaranteed to be zeroed by the higher level client
8488c2ecf20Sopenharmony_ci		 * code, and this behaviour is mandated by RFC 5663
8498c2ecf20Sopenharmony_ci		 * section 2.3.2.
8508c2ecf20Sopenharmony_ci		 */
8518c2ecf20Sopenharmony_ci		return true;
8528c2ecf20Sopenharmony_ci	}
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ci	return false;
8558c2ecf20Sopenharmony_ci}
8568c2ecf20Sopenharmony_ci
8578c2ecf20Sopenharmony_cistatic void
8588c2ecf20Sopenharmony_cibl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
8598c2ecf20Sopenharmony_ci{
8608c2ecf20Sopenharmony_ci	if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
8618c2ecf20Sopenharmony_ci		nfs_pageio_reset_read_mds(pgio);
8628c2ecf20Sopenharmony_ci		return;
8638c2ecf20Sopenharmony_ci	}
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci	pnfs_generic_pg_init_read(pgio, req);
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_ci	if (pgio->pg_lseg &&
8688c2ecf20Sopenharmony_ci		test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
8698c2ecf20Sopenharmony_ci		pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
8708c2ecf20Sopenharmony_ci		pnfs_set_lo_fail(pgio->pg_lseg);
8718c2ecf20Sopenharmony_ci		nfs_pageio_reset_read_mds(pgio);
8728c2ecf20Sopenharmony_ci	}
8738c2ecf20Sopenharmony_ci}
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci/*
8768c2ecf20Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
8778c2ecf20Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced.
8788c2ecf20Sopenharmony_ci */
8798c2ecf20Sopenharmony_cistatic size_t
8808c2ecf20Sopenharmony_cibl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
8818c2ecf20Sopenharmony_ci		struct nfs_page *req)
8828c2ecf20Sopenharmony_ci{
8838c2ecf20Sopenharmony_ci	if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
8848c2ecf20Sopenharmony_ci		return 0;
8858c2ecf20Sopenharmony_ci	return pnfs_generic_pg_test(pgio, prev, req);
8868c2ecf20Sopenharmony_ci}
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci/*
8898c2ecf20Sopenharmony_ci * Return the number of contiguous bytes for a given inode
8908c2ecf20Sopenharmony_ci * starting at page frame idx.
8918c2ecf20Sopenharmony_ci */
8928c2ecf20Sopenharmony_cistatic u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
8938c2ecf20Sopenharmony_ci{
8948c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
8958c2ecf20Sopenharmony_ci	pgoff_t end;
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_ci	/* Optimize common case that writes from 0 to end of file */
8988c2ecf20Sopenharmony_ci	end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
8998c2ecf20Sopenharmony_ci	if (end != inode->i_mapping->nrpages) {
9008c2ecf20Sopenharmony_ci		rcu_read_lock();
9018c2ecf20Sopenharmony_ci		end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX);
9028c2ecf20Sopenharmony_ci		rcu_read_unlock();
9038c2ecf20Sopenharmony_ci	}
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci	if (!end)
9068c2ecf20Sopenharmony_ci		return i_size_read(inode) - (idx << PAGE_SHIFT);
9078c2ecf20Sopenharmony_ci	else
9088c2ecf20Sopenharmony_ci		return (end - idx) << PAGE_SHIFT;
9098c2ecf20Sopenharmony_ci}
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_cistatic void
9128c2ecf20Sopenharmony_cibl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
9138c2ecf20Sopenharmony_ci{
9148c2ecf20Sopenharmony_ci	u64 wb_size;
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci	if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
9178c2ecf20Sopenharmony_ci		nfs_pageio_reset_write_mds(pgio);
9188c2ecf20Sopenharmony_ci		return;
9198c2ecf20Sopenharmony_ci	}
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci	if (pgio->pg_dreq == NULL)
9228c2ecf20Sopenharmony_ci		wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
9238c2ecf20Sopenharmony_ci					      req->wb_index);
9248c2ecf20Sopenharmony_ci	else
9258c2ecf20Sopenharmony_ci		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci	pnfs_generic_pg_init_write(pgio, req, wb_size);
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	if (pgio->pg_lseg &&
9308c2ecf20Sopenharmony_ci		test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci		pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
9338c2ecf20Sopenharmony_ci		pnfs_set_lo_fail(pgio->pg_lseg);
9348c2ecf20Sopenharmony_ci		nfs_pageio_reset_write_mds(pgio);
9358c2ecf20Sopenharmony_ci	}
9368c2ecf20Sopenharmony_ci}
9378c2ecf20Sopenharmony_ci
9388c2ecf20Sopenharmony_ci/*
9398c2ecf20Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
9408c2ecf20Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced.
9418c2ecf20Sopenharmony_ci */
9428c2ecf20Sopenharmony_cistatic size_t
9438c2ecf20Sopenharmony_cibl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
9448c2ecf20Sopenharmony_ci		 struct nfs_page *req)
9458c2ecf20Sopenharmony_ci{
9468c2ecf20Sopenharmony_ci	if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
9478c2ecf20Sopenharmony_ci		return 0;
9488c2ecf20Sopenharmony_ci	return pnfs_generic_pg_test(pgio, prev, req);
9498c2ecf20Sopenharmony_ci}
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_cistatic const struct nfs_pageio_ops bl_pg_read_ops = {
9528c2ecf20Sopenharmony_ci	.pg_init = bl_pg_init_read,
9538c2ecf20Sopenharmony_ci	.pg_test = bl_pg_test_read,
9548c2ecf20Sopenharmony_ci	.pg_doio = pnfs_generic_pg_readpages,
9558c2ecf20Sopenharmony_ci	.pg_cleanup = pnfs_generic_pg_cleanup,
9568c2ecf20Sopenharmony_ci};
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_cistatic const struct nfs_pageio_ops bl_pg_write_ops = {
9598c2ecf20Sopenharmony_ci	.pg_init = bl_pg_init_write,
9608c2ecf20Sopenharmony_ci	.pg_test = bl_pg_test_write,
9618c2ecf20Sopenharmony_ci	.pg_doio = pnfs_generic_pg_writepages,
9628c2ecf20Sopenharmony_ci	.pg_cleanup = pnfs_generic_pg_cleanup,
9638c2ecf20Sopenharmony_ci};
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_cistatic struct pnfs_layoutdriver_type blocklayout_type = {
9668c2ecf20Sopenharmony_ci	.id				= LAYOUT_BLOCK_VOLUME,
9678c2ecf20Sopenharmony_ci	.name				= "LAYOUT_BLOCK_VOLUME",
9688c2ecf20Sopenharmony_ci	.owner				= THIS_MODULE,
9698c2ecf20Sopenharmony_ci	.flags				= PNFS_LAYOUTRET_ON_SETATTR |
9708c2ecf20Sopenharmony_ci					  PNFS_LAYOUTRET_ON_ERROR |
9718c2ecf20Sopenharmony_ci					  PNFS_READ_WHOLE_PAGE,
9728c2ecf20Sopenharmony_ci	.read_pagelist			= bl_read_pagelist,
9738c2ecf20Sopenharmony_ci	.write_pagelist			= bl_write_pagelist,
9748c2ecf20Sopenharmony_ci	.alloc_layout_hdr		= bl_alloc_layout_hdr,
9758c2ecf20Sopenharmony_ci	.free_layout_hdr		= bl_free_layout_hdr,
9768c2ecf20Sopenharmony_ci	.alloc_lseg			= bl_alloc_lseg,
9778c2ecf20Sopenharmony_ci	.free_lseg			= bl_free_lseg,
9788c2ecf20Sopenharmony_ci	.return_range			= bl_return_range,
9798c2ecf20Sopenharmony_ci	.prepare_layoutcommit		= bl_prepare_layoutcommit,
9808c2ecf20Sopenharmony_ci	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
9818c2ecf20Sopenharmony_ci	.set_layoutdriver		= bl_set_layoutdriver,
9828c2ecf20Sopenharmony_ci	.alloc_deviceid_node		= bl_alloc_deviceid_node,
9838c2ecf20Sopenharmony_ci	.free_deviceid_node		= bl_free_deviceid_node,
9848c2ecf20Sopenharmony_ci	.pg_read_ops			= &bl_pg_read_ops,
9858c2ecf20Sopenharmony_ci	.pg_write_ops			= &bl_pg_write_ops,
9868c2ecf20Sopenharmony_ci	.sync				= pnfs_generic_sync,
9878c2ecf20Sopenharmony_ci};
9888c2ecf20Sopenharmony_ci
9898c2ecf20Sopenharmony_cistatic struct pnfs_layoutdriver_type scsilayout_type = {
9908c2ecf20Sopenharmony_ci	.id				= LAYOUT_SCSI,
9918c2ecf20Sopenharmony_ci	.name				= "LAYOUT_SCSI",
9928c2ecf20Sopenharmony_ci	.owner				= THIS_MODULE,
9938c2ecf20Sopenharmony_ci	.flags				= PNFS_LAYOUTRET_ON_SETATTR |
9948c2ecf20Sopenharmony_ci					  PNFS_LAYOUTRET_ON_ERROR |
9958c2ecf20Sopenharmony_ci					  PNFS_READ_WHOLE_PAGE,
9968c2ecf20Sopenharmony_ci	.read_pagelist			= bl_read_pagelist,
9978c2ecf20Sopenharmony_ci	.write_pagelist			= bl_write_pagelist,
9988c2ecf20Sopenharmony_ci	.alloc_layout_hdr		= sl_alloc_layout_hdr,
9998c2ecf20Sopenharmony_ci	.free_layout_hdr		= bl_free_layout_hdr,
10008c2ecf20Sopenharmony_ci	.alloc_lseg			= bl_alloc_lseg,
10018c2ecf20Sopenharmony_ci	.free_lseg			= bl_free_lseg,
10028c2ecf20Sopenharmony_ci	.return_range			= bl_return_range,
10038c2ecf20Sopenharmony_ci	.prepare_layoutcommit		= bl_prepare_layoutcommit,
10048c2ecf20Sopenharmony_ci	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
10058c2ecf20Sopenharmony_ci	.set_layoutdriver		= bl_set_layoutdriver,
10068c2ecf20Sopenharmony_ci	.alloc_deviceid_node		= bl_alloc_deviceid_node,
10078c2ecf20Sopenharmony_ci	.free_deviceid_node		= bl_free_deviceid_node,
10088c2ecf20Sopenharmony_ci	.pg_read_ops			= &bl_pg_read_ops,
10098c2ecf20Sopenharmony_ci	.pg_write_ops			= &bl_pg_write_ops,
10108c2ecf20Sopenharmony_ci	.sync				= pnfs_generic_sync,
10118c2ecf20Sopenharmony_ci};
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_cistatic int __init nfs4blocklayout_init(void)
10158c2ecf20Sopenharmony_ci{
10168c2ecf20Sopenharmony_ci	int ret;
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
10198c2ecf20Sopenharmony_ci
10208c2ecf20Sopenharmony_ci	ret = bl_init_pipefs();
10218c2ecf20Sopenharmony_ci	if (ret)
10228c2ecf20Sopenharmony_ci		goto out;
10238c2ecf20Sopenharmony_ci
10248c2ecf20Sopenharmony_ci	ret = pnfs_register_layoutdriver(&blocklayout_type);
10258c2ecf20Sopenharmony_ci	if (ret)
10268c2ecf20Sopenharmony_ci		goto out_cleanup_pipe;
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_ci	ret = pnfs_register_layoutdriver(&scsilayout_type);
10298c2ecf20Sopenharmony_ci	if (ret)
10308c2ecf20Sopenharmony_ci		goto out_unregister_block;
10318c2ecf20Sopenharmony_ci	return 0;
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_ciout_unregister_block:
10348c2ecf20Sopenharmony_ci	pnfs_unregister_layoutdriver(&blocklayout_type);
10358c2ecf20Sopenharmony_ciout_cleanup_pipe:
10368c2ecf20Sopenharmony_ci	bl_cleanup_pipefs();
10378c2ecf20Sopenharmony_ciout:
10388c2ecf20Sopenharmony_ci	return ret;
10398c2ecf20Sopenharmony_ci}
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_cistatic void __exit nfs4blocklayout_exit(void)
10428c2ecf20Sopenharmony_ci{
10438c2ecf20Sopenharmony_ci	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
10448c2ecf20Sopenharmony_ci	       __func__);
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci	pnfs_unregister_layoutdriver(&scsilayout_type);
10478c2ecf20Sopenharmony_ci	pnfs_unregister_layoutdriver(&blocklayout_type);
10488c2ecf20Sopenharmony_ci	bl_cleanup_pipefs();
10498c2ecf20Sopenharmony_ci}
10508c2ecf20Sopenharmony_ci
10518c2ecf20Sopenharmony_ciMODULE_ALIAS("nfs-layouttype4-3");
10528c2ecf20Sopenharmony_ciMODULE_ALIAS("nfs-layouttype4-5");
10538c2ecf20Sopenharmony_ci
10548c2ecf20Sopenharmony_cimodule_init(nfs4blocklayout_init);
10558c2ecf20Sopenharmony_cimodule_exit(nfs4blocklayout_exit);
1056