162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * linux/fs/nfs/blocklayout/blocklayout.c 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Module for the NFSv4.1 pNFS block layout driver. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Copyright (c) 2006 The Regents of the University of Michigan. 762306a36Sopenharmony_ci * All rights reserved. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Andy Adamson <andros@citi.umich.edu> 1062306a36Sopenharmony_ci * Fred Isaman <iisaman@umich.edu> 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * permission is granted to use, copy, create derivative works and 1362306a36Sopenharmony_ci * redistribute this software and such derivative works for any purpose, 1462306a36Sopenharmony_ci * so long as the name of the university of michigan is not used in 1562306a36Sopenharmony_ci * any advertising or publicity pertaining to the use or distribution 1662306a36Sopenharmony_ci * of this software without specific, written prior authorization. if 1762306a36Sopenharmony_ci * the above copyright notice or any other identification of the 1862306a36Sopenharmony_ci * university of michigan is included in any copy of any portion of 1962306a36Sopenharmony_ci * this software, then the disclaimer below must also be included. 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * this software is provided as is, without representation from the 2262306a36Sopenharmony_ci * university of michigan as to its fitness for any purpose, and without 2362306a36Sopenharmony_ci * warranty by the university of michigan of any kind, either express 2462306a36Sopenharmony_ci * or implied, including without limitation the implied warranties of 2562306a36Sopenharmony_ci * merchantability and fitness for a particular purpose. the regents 2662306a36Sopenharmony_ci * of the university of michigan shall not be liable for any damages, 2762306a36Sopenharmony_ci * including special, indirect, incidental, or consequential damages, 2862306a36Sopenharmony_ci * with respect to any claim arising out or in connection with the use 2962306a36Sopenharmony_ci * of the software, even if it has been or is hereafter advised of the 3062306a36Sopenharmony_ci * possibility of such damages. 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include <linux/module.h> 3462306a36Sopenharmony_ci#include <linux/init.h> 3562306a36Sopenharmony_ci#include <linux/mount.h> 3662306a36Sopenharmony_ci#include <linux/namei.h> 3762306a36Sopenharmony_ci#include <linux/bio.h> /* struct bio */ 3862306a36Sopenharmony_ci#include <linux/prefetch.h> 3962306a36Sopenharmony_ci#include <linux/pagevec.h> 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#include "../pnfs.h" 4262306a36Sopenharmony_ci#include "../nfs4session.h" 4362306a36Sopenharmony_ci#include "../internal.h" 4462306a36Sopenharmony_ci#include "blocklayout.h" 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define NFSDBG_FACILITY NFSDBG_PNFS_LD 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 4962306a36Sopenharmony_ciMODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); 5062306a36Sopenharmony_ciMODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistatic bool is_hole(struct pnfs_block_extent *be) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci switch (be->be_state) { 5562306a36Sopenharmony_ci case PNFS_BLOCK_NONE_DATA: 5662306a36Sopenharmony_ci return true; 5762306a36Sopenharmony_ci case PNFS_BLOCK_INVALID_DATA: 5862306a36Sopenharmony_ci return be->be_tag ? false : true; 5962306a36Sopenharmony_ci default: 6062306a36Sopenharmony_ci return false; 6162306a36Sopenharmony_ci } 6262306a36Sopenharmony_ci} 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci/* The data we are handed might be spread across several bios. We need 6562306a36Sopenharmony_ci * to track when the last one is finished. 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_cistruct parallel_io { 6862306a36Sopenharmony_ci struct kref refcnt; 6962306a36Sopenharmony_ci void (*pnfs_callback) (void *data); 7062306a36Sopenharmony_ci void *data; 7162306a36Sopenharmony_ci}; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_cistatic inline struct parallel_io *alloc_parallel(void *data) 7462306a36Sopenharmony_ci{ 7562306a36Sopenharmony_ci struct parallel_io *rv; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci rv = kmalloc(sizeof(*rv), GFP_NOFS); 7862306a36Sopenharmony_ci if (rv) { 7962306a36Sopenharmony_ci rv->data = data; 8062306a36Sopenharmony_ci kref_init(&rv->refcnt); 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci return rv; 8362306a36Sopenharmony_ci} 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic inline void get_parallel(struct parallel_io *p) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci kref_get(&p->refcnt); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistatic void destroy_parallel(struct kref *kref) 9162306a36Sopenharmony_ci{ 9262306a36Sopenharmony_ci struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 9562306a36Sopenharmony_ci p->pnfs_callback(p->data); 9662306a36Sopenharmony_ci kfree(p); 9762306a36Sopenharmony_ci} 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_cistatic inline void put_parallel(struct parallel_io *p) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci kref_put(&p->refcnt, destroy_parallel); 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_cistatic struct bio * 10562306a36Sopenharmony_cibl_submit_bio(struct bio *bio) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci if (bio) { 10862306a36Sopenharmony_ci get_parallel(bio->bi_private); 10962306a36Sopenharmony_ci dprintk("%s submitting %s bio %u@%llu\n", __func__, 11062306a36Sopenharmony_ci bio_op(bio) == READ ? "read" : "write", 11162306a36Sopenharmony_ci bio->bi_iter.bi_size, 11262306a36Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector); 11362306a36Sopenharmony_ci submit_bio(bio); 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci return NULL; 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci return offset >= map->start && offset < map->start + map->len; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic struct bio * 12462306a36Sopenharmony_cido_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect, 12562306a36Sopenharmony_ci struct page *page, struct pnfs_block_dev_map *map, 12662306a36Sopenharmony_ci struct pnfs_block_extent *be, bio_end_io_t end_io, 12762306a36Sopenharmony_ci struct parallel_io *par, unsigned int offset, int *len) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci struct pnfs_block_dev *dev = 13062306a36Sopenharmony_ci container_of(be->be_device, struct pnfs_block_dev, node); 13162306a36Sopenharmony_ci u64 disk_addr, end; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, 13462306a36Sopenharmony_ci npg, (__force u32)op, (unsigned long long)isect, offset, *len); 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci /* translate to device offset */ 13762306a36Sopenharmony_ci isect += be->be_v_offset; 13862306a36Sopenharmony_ci isect -= be->be_f_offset; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci /* translate to physical disk offset */ 14162306a36Sopenharmony_ci disk_addr = (u64)isect << SECTOR_SHIFT; 14262306a36Sopenharmony_ci if (!offset_in_map(disk_addr, map)) { 14362306a36Sopenharmony_ci if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map)) 14462306a36Sopenharmony_ci return ERR_PTR(-EIO); 14562306a36Sopenharmony_ci bio = bl_submit_bio(bio); 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci disk_addr += map->disk_offset; 14862306a36Sopenharmony_ci disk_addr -= map->start; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci /* limit length to what the device mapping allows */ 15162306a36Sopenharmony_ci end = disk_addr + *len; 15262306a36Sopenharmony_ci if (end >= map->start + map->len) 15362306a36Sopenharmony_ci *len = map->start + map->len - disk_addr; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ciretry: 15662306a36Sopenharmony_ci if (!bio) { 15762306a36Sopenharmony_ci bio = bio_alloc(map->bdev, bio_max_segs(npg), op, GFP_NOIO); 15862306a36Sopenharmony_ci bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT; 15962306a36Sopenharmony_ci bio->bi_end_io = end_io; 16062306a36Sopenharmony_ci bio->bi_private = par; 16162306a36Sopenharmony_ci } 16262306a36Sopenharmony_ci if (bio_add_page(bio, page, *len, offset) < *len) { 16362306a36Sopenharmony_ci bio = bl_submit_bio(bio); 16462306a36Sopenharmony_ci goto retry; 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci return bio; 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_cistatic void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 17262306a36Sopenharmony_ci size_t bytes_left = header->args.count; 17362306a36Sopenharmony_ci sector_t isect, extent_length = 0; 17462306a36Sopenharmony_ci struct pnfs_block_extent be; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci isect = header->args.offset >> SECTOR_SHIFT; 17762306a36Sopenharmony_ci bytes_left += header->args.offset - (isect << SECTOR_SHIFT); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci while (bytes_left > 0) { 18062306a36Sopenharmony_ci if (!ext_tree_lookup(bl, isect, &be, rw)) 18162306a36Sopenharmony_ci return; 18262306a36Sopenharmony_ci extent_length = be.be_length - (isect - be.be_f_offset); 18362306a36Sopenharmony_ci nfs4_mark_deviceid_unavailable(be.be_device); 18462306a36Sopenharmony_ci isect += extent_length; 18562306a36Sopenharmony_ci if (bytes_left > extent_length << SECTOR_SHIFT) 18662306a36Sopenharmony_ci bytes_left -= extent_length << SECTOR_SHIFT; 18762306a36Sopenharmony_ci else 18862306a36Sopenharmony_ci bytes_left = 0; 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_cistatic void bl_end_io_read(struct bio *bio) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci struct parallel_io *par = bio->bi_private; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci if (bio->bi_status) { 19762306a36Sopenharmony_ci struct nfs_pgio_header *header = par->data; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci if (!header->pnfs_error) 20062306a36Sopenharmony_ci header->pnfs_error = -EIO; 20162306a36Sopenharmony_ci pnfs_set_lo_fail(header->lseg); 20262306a36Sopenharmony_ci bl_mark_devices_unavailable(header, false); 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci bio_put(bio); 20662306a36Sopenharmony_ci put_parallel(par); 20762306a36Sopenharmony_ci} 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_cistatic void bl_read_cleanup(struct work_struct *work) 21062306a36Sopenharmony_ci{ 21162306a36Sopenharmony_ci struct rpc_task *task; 21262306a36Sopenharmony_ci struct nfs_pgio_header *hdr; 21362306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 21462306a36Sopenharmony_ci task = container_of(work, struct rpc_task, u.tk_work); 21562306a36Sopenharmony_ci hdr = container_of(task, struct nfs_pgio_header, task); 21662306a36Sopenharmony_ci pnfs_ld_read_done(hdr); 21762306a36Sopenharmony_ci} 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_cistatic void 22062306a36Sopenharmony_cibl_end_par_io_read(void *data) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci struct nfs_pgio_header *hdr = data; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci hdr->task.tk_status = hdr->pnfs_error; 22562306a36Sopenharmony_ci INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup); 22662306a36Sopenharmony_ci schedule_work(&hdr->task.u.tk_work); 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic enum pnfs_try_status 23062306a36Sopenharmony_cibl_read_pagelist(struct nfs_pgio_header *header) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 23362306a36Sopenharmony_ci struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; 23462306a36Sopenharmony_ci struct bio *bio = NULL; 23562306a36Sopenharmony_ci struct pnfs_block_extent be; 23662306a36Sopenharmony_ci sector_t isect, extent_length = 0; 23762306a36Sopenharmony_ci struct parallel_io *par; 23862306a36Sopenharmony_ci loff_t f_offset = header->args.offset; 23962306a36Sopenharmony_ci size_t bytes_left = header->args.count; 24062306a36Sopenharmony_ci unsigned int pg_offset = header->args.pgbase, pg_len; 24162306a36Sopenharmony_ci struct page **pages = header->args.pages; 24262306a36Sopenharmony_ci int pg_index = header->args.pgbase >> PAGE_SHIFT; 24362306a36Sopenharmony_ci const bool is_dio = (header->dreq != NULL); 24462306a36Sopenharmony_ci struct blk_plug plug; 24562306a36Sopenharmony_ci int i; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 24862306a36Sopenharmony_ci header->page_array.npages, f_offset, 24962306a36Sopenharmony_ci (unsigned int)header->args.count); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci par = alloc_parallel(header); 25262306a36Sopenharmony_ci if (!par) 25362306a36Sopenharmony_ci return PNFS_NOT_ATTEMPTED; 25462306a36Sopenharmony_ci par->pnfs_callback = bl_end_par_io_read; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci blk_start_plug(&plug); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci isect = (sector_t) (f_offset >> SECTOR_SHIFT); 25962306a36Sopenharmony_ci /* Code assumes extents are page-aligned */ 26062306a36Sopenharmony_ci for (i = pg_index; i < header->page_array.npages; i++) { 26162306a36Sopenharmony_ci if (extent_length <= 0) { 26262306a36Sopenharmony_ci /* We've used up the previous extent */ 26362306a36Sopenharmony_ci bio = bl_submit_bio(bio); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci /* Get the next one */ 26662306a36Sopenharmony_ci if (!ext_tree_lookup(bl, isect, &be, false)) { 26762306a36Sopenharmony_ci header->pnfs_error = -EIO; 26862306a36Sopenharmony_ci goto out; 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci extent_length = be.be_length - (isect - be.be_f_offset); 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (is_dio) { 27462306a36Sopenharmony_ci if (pg_offset + bytes_left > PAGE_SIZE) 27562306a36Sopenharmony_ci pg_len = PAGE_SIZE - pg_offset; 27662306a36Sopenharmony_ci else 27762306a36Sopenharmony_ci pg_len = bytes_left; 27862306a36Sopenharmony_ci } else { 27962306a36Sopenharmony_ci BUG_ON(pg_offset != 0); 28062306a36Sopenharmony_ci pg_len = PAGE_SIZE; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci if (is_hole(&be)) { 28462306a36Sopenharmony_ci bio = bl_submit_bio(bio); 28562306a36Sopenharmony_ci /* Fill hole w/ zeroes w/o accessing device */ 28662306a36Sopenharmony_ci dprintk("%s Zeroing page for hole\n", __func__); 28762306a36Sopenharmony_ci zero_user_segment(pages[i], pg_offset, pg_len); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci /* invalidate map */ 29062306a36Sopenharmony_ci map.start = NFS4_MAX_UINT64; 29162306a36Sopenharmony_ci } else { 29262306a36Sopenharmony_ci bio = do_add_page_to_bio(bio, 29362306a36Sopenharmony_ci header->page_array.npages - i, 29462306a36Sopenharmony_ci REQ_OP_READ, 29562306a36Sopenharmony_ci isect, pages[i], &map, &be, 29662306a36Sopenharmony_ci bl_end_io_read, par, 29762306a36Sopenharmony_ci pg_offset, &pg_len); 29862306a36Sopenharmony_ci if (IS_ERR(bio)) { 29962306a36Sopenharmony_ci header->pnfs_error = PTR_ERR(bio); 30062306a36Sopenharmony_ci bio = NULL; 30162306a36Sopenharmony_ci goto out; 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci isect += (pg_len >> SECTOR_SHIFT); 30562306a36Sopenharmony_ci extent_length -= (pg_len >> SECTOR_SHIFT); 30662306a36Sopenharmony_ci f_offset += pg_len; 30762306a36Sopenharmony_ci bytes_left -= pg_len; 30862306a36Sopenharmony_ci pg_offset = 0; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 31162306a36Sopenharmony_ci header->res.eof = 1; 31262306a36Sopenharmony_ci header->res.count = header->inode->i_size - header->args.offset; 31362306a36Sopenharmony_ci } else { 31462306a36Sopenharmony_ci header->res.count = (isect << SECTOR_SHIFT) - header->args.offset; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ciout: 31762306a36Sopenharmony_ci bl_submit_bio(bio); 31862306a36Sopenharmony_ci blk_finish_plug(&plug); 31962306a36Sopenharmony_ci put_parallel(par); 32062306a36Sopenharmony_ci return PNFS_ATTEMPTED; 32162306a36Sopenharmony_ci} 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_cistatic void bl_end_io_write(struct bio *bio) 32462306a36Sopenharmony_ci{ 32562306a36Sopenharmony_ci struct parallel_io *par = bio->bi_private; 32662306a36Sopenharmony_ci struct nfs_pgio_header *header = par->data; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci if (bio->bi_status) { 32962306a36Sopenharmony_ci if (!header->pnfs_error) 33062306a36Sopenharmony_ci header->pnfs_error = -EIO; 33162306a36Sopenharmony_ci pnfs_set_lo_fail(header->lseg); 33262306a36Sopenharmony_ci bl_mark_devices_unavailable(header, true); 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci bio_put(bio); 33562306a36Sopenharmony_ci put_parallel(par); 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci/* Function scheduled for call during bl_end_par_io_write, 33962306a36Sopenharmony_ci * it marks sectors as written and extends the commitlist. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_cistatic void bl_write_cleanup(struct work_struct *work) 34262306a36Sopenharmony_ci{ 34362306a36Sopenharmony_ci struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work); 34462306a36Sopenharmony_ci struct nfs_pgio_header *hdr = 34562306a36Sopenharmony_ci container_of(task, struct nfs_pgio_header, task); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci if (likely(!hdr->pnfs_error)) { 35062306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg); 35162306a36Sopenharmony_ci u64 start = hdr->args.offset & (loff_t)PAGE_MASK; 35262306a36Sopenharmony_ci u64 end = (hdr->args.offset + hdr->args.count + 35362306a36Sopenharmony_ci PAGE_SIZE - 1) & (loff_t)PAGE_MASK; 35462306a36Sopenharmony_ci u64 lwb = hdr->args.offset + hdr->args.count; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci ext_tree_mark_written(bl, start >> SECTOR_SHIFT, 35762306a36Sopenharmony_ci (end - start) >> SECTOR_SHIFT, lwb); 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci pnfs_ld_write_done(hdr); 36162306a36Sopenharmony_ci} 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci/* Called when last of bios associated with a bl_write_pagelist call finishes */ 36462306a36Sopenharmony_cistatic void bl_end_par_io_write(void *data) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci struct nfs_pgio_header *hdr = data; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci hdr->task.tk_status = hdr->pnfs_error; 36962306a36Sopenharmony_ci hdr->verf.committed = NFS_FILE_SYNC; 37062306a36Sopenharmony_ci INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); 37162306a36Sopenharmony_ci schedule_work(&hdr->task.u.tk_work); 37262306a36Sopenharmony_ci} 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_cistatic enum pnfs_try_status 37562306a36Sopenharmony_cibl_write_pagelist(struct nfs_pgio_header *header, int sync) 37662306a36Sopenharmony_ci{ 37762306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 37862306a36Sopenharmony_ci struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; 37962306a36Sopenharmony_ci struct bio *bio = NULL; 38062306a36Sopenharmony_ci struct pnfs_block_extent be; 38162306a36Sopenharmony_ci sector_t isect, extent_length = 0; 38262306a36Sopenharmony_ci struct parallel_io *par = NULL; 38362306a36Sopenharmony_ci loff_t offset = header->args.offset; 38462306a36Sopenharmony_ci size_t count = header->args.count; 38562306a36Sopenharmony_ci struct page **pages = header->args.pages; 38662306a36Sopenharmony_ci int pg_index = header->args.pgbase >> PAGE_SHIFT; 38762306a36Sopenharmony_ci unsigned int pg_len; 38862306a36Sopenharmony_ci struct blk_plug plug; 38962306a36Sopenharmony_ci int i; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci dprintk("%s enter, %zu@%lld\n", __func__, count, offset); 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci /* At this point, header->page_aray is a (sequential) list of nfs_pages. 39462306a36Sopenharmony_ci * We want to write each, and if there is an error set pnfs_error 39562306a36Sopenharmony_ci * to have it redone using nfs. 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci par = alloc_parallel(header); 39862306a36Sopenharmony_ci if (!par) 39962306a36Sopenharmony_ci return PNFS_NOT_ATTEMPTED; 40062306a36Sopenharmony_ci par->pnfs_callback = bl_end_par_io_write; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci blk_start_plug(&plug); 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci /* we always write out the whole page */ 40562306a36Sopenharmony_ci offset = offset & (loff_t)PAGE_MASK; 40662306a36Sopenharmony_ci isect = offset >> SECTOR_SHIFT; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci for (i = pg_index; i < header->page_array.npages; i++) { 40962306a36Sopenharmony_ci if (extent_length <= 0) { 41062306a36Sopenharmony_ci /* We've used up the previous extent */ 41162306a36Sopenharmony_ci bio = bl_submit_bio(bio); 41262306a36Sopenharmony_ci /* Get the next one */ 41362306a36Sopenharmony_ci if (!ext_tree_lookup(bl, isect, &be, true)) { 41462306a36Sopenharmony_ci header->pnfs_error = -EINVAL; 41562306a36Sopenharmony_ci goto out; 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci extent_length = be.be_length - (isect - be.be_f_offset); 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci pg_len = PAGE_SIZE; 42262306a36Sopenharmony_ci bio = do_add_page_to_bio(bio, header->page_array.npages - i, 42362306a36Sopenharmony_ci REQ_OP_WRITE, isect, pages[i], &map, 42462306a36Sopenharmony_ci &be, bl_end_io_write, par, 0, &pg_len); 42562306a36Sopenharmony_ci if (IS_ERR(bio)) { 42662306a36Sopenharmony_ci header->pnfs_error = PTR_ERR(bio); 42762306a36Sopenharmony_ci bio = NULL; 42862306a36Sopenharmony_ci goto out; 42962306a36Sopenharmony_ci } 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci offset += pg_len; 43262306a36Sopenharmony_ci count -= pg_len; 43362306a36Sopenharmony_ci isect += (pg_len >> SECTOR_SHIFT); 43462306a36Sopenharmony_ci extent_length -= (pg_len >> SECTOR_SHIFT); 43562306a36Sopenharmony_ci } 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci header->res.count = header->args.count; 43862306a36Sopenharmony_ciout: 43962306a36Sopenharmony_ci bl_submit_bio(bio); 44062306a36Sopenharmony_ci blk_finish_plug(&plug); 44162306a36Sopenharmony_ci put_parallel(par); 44262306a36Sopenharmony_ci return PNFS_ATTEMPTED; 44362306a36Sopenharmony_ci} 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_cistatic void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) 44662306a36Sopenharmony_ci{ 44762306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LO2EXT(lo); 44862306a36Sopenharmony_ci int err; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci err = ext_tree_remove(bl, true, 0, LLONG_MAX); 45362306a36Sopenharmony_ci WARN_ON(err); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci kfree_rcu(bl, bl_layout.plh_rcu); 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_cistatic struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode, 45962306a36Sopenharmony_ci gfp_t gfp_flags, bool is_scsi_layout) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci struct pnfs_block_layout *bl; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 46462306a36Sopenharmony_ci bl = kzalloc(sizeof(*bl), gfp_flags); 46562306a36Sopenharmony_ci if (!bl) 46662306a36Sopenharmony_ci return NULL; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci bl->bl_ext_rw = RB_ROOT; 46962306a36Sopenharmony_ci bl->bl_ext_ro = RB_ROOT; 47062306a36Sopenharmony_ci spin_lock_init(&bl->bl_ext_lock); 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci bl->bl_scsi_layout = is_scsi_layout; 47362306a36Sopenharmony_ci return &bl->bl_layout; 47462306a36Sopenharmony_ci} 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_cistatic struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 47762306a36Sopenharmony_ci gfp_t gfp_flags) 47862306a36Sopenharmony_ci{ 47962306a36Sopenharmony_ci return __bl_alloc_layout_hdr(inode, gfp_flags, false); 48062306a36Sopenharmony_ci} 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_cistatic struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode, 48362306a36Sopenharmony_ci gfp_t gfp_flags) 48462306a36Sopenharmony_ci{ 48562306a36Sopenharmony_ci return __bl_alloc_layout_hdr(inode, gfp_flags, true); 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_cistatic void bl_free_lseg(struct pnfs_layout_segment *lseg) 48962306a36Sopenharmony_ci{ 49062306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 49162306a36Sopenharmony_ci kfree(lseg); 49262306a36Sopenharmony_ci} 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci/* Tracks info needed to ensure extents in layout obey constraints of spec */ 49562306a36Sopenharmony_cistruct layout_verification { 49662306a36Sopenharmony_ci u32 mode; /* R or RW */ 49762306a36Sopenharmony_ci u64 start; /* Expected start of next non-COW extent */ 49862306a36Sopenharmony_ci u64 inval; /* Start of INVAL coverage */ 49962306a36Sopenharmony_ci u64 cowread; /* End of COW read coverage */ 50062306a36Sopenharmony_ci}; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci/* Verify the extent meets the layout requirements of the pnfs-block draft, 50362306a36Sopenharmony_ci * section 2.3.1. 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_cistatic int verify_extent(struct pnfs_block_extent *be, 50662306a36Sopenharmony_ci struct layout_verification *lv) 50762306a36Sopenharmony_ci{ 50862306a36Sopenharmony_ci if (lv->mode == IOMODE_READ) { 50962306a36Sopenharmony_ci if (be->be_state == PNFS_BLOCK_READWRITE_DATA || 51062306a36Sopenharmony_ci be->be_state == PNFS_BLOCK_INVALID_DATA) 51162306a36Sopenharmony_ci return -EIO; 51262306a36Sopenharmony_ci if (be->be_f_offset != lv->start) 51362306a36Sopenharmony_ci return -EIO; 51462306a36Sopenharmony_ci lv->start += be->be_length; 51562306a36Sopenharmony_ci return 0; 51662306a36Sopenharmony_ci } 51762306a36Sopenharmony_ci /* lv->mode == IOMODE_RW */ 51862306a36Sopenharmony_ci if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { 51962306a36Sopenharmony_ci if (be->be_f_offset != lv->start) 52062306a36Sopenharmony_ci return -EIO; 52162306a36Sopenharmony_ci if (lv->cowread > lv->start) 52262306a36Sopenharmony_ci return -EIO; 52362306a36Sopenharmony_ci lv->start += be->be_length; 52462306a36Sopenharmony_ci lv->inval = lv->start; 52562306a36Sopenharmony_ci return 0; 52662306a36Sopenharmony_ci } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 52762306a36Sopenharmony_ci if (be->be_f_offset != lv->start) 52862306a36Sopenharmony_ci return -EIO; 52962306a36Sopenharmony_ci lv->start += be->be_length; 53062306a36Sopenharmony_ci return 0; 53162306a36Sopenharmony_ci } else if (be->be_state == PNFS_BLOCK_READ_DATA) { 53262306a36Sopenharmony_ci if (be->be_f_offset > lv->start) 53362306a36Sopenharmony_ci return -EIO; 53462306a36Sopenharmony_ci if (be->be_f_offset < lv->inval) 53562306a36Sopenharmony_ci return -EIO; 53662306a36Sopenharmony_ci if (be->be_f_offset < lv->cowread) 53762306a36Sopenharmony_ci return -EIO; 53862306a36Sopenharmony_ci /* It looks like you might want to min this with lv->start, 53962306a36Sopenharmony_ci * but you really don't. 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_ci lv->inval = lv->inval + be->be_length; 54262306a36Sopenharmony_ci lv->cowread = be->be_f_offset + be->be_length; 54362306a36Sopenharmony_ci return 0; 54462306a36Sopenharmony_ci } else 54562306a36Sopenharmony_ci return -EIO; 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cistatic int decode_sector_number(__be32 **rp, sector_t *sp) 54962306a36Sopenharmony_ci{ 55062306a36Sopenharmony_ci uint64_t s; 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci *rp = xdr_decode_hyper(*rp, &s); 55362306a36Sopenharmony_ci if (s & 0x1ff) { 55462306a36Sopenharmony_ci printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); 55562306a36Sopenharmony_ci return -1; 55662306a36Sopenharmony_ci } 55762306a36Sopenharmony_ci *sp = s >> SECTOR_SHIFT; 55862306a36Sopenharmony_ci return 0; 55962306a36Sopenharmony_ci} 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_cistatic struct nfs4_deviceid_node * 56262306a36Sopenharmony_cibl_find_get_deviceid(struct nfs_server *server, 56362306a36Sopenharmony_ci const struct nfs4_deviceid *id, const struct cred *cred, 56462306a36Sopenharmony_ci gfp_t gfp_mask) 56562306a36Sopenharmony_ci{ 56662306a36Sopenharmony_ci struct nfs4_deviceid_node *node; 56762306a36Sopenharmony_ci unsigned long start, end; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ciretry: 57062306a36Sopenharmony_ci node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); 57162306a36Sopenharmony_ci if (!node) 57262306a36Sopenharmony_ci return ERR_PTR(-ENODEV); 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) 57562306a36Sopenharmony_ci return node; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci end = jiffies; 57862306a36Sopenharmony_ci start = end - PNFS_DEVICE_RETRY_TIMEOUT; 57962306a36Sopenharmony_ci if (!time_in_range(node->timestamp_unavailable, start, end)) { 58062306a36Sopenharmony_ci nfs4_delete_deviceid(node->ld, node->nfs_client, id); 58162306a36Sopenharmony_ci goto retry; 58262306a36Sopenharmony_ci } 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci nfs4_put_deviceid_node(node); 58562306a36Sopenharmony_ci return ERR_PTR(-ENODEV); 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic int 58962306a36Sopenharmony_cibl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, 59062306a36Sopenharmony_ci struct layout_verification *lv, struct list_head *extents, 59162306a36Sopenharmony_ci gfp_t gfp_mask) 59262306a36Sopenharmony_ci{ 59362306a36Sopenharmony_ci struct pnfs_block_extent *be; 59462306a36Sopenharmony_ci struct nfs4_deviceid id; 59562306a36Sopenharmony_ci int error; 59662306a36Sopenharmony_ci __be32 *p; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE); 59962306a36Sopenharmony_ci if (!p) 60062306a36Sopenharmony_ci return -EIO; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci be = kzalloc(sizeof(*be), GFP_NOFS); 60362306a36Sopenharmony_ci if (!be) 60462306a36Sopenharmony_ci return -ENOMEM; 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci memcpy(&id, p, NFS4_DEVICEID4_SIZE); 60762306a36Sopenharmony_ci p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id, 61062306a36Sopenharmony_ci lo->plh_lc_cred, gfp_mask); 61162306a36Sopenharmony_ci if (IS_ERR(be->be_device)) { 61262306a36Sopenharmony_ci error = PTR_ERR(be->be_device); 61362306a36Sopenharmony_ci goto out_free_be; 61462306a36Sopenharmony_ci } 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci /* 61762306a36Sopenharmony_ci * The next three values are read in as bytes, but stored in the 61862306a36Sopenharmony_ci * extent structure in 512-byte granularity. 61962306a36Sopenharmony_ci */ 62062306a36Sopenharmony_ci error = -EIO; 62162306a36Sopenharmony_ci if (decode_sector_number(&p, &be->be_f_offset) < 0) 62262306a36Sopenharmony_ci goto out_put_deviceid; 62362306a36Sopenharmony_ci if (decode_sector_number(&p, &be->be_length) < 0) 62462306a36Sopenharmony_ci goto out_put_deviceid; 62562306a36Sopenharmony_ci if (decode_sector_number(&p, &be->be_v_offset) < 0) 62662306a36Sopenharmony_ci goto out_put_deviceid; 62762306a36Sopenharmony_ci be->be_state = be32_to_cpup(p++); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci error = verify_extent(be, lv); 63062306a36Sopenharmony_ci if (error) { 63162306a36Sopenharmony_ci dprintk("%s: extent verification failed\n", __func__); 63262306a36Sopenharmony_ci goto out_put_deviceid; 63362306a36Sopenharmony_ci } 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci list_add_tail(&be->be_list, extents); 63662306a36Sopenharmony_ci return 0; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ciout_put_deviceid: 63962306a36Sopenharmony_ci nfs4_put_deviceid_node(be->be_device); 64062306a36Sopenharmony_ciout_free_be: 64162306a36Sopenharmony_ci kfree(be); 64262306a36Sopenharmony_ci return error; 64362306a36Sopenharmony_ci} 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_cistatic struct pnfs_layout_segment * 64662306a36Sopenharmony_cibl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, 64762306a36Sopenharmony_ci gfp_t gfp_mask) 64862306a36Sopenharmony_ci{ 64962306a36Sopenharmony_ci struct layout_verification lv = { 65062306a36Sopenharmony_ci .mode = lgr->range.iomode, 65162306a36Sopenharmony_ci .start = lgr->range.offset >> SECTOR_SHIFT, 65262306a36Sopenharmony_ci .inval = lgr->range.offset >> SECTOR_SHIFT, 65362306a36Sopenharmony_ci .cowread = lgr->range.offset >> SECTOR_SHIFT, 65462306a36Sopenharmony_ci }; 65562306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LO2EXT(lo); 65662306a36Sopenharmony_ci struct pnfs_layout_segment *lseg; 65762306a36Sopenharmony_ci struct xdr_buf buf; 65862306a36Sopenharmony_ci struct xdr_stream xdr; 65962306a36Sopenharmony_ci struct page *scratch; 66062306a36Sopenharmony_ci int status, i; 66162306a36Sopenharmony_ci uint32_t count; 66262306a36Sopenharmony_ci __be32 *p; 66362306a36Sopenharmony_ci LIST_HEAD(extents); 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci dprintk("---> %s\n", __func__); 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci lseg = kzalloc(sizeof(*lseg), gfp_mask); 66862306a36Sopenharmony_ci if (!lseg) 66962306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci status = -ENOMEM; 67262306a36Sopenharmony_ci scratch = alloc_page(gfp_mask); 67362306a36Sopenharmony_ci if (!scratch) 67462306a36Sopenharmony_ci goto out; 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci xdr_init_decode_pages(&xdr, &buf, 67762306a36Sopenharmony_ci lgr->layoutp->pages, lgr->layoutp->len); 67862306a36Sopenharmony_ci xdr_set_scratch_page(&xdr, scratch); 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci status = -EIO; 68162306a36Sopenharmony_ci p = xdr_inline_decode(&xdr, 4); 68262306a36Sopenharmony_ci if (unlikely(!p)) 68362306a36Sopenharmony_ci goto out_free_scratch; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci count = be32_to_cpup(p++); 68662306a36Sopenharmony_ci dprintk("%s: number of extents %d\n", __func__, count); 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci /* 68962306a36Sopenharmony_ci * Decode individual extents, putting them in temporary staging area 69062306a36Sopenharmony_ci * until whole layout is decoded to make error recovery easier. 69162306a36Sopenharmony_ci */ 69262306a36Sopenharmony_ci for (i = 0; i < count; i++) { 69362306a36Sopenharmony_ci status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask); 69462306a36Sopenharmony_ci if (status) 69562306a36Sopenharmony_ci goto process_extents; 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci if (lgr->range.offset + lgr->range.length != 69962306a36Sopenharmony_ci lv.start << SECTOR_SHIFT) { 70062306a36Sopenharmony_ci dprintk("%s Final length mismatch\n", __func__); 70162306a36Sopenharmony_ci status = -EIO; 70262306a36Sopenharmony_ci goto process_extents; 70362306a36Sopenharmony_ci } 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci if (lv.start < lv.cowread) { 70662306a36Sopenharmony_ci dprintk("%s Final uncovered COW extent\n", __func__); 70762306a36Sopenharmony_ci status = -EIO; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ciprocess_extents: 71162306a36Sopenharmony_ci while (!list_empty(&extents)) { 71262306a36Sopenharmony_ci struct pnfs_block_extent *be = 71362306a36Sopenharmony_ci list_first_entry(&extents, struct pnfs_block_extent, 71462306a36Sopenharmony_ci be_list); 71562306a36Sopenharmony_ci list_del(&be->be_list); 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci if (!status) 71862306a36Sopenharmony_ci status = ext_tree_insert(bl, be); 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci if (status) { 72162306a36Sopenharmony_ci nfs4_put_deviceid_node(be->be_device); 72262306a36Sopenharmony_ci kfree(be); 72362306a36Sopenharmony_ci } 72462306a36Sopenharmony_ci } 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ciout_free_scratch: 72762306a36Sopenharmony_ci __free_page(scratch); 72862306a36Sopenharmony_ciout: 72962306a36Sopenharmony_ci dprintk("%s returns %d\n", __func__, status); 73062306a36Sopenharmony_ci switch (status) { 73162306a36Sopenharmony_ci case -ENODEV: 73262306a36Sopenharmony_ci /* Our extent block devices are unavailable */ 73362306a36Sopenharmony_ci set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags); 73462306a36Sopenharmony_ci fallthrough; 73562306a36Sopenharmony_ci case 0: 73662306a36Sopenharmony_ci return lseg; 73762306a36Sopenharmony_ci default: 73862306a36Sopenharmony_ci kfree(lseg); 73962306a36Sopenharmony_ci return ERR_PTR(status); 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci} 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_cistatic void 74462306a36Sopenharmony_cibl_return_range(struct pnfs_layout_hdr *lo, 74562306a36Sopenharmony_ci struct pnfs_layout_range *range) 74662306a36Sopenharmony_ci{ 74762306a36Sopenharmony_ci struct pnfs_block_layout *bl = BLK_LO2EXT(lo); 74862306a36Sopenharmony_ci sector_t offset = range->offset >> SECTOR_SHIFT, end; 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci if (range->offset % 8) { 75162306a36Sopenharmony_ci dprintk("%s: offset %lld not block size aligned\n", 75262306a36Sopenharmony_ci __func__, range->offset); 75362306a36Sopenharmony_ci return; 75462306a36Sopenharmony_ci } 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci if (range->length != NFS4_MAX_UINT64) { 75762306a36Sopenharmony_ci if (range->length % 8) { 75862306a36Sopenharmony_ci dprintk("%s: length %lld not block size aligned\n", 75962306a36Sopenharmony_ci __func__, range->length); 76062306a36Sopenharmony_ci return; 76162306a36Sopenharmony_ci } 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci end = offset + (range->length >> SECTOR_SHIFT); 76462306a36Sopenharmony_ci } else { 76562306a36Sopenharmony_ci end = round_down(NFS4_MAX_UINT64, PAGE_SIZE); 76662306a36Sopenharmony_ci } 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_ci ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_cistatic int 77262306a36Sopenharmony_cibl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg) 77362306a36Sopenharmony_ci{ 77462306a36Sopenharmony_ci return ext_tree_prepare_commit(arg); 77562306a36Sopenharmony_ci} 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_cistatic void 77862306a36Sopenharmony_cibl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) 77962306a36Sopenharmony_ci{ 78062306a36Sopenharmony_ci ext_tree_mark_committed(&lcdata->args, lcdata->res.status); 78162306a36Sopenharmony_ci} 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_cistatic int 78462306a36Sopenharmony_cibl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) 78562306a36Sopenharmony_ci{ 78662306a36Sopenharmony_ci dprintk("%s enter\n", __func__); 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci if (server->pnfs_blksize == 0) { 78962306a36Sopenharmony_ci dprintk("%s Server did not return blksize\n", __func__); 79062306a36Sopenharmony_ci return -EINVAL; 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci if (server->pnfs_blksize > PAGE_SIZE) { 79362306a36Sopenharmony_ci printk(KERN_ERR "%s: pNFS blksize %d not supported.\n", 79462306a36Sopenharmony_ci __func__, server->pnfs_blksize); 79562306a36Sopenharmony_ci return -EINVAL; 79662306a36Sopenharmony_ci } 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci return 0; 79962306a36Sopenharmony_ci} 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_cistatic bool 80262306a36Sopenharmony_ciis_aligned_req(struct nfs_pageio_descriptor *pgio, 80362306a36Sopenharmony_ci struct nfs_page *req, unsigned int alignment, bool is_write) 80462306a36Sopenharmony_ci{ 80562306a36Sopenharmony_ci /* 80662306a36Sopenharmony_ci * Always accept buffered writes, higher layers take care of the 80762306a36Sopenharmony_ci * right alignment. 80862306a36Sopenharmony_ci */ 80962306a36Sopenharmony_ci if (pgio->pg_dreq == NULL) 81062306a36Sopenharmony_ci return true; 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci if (!IS_ALIGNED(req->wb_offset, alignment)) 81362306a36Sopenharmony_ci return false; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci if (IS_ALIGNED(req->wb_bytes, alignment)) 81662306a36Sopenharmony_ci return true; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci if (is_write && 81962306a36Sopenharmony_ci (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) { 82062306a36Sopenharmony_ci /* 82162306a36Sopenharmony_ci * If the write goes up to the inode size, just write 82262306a36Sopenharmony_ci * the full page. Data past the inode size is 82362306a36Sopenharmony_ci * guaranteed to be zeroed by the higher level client 82462306a36Sopenharmony_ci * code, and this behaviour is mandated by RFC 5663 82562306a36Sopenharmony_ci * section 2.3.2. 82662306a36Sopenharmony_ci */ 82762306a36Sopenharmony_ci return true; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci return false; 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_cistatic void 83462306a36Sopenharmony_cibl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 83562306a36Sopenharmony_ci{ 83662306a36Sopenharmony_ci if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) { 83762306a36Sopenharmony_ci nfs_pageio_reset_read_mds(pgio); 83862306a36Sopenharmony_ci return; 83962306a36Sopenharmony_ci } 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci pnfs_generic_pg_init_read(pgio, req); 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci if (pgio->pg_lseg && 84462306a36Sopenharmony_ci test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) { 84562306a36Sopenharmony_ci pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg); 84662306a36Sopenharmony_ci pnfs_set_lo_fail(pgio->pg_lseg); 84762306a36Sopenharmony_ci nfs_pageio_reset_read_mds(pgio); 84862306a36Sopenharmony_ci } 84962306a36Sopenharmony_ci} 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci/* 85262306a36Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 85362306a36Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced. 85462306a36Sopenharmony_ci */ 85562306a36Sopenharmony_cistatic size_t 85662306a36Sopenharmony_cibl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 85762306a36Sopenharmony_ci struct nfs_page *req) 85862306a36Sopenharmony_ci{ 85962306a36Sopenharmony_ci if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) 86062306a36Sopenharmony_ci return 0; 86162306a36Sopenharmony_ci return pnfs_generic_pg_test(pgio, prev, req); 86262306a36Sopenharmony_ci} 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci/* 86562306a36Sopenharmony_ci * Return the number of contiguous bytes for a given inode 86662306a36Sopenharmony_ci * starting at page frame idx. 86762306a36Sopenharmony_ci */ 86862306a36Sopenharmony_cistatic u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) 86962306a36Sopenharmony_ci{ 87062306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 87162306a36Sopenharmony_ci pgoff_t end; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci /* Optimize common case that writes from 0 to end of file */ 87462306a36Sopenharmony_ci end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 87562306a36Sopenharmony_ci if (end != inode->i_mapping->nrpages) { 87662306a36Sopenharmony_ci rcu_read_lock(); 87762306a36Sopenharmony_ci end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX); 87862306a36Sopenharmony_ci rcu_read_unlock(); 87962306a36Sopenharmony_ci } 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci if (!end) 88262306a36Sopenharmony_ci return i_size_read(inode) - (idx << PAGE_SHIFT); 88362306a36Sopenharmony_ci else 88462306a36Sopenharmony_ci return (end - idx) << PAGE_SHIFT; 88562306a36Sopenharmony_ci} 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_cistatic void 88862306a36Sopenharmony_cibl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 88962306a36Sopenharmony_ci{ 89062306a36Sopenharmony_ci u64 wb_size; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) { 89362306a36Sopenharmony_ci nfs_pageio_reset_write_mds(pgio); 89462306a36Sopenharmony_ci return; 89562306a36Sopenharmony_ci } 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci if (pgio->pg_dreq == NULL) 89862306a36Sopenharmony_ci wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); 89962306a36Sopenharmony_ci else 90062306a36Sopenharmony_ci wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req)); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci pnfs_generic_pg_init_write(pgio, req, wb_size); 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci if (pgio->pg_lseg && 90562306a36Sopenharmony_ci test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) { 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg); 90862306a36Sopenharmony_ci pnfs_set_lo_fail(pgio->pg_lseg); 90962306a36Sopenharmony_ci nfs_pageio_reset_write_mds(pgio); 91062306a36Sopenharmony_ci } 91162306a36Sopenharmony_ci} 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci/* 91462306a36Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 91562306a36Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced. 91662306a36Sopenharmony_ci */ 91762306a36Sopenharmony_cistatic size_t 91862306a36Sopenharmony_cibl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 91962306a36Sopenharmony_ci struct nfs_page *req) 92062306a36Sopenharmony_ci{ 92162306a36Sopenharmony_ci if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) 92262306a36Sopenharmony_ci return 0; 92362306a36Sopenharmony_ci return pnfs_generic_pg_test(pgio, prev, req); 92462306a36Sopenharmony_ci} 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_cistatic const struct nfs_pageio_ops bl_pg_read_ops = { 92762306a36Sopenharmony_ci .pg_init = bl_pg_init_read, 92862306a36Sopenharmony_ci .pg_test = bl_pg_test_read, 92962306a36Sopenharmony_ci .pg_doio = pnfs_generic_pg_readpages, 93062306a36Sopenharmony_ci .pg_cleanup = pnfs_generic_pg_cleanup, 93162306a36Sopenharmony_ci}; 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_cistatic const struct nfs_pageio_ops bl_pg_write_ops = { 93462306a36Sopenharmony_ci .pg_init = bl_pg_init_write, 93562306a36Sopenharmony_ci .pg_test = bl_pg_test_write, 93662306a36Sopenharmony_ci .pg_doio = pnfs_generic_pg_writepages, 93762306a36Sopenharmony_ci .pg_cleanup = pnfs_generic_pg_cleanup, 93862306a36Sopenharmony_ci}; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_cistatic struct pnfs_layoutdriver_type blocklayout_type = { 94162306a36Sopenharmony_ci .id = LAYOUT_BLOCK_VOLUME, 94262306a36Sopenharmony_ci .name = "LAYOUT_BLOCK_VOLUME", 94362306a36Sopenharmony_ci .owner = THIS_MODULE, 94462306a36Sopenharmony_ci .flags = PNFS_LAYOUTRET_ON_SETATTR | 94562306a36Sopenharmony_ci PNFS_LAYOUTRET_ON_ERROR | 94662306a36Sopenharmony_ci PNFS_READ_WHOLE_PAGE, 94762306a36Sopenharmony_ci .read_pagelist = bl_read_pagelist, 94862306a36Sopenharmony_ci .write_pagelist = bl_write_pagelist, 94962306a36Sopenharmony_ci .alloc_layout_hdr = bl_alloc_layout_hdr, 95062306a36Sopenharmony_ci .free_layout_hdr = bl_free_layout_hdr, 95162306a36Sopenharmony_ci .alloc_lseg = bl_alloc_lseg, 95262306a36Sopenharmony_ci .free_lseg = bl_free_lseg, 95362306a36Sopenharmony_ci .return_range = bl_return_range, 95462306a36Sopenharmony_ci .prepare_layoutcommit = bl_prepare_layoutcommit, 95562306a36Sopenharmony_ci .cleanup_layoutcommit = bl_cleanup_layoutcommit, 95662306a36Sopenharmony_ci .set_layoutdriver = bl_set_layoutdriver, 95762306a36Sopenharmony_ci .alloc_deviceid_node = bl_alloc_deviceid_node, 95862306a36Sopenharmony_ci .free_deviceid_node = bl_free_deviceid_node, 95962306a36Sopenharmony_ci .pg_read_ops = &bl_pg_read_ops, 96062306a36Sopenharmony_ci .pg_write_ops = &bl_pg_write_ops, 96162306a36Sopenharmony_ci .sync = pnfs_generic_sync, 96262306a36Sopenharmony_ci}; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_cistatic struct pnfs_layoutdriver_type scsilayout_type = { 96562306a36Sopenharmony_ci .id = LAYOUT_SCSI, 96662306a36Sopenharmony_ci .name = "LAYOUT_SCSI", 96762306a36Sopenharmony_ci .owner = THIS_MODULE, 96862306a36Sopenharmony_ci .flags = PNFS_LAYOUTRET_ON_SETATTR | 96962306a36Sopenharmony_ci PNFS_LAYOUTRET_ON_ERROR | 97062306a36Sopenharmony_ci PNFS_READ_WHOLE_PAGE, 97162306a36Sopenharmony_ci .read_pagelist = bl_read_pagelist, 97262306a36Sopenharmony_ci .write_pagelist = bl_write_pagelist, 97362306a36Sopenharmony_ci .alloc_layout_hdr = sl_alloc_layout_hdr, 97462306a36Sopenharmony_ci .free_layout_hdr = bl_free_layout_hdr, 97562306a36Sopenharmony_ci .alloc_lseg = bl_alloc_lseg, 97662306a36Sopenharmony_ci .free_lseg = bl_free_lseg, 97762306a36Sopenharmony_ci .return_range = bl_return_range, 97862306a36Sopenharmony_ci .prepare_layoutcommit = bl_prepare_layoutcommit, 97962306a36Sopenharmony_ci .cleanup_layoutcommit = bl_cleanup_layoutcommit, 98062306a36Sopenharmony_ci .set_layoutdriver = bl_set_layoutdriver, 98162306a36Sopenharmony_ci .alloc_deviceid_node = bl_alloc_deviceid_node, 98262306a36Sopenharmony_ci .free_deviceid_node = bl_free_deviceid_node, 98362306a36Sopenharmony_ci .pg_read_ops = &bl_pg_read_ops, 98462306a36Sopenharmony_ci .pg_write_ops = &bl_pg_write_ops, 98562306a36Sopenharmony_ci .sync = pnfs_generic_sync, 98662306a36Sopenharmony_ci}; 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_cistatic int __init nfs4blocklayout_init(void) 99062306a36Sopenharmony_ci{ 99162306a36Sopenharmony_ci int ret; 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci ret = bl_init_pipefs(); 99662306a36Sopenharmony_ci if (ret) 99762306a36Sopenharmony_ci goto out; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci ret = pnfs_register_layoutdriver(&blocklayout_type); 100062306a36Sopenharmony_ci if (ret) 100162306a36Sopenharmony_ci goto out_cleanup_pipe; 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci ret = pnfs_register_layoutdriver(&scsilayout_type); 100462306a36Sopenharmony_ci if (ret) 100562306a36Sopenharmony_ci goto out_unregister_block; 100662306a36Sopenharmony_ci return 0; 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ciout_unregister_block: 100962306a36Sopenharmony_ci pnfs_unregister_layoutdriver(&blocklayout_type); 101062306a36Sopenharmony_ciout_cleanup_pipe: 101162306a36Sopenharmony_ci bl_cleanup_pipefs(); 101262306a36Sopenharmony_ciout: 101362306a36Sopenharmony_ci return ret; 101462306a36Sopenharmony_ci} 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_cistatic void __exit nfs4blocklayout_exit(void) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 101962306a36Sopenharmony_ci __func__); 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci pnfs_unregister_layoutdriver(&scsilayout_type); 102262306a36Sopenharmony_ci pnfs_unregister_layoutdriver(&blocklayout_type); 102362306a36Sopenharmony_ci bl_cleanup_pipefs(); 102462306a36Sopenharmony_ci} 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ciMODULE_ALIAS("nfs-layouttype4-3"); 102762306a36Sopenharmony_ciMODULE_ALIAS("nfs-layouttype4-5"); 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_cimodule_init(nfs4blocklayout_init); 103062306a36Sopenharmony_cimodule_exit(nfs4blocklayout_exit); 1031