18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle. All rights reserved. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/fs.h> 98c2ecf20Sopenharmony_ci#include <linux/slab.h> 108c2ecf20Sopenharmony_ci#include <linux/highmem.h> 118c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 128c2ecf20Sopenharmony_ci#include <asm/byteorder.h> 138c2ecf20Sopenharmony_ci#include <linux/swap.h> 148c2ecf20Sopenharmony_ci#include <linux/mpage.h> 158c2ecf20Sopenharmony_ci#include <linux/quotaops.h> 168c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 178c2ecf20Sopenharmony_ci#include <linux/uio.h> 188c2ecf20Sopenharmony_ci#include <linux/mm.h> 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#include <cluster/masklog.h> 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include "ocfs2.h" 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#include "alloc.h" 258c2ecf20Sopenharmony_ci#include "aops.h" 268c2ecf20Sopenharmony_ci#include "dlmglue.h" 278c2ecf20Sopenharmony_ci#include "extent_map.h" 288c2ecf20Sopenharmony_ci#include "file.h" 298c2ecf20Sopenharmony_ci#include "inode.h" 308c2ecf20Sopenharmony_ci#include "journal.h" 318c2ecf20Sopenharmony_ci#include "suballoc.h" 328c2ecf20Sopenharmony_ci#include "super.h" 338c2ecf20Sopenharmony_ci#include "symlink.h" 348c2ecf20Sopenharmony_ci#include "refcounttree.h" 358c2ecf20Sopenharmony_ci#include "ocfs2_trace.h" 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#include "buffer_head_io.h" 388c2ecf20Sopenharmony_ci#include "dir.h" 398c2ecf20Sopenharmony_ci#include "namei.h" 408c2ecf20Sopenharmony_ci#include "sysfile.h" 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistatic int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, 438c2ecf20Sopenharmony_ci struct buffer_head *bh_result, int create) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci int err = -EIO; 468c2ecf20Sopenharmony_ci int status; 478c2ecf20Sopenharmony_ci struct ocfs2_dinode *fe = NULL; 488c2ecf20Sopenharmony_ci struct buffer_head *bh = NULL; 498c2ecf20Sopenharmony_ci struct buffer_head *buffer_cache_bh = NULL; 508c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 518c2ecf20Sopenharmony_ci void *kaddr; 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci trace_ocfs2_symlink_get_block( 548c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, 558c2ecf20Sopenharmony_ci (unsigned long long)iblock, bh_result, create); 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci BUG_ON(ocfs2_inode_is_fast_symlink(inode)); 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { 608c2ecf20Sopenharmony_ci mlog(ML_ERROR, "block offset > PATH_MAX: %llu", 618c2ecf20Sopenharmony_ci (unsigned long long)iblock); 628c2ecf20Sopenharmony_ci goto bail; 638c2ecf20Sopenharmony_ci } 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci status = ocfs2_read_inode_block(inode, &bh); 668c2ecf20Sopenharmony_ci if (status < 0) { 678c2ecf20Sopenharmony_ci mlog_errno(status); 688c2ecf20Sopenharmony_ci goto bail; 698c2ecf20Sopenharmony_ci } 708c2ecf20Sopenharmony_ci fe = (struct ocfs2_dinode *) bh->b_data; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, 738c2ecf20Sopenharmony_ci le32_to_cpu(fe->i_clusters))) { 748c2ecf20Sopenharmony_ci err = -ENOMEM; 758c2ecf20Sopenharmony_ci mlog(ML_ERROR, "block offset is outside the allocated size: " 768c2ecf20Sopenharmony_ci "%llu\n", (unsigned long long)iblock); 778c2ecf20Sopenharmony_ci goto bail; 788c2ecf20Sopenharmony_ci } 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci /* We don't use the page cache to create symlink data, so if 818c2ecf20Sopenharmony_ci * need be, copy it over from the buffer cache. */ 828c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { 838c2ecf20Sopenharmony_ci u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + 848c2ecf20Sopenharmony_ci iblock; 858c2ecf20Sopenharmony_ci buffer_cache_bh = sb_getblk(osb->sb, blkno); 868c2ecf20Sopenharmony_ci if (!buffer_cache_bh) { 878c2ecf20Sopenharmony_ci err = -ENOMEM; 888c2ecf20Sopenharmony_ci mlog(ML_ERROR, "couldn't getblock for symlink!\n"); 898c2ecf20Sopenharmony_ci goto bail; 908c2ecf20Sopenharmony_ci } 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci /* we haven't locked out transactions, so a commit 938c2ecf20Sopenharmony_ci * could've happened. Since we've got a reference on 948c2ecf20Sopenharmony_ci * the bh, even if it commits while we're doing the 958c2ecf20Sopenharmony_ci * copy, the data is still good. */ 968c2ecf20Sopenharmony_ci if (buffer_jbd(buffer_cache_bh) 978c2ecf20Sopenharmony_ci && ocfs2_inode_is_new(inode)) { 988c2ecf20Sopenharmony_ci kaddr = kmap_atomic(bh_result->b_page); 998c2ecf20Sopenharmony_ci if (!kaddr) { 1008c2ecf20Sopenharmony_ci mlog(ML_ERROR, "couldn't kmap!\n"); 1018c2ecf20Sopenharmony_ci goto bail; 1028c2ecf20Sopenharmony_ci } 1038c2ecf20Sopenharmony_ci memcpy(kaddr + (bh_result->b_size * iblock), 1048c2ecf20Sopenharmony_ci buffer_cache_bh->b_data, 1058c2ecf20Sopenharmony_ci bh_result->b_size); 1068c2ecf20Sopenharmony_ci kunmap_atomic(kaddr); 1078c2ecf20Sopenharmony_ci set_buffer_uptodate(bh_result); 1088c2ecf20Sopenharmony_ci } 1098c2ecf20Sopenharmony_ci brelse(buffer_cache_bh); 1108c2ecf20Sopenharmony_ci } 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci map_bh(bh_result, inode->i_sb, 1138c2ecf20Sopenharmony_ci le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci err = 0; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_cibail: 1188c2ecf20Sopenharmony_ci brelse(bh); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci return err; 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistatic int ocfs2_lock_get_block(struct inode *inode, sector_t iblock, 1248c2ecf20Sopenharmony_ci struct buffer_head *bh_result, int create) 1258c2ecf20Sopenharmony_ci{ 1268c2ecf20Sopenharmony_ci int ret = 0; 1278c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci down_read(&oi->ip_alloc_sem); 1308c2ecf20Sopenharmony_ci ret = ocfs2_get_block(inode, iblock, bh_result, create); 1318c2ecf20Sopenharmony_ci up_read(&oi->ip_alloc_sem); 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci return ret; 1348c2ecf20Sopenharmony_ci} 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ciint ocfs2_get_block(struct inode *inode, sector_t iblock, 1378c2ecf20Sopenharmony_ci struct buffer_head *bh_result, int create) 1388c2ecf20Sopenharmony_ci{ 1398c2ecf20Sopenharmony_ci int err = 0; 1408c2ecf20Sopenharmony_ci unsigned int ext_flags; 1418c2ecf20Sopenharmony_ci u64 max_blocks = bh_result->b_size >> inode->i_blkbits; 1428c2ecf20Sopenharmony_ci u64 p_blkno, count, past_eof; 1438c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci trace_ocfs2_get_block((unsigned long long)OCFS2_I(inode)->ip_blkno, 1468c2ecf20Sopenharmony_ci (unsigned long long)iblock, bh_result, create); 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) 1498c2ecf20Sopenharmony_ci mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", 1508c2ecf20Sopenharmony_ci inode, inode->i_ino); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci if (S_ISLNK(inode->i_mode)) { 1538c2ecf20Sopenharmony_ci /* this always does I/O for some reason. */ 1548c2ecf20Sopenharmony_ci err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); 1558c2ecf20Sopenharmony_ci goto bail; 1568c2ecf20Sopenharmony_ci } 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, 1598c2ecf20Sopenharmony_ci &ext_flags); 1608c2ecf20Sopenharmony_ci if (err) { 1618c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " 1628c2ecf20Sopenharmony_ci "%llu, NULL)\n", err, inode, (unsigned long long)iblock, 1638c2ecf20Sopenharmony_ci (unsigned long long)p_blkno); 1648c2ecf20Sopenharmony_ci goto bail; 1658c2ecf20Sopenharmony_ci } 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci if (max_blocks < count) 1688c2ecf20Sopenharmony_ci count = max_blocks; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* 1718c2ecf20Sopenharmony_ci * ocfs2 never allocates in this function - the only time we 1728c2ecf20Sopenharmony_ci * need to use BH_New is when we're extending i_size on a file 1738c2ecf20Sopenharmony_ci * system which doesn't support holes, in which case BH_New 1748c2ecf20Sopenharmony_ci * allows __block_write_begin() to zero. 1758c2ecf20Sopenharmony_ci * 1768c2ecf20Sopenharmony_ci * If we see this on a sparse file system, then a truncate has 1778c2ecf20Sopenharmony_ci * raced us and removed the cluster. In this case, we clear 1788c2ecf20Sopenharmony_ci * the buffers dirty and uptodate bits and let the buffer code 1798c2ecf20Sopenharmony_ci * ignore it as a hole. 1808c2ecf20Sopenharmony_ci */ 1818c2ecf20Sopenharmony_ci if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) { 1828c2ecf20Sopenharmony_ci clear_buffer_dirty(bh_result); 1838c2ecf20Sopenharmony_ci clear_buffer_uptodate(bh_result); 1848c2ecf20Sopenharmony_ci goto bail; 1858c2ecf20Sopenharmony_ci } 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci /* Treat the unwritten extent as a hole for zeroing purposes. */ 1888c2ecf20Sopenharmony_ci if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) 1898c2ecf20Sopenharmony_ci map_bh(bh_result, inode->i_sb, p_blkno); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci bh_result->b_size = count << inode->i_blkbits; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci if (!ocfs2_sparse_alloc(osb)) { 1948c2ecf20Sopenharmony_ci if (p_blkno == 0) { 1958c2ecf20Sopenharmony_ci err = -EIO; 1968c2ecf20Sopenharmony_ci mlog(ML_ERROR, 1978c2ecf20Sopenharmony_ci "iblock = %llu p_blkno = %llu blkno=(%llu)\n", 1988c2ecf20Sopenharmony_ci (unsigned long long)iblock, 1998c2ecf20Sopenharmony_ci (unsigned long long)p_blkno, 2008c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno); 2018c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 2028c2ecf20Sopenharmony_ci dump_stack(); 2038c2ecf20Sopenharmony_ci goto bail; 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci trace_ocfs2_get_block_end((unsigned long long)OCFS2_I(inode)->ip_blkno, 2108c2ecf20Sopenharmony_ci (unsigned long long)past_eof); 2118c2ecf20Sopenharmony_ci if (create && (iblock >= past_eof)) 2128c2ecf20Sopenharmony_ci set_buffer_new(bh_result); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cibail: 2158c2ecf20Sopenharmony_ci if (err < 0) 2168c2ecf20Sopenharmony_ci err = -EIO; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci return err; 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ciint ocfs2_read_inline_data(struct inode *inode, struct page *page, 2228c2ecf20Sopenharmony_ci struct buffer_head *di_bh) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci void *kaddr; 2258c2ecf20Sopenharmony_ci loff_t size; 2268c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { 2298c2ecf20Sopenharmony_ci ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag\n", 2308c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno); 2318c2ecf20Sopenharmony_ci return -EROFS; 2328c2ecf20Sopenharmony_ci } 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci size = i_size_read(inode); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci if (size > PAGE_SIZE || 2378c2ecf20Sopenharmony_ci size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) { 2388c2ecf20Sopenharmony_ci ocfs2_error(inode->i_sb, 2398c2ecf20Sopenharmony_ci "Inode %llu has with inline data has bad size: %Lu\n", 2408c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, 2418c2ecf20Sopenharmony_ci (unsigned long long)size); 2428c2ecf20Sopenharmony_ci return -EROFS; 2438c2ecf20Sopenharmony_ci } 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci kaddr = kmap_atomic(page); 2468c2ecf20Sopenharmony_ci if (size) 2478c2ecf20Sopenharmony_ci memcpy(kaddr, di->id2.i_data.id_data, size); 2488c2ecf20Sopenharmony_ci /* Clear the remaining part of the page */ 2498c2ecf20Sopenharmony_ci memset(kaddr + size, 0, PAGE_SIZE - size); 2508c2ecf20Sopenharmony_ci flush_dcache_page(page); 2518c2ecf20Sopenharmony_ci kunmap_atomic(kaddr); 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci SetPageUptodate(page); 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci return 0; 2568c2ecf20Sopenharmony_ci} 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_cistatic int ocfs2_readpage_inline(struct inode *inode, struct page *page) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci int ret; 2618c2ecf20Sopenharmony_ci struct buffer_head *di_bh = NULL; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci BUG_ON(!PageLocked(page)); 2648c2ecf20Sopenharmony_ci BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci ret = ocfs2_read_inode_block(inode, &di_bh); 2678c2ecf20Sopenharmony_ci if (ret) { 2688c2ecf20Sopenharmony_ci mlog_errno(ret); 2698c2ecf20Sopenharmony_ci goto out; 2708c2ecf20Sopenharmony_ci } 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci ret = ocfs2_read_inline_data(inode, page, di_bh); 2738c2ecf20Sopenharmony_ciout: 2748c2ecf20Sopenharmony_ci unlock_page(page); 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci brelse(di_bh); 2778c2ecf20Sopenharmony_ci return ret; 2788c2ecf20Sopenharmony_ci} 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_cistatic int ocfs2_readpage(struct file *file, struct page *page) 2818c2ecf20Sopenharmony_ci{ 2828c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 2838c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 2848c2ecf20Sopenharmony_ci loff_t start = (loff_t)page->index << PAGE_SHIFT; 2858c2ecf20Sopenharmony_ci int ret, unlock = 1; 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, 2888c2ecf20Sopenharmony_ci (page ? page->index : 0)); 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); 2918c2ecf20Sopenharmony_ci if (ret != 0) { 2928c2ecf20Sopenharmony_ci if (ret == AOP_TRUNCATED_PAGE) 2938c2ecf20Sopenharmony_ci unlock = 0; 2948c2ecf20Sopenharmony_ci mlog_errno(ret); 2958c2ecf20Sopenharmony_ci goto out; 2968c2ecf20Sopenharmony_ci } 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci if (down_read_trylock(&oi->ip_alloc_sem) == 0) { 2998c2ecf20Sopenharmony_ci /* 3008c2ecf20Sopenharmony_ci * Unlock the page and cycle ip_alloc_sem so that we don't 3018c2ecf20Sopenharmony_ci * busyloop waiting for ip_alloc_sem to unlock 3028c2ecf20Sopenharmony_ci */ 3038c2ecf20Sopenharmony_ci ret = AOP_TRUNCATED_PAGE; 3048c2ecf20Sopenharmony_ci unlock_page(page); 3058c2ecf20Sopenharmony_ci unlock = 0; 3068c2ecf20Sopenharmony_ci down_read(&oi->ip_alloc_sem); 3078c2ecf20Sopenharmony_ci up_read(&oi->ip_alloc_sem); 3088c2ecf20Sopenharmony_ci goto out_inode_unlock; 3098c2ecf20Sopenharmony_ci } 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci /* 3128c2ecf20Sopenharmony_ci * i_size might have just been updated as we grabed the meta lock. We 3138c2ecf20Sopenharmony_ci * might now be discovering a truncate that hit on another node. 3148c2ecf20Sopenharmony_ci * block_read_full_page->get_block freaks out if it is asked to read 3158c2ecf20Sopenharmony_ci * beyond the end of a file, so we check here. Callers 3168c2ecf20Sopenharmony_ci * (generic_file_read, vm_ops->fault) are clever enough to check i_size 3178c2ecf20Sopenharmony_ci * and notice that the page they just read isn't needed. 3188c2ecf20Sopenharmony_ci * 3198c2ecf20Sopenharmony_ci * XXX sys_readahead() seems to get that wrong? 3208c2ecf20Sopenharmony_ci */ 3218c2ecf20Sopenharmony_ci if (start >= i_size_read(inode)) { 3228c2ecf20Sopenharmony_ci zero_user(page, 0, PAGE_SIZE); 3238c2ecf20Sopenharmony_ci SetPageUptodate(page); 3248c2ecf20Sopenharmony_ci ret = 0; 3258c2ecf20Sopenharmony_ci goto out_alloc; 3268c2ecf20Sopenharmony_ci } 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) 3298c2ecf20Sopenharmony_ci ret = ocfs2_readpage_inline(inode, page); 3308c2ecf20Sopenharmony_ci else 3318c2ecf20Sopenharmony_ci ret = block_read_full_page(page, ocfs2_get_block); 3328c2ecf20Sopenharmony_ci unlock = 0; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ciout_alloc: 3358c2ecf20Sopenharmony_ci up_read(&oi->ip_alloc_sem); 3368c2ecf20Sopenharmony_ciout_inode_unlock: 3378c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 0); 3388c2ecf20Sopenharmony_ciout: 3398c2ecf20Sopenharmony_ci if (unlock) 3408c2ecf20Sopenharmony_ci unlock_page(page); 3418c2ecf20Sopenharmony_ci return ret; 3428c2ecf20Sopenharmony_ci} 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci/* 3458c2ecf20Sopenharmony_ci * This is used only for read-ahead. Failures or difficult to handle 3468c2ecf20Sopenharmony_ci * situations are safe to ignore. 3478c2ecf20Sopenharmony_ci * 3488c2ecf20Sopenharmony_ci * Right now, we don't bother with BH_Boundary - in-inode extent lists 3498c2ecf20Sopenharmony_ci * are quite large (243 extents on 4k blocks), so most inodes don't 3508c2ecf20Sopenharmony_ci * grow out to a tree. If need be, detecting boundary extents could 3518c2ecf20Sopenharmony_ci * trivially be added in a future version of ocfs2_get_block(). 3528c2ecf20Sopenharmony_ci */ 3538c2ecf20Sopenharmony_cistatic void ocfs2_readahead(struct readahead_control *rac) 3548c2ecf20Sopenharmony_ci{ 3558c2ecf20Sopenharmony_ci int ret; 3568c2ecf20Sopenharmony_ci struct inode *inode = rac->mapping->host; 3578c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci /* 3608c2ecf20Sopenharmony_ci * Use the nonblocking flag for the dlm code to avoid page 3618c2ecf20Sopenharmony_ci * lock inversion, but don't bother with retrying. 3628c2ecf20Sopenharmony_ci */ 3638c2ecf20Sopenharmony_ci ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); 3648c2ecf20Sopenharmony_ci if (ret) 3658c2ecf20Sopenharmony_ci return; 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci if (down_read_trylock(&oi->ip_alloc_sem) == 0) 3688c2ecf20Sopenharmony_ci goto out_unlock; 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci /* 3718c2ecf20Sopenharmony_ci * Don't bother with inline-data. There isn't anything 3728c2ecf20Sopenharmony_ci * to read-ahead in that case anyway... 3738c2ecf20Sopenharmony_ci */ 3748c2ecf20Sopenharmony_ci if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) 3758c2ecf20Sopenharmony_ci goto out_up; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci /* 3788c2ecf20Sopenharmony_ci * Check whether a remote node truncated this file - we just 3798c2ecf20Sopenharmony_ci * drop out in that case as it's not worth handling here. 3808c2ecf20Sopenharmony_ci */ 3818c2ecf20Sopenharmony_ci if (readahead_pos(rac) >= i_size_read(inode)) 3828c2ecf20Sopenharmony_ci goto out_up; 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci mpage_readahead(rac, ocfs2_get_block); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ciout_up: 3878c2ecf20Sopenharmony_ci up_read(&oi->ip_alloc_sem); 3888c2ecf20Sopenharmony_ciout_unlock: 3898c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 0); 3908c2ecf20Sopenharmony_ci} 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci/* Note: Because we don't support holes, our allocation has 3938c2ecf20Sopenharmony_ci * already happened (allocation writes zeros to the file data) 3948c2ecf20Sopenharmony_ci * so we don't have to worry about ordered writes in 3958c2ecf20Sopenharmony_ci * ocfs2_writepage. 3968c2ecf20Sopenharmony_ci * 3978c2ecf20Sopenharmony_ci * ->writepage is called during the process of invalidating the page cache 3988c2ecf20Sopenharmony_ci * during blocked lock processing. It can't block on any cluster locks 3998c2ecf20Sopenharmony_ci * to during block mapping. It's relying on the fact that the block 4008c2ecf20Sopenharmony_ci * mapping can't have disappeared under the dirty pages that it is 4018c2ecf20Sopenharmony_ci * being asked to write back. 4028c2ecf20Sopenharmony_ci */ 4038c2ecf20Sopenharmony_cistatic int ocfs2_writepage(struct page *page, struct writeback_control *wbc) 4048c2ecf20Sopenharmony_ci{ 4058c2ecf20Sopenharmony_ci trace_ocfs2_writepage( 4068c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno, 4078c2ecf20Sopenharmony_ci page->index); 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci return block_write_full_page(page, ocfs2_get_block, wbc); 4108c2ecf20Sopenharmony_ci} 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci/* Taken from ext3. We don't necessarily need the full blown 4138c2ecf20Sopenharmony_ci * functionality yet, but IMHO it's better to cut and paste the whole 4148c2ecf20Sopenharmony_ci * thing so we can avoid introducing our own bugs (and easily pick up 4158c2ecf20Sopenharmony_ci * their fixes when they happen) --Mark */ 4168c2ecf20Sopenharmony_ciint walk_page_buffers( handle_t *handle, 4178c2ecf20Sopenharmony_ci struct buffer_head *head, 4188c2ecf20Sopenharmony_ci unsigned from, 4198c2ecf20Sopenharmony_ci unsigned to, 4208c2ecf20Sopenharmony_ci int *partial, 4218c2ecf20Sopenharmony_ci int (*fn)( handle_t *handle, 4228c2ecf20Sopenharmony_ci struct buffer_head *bh)) 4238c2ecf20Sopenharmony_ci{ 4248c2ecf20Sopenharmony_ci struct buffer_head *bh; 4258c2ecf20Sopenharmony_ci unsigned block_start, block_end; 4268c2ecf20Sopenharmony_ci unsigned blocksize = head->b_size; 4278c2ecf20Sopenharmony_ci int err, ret = 0; 4288c2ecf20Sopenharmony_ci struct buffer_head *next; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci for ( bh = head, block_start = 0; 4318c2ecf20Sopenharmony_ci ret == 0 && (bh != head || !block_start); 4328c2ecf20Sopenharmony_ci block_start = block_end, bh = next) 4338c2ecf20Sopenharmony_ci { 4348c2ecf20Sopenharmony_ci next = bh->b_this_page; 4358c2ecf20Sopenharmony_ci block_end = block_start + blocksize; 4368c2ecf20Sopenharmony_ci if (block_end <= from || block_start >= to) { 4378c2ecf20Sopenharmony_ci if (partial && !buffer_uptodate(bh)) 4388c2ecf20Sopenharmony_ci *partial = 1; 4398c2ecf20Sopenharmony_ci continue; 4408c2ecf20Sopenharmony_ci } 4418c2ecf20Sopenharmony_ci err = (*fn)(handle, bh); 4428c2ecf20Sopenharmony_ci if (!ret) 4438c2ecf20Sopenharmony_ci ret = err; 4448c2ecf20Sopenharmony_ci } 4458c2ecf20Sopenharmony_ci return ret; 4468c2ecf20Sopenharmony_ci} 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_cistatic sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) 4498c2ecf20Sopenharmony_ci{ 4508c2ecf20Sopenharmony_ci sector_t status; 4518c2ecf20Sopenharmony_ci u64 p_blkno = 0; 4528c2ecf20Sopenharmony_ci int err = 0; 4538c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno, 4568c2ecf20Sopenharmony_ci (unsigned long long)block); 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci /* 4598c2ecf20Sopenharmony_ci * The swap code (ab-)uses ->bmap to get a block mapping and then 4608c2ecf20Sopenharmony_ci * bypasseѕ the file system for actual I/O. We really can't allow 4618c2ecf20Sopenharmony_ci * that on refcounted inodes, so we have to skip out here. And yes, 4628c2ecf20Sopenharmony_ci * 0 is the magic code for a bmap error.. 4638c2ecf20Sopenharmony_ci */ 4648c2ecf20Sopenharmony_ci if (ocfs2_is_refcount_inode(inode)) 4658c2ecf20Sopenharmony_ci return 0; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci /* We don't need to lock journal system files, since they aren't 4688c2ecf20Sopenharmony_ci * accessed concurrently from multiple nodes. 4698c2ecf20Sopenharmony_ci */ 4708c2ecf20Sopenharmony_ci if (!INODE_JOURNAL(inode)) { 4718c2ecf20Sopenharmony_ci err = ocfs2_inode_lock(inode, NULL, 0); 4728c2ecf20Sopenharmony_ci if (err) { 4738c2ecf20Sopenharmony_ci if (err != -ENOENT) 4748c2ecf20Sopenharmony_ci mlog_errno(err); 4758c2ecf20Sopenharmony_ci goto bail; 4768c2ecf20Sopenharmony_ci } 4778c2ecf20Sopenharmony_ci down_read(&OCFS2_I(inode)->ip_alloc_sem); 4788c2ecf20Sopenharmony_ci } 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) 4818c2ecf20Sopenharmony_ci err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, 4828c2ecf20Sopenharmony_ci NULL); 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci if (!INODE_JOURNAL(inode)) { 4858c2ecf20Sopenharmony_ci up_read(&OCFS2_I(inode)->ip_alloc_sem); 4868c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 0); 4878c2ecf20Sopenharmony_ci } 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci if (err) { 4908c2ecf20Sopenharmony_ci mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", 4918c2ecf20Sopenharmony_ci (unsigned long long)block); 4928c2ecf20Sopenharmony_ci mlog_errno(err); 4938c2ecf20Sopenharmony_ci goto bail; 4948c2ecf20Sopenharmony_ci } 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_cibail: 4978c2ecf20Sopenharmony_ci status = err ? 0 : p_blkno; 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci return status; 5008c2ecf20Sopenharmony_ci} 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_cistatic int ocfs2_releasepage(struct page *page, gfp_t wait) 5038c2ecf20Sopenharmony_ci{ 5048c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 5058c2ecf20Sopenharmony_ci return 0; 5068c2ecf20Sopenharmony_ci return try_to_free_buffers(page); 5078c2ecf20Sopenharmony_ci} 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_cistatic void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, 5108c2ecf20Sopenharmony_ci u32 cpos, 5118c2ecf20Sopenharmony_ci unsigned int *start, 5128c2ecf20Sopenharmony_ci unsigned int *end) 5138c2ecf20Sopenharmony_ci{ 5148c2ecf20Sopenharmony_ci unsigned int cluster_start = 0, cluster_end = PAGE_SIZE; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits)) { 5178c2ecf20Sopenharmony_ci unsigned int cpp; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci cpp = 1 << (PAGE_SHIFT - osb->s_clustersize_bits); 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci cluster_start = cpos % cpp; 5228c2ecf20Sopenharmony_ci cluster_start = cluster_start << osb->s_clustersize_bits; 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci cluster_end = cluster_start + osb->s_clustersize; 5258c2ecf20Sopenharmony_ci } 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci BUG_ON(cluster_start > PAGE_SIZE); 5288c2ecf20Sopenharmony_ci BUG_ON(cluster_end > PAGE_SIZE); 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci if (start) 5318c2ecf20Sopenharmony_ci *start = cluster_start; 5328c2ecf20Sopenharmony_ci if (end) 5338c2ecf20Sopenharmony_ci *end = cluster_end; 5348c2ecf20Sopenharmony_ci} 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci/* 5378c2ecf20Sopenharmony_ci * 'from' and 'to' are the region in the page to avoid zeroing. 5388c2ecf20Sopenharmony_ci * 5398c2ecf20Sopenharmony_ci * If pagesize > clustersize, this function will avoid zeroing outside 5408c2ecf20Sopenharmony_ci * of the cluster boundary. 5418c2ecf20Sopenharmony_ci * 5428c2ecf20Sopenharmony_ci * from == to == 0 is code for "zero the entire cluster region" 5438c2ecf20Sopenharmony_ci */ 5448c2ecf20Sopenharmony_cistatic void ocfs2_clear_page_regions(struct page *page, 5458c2ecf20Sopenharmony_ci struct ocfs2_super *osb, u32 cpos, 5468c2ecf20Sopenharmony_ci unsigned from, unsigned to) 5478c2ecf20Sopenharmony_ci{ 5488c2ecf20Sopenharmony_ci void *kaddr; 5498c2ecf20Sopenharmony_ci unsigned int cluster_start, cluster_end; 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci kaddr = kmap_atomic(page); 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci if (from || to) { 5568c2ecf20Sopenharmony_ci if (from > cluster_start) 5578c2ecf20Sopenharmony_ci memset(kaddr + cluster_start, 0, from - cluster_start); 5588c2ecf20Sopenharmony_ci if (to < cluster_end) 5598c2ecf20Sopenharmony_ci memset(kaddr + to, 0, cluster_end - to); 5608c2ecf20Sopenharmony_ci } else { 5618c2ecf20Sopenharmony_ci memset(kaddr + cluster_start, 0, cluster_end - cluster_start); 5628c2ecf20Sopenharmony_ci } 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci kunmap_atomic(kaddr); 5658c2ecf20Sopenharmony_ci} 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci/* 5688c2ecf20Sopenharmony_ci * Nonsparse file systems fully allocate before we get to the write 5698c2ecf20Sopenharmony_ci * code. This prevents ocfs2_write() from tagging the write as an 5708c2ecf20Sopenharmony_ci * allocating one, which means ocfs2_map_page_blocks() might try to 5718c2ecf20Sopenharmony_ci * read-in the blocks at the tail of our file. Avoid reading them by 5728c2ecf20Sopenharmony_ci * testing i_size against each block offset. 5738c2ecf20Sopenharmony_ci */ 5748c2ecf20Sopenharmony_cistatic int ocfs2_should_read_blk(struct inode *inode, struct page *page, 5758c2ecf20Sopenharmony_ci unsigned int block_start) 5768c2ecf20Sopenharmony_ci{ 5778c2ecf20Sopenharmony_ci u64 offset = page_offset(page) + block_start; 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 5808c2ecf20Sopenharmony_ci return 1; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci if (i_size_read(inode) > offset) 5838c2ecf20Sopenharmony_ci return 1; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci return 0; 5868c2ecf20Sopenharmony_ci} 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci/* 5898c2ecf20Sopenharmony_ci * Some of this taken from __block_write_begin(). We already have our 5908c2ecf20Sopenharmony_ci * mapping by now though, and the entire write will be allocating or 5918c2ecf20Sopenharmony_ci * it won't, so not much need to use BH_New. 5928c2ecf20Sopenharmony_ci * 5938c2ecf20Sopenharmony_ci * This will also skip zeroing, which is handled externally. 5948c2ecf20Sopenharmony_ci */ 5958c2ecf20Sopenharmony_ciint ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, 5968c2ecf20Sopenharmony_ci struct inode *inode, unsigned int from, 5978c2ecf20Sopenharmony_ci unsigned int to, int new) 5988c2ecf20Sopenharmony_ci{ 5998c2ecf20Sopenharmony_ci int ret = 0; 6008c2ecf20Sopenharmony_ci struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; 6018c2ecf20Sopenharmony_ci unsigned int block_end, block_start; 6028c2ecf20Sopenharmony_ci unsigned int bsize = i_blocksize(inode); 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 6058c2ecf20Sopenharmony_ci create_empty_buffers(page, bsize, 0); 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci head = page_buffers(page); 6088c2ecf20Sopenharmony_ci for (bh = head, block_start = 0; bh != head || !block_start; 6098c2ecf20Sopenharmony_ci bh = bh->b_this_page, block_start += bsize) { 6108c2ecf20Sopenharmony_ci block_end = block_start + bsize; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci clear_buffer_new(bh); 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci /* 6158c2ecf20Sopenharmony_ci * Ignore blocks outside of our i/o range - 6168c2ecf20Sopenharmony_ci * they may belong to unallocated clusters. 6178c2ecf20Sopenharmony_ci */ 6188c2ecf20Sopenharmony_ci if (block_start >= to || block_end <= from) { 6198c2ecf20Sopenharmony_ci if (PageUptodate(page)) 6208c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 6218c2ecf20Sopenharmony_ci continue; 6228c2ecf20Sopenharmony_ci } 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci /* 6258c2ecf20Sopenharmony_ci * For an allocating write with cluster size >= page 6268c2ecf20Sopenharmony_ci * size, we always write the entire page. 6278c2ecf20Sopenharmony_ci */ 6288c2ecf20Sopenharmony_ci if (new) 6298c2ecf20Sopenharmony_ci set_buffer_new(bh); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci if (!buffer_mapped(bh)) { 6328c2ecf20Sopenharmony_ci map_bh(bh, inode->i_sb, *p_blkno); 6338c2ecf20Sopenharmony_ci clean_bdev_bh_alias(bh); 6348c2ecf20Sopenharmony_ci } 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci if (PageUptodate(page)) { 6378c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) 6388c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 6398c2ecf20Sopenharmony_ci } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && 6408c2ecf20Sopenharmony_ci !buffer_new(bh) && 6418c2ecf20Sopenharmony_ci ocfs2_should_read_blk(inode, page, block_start) && 6428c2ecf20Sopenharmony_ci (block_start < from || block_end > to)) { 6438c2ecf20Sopenharmony_ci ll_rw_block(REQ_OP_READ, 0, 1, &bh); 6448c2ecf20Sopenharmony_ci *wait_bh++=bh; 6458c2ecf20Sopenharmony_ci } 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci *p_blkno = *p_blkno + 1; 6488c2ecf20Sopenharmony_ci } 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci /* 6518c2ecf20Sopenharmony_ci * If we issued read requests - let them complete. 6528c2ecf20Sopenharmony_ci */ 6538c2ecf20Sopenharmony_ci while(wait_bh > wait) { 6548c2ecf20Sopenharmony_ci wait_on_buffer(*--wait_bh); 6558c2ecf20Sopenharmony_ci if (!buffer_uptodate(*wait_bh)) 6568c2ecf20Sopenharmony_ci ret = -EIO; 6578c2ecf20Sopenharmony_ci } 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci if (ret == 0 || !new) 6608c2ecf20Sopenharmony_ci return ret; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci /* 6638c2ecf20Sopenharmony_ci * If we get -EIO above, zero out any newly allocated blocks 6648c2ecf20Sopenharmony_ci * to avoid exposing stale data. 6658c2ecf20Sopenharmony_ci */ 6668c2ecf20Sopenharmony_ci bh = head; 6678c2ecf20Sopenharmony_ci block_start = 0; 6688c2ecf20Sopenharmony_ci do { 6698c2ecf20Sopenharmony_ci block_end = block_start + bsize; 6708c2ecf20Sopenharmony_ci if (block_end <= from) 6718c2ecf20Sopenharmony_ci goto next_bh; 6728c2ecf20Sopenharmony_ci if (block_start >= to) 6738c2ecf20Sopenharmony_ci break; 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci zero_user(page, block_start, bh->b_size); 6768c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 6778c2ecf20Sopenharmony_ci mark_buffer_dirty(bh); 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_cinext_bh: 6808c2ecf20Sopenharmony_ci block_start = block_end; 6818c2ecf20Sopenharmony_ci bh = bh->b_this_page; 6828c2ecf20Sopenharmony_ci } while (bh != head); 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci return ret; 6858c2ecf20Sopenharmony_ci} 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci#if (PAGE_SIZE >= OCFS2_MAX_CLUSTERSIZE) 6888c2ecf20Sopenharmony_ci#define OCFS2_MAX_CTXT_PAGES 1 6898c2ecf20Sopenharmony_ci#else 6908c2ecf20Sopenharmony_ci#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_SIZE) 6918c2ecf20Sopenharmony_ci#endif 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_SIZE / OCFS2_MIN_CLUSTERSIZE) 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_cistruct ocfs2_unwritten_extent { 6968c2ecf20Sopenharmony_ci struct list_head ue_node; 6978c2ecf20Sopenharmony_ci struct list_head ue_ip_node; 6988c2ecf20Sopenharmony_ci u32 ue_cpos; 6998c2ecf20Sopenharmony_ci u32 ue_phys; 7008c2ecf20Sopenharmony_ci}; 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci/* 7038c2ecf20Sopenharmony_ci * Describe the state of a single cluster to be written to. 7048c2ecf20Sopenharmony_ci */ 7058c2ecf20Sopenharmony_cistruct ocfs2_write_cluster_desc { 7068c2ecf20Sopenharmony_ci u32 c_cpos; 7078c2ecf20Sopenharmony_ci u32 c_phys; 7088c2ecf20Sopenharmony_ci /* 7098c2ecf20Sopenharmony_ci * Give this a unique field because c_phys eventually gets 7108c2ecf20Sopenharmony_ci * filled. 7118c2ecf20Sopenharmony_ci */ 7128c2ecf20Sopenharmony_ci unsigned c_new; 7138c2ecf20Sopenharmony_ci unsigned c_clear_unwritten; 7148c2ecf20Sopenharmony_ci unsigned c_needs_zero; 7158c2ecf20Sopenharmony_ci}; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_cistruct ocfs2_write_ctxt { 7188c2ecf20Sopenharmony_ci /* Logical cluster position / len of write */ 7198c2ecf20Sopenharmony_ci u32 w_cpos; 7208c2ecf20Sopenharmony_ci u32 w_clen; 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci /* First cluster allocated in a nonsparse extend */ 7238c2ecf20Sopenharmony_ci u32 w_first_new_cpos; 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci /* Type of caller. Must be one of buffer, mmap, direct. */ 7268c2ecf20Sopenharmony_ci ocfs2_write_type_t w_type; 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci /* 7318c2ecf20Sopenharmony_ci * This is true if page_size > cluster_size. 7328c2ecf20Sopenharmony_ci * 7338c2ecf20Sopenharmony_ci * It triggers a set of special cases during write which might 7348c2ecf20Sopenharmony_ci * have to deal with allocating writes to partial pages. 7358c2ecf20Sopenharmony_ci */ 7368c2ecf20Sopenharmony_ci unsigned int w_large_pages; 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci /* 7398c2ecf20Sopenharmony_ci * Pages involved in this write. 7408c2ecf20Sopenharmony_ci * 7418c2ecf20Sopenharmony_ci * w_target_page is the page being written to by the user. 7428c2ecf20Sopenharmony_ci * 7438c2ecf20Sopenharmony_ci * w_pages is an array of pages which always contains 7448c2ecf20Sopenharmony_ci * w_target_page, and in the case of an allocating write with 7458c2ecf20Sopenharmony_ci * page_size < cluster size, it will contain zero'd and mapped 7468c2ecf20Sopenharmony_ci * pages adjacent to w_target_page which need to be written 7478c2ecf20Sopenharmony_ci * out in so that future reads from that region will get 7488c2ecf20Sopenharmony_ci * zero's. 7498c2ecf20Sopenharmony_ci */ 7508c2ecf20Sopenharmony_ci unsigned int w_num_pages; 7518c2ecf20Sopenharmony_ci struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; 7528c2ecf20Sopenharmony_ci struct page *w_target_page; 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci /* 7558c2ecf20Sopenharmony_ci * w_target_locked is used for page_mkwrite path indicating no unlocking 7568c2ecf20Sopenharmony_ci * against w_target_page in ocfs2_write_end_nolock. 7578c2ecf20Sopenharmony_ci */ 7588c2ecf20Sopenharmony_ci unsigned int w_target_locked:1; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci /* 7618c2ecf20Sopenharmony_ci * ocfs2_write_end() uses this to know what the real range to 7628c2ecf20Sopenharmony_ci * write in the target should be. 7638c2ecf20Sopenharmony_ci */ 7648c2ecf20Sopenharmony_ci unsigned int w_target_from; 7658c2ecf20Sopenharmony_ci unsigned int w_target_to; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci /* 7688c2ecf20Sopenharmony_ci * We could use journal_current_handle() but this is cleaner, 7698c2ecf20Sopenharmony_ci * IMHO -Mark 7708c2ecf20Sopenharmony_ci */ 7718c2ecf20Sopenharmony_ci handle_t *w_handle; 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ci struct buffer_head *w_di_bh; 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci struct ocfs2_cached_dealloc_ctxt w_dealloc; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci struct list_head w_unwritten_list; 7788c2ecf20Sopenharmony_ci unsigned int w_unwritten_count; 7798c2ecf20Sopenharmony_ci}; 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_civoid ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) 7828c2ecf20Sopenharmony_ci{ 7838c2ecf20Sopenharmony_ci int i; 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci for(i = 0; i < num_pages; i++) { 7868c2ecf20Sopenharmony_ci if (pages[i]) { 7878c2ecf20Sopenharmony_ci unlock_page(pages[i]); 7888c2ecf20Sopenharmony_ci mark_page_accessed(pages[i]); 7898c2ecf20Sopenharmony_ci put_page(pages[i]); 7908c2ecf20Sopenharmony_ci } 7918c2ecf20Sopenharmony_ci } 7928c2ecf20Sopenharmony_ci} 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_cistatic void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) 7958c2ecf20Sopenharmony_ci{ 7968c2ecf20Sopenharmony_ci int i; 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci /* 7998c2ecf20Sopenharmony_ci * w_target_locked is only set to true in the page_mkwrite() case. 8008c2ecf20Sopenharmony_ci * The intent is to allow us to lock the target page from write_begin() 8018c2ecf20Sopenharmony_ci * to write_end(). The caller must hold a ref on w_target_page. 8028c2ecf20Sopenharmony_ci */ 8038c2ecf20Sopenharmony_ci if (wc->w_target_locked) { 8048c2ecf20Sopenharmony_ci BUG_ON(!wc->w_target_page); 8058c2ecf20Sopenharmony_ci for (i = 0; i < wc->w_num_pages; i++) { 8068c2ecf20Sopenharmony_ci if (wc->w_target_page == wc->w_pages[i]) { 8078c2ecf20Sopenharmony_ci wc->w_pages[i] = NULL; 8088c2ecf20Sopenharmony_ci break; 8098c2ecf20Sopenharmony_ci } 8108c2ecf20Sopenharmony_ci } 8118c2ecf20Sopenharmony_ci mark_page_accessed(wc->w_target_page); 8128c2ecf20Sopenharmony_ci put_page(wc->w_target_page); 8138c2ecf20Sopenharmony_ci } 8148c2ecf20Sopenharmony_ci ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); 8158c2ecf20Sopenharmony_ci} 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_cistatic void ocfs2_free_unwritten_list(struct inode *inode, 8188c2ecf20Sopenharmony_ci struct list_head *head) 8198c2ecf20Sopenharmony_ci{ 8208c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 8218c2ecf20Sopenharmony_ci struct ocfs2_unwritten_extent *ue = NULL, *tmp = NULL; 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci list_for_each_entry_safe(ue, tmp, head, ue_node) { 8248c2ecf20Sopenharmony_ci list_del(&ue->ue_node); 8258c2ecf20Sopenharmony_ci spin_lock(&oi->ip_lock); 8268c2ecf20Sopenharmony_ci list_del(&ue->ue_ip_node); 8278c2ecf20Sopenharmony_ci spin_unlock(&oi->ip_lock); 8288c2ecf20Sopenharmony_ci kfree(ue); 8298c2ecf20Sopenharmony_ci } 8308c2ecf20Sopenharmony_ci} 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_cistatic void ocfs2_free_write_ctxt(struct inode *inode, 8338c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc) 8348c2ecf20Sopenharmony_ci{ 8358c2ecf20Sopenharmony_ci ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list); 8368c2ecf20Sopenharmony_ci ocfs2_unlock_pages(wc); 8378c2ecf20Sopenharmony_ci brelse(wc->w_di_bh); 8388c2ecf20Sopenharmony_ci kfree(wc); 8398c2ecf20Sopenharmony_ci} 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_cistatic int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, 8428c2ecf20Sopenharmony_ci struct ocfs2_super *osb, loff_t pos, 8438c2ecf20Sopenharmony_ci unsigned len, ocfs2_write_type_t type, 8448c2ecf20Sopenharmony_ci struct buffer_head *di_bh) 8458c2ecf20Sopenharmony_ci{ 8468c2ecf20Sopenharmony_ci u32 cend; 8478c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc; 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); 8508c2ecf20Sopenharmony_ci if (!wc) 8518c2ecf20Sopenharmony_ci return -ENOMEM; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci wc->w_cpos = pos >> osb->s_clustersize_bits; 8548c2ecf20Sopenharmony_ci wc->w_first_new_cpos = UINT_MAX; 8558c2ecf20Sopenharmony_ci cend = (pos + len - 1) >> osb->s_clustersize_bits; 8568c2ecf20Sopenharmony_ci wc->w_clen = cend - wc->w_cpos + 1; 8578c2ecf20Sopenharmony_ci get_bh(di_bh); 8588c2ecf20Sopenharmony_ci wc->w_di_bh = di_bh; 8598c2ecf20Sopenharmony_ci wc->w_type = type; 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits)) 8628c2ecf20Sopenharmony_ci wc->w_large_pages = 1; 8638c2ecf20Sopenharmony_ci else 8648c2ecf20Sopenharmony_ci wc->w_large_pages = 0; 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci ocfs2_init_dealloc_ctxt(&wc->w_dealloc); 8678c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&wc->w_unwritten_list); 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci *wcp = wc; 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci return 0; 8728c2ecf20Sopenharmony_ci} 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci/* 8758c2ecf20Sopenharmony_ci * If a page has any new buffers, zero them out here, and mark them uptodate 8768c2ecf20Sopenharmony_ci * and dirty so they'll be written out (in order to prevent uninitialised 8778c2ecf20Sopenharmony_ci * block data from leaking). And clear the new bit. 8788c2ecf20Sopenharmony_ci */ 8798c2ecf20Sopenharmony_cistatic void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) 8808c2ecf20Sopenharmony_ci{ 8818c2ecf20Sopenharmony_ci unsigned int block_start, block_end; 8828c2ecf20Sopenharmony_ci struct buffer_head *head, *bh; 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci BUG_ON(!PageLocked(page)); 8858c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 8868c2ecf20Sopenharmony_ci return; 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci bh = head = page_buffers(page); 8898c2ecf20Sopenharmony_ci block_start = 0; 8908c2ecf20Sopenharmony_ci do { 8918c2ecf20Sopenharmony_ci block_end = block_start + bh->b_size; 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci if (buffer_new(bh)) { 8948c2ecf20Sopenharmony_ci if (block_end > from && block_start < to) { 8958c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 8968c2ecf20Sopenharmony_ci unsigned start, end; 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci start = max(from, block_start); 8998c2ecf20Sopenharmony_ci end = min(to, block_end); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci zero_user_segment(page, start, end); 9028c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 9038c2ecf20Sopenharmony_ci } 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci clear_buffer_new(bh); 9068c2ecf20Sopenharmony_ci mark_buffer_dirty(bh); 9078c2ecf20Sopenharmony_ci } 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci block_start = block_end; 9118c2ecf20Sopenharmony_ci bh = bh->b_this_page; 9128c2ecf20Sopenharmony_ci } while (bh != head); 9138c2ecf20Sopenharmony_ci} 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci/* 9168c2ecf20Sopenharmony_ci * Only called when we have a failure during allocating write to write 9178c2ecf20Sopenharmony_ci * zero's to the newly allocated region. 9188c2ecf20Sopenharmony_ci */ 9198c2ecf20Sopenharmony_cistatic void ocfs2_write_failure(struct inode *inode, 9208c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 9218c2ecf20Sopenharmony_ci loff_t user_pos, unsigned user_len) 9228c2ecf20Sopenharmony_ci{ 9238c2ecf20Sopenharmony_ci int i; 9248c2ecf20Sopenharmony_ci unsigned from = user_pos & (PAGE_SIZE - 1), 9258c2ecf20Sopenharmony_ci to = user_pos + user_len; 9268c2ecf20Sopenharmony_ci struct page *tmppage; 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci if (wc->w_target_page) 9298c2ecf20Sopenharmony_ci ocfs2_zero_new_buffers(wc->w_target_page, from, to); 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_ci for(i = 0; i < wc->w_num_pages; i++) { 9328c2ecf20Sopenharmony_ci tmppage = wc->w_pages[i]; 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci if (tmppage && page_has_buffers(tmppage)) { 9358c2ecf20Sopenharmony_ci if (ocfs2_should_order_data(inode)) 9368c2ecf20Sopenharmony_ci ocfs2_jbd2_inode_add_write(wc->w_handle, inode, 9378c2ecf20Sopenharmony_ci user_pos, user_len); 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci block_commit_write(tmppage, from, to); 9408c2ecf20Sopenharmony_ci } 9418c2ecf20Sopenharmony_ci } 9428c2ecf20Sopenharmony_ci} 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_cistatic int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, 9458c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 9468c2ecf20Sopenharmony_ci struct page *page, u32 cpos, 9478c2ecf20Sopenharmony_ci loff_t user_pos, unsigned user_len, 9488c2ecf20Sopenharmony_ci int new) 9498c2ecf20Sopenharmony_ci{ 9508c2ecf20Sopenharmony_ci int ret; 9518c2ecf20Sopenharmony_ci unsigned int map_from = 0, map_to = 0; 9528c2ecf20Sopenharmony_ci unsigned int cluster_start, cluster_end; 9538c2ecf20Sopenharmony_ci unsigned int user_data_from = 0, user_data_to = 0; 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ci ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, 9568c2ecf20Sopenharmony_ci &cluster_start, &cluster_end); 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci /* treat the write as new if the a hole/lseek spanned across 9598c2ecf20Sopenharmony_ci * the page boundary. 9608c2ecf20Sopenharmony_ci */ 9618c2ecf20Sopenharmony_ci new = new | ((i_size_read(inode) <= page_offset(page)) && 9628c2ecf20Sopenharmony_ci (page_offset(page) <= user_pos)); 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci if (page == wc->w_target_page) { 9658c2ecf20Sopenharmony_ci map_from = user_pos & (PAGE_SIZE - 1); 9668c2ecf20Sopenharmony_ci map_to = map_from + user_len; 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci if (new) 9698c2ecf20Sopenharmony_ci ret = ocfs2_map_page_blocks(page, p_blkno, inode, 9708c2ecf20Sopenharmony_ci cluster_start, cluster_end, 9718c2ecf20Sopenharmony_ci new); 9728c2ecf20Sopenharmony_ci else 9738c2ecf20Sopenharmony_ci ret = ocfs2_map_page_blocks(page, p_blkno, inode, 9748c2ecf20Sopenharmony_ci map_from, map_to, new); 9758c2ecf20Sopenharmony_ci if (ret) { 9768c2ecf20Sopenharmony_ci mlog_errno(ret); 9778c2ecf20Sopenharmony_ci goto out; 9788c2ecf20Sopenharmony_ci } 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci user_data_from = map_from; 9818c2ecf20Sopenharmony_ci user_data_to = map_to; 9828c2ecf20Sopenharmony_ci if (new) { 9838c2ecf20Sopenharmony_ci map_from = cluster_start; 9848c2ecf20Sopenharmony_ci map_to = cluster_end; 9858c2ecf20Sopenharmony_ci } 9868c2ecf20Sopenharmony_ci } else { 9878c2ecf20Sopenharmony_ci /* 9888c2ecf20Sopenharmony_ci * If we haven't allocated the new page yet, we 9898c2ecf20Sopenharmony_ci * shouldn't be writing it out without copying user 9908c2ecf20Sopenharmony_ci * data. This is likely a math error from the caller. 9918c2ecf20Sopenharmony_ci */ 9928c2ecf20Sopenharmony_ci BUG_ON(!new); 9938c2ecf20Sopenharmony_ci 9948c2ecf20Sopenharmony_ci map_from = cluster_start; 9958c2ecf20Sopenharmony_ci map_to = cluster_end; 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci ret = ocfs2_map_page_blocks(page, p_blkno, inode, 9988c2ecf20Sopenharmony_ci cluster_start, cluster_end, new); 9998c2ecf20Sopenharmony_ci if (ret) { 10008c2ecf20Sopenharmony_ci mlog_errno(ret); 10018c2ecf20Sopenharmony_ci goto out; 10028c2ecf20Sopenharmony_ci } 10038c2ecf20Sopenharmony_ci } 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ci /* 10068c2ecf20Sopenharmony_ci * Parts of newly allocated pages need to be zero'd. 10078c2ecf20Sopenharmony_ci * 10088c2ecf20Sopenharmony_ci * Above, we have also rewritten 'to' and 'from' - as far as 10098c2ecf20Sopenharmony_ci * the rest of the function is concerned, the entire cluster 10108c2ecf20Sopenharmony_ci * range inside of a page needs to be written. 10118c2ecf20Sopenharmony_ci * 10128c2ecf20Sopenharmony_ci * We can skip this if the page is up to date - it's already 10138c2ecf20Sopenharmony_ci * been zero'd from being read in as a hole. 10148c2ecf20Sopenharmony_ci */ 10158c2ecf20Sopenharmony_ci if (new && !PageUptodate(page)) 10168c2ecf20Sopenharmony_ci ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), 10178c2ecf20Sopenharmony_ci cpos, user_data_from, user_data_to); 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_ci flush_dcache_page(page); 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_ciout: 10228c2ecf20Sopenharmony_ci return ret; 10238c2ecf20Sopenharmony_ci} 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_ci/* 10268c2ecf20Sopenharmony_ci * This function will only grab one clusters worth of pages. 10278c2ecf20Sopenharmony_ci */ 10288c2ecf20Sopenharmony_cistatic int ocfs2_grab_pages_for_write(struct address_space *mapping, 10298c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 10308c2ecf20Sopenharmony_ci u32 cpos, loff_t user_pos, 10318c2ecf20Sopenharmony_ci unsigned user_len, int new, 10328c2ecf20Sopenharmony_ci struct page *mmap_page) 10338c2ecf20Sopenharmony_ci{ 10348c2ecf20Sopenharmony_ci int ret = 0, i; 10358c2ecf20Sopenharmony_ci unsigned long start, target_index, end_index, index; 10368c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 10378c2ecf20Sopenharmony_ci loff_t last_byte; 10388c2ecf20Sopenharmony_ci 10398c2ecf20Sopenharmony_ci target_index = user_pos >> PAGE_SHIFT; 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci /* 10428c2ecf20Sopenharmony_ci * Figure out how many pages we'll be manipulating here. For 10438c2ecf20Sopenharmony_ci * non allocating write, we just change the one 10448c2ecf20Sopenharmony_ci * page. Otherwise, we'll need a whole clusters worth. If we're 10458c2ecf20Sopenharmony_ci * writing past i_size, we only need enough pages to cover the 10468c2ecf20Sopenharmony_ci * last page of the write. 10478c2ecf20Sopenharmony_ci */ 10488c2ecf20Sopenharmony_ci if (new) { 10498c2ecf20Sopenharmony_ci wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); 10508c2ecf20Sopenharmony_ci start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); 10518c2ecf20Sopenharmony_ci /* 10528c2ecf20Sopenharmony_ci * We need the index *past* the last page we could possibly 10538c2ecf20Sopenharmony_ci * touch. This is the page past the end of the write or 10548c2ecf20Sopenharmony_ci * i_size, whichever is greater. 10558c2ecf20Sopenharmony_ci */ 10568c2ecf20Sopenharmony_ci last_byte = max(user_pos + user_len, i_size_read(inode)); 10578c2ecf20Sopenharmony_ci BUG_ON(last_byte < 1); 10588c2ecf20Sopenharmony_ci end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1; 10598c2ecf20Sopenharmony_ci if ((start + wc->w_num_pages) > end_index) 10608c2ecf20Sopenharmony_ci wc->w_num_pages = end_index - start; 10618c2ecf20Sopenharmony_ci } else { 10628c2ecf20Sopenharmony_ci wc->w_num_pages = 1; 10638c2ecf20Sopenharmony_ci start = target_index; 10648c2ecf20Sopenharmony_ci } 10658c2ecf20Sopenharmony_ci end_index = (user_pos + user_len - 1) >> PAGE_SHIFT; 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ci for(i = 0; i < wc->w_num_pages; i++) { 10688c2ecf20Sopenharmony_ci index = start + i; 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci if (index >= target_index && index <= end_index && 10718c2ecf20Sopenharmony_ci wc->w_type == OCFS2_WRITE_MMAP) { 10728c2ecf20Sopenharmony_ci /* 10738c2ecf20Sopenharmony_ci * ocfs2_pagemkwrite() is a little different 10748c2ecf20Sopenharmony_ci * and wants us to directly use the page 10758c2ecf20Sopenharmony_ci * passed in. 10768c2ecf20Sopenharmony_ci */ 10778c2ecf20Sopenharmony_ci lock_page(mmap_page); 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci /* Exit and let the caller retry */ 10808c2ecf20Sopenharmony_ci if (mmap_page->mapping != mapping) { 10818c2ecf20Sopenharmony_ci WARN_ON(mmap_page->mapping); 10828c2ecf20Sopenharmony_ci unlock_page(mmap_page); 10838c2ecf20Sopenharmony_ci ret = -EAGAIN; 10848c2ecf20Sopenharmony_ci goto out; 10858c2ecf20Sopenharmony_ci } 10868c2ecf20Sopenharmony_ci 10878c2ecf20Sopenharmony_ci get_page(mmap_page); 10888c2ecf20Sopenharmony_ci wc->w_pages[i] = mmap_page; 10898c2ecf20Sopenharmony_ci wc->w_target_locked = true; 10908c2ecf20Sopenharmony_ci } else if (index >= target_index && index <= end_index && 10918c2ecf20Sopenharmony_ci wc->w_type == OCFS2_WRITE_DIRECT) { 10928c2ecf20Sopenharmony_ci /* Direct write has no mapping page. */ 10938c2ecf20Sopenharmony_ci wc->w_pages[i] = NULL; 10948c2ecf20Sopenharmony_ci continue; 10958c2ecf20Sopenharmony_ci } else { 10968c2ecf20Sopenharmony_ci wc->w_pages[i] = find_or_create_page(mapping, index, 10978c2ecf20Sopenharmony_ci GFP_NOFS); 10988c2ecf20Sopenharmony_ci if (!wc->w_pages[i]) { 10998c2ecf20Sopenharmony_ci ret = -ENOMEM; 11008c2ecf20Sopenharmony_ci mlog_errno(ret); 11018c2ecf20Sopenharmony_ci goto out; 11028c2ecf20Sopenharmony_ci } 11038c2ecf20Sopenharmony_ci } 11048c2ecf20Sopenharmony_ci wait_for_stable_page(wc->w_pages[i]); 11058c2ecf20Sopenharmony_ci 11068c2ecf20Sopenharmony_ci if (index == target_index) 11078c2ecf20Sopenharmony_ci wc->w_target_page = wc->w_pages[i]; 11088c2ecf20Sopenharmony_ci } 11098c2ecf20Sopenharmony_ciout: 11108c2ecf20Sopenharmony_ci if (ret) 11118c2ecf20Sopenharmony_ci wc->w_target_locked = false; 11128c2ecf20Sopenharmony_ci return ret; 11138c2ecf20Sopenharmony_ci} 11148c2ecf20Sopenharmony_ci 11158c2ecf20Sopenharmony_ci/* 11168c2ecf20Sopenharmony_ci * Prepare a single cluster for write one cluster into the file. 11178c2ecf20Sopenharmony_ci */ 11188c2ecf20Sopenharmony_cistatic int ocfs2_write_cluster(struct address_space *mapping, 11198c2ecf20Sopenharmony_ci u32 *phys, unsigned int new, 11208c2ecf20Sopenharmony_ci unsigned int clear_unwritten, 11218c2ecf20Sopenharmony_ci unsigned int should_zero, 11228c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *data_ac, 11238c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *meta_ac, 11248c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, u32 cpos, 11258c2ecf20Sopenharmony_ci loff_t user_pos, unsigned user_len) 11268c2ecf20Sopenharmony_ci{ 11278c2ecf20Sopenharmony_ci int ret, i; 11288c2ecf20Sopenharmony_ci u64 p_blkno; 11298c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 11308c2ecf20Sopenharmony_ci struct ocfs2_extent_tree et; 11318c2ecf20Sopenharmony_ci int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 11328c2ecf20Sopenharmony_ci 11338c2ecf20Sopenharmony_ci if (new) { 11348c2ecf20Sopenharmony_ci u32 tmp_pos; 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci /* 11378c2ecf20Sopenharmony_ci * This is safe to call with the page locks - it won't take 11388c2ecf20Sopenharmony_ci * any additional semaphores or cluster locks. 11398c2ecf20Sopenharmony_ci */ 11408c2ecf20Sopenharmony_ci tmp_pos = cpos; 11418c2ecf20Sopenharmony_ci ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, 11428c2ecf20Sopenharmony_ci &tmp_pos, 1, !clear_unwritten, 11438c2ecf20Sopenharmony_ci wc->w_di_bh, wc->w_handle, 11448c2ecf20Sopenharmony_ci data_ac, meta_ac, NULL); 11458c2ecf20Sopenharmony_ci /* 11468c2ecf20Sopenharmony_ci * This shouldn't happen because we must have already 11478c2ecf20Sopenharmony_ci * calculated the correct meta data allocation required. The 11488c2ecf20Sopenharmony_ci * internal tree allocation code should know how to increase 11498c2ecf20Sopenharmony_ci * transaction credits itself. 11508c2ecf20Sopenharmony_ci * 11518c2ecf20Sopenharmony_ci * If need be, we could handle -EAGAIN for a 11528c2ecf20Sopenharmony_ci * RESTART_TRANS here. 11538c2ecf20Sopenharmony_ci */ 11548c2ecf20Sopenharmony_ci mlog_bug_on_msg(ret == -EAGAIN, 11558c2ecf20Sopenharmony_ci "Inode %llu: EAGAIN return during allocation.\n", 11568c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno); 11578c2ecf20Sopenharmony_ci if (ret < 0) { 11588c2ecf20Sopenharmony_ci mlog_errno(ret); 11598c2ecf20Sopenharmony_ci goto out; 11608c2ecf20Sopenharmony_ci } 11618c2ecf20Sopenharmony_ci } else if (clear_unwritten) { 11628c2ecf20Sopenharmony_ci ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), 11638c2ecf20Sopenharmony_ci wc->w_di_bh); 11648c2ecf20Sopenharmony_ci ret = ocfs2_mark_extent_written(inode, &et, 11658c2ecf20Sopenharmony_ci wc->w_handle, cpos, 1, *phys, 11668c2ecf20Sopenharmony_ci meta_ac, &wc->w_dealloc); 11678c2ecf20Sopenharmony_ci if (ret < 0) { 11688c2ecf20Sopenharmony_ci mlog_errno(ret); 11698c2ecf20Sopenharmony_ci goto out; 11708c2ecf20Sopenharmony_ci } 11718c2ecf20Sopenharmony_ci } 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ci /* 11748c2ecf20Sopenharmony_ci * The only reason this should fail is due to an inability to 11758c2ecf20Sopenharmony_ci * find the extent added. 11768c2ecf20Sopenharmony_ci */ 11778c2ecf20Sopenharmony_ci ret = ocfs2_get_clusters(inode, cpos, phys, NULL, NULL); 11788c2ecf20Sopenharmony_ci if (ret < 0) { 11798c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Get physical blkno failed for inode %llu, " 11808c2ecf20Sopenharmony_ci "at logical cluster %u", 11818c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 11828c2ecf20Sopenharmony_ci goto out; 11838c2ecf20Sopenharmony_ci } 11848c2ecf20Sopenharmony_ci 11858c2ecf20Sopenharmony_ci BUG_ON(*phys == 0); 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *phys); 11888c2ecf20Sopenharmony_ci if (!should_zero) 11898c2ecf20Sopenharmony_ci p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1); 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci for(i = 0; i < wc->w_num_pages; i++) { 11928c2ecf20Sopenharmony_ci int tmpret; 11938c2ecf20Sopenharmony_ci 11948c2ecf20Sopenharmony_ci /* This is the direct io target page. */ 11958c2ecf20Sopenharmony_ci if (wc->w_pages[i] == NULL) { 11968c2ecf20Sopenharmony_ci p_blkno++; 11978c2ecf20Sopenharmony_ci continue; 11988c2ecf20Sopenharmony_ci } 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, 12018c2ecf20Sopenharmony_ci wc->w_pages[i], cpos, 12028c2ecf20Sopenharmony_ci user_pos, user_len, 12038c2ecf20Sopenharmony_ci should_zero); 12048c2ecf20Sopenharmony_ci if (tmpret) { 12058c2ecf20Sopenharmony_ci mlog_errno(tmpret); 12068c2ecf20Sopenharmony_ci if (ret == 0) 12078c2ecf20Sopenharmony_ci ret = tmpret; 12088c2ecf20Sopenharmony_ci } 12098c2ecf20Sopenharmony_ci } 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ci /* 12128c2ecf20Sopenharmony_ci * We only have cleanup to do in case of allocating write. 12138c2ecf20Sopenharmony_ci */ 12148c2ecf20Sopenharmony_ci if (ret && new) 12158c2ecf20Sopenharmony_ci ocfs2_write_failure(inode, wc, user_pos, user_len); 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ciout: 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci return ret; 12208c2ecf20Sopenharmony_ci} 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_cistatic int ocfs2_write_cluster_by_desc(struct address_space *mapping, 12238c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *data_ac, 12248c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *meta_ac, 12258c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 12268c2ecf20Sopenharmony_ci loff_t pos, unsigned len) 12278c2ecf20Sopenharmony_ci{ 12288c2ecf20Sopenharmony_ci int ret, i; 12298c2ecf20Sopenharmony_ci loff_t cluster_off; 12308c2ecf20Sopenharmony_ci unsigned int local_len = len; 12318c2ecf20Sopenharmony_ci struct ocfs2_write_cluster_desc *desc; 12328c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb); 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci for (i = 0; i < wc->w_clen; i++) { 12358c2ecf20Sopenharmony_ci desc = &wc->w_desc[i]; 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_ci /* 12388c2ecf20Sopenharmony_ci * We have to make sure that the total write passed in 12398c2ecf20Sopenharmony_ci * doesn't extend past a single cluster. 12408c2ecf20Sopenharmony_ci */ 12418c2ecf20Sopenharmony_ci local_len = len; 12428c2ecf20Sopenharmony_ci cluster_off = pos & (osb->s_clustersize - 1); 12438c2ecf20Sopenharmony_ci if ((cluster_off + local_len) > osb->s_clustersize) 12448c2ecf20Sopenharmony_ci local_len = osb->s_clustersize - cluster_off; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci ret = ocfs2_write_cluster(mapping, &desc->c_phys, 12478c2ecf20Sopenharmony_ci desc->c_new, 12488c2ecf20Sopenharmony_ci desc->c_clear_unwritten, 12498c2ecf20Sopenharmony_ci desc->c_needs_zero, 12508c2ecf20Sopenharmony_ci data_ac, meta_ac, 12518c2ecf20Sopenharmony_ci wc, desc->c_cpos, pos, local_len); 12528c2ecf20Sopenharmony_ci if (ret) { 12538c2ecf20Sopenharmony_ci mlog_errno(ret); 12548c2ecf20Sopenharmony_ci goto out; 12558c2ecf20Sopenharmony_ci } 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci len -= local_len; 12588c2ecf20Sopenharmony_ci pos += local_len; 12598c2ecf20Sopenharmony_ci } 12608c2ecf20Sopenharmony_ci 12618c2ecf20Sopenharmony_ci ret = 0; 12628c2ecf20Sopenharmony_ciout: 12638c2ecf20Sopenharmony_ci return ret; 12648c2ecf20Sopenharmony_ci} 12658c2ecf20Sopenharmony_ci 12668c2ecf20Sopenharmony_ci/* 12678c2ecf20Sopenharmony_ci * ocfs2_write_end() wants to know which parts of the target page it 12688c2ecf20Sopenharmony_ci * should complete the write on. It's easiest to compute them ahead of 12698c2ecf20Sopenharmony_ci * time when a more complete view of the write is available. 12708c2ecf20Sopenharmony_ci */ 12718c2ecf20Sopenharmony_cistatic void ocfs2_set_target_boundaries(struct ocfs2_super *osb, 12728c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 12738c2ecf20Sopenharmony_ci loff_t pos, unsigned len, int alloc) 12748c2ecf20Sopenharmony_ci{ 12758c2ecf20Sopenharmony_ci struct ocfs2_write_cluster_desc *desc; 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci wc->w_target_from = pos & (PAGE_SIZE - 1); 12788c2ecf20Sopenharmony_ci wc->w_target_to = wc->w_target_from + len; 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci if (alloc == 0) 12818c2ecf20Sopenharmony_ci return; 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci /* 12848c2ecf20Sopenharmony_ci * Allocating write - we may have different boundaries based 12858c2ecf20Sopenharmony_ci * on page size and cluster size. 12868c2ecf20Sopenharmony_ci * 12878c2ecf20Sopenharmony_ci * NOTE: We can no longer compute one value from the other as 12888c2ecf20Sopenharmony_ci * the actual write length and user provided length may be 12898c2ecf20Sopenharmony_ci * different. 12908c2ecf20Sopenharmony_ci */ 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci if (wc->w_large_pages) { 12938c2ecf20Sopenharmony_ci /* 12948c2ecf20Sopenharmony_ci * We only care about the 1st and last cluster within 12958c2ecf20Sopenharmony_ci * our range and whether they should be zero'd or not. Either 12968c2ecf20Sopenharmony_ci * value may be extended out to the start/end of a 12978c2ecf20Sopenharmony_ci * newly allocated cluster. 12988c2ecf20Sopenharmony_ci */ 12998c2ecf20Sopenharmony_ci desc = &wc->w_desc[0]; 13008c2ecf20Sopenharmony_ci if (desc->c_needs_zero) 13018c2ecf20Sopenharmony_ci ocfs2_figure_cluster_boundaries(osb, 13028c2ecf20Sopenharmony_ci desc->c_cpos, 13038c2ecf20Sopenharmony_ci &wc->w_target_from, 13048c2ecf20Sopenharmony_ci NULL); 13058c2ecf20Sopenharmony_ci 13068c2ecf20Sopenharmony_ci desc = &wc->w_desc[wc->w_clen - 1]; 13078c2ecf20Sopenharmony_ci if (desc->c_needs_zero) 13088c2ecf20Sopenharmony_ci ocfs2_figure_cluster_boundaries(osb, 13098c2ecf20Sopenharmony_ci desc->c_cpos, 13108c2ecf20Sopenharmony_ci NULL, 13118c2ecf20Sopenharmony_ci &wc->w_target_to); 13128c2ecf20Sopenharmony_ci } else { 13138c2ecf20Sopenharmony_ci wc->w_target_from = 0; 13148c2ecf20Sopenharmony_ci wc->w_target_to = PAGE_SIZE; 13158c2ecf20Sopenharmony_ci } 13168c2ecf20Sopenharmony_ci} 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci/* 13198c2ecf20Sopenharmony_ci * Check if this extent is marked UNWRITTEN by direct io. If so, we need not to 13208c2ecf20Sopenharmony_ci * do the zero work. And should not to clear UNWRITTEN since it will be cleared 13218c2ecf20Sopenharmony_ci * by the direct io procedure. 13228c2ecf20Sopenharmony_ci * If this is a new extent that allocated by direct io, we should mark it in 13238c2ecf20Sopenharmony_ci * the ip_unwritten_list. 13248c2ecf20Sopenharmony_ci */ 13258c2ecf20Sopenharmony_cistatic int ocfs2_unwritten_check(struct inode *inode, 13268c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 13278c2ecf20Sopenharmony_ci struct ocfs2_write_cluster_desc *desc) 13288c2ecf20Sopenharmony_ci{ 13298c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 13308c2ecf20Sopenharmony_ci struct ocfs2_unwritten_extent *ue = NULL, *new = NULL; 13318c2ecf20Sopenharmony_ci int ret = 0; 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci if (!desc->c_needs_zero) 13348c2ecf20Sopenharmony_ci return 0; 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ciretry: 13378c2ecf20Sopenharmony_ci spin_lock(&oi->ip_lock); 13388c2ecf20Sopenharmony_ci /* Needs not to zero no metter buffer or direct. The one who is zero 13398c2ecf20Sopenharmony_ci * the cluster is doing zero. And he will clear unwritten after all 13408c2ecf20Sopenharmony_ci * cluster io finished. */ 13418c2ecf20Sopenharmony_ci list_for_each_entry(ue, &oi->ip_unwritten_list, ue_ip_node) { 13428c2ecf20Sopenharmony_ci if (desc->c_cpos == ue->ue_cpos) { 13438c2ecf20Sopenharmony_ci BUG_ON(desc->c_new); 13448c2ecf20Sopenharmony_ci desc->c_needs_zero = 0; 13458c2ecf20Sopenharmony_ci desc->c_clear_unwritten = 0; 13468c2ecf20Sopenharmony_ci goto unlock; 13478c2ecf20Sopenharmony_ci } 13488c2ecf20Sopenharmony_ci } 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci if (wc->w_type != OCFS2_WRITE_DIRECT) 13518c2ecf20Sopenharmony_ci goto unlock; 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci if (new == NULL) { 13548c2ecf20Sopenharmony_ci spin_unlock(&oi->ip_lock); 13558c2ecf20Sopenharmony_ci new = kmalloc(sizeof(struct ocfs2_unwritten_extent), 13568c2ecf20Sopenharmony_ci GFP_NOFS); 13578c2ecf20Sopenharmony_ci if (new == NULL) { 13588c2ecf20Sopenharmony_ci ret = -ENOMEM; 13598c2ecf20Sopenharmony_ci goto out; 13608c2ecf20Sopenharmony_ci } 13618c2ecf20Sopenharmony_ci goto retry; 13628c2ecf20Sopenharmony_ci } 13638c2ecf20Sopenharmony_ci /* This direct write will doing zero. */ 13648c2ecf20Sopenharmony_ci new->ue_cpos = desc->c_cpos; 13658c2ecf20Sopenharmony_ci new->ue_phys = desc->c_phys; 13668c2ecf20Sopenharmony_ci desc->c_clear_unwritten = 0; 13678c2ecf20Sopenharmony_ci list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); 13688c2ecf20Sopenharmony_ci list_add_tail(&new->ue_node, &wc->w_unwritten_list); 13698c2ecf20Sopenharmony_ci wc->w_unwritten_count++; 13708c2ecf20Sopenharmony_ci new = NULL; 13718c2ecf20Sopenharmony_ciunlock: 13728c2ecf20Sopenharmony_ci spin_unlock(&oi->ip_lock); 13738c2ecf20Sopenharmony_ciout: 13748c2ecf20Sopenharmony_ci kfree(new); 13758c2ecf20Sopenharmony_ci return ret; 13768c2ecf20Sopenharmony_ci} 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_ci/* 13798c2ecf20Sopenharmony_ci * Populate each single-cluster write descriptor in the write context 13808c2ecf20Sopenharmony_ci * with information about the i/o to be done. 13818c2ecf20Sopenharmony_ci * 13828c2ecf20Sopenharmony_ci * Returns the number of clusters that will have to be allocated, as 13838c2ecf20Sopenharmony_ci * well as a worst case estimate of the number of extent records that 13848c2ecf20Sopenharmony_ci * would have to be created during a write to an unwritten region. 13858c2ecf20Sopenharmony_ci */ 13868c2ecf20Sopenharmony_cistatic int ocfs2_populate_write_desc(struct inode *inode, 13878c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc, 13888c2ecf20Sopenharmony_ci unsigned int *clusters_to_alloc, 13898c2ecf20Sopenharmony_ci unsigned int *extents_to_split) 13908c2ecf20Sopenharmony_ci{ 13918c2ecf20Sopenharmony_ci int ret; 13928c2ecf20Sopenharmony_ci struct ocfs2_write_cluster_desc *desc; 13938c2ecf20Sopenharmony_ci unsigned int num_clusters = 0; 13948c2ecf20Sopenharmony_ci unsigned int ext_flags = 0; 13958c2ecf20Sopenharmony_ci u32 phys = 0; 13968c2ecf20Sopenharmony_ci int i; 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_ci *clusters_to_alloc = 0; 13998c2ecf20Sopenharmony_ci *extents_to_split = 0; 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci for (i = 0; i < wc->w_clen; i++) { 14028c2ecf20Sopenharmony_ci desc = &wc->w_desc[i]; 14038c2ecf20Sopenharmony_ci desc->c_cpos = wc->w_cpos + i; 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci if (num_clusters == 0) { 14068c2ecf20Sopenharmony_ci /* 14078c2ecf20Sopenharmony_ci * Need to look up the next extent record. 14088c2ecf20Sopenharmony_ci */ 14098c2ecf20Sopenharmony_ci ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, 14108c2ecf20Sopenharmony_ci &num_clusters, &ext_flags); 14118c2ecf20Sopenharmony_ci if (ret) { 14128c2ecf20Sopenharmony_ci mlog_errno(ret); 14138c2ecf20Sopenharmony_ci goto out; 14148c2ecf20Sopenharmony_ci } 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci /* We should already CoW the refcountd extent. */ 14178c2ecf20Sopenharmony_ci BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci /* 14208c2ecf20Sopenharmony_ci * Assume worst case - that we're writing in 14218c2ecf20Sopenharmony_ci * the middle of the extent. 14228c2ecf20Sopenharmony_ci * 14238c2ecf20Sopenharmony_ci * We can assume that the write proceeds from 14248c2ecf20Sopenharmony_ci * left to right, in which case the extent 14258c2ecf20Sopenharmony_ci * insert code is smart enough to coalesce the 14268c2ecf20Sopenharmony_ci * next splits into the previous records created. 14278c2ecf20Sopenharmony_ci */ 14288c2ecf20Sopenharmony_ci if (ext_flags & OCFS2_EXT_UNWRITTEN) 14298c2ecf20Sopenharmony_ci *extents_to_split = *extents_to_split + 2; 14308c2ecf20Sopenharmony_ci } else if (phys) { 14318c2ecf20Sopenharmony_ci /* 14328c2ecf20Sopenharmony_ci * Only increment phys if it doesn't describe 14338c2ecf20Sopenharmony_ci * a hole. 14348c2ecf20Sopenharmony_ci */ 14358c2ecf20Sopenharmony_ci phys++; 14368c2ecf20Sopenharmony_ci } 14378c2ecf20Sopenharmony_ci 14388c2ecf20Sopenharmony_ci /* 14398c2ecf20Sopenharmony_ci * If w_first_new_cpos is < UINT_MAX, we have a non-sparse 14408c2ecf20Sopenharmony_ci * file that got extended. w_first_new_cpos tells us 14418c2ecf20Sopenharmony_ci * where the newly allocated clusters are so we can 14428c2ecf20Sopenharmony_ci * zero them. 14438c2ecf20Sopenharmony_ci */ 14448c2ecf20Sopenharmony_ci if (desc->c_cpos >= wc->w_first_new_cpos) { 14458c2ecf20Sopenharmony_ci BUG_ON(phys == 0); 14468c2ecf20Sopenharmony_ci desc->c_needs_zero = 1; 14478c2ecf20Sopenharmony_ci } 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci desc->c_phys = phys; 14508c2ecf20Sopenharmony_ci if (phys == 0) { 14518c2ecf20Sopenharmony_ci desc->c_new = 1; 14528c2ecf20Sopenharmony_ci desc->c_needs_zero = 1; 14538c2ecf20Sopenharmony_ci desc->c_clear_unwritten = 1; 14548c2ecf20Sopenharmony_ci *clusters_to_alloc = *clusters_to_alloc + 1; 14558c2ecf20Sopenharmony_ci } 14568c2ecf20Sopenharmony_ci 14578c2ecf20Sopenharmony_ci if (ext_flags & OCFS2_EXT_UNWRITTEN) { 14588c2ecf20Sopenharmony_ci desc->c_clear_unwritten = 1; 14598c2ecf20Sopenharmony_ci desc->c_needs_zero = 1; 14608c2ecf20Sopenharmony_ci } 14618c2ecf20Sopenharmony_ci 14628c2ecf20Sopenharmony_ci ret = ocfs2_unwritten_check(inode, wc, desc); 14638c2ecf20Sopenharmony_ci if (ret) { 14648c2ecf20Sopenharmony_ci mlog_errno(ret); 14658c2ecf20Sopenharmony_ci goto out; 14668c2ecf20Sopenharmony_ci } 14678c2ecf20Sopenharmony_ci 14688c2ecf20Sopenharmony_ci num_clusters--; 14698c2ecf20Sopenharmony_ci } 14708c2ecf20Sopenharmony_ci 14718c2ecf20Sopenharmony_ci ret = 0; 14728c2ecf20Sopenharmony_ciout: 14738c2ecf20Sopenharmony_ci return ret; 14748c2ecf20Sopenharmony_ci} 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_cistatic int ocfs2_write_begin_inline(struct address_space *mapping, 14778c2ecf20Sopenharmony_ci struct inode *inode, 14788c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc) 14798c2ecf20Sopenharmony_ci{ 14808c2ecf20Sopenharmony_ci int ret; 14818c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 14828c2ecf20Sopenharmony_ci struct page *page; 14838c2ecf20Sopenharmony_ci handle_t *handle; 14848c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 14878c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 14888c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 14898c2ecf20Sopenharmony_ci mlog_errno(ret); 14908c2ecf20Sopenharmony_ci goto out; 14918c2ecf20Sopenharmony_ci } 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci page = find_or_create_page(mapping, 0, GFP_NOFS); 14948c2ecf20Sopenharmony_ci if (!page) { 14958c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 14968c2ecf20Sopenharmony_ci ret = -ENOMEM; 14978c2ecf20Sopenharmony_ci mlog_errno(ret); 14988c2ecf20Sopenharmony_ci goto out; 14998c2ecf20Sopenharmony_ci } 15008c2ecf20Sopenharmony_ci /* 15018c2ecf20Sopenharmony_ci * If we don't set w_num_pages then this page won't get unlocked 15028c2ecf20Sopenharmony_ci * and freed on cleanup of the write context. 15038c2ecf20Sopenharmony_ci */ 15048c2ecf20Sopenharmony_ci wc->w_pages[0] = wc->w_target_page = page; 15058c2ecf20Sopenharmony_ci wc->w_num_pages = 1; 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, 15088c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 15098c2ecf20Sopenharmony_ci if (ret) { 15108c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 15118c2ecf20Sopenharmony_ci 15128c2ecf20Sopenharmony_ci mlog_errno(ret); 15138c2ecf20Sopenharmony_ci goto out; 15148c2ecf20Sopenharmony_ci } 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) 15178c2ecf20Sopenharmony_ci ocfs2_set_inode_data_inline(inode, di); 15188c2ecf20Sopenharmony_ci 15198c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 15208c2ecf20Sopenharmony_ci ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh); 15218c2ecf20Sopenharmony_ci if (ret) { 15228c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_ci goto out; 15258c2ecf20Sopenharmony_ci } 15268c2ecf20Sopenharmony_ci } 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci wc->w_handle = handle; 15298c2ecf20Sopenharmony_ciout: 15308c2ecf20Sopenharmony_ci return ret; 15318c2ecf20Sopenharmony_ci} 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_ciint ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size) 15348c2ecf20Sopenharmony_ci{ 15358c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci if (new_size <= le16_to_cpu(di->id2.i_data.id_count)) 15388c2ecf20Sopenharmony_ci return 1; 15398c2ecf20Sopenharmony_ci return 0; 15408c2ecf20Sopenharmony_ci} 15418c2ecf20Sopenharmony_ci 15428c2ecf20Sopenharmony_cistatic int ocfs2_try_to_write_inline_data(struct address_space *mapping, 15438c2ecf20Sopenharmony_ci struct inode *inode, loff_t pos, 15448c2ecf20Sopenharmony_ci unsigned len, struct page *mmap_page, 15458c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc) 15468c2ecf20Sopenharmony_ci{ 15478c2ecf20Sopenharmony_ci int ret, written = 0; 15488c2ecf20Sopenharmony_ci loff_t end = pos + len; 15498c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 15508c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = NULL; 15518c2ecf20Sopenharmony_ci 15528c2ecf20Sopenharmony_ci trace_ocfs2_try_to_write_inline_data((unsigned long long)oi->ip_blkno, 15538c2ecf20Sopenharmony_ci len, (unsigned long long)pos, 15548c2ecf20Sopenharmony_ci oi->ip_dyn_features); 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci /* 15578c2ecf20Sopenharmony_ci * Handle inodes which already have inline data 1st. 15588c2ecf20Sopenharmony_ci */ 15598c2ecf20Sopenharmony_ci if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 15608c2ecf20Sopenharmony_ci if (mmap_page == NULL && 15618c2ecf20Sopenharmony_ci ocfs2_size_fits_inline_data(wc->w_di_bh, end)) 15628c2ecf20Sopenharmony_ci goto do_inline_write; 15638c2ecf20Sopenharmony_ci 15648c2ecf20Sopenharmony_ci /* 15658c2ecf20Sopenharmony_ci * The write won't fit - we have to give this inode an 15668c2ecf20Sopenharmony_ci * inline extent list now. 15678c2ecf20Sopenharmony_ci */ 15688c2ecf20Sopenharmony_ci ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh); 15698c2ecf20Sopenharmony_ci if (ret) 15708c2ecf20Sopenharmony_ci mlog_errno(ret); 15718c2ecf20Sopenharmony_ci goto out; 15728c2ecf20Sopenharmony_ci } 15738c2ecf20Sopenharmony_ci 15748c2ecf20Sopenharmony_ci /* 15758c2ecf20Sopenharmony_ci * Check whether the inode can accept inline data. 15768c2ecf20Sopenharmony_ci */ 15778c2ecf20Sopenharmony_ci if (oi->ip_clusters != 0 || i_size_read(inode) != 0) 15788c2ecf20Sopenharmony_ci return 0; 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci /* 15818c2ecf20Sopenharmony_ci * Check whether the write can fit. 15828c2ecf20Sopenharmony_ci */ 15838c2ecf20Sopenharmony_ci di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 15848c2ecf20Sopenharmony_ci if (mmap_page || 15858c2ecf20Sopenharmony_ci end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) 15868c2ecf20Sopenharmony_ci return 0; 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_cido_inline_write: 15898c2ecf20Sopenharmony_ci ret = ocfs2_write_begin_inline(mapping, inode, wc); 15908c2ecf20Sopenharmony_ci if (ret) { 15918c2ecf20Sopenharmony_ci mlog_errno(ret); 15928c2ecf20Sopenharmony_ci goto out; 15938c2ecf20Sopenharmony_ci } 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci /* 15968c2ecf20Sopenharmony_ci * This signals to the caller that the data can be written 15978c2ecf20Sopenharmony_ci * inline. 15988c2ecf20Sopenharmony_ci */ 15998c2ecf20Sopenharmony_ci written = 1; 16008c2ecf20Sopenharmony_ciout: 16018c2ecf20Sopenharmony_ci return written ? written : ret; 16028c2ecf20Sopenharmony_ci} 16038c2ecf20Sopenharmony_ci 16048c2ecf20Sopenharmony_ci/* 16058c2ecf20Sopenharmony_ci * This function only does anything for file systems which can't 16068c2ecf20Sopenharmony_ci * handle sparse files. 16078c2ecf20Sopenharmony_ci * 16088c2ecf20Sopenharmony_ci * What we want to do here is fill in any hole between the current end 16098c2ecf20Sopenharmony_ci * of allocation and the end of our write. That way the rest of the 16108c2ecf20Sopenharmony_ci * write path can treat it as an non-allocating write, which has no 16118c2ecf20Sopenharmony_ci * special case code for sparse/nonsparse files. 16128c2ecf20Sopenharmony_ci */ 16138c2ecf20Sopenharmony_cistatic int ocfs2_expand_nonsparse_inode(struct inode *inode, 16148c2ecf20Sopenharmony_ci struct buffer_head *di_bh, 16158c2ecf20Sopenharmony_ci loff_t pos, unsigned len, 16168c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc) 16178c2ecf20Sopenharmony_ci{ 16188c2ecf20Sopenharmony_ci int ret; 16198c2ecf20Sopenharmony_ci loff_t newsize = pos + len; 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_ci BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_ci if (newsize <= i_size_read(inode)) 16248c2ecf20Sopenharmony_ci return 0; 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_ci ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos); 16278c2ecf20Sopenharmony_ci if (ret) 16288c2ecf20Sopenharmony_ci mlog_errno(ret); 16298c2ecf20Sopenharmony_ci 16308c2ecf20Sopenharmony_ci /* There is no wc if this is call from direct. */ 16318c2ecf20Sopenharmony_ci if (wc) 16328c2ecf20Sopenharmony_ci wc->w_first_new_cpos = 16338c2ecf20Sopenharmony_ci ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 16348c2ecf20Sopenharmony_ci 16358c2ecf20Sopenharmony_ci return ret; 16368c2ecf20Sopenharmony_ci} 16378c2ecf20Sopenharmony_ci 16388c2ecf20Sopenharmony_cistatic int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, 16398c2ecf20Sopenharmony_ci loff_t pos) 16408c2ecf20Sopenharmony_ci{ 16418c2ecf20Sopenharmony_ci int ret = 0; 16428c2ecf20Sopenharmony_ci 16438c2ecf20Sopenharmony_ci BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); 16448c2ecf20Sopenharmony_ci if (pos > i_size_read(inode)) 16458c2ecf20Sopenharmony_ci ret = ocfs2_zero_extend(inode, di_bh, pos); 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_ci return ret; 16488c2ecf20Sopenharmony_ci} 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ciint ocfs2_write_begin_nolock(struct address_space *mapping, 16518c2ecf20Sopenharmony_ci loff_t pos, unsigned len, ocfs2_write_type_t type, 16528c2ecf20Sopenharmony_ci struct page **pagep, void **fsdata, 16538c2ecf20Sopenharmony_ci struct buffer_head *di_bh, struct page *mmap_page) 16548c2ecf20Sopenharmony_ci{ 16558c2ecf20Sopenharmony_ci int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; 16568c2ecf20Sopenharmony_ci unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0; 16578c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc; 16588c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 16598c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 16608c2ecf20Sopenharmony_ci struct ocfs2_dinode *di; 16618c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *data_ac = NULL; 16628c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *meta_ac = NULL; 16638c2ecf20Sopenharmony_ci handle_t *handle; 16648c2ecf20Sopenharmony_ci struct ocfs2_extent_tree et; 16658c2ecf20Sopenharmony_ci int try_free = 1, ret1; 16668c2ecf20Sopenharmony_ci 16678c2ecf20Sopenharmony_citry_again: 16688c2ecf20Sopenharmony_ci ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, type, di_bh); 16698c2ecf20Sopenharmony_ci if (ret) { 16708c2ecf20Sopenharmony_ci mlog_errno(ret); 16718c2ecf20Sopenharmony_ci return ret; 16728c2ecf20Sopenharmony_ci } 16738c2ecf20Sopenharmony_ci 16748c2ecf20Sopenharmony_ci if (ocfs2_supports_inline_data(osb)) { 16758c2ecf20Sopenharmony_ci ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len, 16768c2ecf20Sopenharmony_ci mmap_page, wc); 16778c2ecf20Sopenharmony_ci if (ret == 1) { 16788c2ecf20Sopenharmony_ci ret = 0; 16798c2ecf20Sopenharmony_ci goto success; 16808c2ecf20Sopenharmony_ci } 16818c2ecf20Sopenharmony_ci if (ret < 0) { 16828c2ecf20Sopenharmony_ci mlog_errno(ret); 16838c2ecf20Sopenharmony_ci goto out; 16848c2ecf20Sopenharmony_ci } 16858c2ecf20Sopenharmony_ci } 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci /* Direct io change i_size late, should not zero tail here. */ 16888c2ecf20Sopenharmony_ci if (type != OCFS2_WRITE_DIRECT) { 16898c2ecf20Sopenharmony_ci if (ocfs2_sparse_alloc(osb)) 16908c2ecf20Sopenharmony_ci ret = ocfs2_zero_tail(inode, di_bh, pos); 16918c2ecf20Sopenharmony_ci else 16928c2ecf20Sopenharmony_ci ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, 16938c2ecf20Sopenharmony_ci len, wc); 16948c2ecf20Sopenharmony_ci if (ret) { 16958c2ecf20Sopenharmony_ci mlog_errno(ret); 16968c2ecf20Sopenharmony_ci goto out; 16978c2ecf20Sopenharmony_ci } 16988c2ecf20Sopenharmony_ci } 16998c2ecf20Sopenharmony_ci 17008c2ecf20Sopenharmony_ci ret = ocfs2_check_range_for_refcount(inode, pos, len); 17018c2ecf20Sopenharmony_ci if (ret < 0) { 17028c2ecf20Sopenharmony_ci mlog_errno(ret); 17038c2ecf20Sopenharmony_ci goto out; 17048c2ecf20Sopenharmony_ci } else if (ret == 1) { 17058c2ecf20Sopenharmony_ci clusters_need = wc->w_clen; 17068c2ecf20Sopenharmony_ci ret = ocfs2_refcount_cow(inode, di_bh, 17078c2ecf20Sopenharmony_ci wc->w_cpos, wc->w_clen, UINT_MAX); 17088c2ecf20Sopenharmony_ci if (ret) { 17098c2ecf20Sopenharmony_ci mlog_errno(ret); 17108c2ecf20Sopenharmony_ci goto out; 17118c2ecf20Sopenharmony_ci } 17128c2ecf20Sopenharmony_ci } 17138c2ecf20Sopenharmony_ci 17148c2ecf20Sopenharmony_ci ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, 17158c2ecf20Sopenharmony_ci &extents_to_split); 17168c2ecf20Sopenharmony_ci if (ret) { 17178c2ecf20Sopenharmony_ci mlog_errno(ret); 17188c2ecf20Sopenharmony_ci goto out; 17198c2ecf20Sopenharmony_ci } 17208c2ecf20Sopenharmony_ci clusters_need += clusters_to_alloc; 17218c2ecf20Sopenharmony_ci 17228c2ecf20Sopenharmony_ci di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci trace_ocfs2_write_begin_nolock( 17258c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, 17268c2ecf20Sopenharmony_ci (long long)i_size_read(inode), 17278c2ecf20Sopenharmony_ci le32_to_cpu(di->i_clusters), 17288c2ecf20Sopenharmony_ci pos, len, type, mmap_page, 17298c2ecf20Sopenharmony_ci clusters_to_alloc, extents_to_split); 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci /* 17328c2ecf20Sopenharmony_ci * We set w_target_from, w_target_to here so that 17338c2ecf20Sopenharmony_ci * ocfs2_write_end() knows which range in the target page to 17348c2ecf20Sopenharmony_ci * write out. An allocation requires that we write the entire 17358c2ecf20Sopenharmony_ci * cluster range. 17368c2ecf20Sopenharmony_ci */ 17378c2ecf20Sopenharmony_ci if (clusters_to_alloc || extents_to_split) { 17388c2ecf20Sopenharmony_ci /* 17398c2ecf20Sopenharmony_ci * XXX: We are stretching the limits of 17408c2ecf20Sopenharmony_ci * ocfs2_lock_allocators(). It greatly over-estimates 17418c2ecf20Sopenharmony_ci * the work to be done. 17428c2ecf20Sopenharmony_ci */ 17438c2ecf20Sopenharmony_ci ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), 17448c2ecf20Sopenharmony_ci wc->w_di_bh); 17458c2ecf20Sopenharmony_ci ret = ocfs2_lock_allocators(inode, &et, 17468c2ecf20Sopenharmony_ci clusters_to_alloc, extents_to_split, 17478c2ecf20Sopenharmony_ci &data_ac, &meta_ac); 17488c2ecf20Sopenharmony_ci if (ret) { 17498c2ecf20Sopenharmony_ci mlog_errno(ret); 17508c2ecf20Sopenharmony_ci goto out; 17518c2ecf20Sopenharmony_ci } 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci if (data_ac) 17548c2ecf20Sopenharmony_ci data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci credits = ocfs2_calc_extend_credits(inode->i_sb, 17578c2ecf20Sopenharmony_ci &di->id2.i_list); 17588c2ecf20Sopenharmony_ci } else if (type == OCFS2_WRITE_DIRECT) 17598c2ecf20Sopenharmony_ci /* direct write needs not to start trans if no extents alloc. */ 17608c2ecf20Sopenharmony_ci goto success; 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci /* 17638c2ecf20Sopenharmony_ci * We have to zero sparse allocated clusters, unwritten extent clusters, 17648c2ecf20Sopenharmony_ci * and non-sparse clusters we just extended. For non-sparse writes, 17658c2ecf20Sopenharmony_ci * we know zeros will only be needed in the first and/or last cluster. 17668c2ecf20Sopenharmony_ci */ 17678c2ecf20Sopenharmony_ci if (wc->w_clen && (wc->w_desc[0].c_needs_zero || 17688c2ecf20Sopenharmony_ci wc->w_desc[wc->w_clen - 1].c_needs_zero)) 17698c2ecf20Sopenharmony_ci cluster_of_pages = 1; 17708c2ecf20Sopenharmony_ci else 17718c2ecf20Sopenharmony_ci cluster_of_pages = 0; 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages); 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, credits); 17768c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 17778c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 17788c2ecf20Sopenharmony_ci mlog_errno(ret); 17798c2ecf20Sopenharmony_ci goto out; 17808c2ecf20Sopenharmony_ci } 17818c2ecf20Sopenharmony_ci 17828c2ecf20Sopenharmony_ci wc->w_handle = handle; 17838c2ecf20Sopenharmony_ci 17848c2ecf20Sopenharmony_ci if (clusters_to_alloc) { 17858c2ecf20Sopenharmony_ci ret = dquot_alloc_space_nodirty(inode, 17868c2ecf20Sopenharmony_ci ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); 17878c2ecf20Sopenharmony_ci if (ret) 17888c2ecf20Sopenharmony_ci goto out_commit; 17898c2ecf20Sopenharmony_ci } 17908c2ecf20Sopenharmony_ci 17918c2ecf20Sopenharmony_ci ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, 17928c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 17938c2ecf20Sopenharmony_ci if (ret) { 17948c2ecf20Sopenharmony_ci mlog_errno(ret); 17958c2ecf20Sopenharmony_ci goto out_quota; 17968c2ecf20Sopenharmony_ci } 17978c2ecf20Sopenharmony_ci 17988c2ecf20Sopenharmony_ci /* 17998c2ecf20Sopenharmony_ci * Fill our page array first. That way we've grabbed enough so 18008c2ecf20Sopenharmony_ci * that we can zero and flush if we error after adding the 18018c2ecf20Sopenharmony_ci * extent. 18028c2ecf20Sopenharmony_ci */ 18038c2ecf20Sopenharmony_ci ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, 18048c2ecf20Sopenharmony_ci cluster_of_pages, mmap_page); 18058c2ecf20Sopenharmony_ci if (ret && ret != -EAGAIN) { 18068c2ecf20Sopenharmony_ci mlog_errno(ret); 18078c2ecf20Sopenharmony_ci goto out_quota; 18088c2ecf20Sopenharmony_ci } 18098c2ecf20Sopenharmony_ci 18108c2ecf20Sopenharmony_ci /* 18118c2ecf20Sopenharmony_ci * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock 18128c2ecf20Sopenharmony_ci * the target page. In this case, we exit with no error and no target 18138c2ecf20Sopenharmony_ci * page. This will trigger the caller, page_mkwrite(), to re-try 18148c2ecf20Sopenharmony_ci * the operation. 18158c2ecf20Sopenharmony_ci */ 18168c2ecf20Sopenharmony_ci if (ret == -EAGAIN) { 18178c2ecf20Sopenharmony_ci BUG_ON(wc->w_target_page); 18188c2ecf20Sopenharmony_ci ret = 0; 18198c2ecf20Sopenharmony_ci goto out_quota; 18208c2ecf20Sopenharmony_ci } 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, 18238c2ecf20Sopenharmony_ci len); 18248c2ecf20Sopenharmony_ci if (ret) { 18258c2ecf20Sopenharmony_ci mlog_errno(ret); 18268c2ecf20Sopenharmony_ci goto out_quota; 18278c2ecf20Sopenharmony_ci } 18288c2ecf20Sopenharmony_ci 18298c2ecf20Sopenharmony_ci if (data_ac) 18308c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(data_ac); 18318c2ecf20Sopenharmony_ci if (meta_ac) 18328c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(meta_ac); 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_cisuccess: 18358c2ecf20Sopenharmony_ci if (pagep) 18368c2ecf20Sopenharmony_ci *pagep = wc->w_target_page; 18378c2ecf20Sopenharmony_ci *fsdata = wc; 18388c2ecf20Sopenharmony_ci return 0; 18398c2ecf20Sopenharmony_ciout_quota: 18408c2ecf20Sopenharmony_ci if (clusters_to_alloc) 18418c2ecf20Sopenharmony_ci dquot_free_space(inode, 18428c2ecf20Sopenharmony_ci ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); 18438c2ecf20Sopenharmony_ciout_commit: 18448c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 18458c2ecf20Sopenharmony_ci 18468c2ecf20Sopenharmony_ciout: 18478c2ecf20Sopenharmony_ci /* 18488c2ecf20Sopenharmony_ci * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(), 18498c2ecf20Sopenharmony_ci * even in case of error here like ENOSPC and ENOMEM. So, we need 18508c2ecf20Sopenharmony_ci * to unlock the target page manually to prevent deadlocks when 18518c2ecf20Sopenharmony_ci * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED 18528c2ecf20Sopenharmony_ci * to VM code. 18538c2ecf20Sopenharmony_ci */ 18548c2ecf20Sopenharmony_ci if (wc->w_target_locked) 18558c2ecf20Sopenharmony_ci unlock_page(mmap_page); 18568c2ecf20Sopenharmony_ci 18578c2ecf20Sopenharmony_ci ocfs2_free_write_ctxt(inode, wc); 18588c2ecf20Sopenharmony_ci 18598c2ecf20Sopenharmony_ci if (data_ac) { 18608c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(data_ac); 18618c2ecf20Sopenharmony_ci data_ac = NULL; 18628c2ecf20Sopenharmony_ci } 18638c2ecf20Sopenharmony_ci if (meta_ac) { 18648c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(meta_ac); 18658c2ecf20Sopenharmony_ci meta_ac = NULL; 18668c2ecf20Sopenharmony_ci } 18678c2ecf20Sopenharmony_ci 18688c2ecf20Sopenharmony_ci if (ret == -ENOSPC && try_free) { 18698c2ecf20Sopenharmony_ci /* 18708c2ecf20Sopenharmony_ci * Try to free some truncate log so that we can have enough 18718c2ecf20Sopenharmony_ci * clusters to allocate. 18728c2ecf20Sopenharmony_ci */ 18738c2ecf20Sopenharmony_ci try_free = 0; 18748c2ecf20Sopenharmony_ci 18758c2ecf20Sopenharmony_ci ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need); 18768c2ecf20Sopenharmony_ci if (ret1 == 1) 18778c2ecf20Sopenharmony_ci goto try_again; 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_ci if (ret1 < 0) 18808c2ecf20Sopenharmony_ci mlog_errno(ret1); 18818c2ecf20Sopenharmony_ci } 18828c2ecf20Sopenharmony_ci 18838c2ecf20Sopenharmony_ci return ret; 18848c2ecf20Sopenharmony_ci} 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_cistatic int ocfs2_write_begin(struct file *file, struct address_space *mapping, 18878c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned flags, 18888c2ecf20Sopenharmony_ci struct page **pagep, void **fsdata) 18898c2ecf20Sopenharmony_ci{ 18908c2ecf20Sopenharmony_ci int ret; 18918c2ecf20Sopenharmony_ci struct buffer_head *di_bh = NULL; 18928c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_ci ret = ocfs2_inode_lock(inode, &di_bh, 1); 18958c2ecf20Sopenharmony_ci if (ret) { 18968c2ecf20Sopenharmony_ci mlog_errno(ret); 18978c2ecf20Sopenharmony_ci return ret; 18988c2ecf20Sopenharmony_ci } 18998c2ecf20Sopenharmony_ci 19008c2ecf20Sopenharmony_ci /* 19018c2ecf20Sopenharmony_ci * Take alloc sem here to prevent concurrent lookups. That way 19028c2ecf20Sopenharmony_ci * the mapping, zeroing and tree manipulation within 19038c2ecf20Sopenharmony_ci * ocfs2_write() will be safe against ->readpage(). This 19048c2ecf20Sopenharmony_ci * should also serve to lock out allocation from a shared 19058c2ecf20Sopenharmony_ci * writeable region. 19068c2ecf20Sopenharmony_ci */ 19078c2ecf20Sopenharmony_ci down_write(&OCFS2_I(inode)->ip_alloc_sem); 19088c2ecf20Sopenharmony_ci 19098c2ecf20Sopenharmony_ci ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER, 19108c2ecf20Sopenharmony_ci pagep, fsdata, di_bh, NULL); 19118c2ecf20Sopenharmony_ci if (ret) { 19128c2ecf20Sopenharmony_ci mlog_errno(ret); 19138c2ecf20Sopenharmony_ci goto out_fail; 19148c2ecf20Sopenharmony_ci } 19158c2ecf20Sopenharmony_ci 19168c2ecf20Sopenharmony_ci brelse(di_bh); 19178c2ecf20Sopenharmony_ci 19188c2ecf20Sopenharmony_ci return 0; 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ciout_fail: 19218c2ecf20Sopenharmony_ci up_write(&OCFS2_I(inode)->ip_alloc_sem); 19228c2ecf20Sopenharmony_ci 19238c2ecf20Sopenharmony_ci brelse(di_bh); 19248c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci return ret; 19278c2ecf20Sopenharmony_ci} 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_cistatic void ocfs2_write_end_inline(struct inode *inode, loff_t pos, 19308c2ecf20Sopenharmony_ci unsigned len, unsigned *copied, 19318c2ecf20Sopenharmony_ci struct ocfs2_dinode *di, 19328c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc) 19338c2ecf20Sopenharmony_ci{ 19348c2ecf20Sopenharmony_ci void *kaddr; 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci if (unlikely(*copied < len)) { 19378c2ecf20Sopenharmony_ci if (!PageUptodate(wc->w_target_page)) { 19388c2ecf20Sopenharmony_ci *copied = 0; 19398c2ecf20Sopenharmony_ci return; 19408c2ecf20Sopenharmony_ci } 19418c2ecf20Sopenharmony_ci } 19428c2ecf20Sopenharmony_ci 19438c2ecf20Sopenharmony_ci kaddr = kmap_atomic(wc->w_target_page); 19448c2ecf20Sopenharmony_ci memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); 19458c2ecf20Sopenharmony_ci kunmap_atomic(kaddr); 19468c2ecf20Sopenharmony_ci 19478c2ecf20Sopenharmony_ci trace_ocfs2_write_end_inline( 19488c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, 19498c2ecf20Sopenharmony_ci (unsigned long long)pos, *copied, 19508c2ecf20Sopenharmony_ci le16_to_cpu(di->id2.i_data.id_count), 19518c2ecf20Sopenharmony_ci le16_to_cpu(di->i_dyn_features)); 19528c2ecf20Sopenharmony_ci} 19538c2ecf20Sopenharmony_ci 19548c2ecf20Sopenharmony_ciint ocfs2_write_end_nolock(struct address_space *mapping, 19558c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, void *fsdata) 19568c2ecf20Sopenharmony_ci{ 19578c2ecf20Sopenharmony_ci int i, ret; 19588c2ecf20Sopenharmony_ci unsigned from, to, start = pos & (PAGE_SIZE - 1); 19598c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 19608c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 19618c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc = fsdata; 19628c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 19638c2ecf20Sopenharmony_ci handle_t *handle = wc->w_handle; 19648c2ecf20Sopenharmony_ci struct page *tmppage; 19658c2ecf20Sopenharmony_ci 19668c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&wc->w_unwritten_list)); 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci if (handle) { 19698c2ecf20Sopenharmony_ci ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), 19708c2ecf20Sopenharmony_ci wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE); 19718c2ecf20Sopenharmony_ci if (ret) { 19728c2ecf20Sopenharmony_ci copied = ret; 19738c2ecf20Sopenharmony_ci mlog_errno(ret); 19748c2ecf20Sopenharmony_ci goto out; 19758c2ecf20Sopenharmony_ci } 19768c2ecf20Sopenharmony_ci } 19778c2ecf20Sopenharmony_ci 19788c2ecf20Sopenharmony_ci if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 19798c2ecf20Sopenharmony_ci ocfs2_write_end_inline(inode, pos, len, &copied, di, wc); 19808c2ecf20Sopenharmony_ci goto out_write_size; 19818c2ecf20Sopenharmony_ci } 19828c2ecf20Sopenharmony_ci 19838c2ecf20Sopenharmony_ci if (unlikely(copied < len) && wc->w_target_page) { 19848c2ecf20Sopenharmony_ci loff_t new_isize; 19858c2ecf20Sopenharmony_ci 19868c2ecf20Sopenharmony_ci if (!PageUptodate(wc->w_target_page)) 19878c2ecf20Sopenharmony_ci copied = 0; 19888c2ecf20Sopenharmony_ci 19898c2ecf20Sopenharmony_ci new_isize = max_t(loff_t, i_size_read(inode), pos + copied); 19908c2ecf20Sopenharmony_ci if (new_isize > page_offset(wc->w_target_page)) 19918c2ecf20Sopenharmony_ci ocfs2_zero_new_buffers(wc->w_target_page, start+copied, 19928c2ecf20Sopenharmony_ci start+len); 19938c2ecf20Sopenharmony_ci else { 19948c2ecf20Sopenharmony_ci /* 19958c2ecf20Sopenharmony_ci * When page is fully beyond new isize (data copy 19968c2ecf20Sopenharmony_ci * failed), do not bother zeroing the page. Invalidate 19978c2ecf20Sopenharmony_ci * it instead so that writeback does not get confused 19988c2ecf20Sopenharmony_ci * put page & buffer dirty bits into inconsistent 19998c2ecf20Sopenharmony_ci * state. 20008c2ecf20Sopenharmony_ci */ 20018c2ecf20Sopenharmony_ci block_invalidatepage(wc->w_target_page, 0, PAGE_SIZE); 20028c2ecf20Sopenharmony_ci } 20038c2ecf20Sopenharmony_ci } 20048c2ecf20Sopenharmony_ci if (wc->w_target_page) 20058c2ecf20Sopenharmony_ci flush_dcache_page(wc->w_target_page); 20068c2ecf20Sopenharmony_ci 20078c2ecf20Sopenharmony_ci for(i = 0; i < wc->w_num_pages; i++) { 20088c2ecf20Sopenharmony_ci tmppage = wc->w_pages[i]; 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci /* This is the direct io target page. */ 20118c2ecf20Sopenharmony_ci if (tmppage == NULL) 20128c2ecf20Sopenharmony_ci continue; 20138c2ecf20Sopenharmony_ci 20148c2ecf20Sopenharmony_ci if (tmppage == wc->w_target_page) { 20158c2ecf20Sopenharmony_ci from = wc->w_target_from; 20168c2ecf20Sopenharmony_ci to = wc->w_target_to; 20178c2ecf20Sopenharmony_ci 20188c2ecf20Sopenharmony_ci BUG_ON(from > PAGE_SIZE || 20198c2ecf20Sopenharmony_ci to > PAGE_SIZE || 20208c2ecf20Sopenharmony_ci to < from); 20218c2ecf20Sopenharmony_ci } else { 20228c2ecf20Sopenharmony_ci /* 20238c2ecf20Sopenharmony_ci * Pages adjacent to the target (if any) imply 20248c2ecf20Sopenharmony_ci * a hole-filling write in which case we want 20258c2ecf20Sopenharmony_ci * to flush their entire range. 20268c2ecf20Sopenharmony_ci */ 20278c2ecf20Sopenharmony_ci from = 0; 20288c2ecf20Sopenharmony_ci to = PAGE_SIZE; 20298c2ecf20Sopenharmony_ci } 20308c2ecf20Sopenharmony_ci 20318c2ecf20Sopenharmony_ci if (page_has_buffers(tmppage)) { 20328c2ecf20Sopenharmony_ci if (handle && ocfs2_should_order_data(inode)) { 20338c2ecf20Sopenharmony_ci loff_t start_byte = 20348c2ecf20Sopenharmony_ci ((loff_t)tmppage->index << PAGE_SHIFT) + 20358c2ecf20Sopenharmony_ci from; 20368c2ecf20Sopenharmony_ci loff_t length = to - from; 20378c2ecf20Sopenharmony_ci ocfs2_jbd2_inode_add_write(handle, inode, 20388c2ecf20Sopenharmony_ci start_byte, length); 20398c2ecf20Sopenharmony_ci } 20408c2ecf20Sopenharmony_ci block_commit_write(tmppage, from, to); 20418c2ecf20Sopenharmony_ci } 20428c2ecf20Sopenharmony_ci } 20438c2ecf20Sopenharmony_ci 20448c2ecf20Sopenharmony_ciout_write_size: 20458c2ecf20Sopenharmony_ci /* Direct io do not update i_size here. */ 20468c2ecf20Sopenharmony_ci if (wc->w_type != OCFS2_WRITE_DIRECT) { 20478c2ecf20Sopenharmony_ci pos += copied; 20488c2ecf20Sopenharmony_ci if (pos > i_size_read(inode)) { 20498c2ecf20Sopenharmony_ci i_size_write(inode, pos); 20508c2ecf20Sopenharmony_ci mark_inode_dirty(inode); 20518c2ecf20Sopenharmony_ci } 20528c2ecf20Sopenharmony_ci inode->i_blocks = ocfs2_inode_sector_count(inode); 20538c2ecf20Sopenharmony_ci di->i_size = cpu_to_le64((u64)i_size_read(inode)); 20548c2ecf20Sopenharmony_ci inode->i_mtime = inode->i_ctime = current_time(inode); 20558c2ecf20Sopenharmony_ci di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); 20568c2ecf20Sopenharmony_ci di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 20578c2ecf20Sopenharmony_ci if (handle) 20588c2ecf20Sopenharmony_ci ocfs2_update_inode_fsync_trans(handle, inode, 1); 20598c2ecf20Sopenharmony_ci } 20608c2ecf20Sopenharmony_ci if (handle) 20618c2ecf20Sopenharmony_ci ocfs2_journal_dirty(handle, wc->w_di_bh); 20628c2ecf20Sopenharmony_ci 20638c2ecf20Sopenharmony_ciout: 20648c2ecf20Sopenharmony_ci /* unlock pages before dealloc since it needs acquiring j_trans_barrier 20658c2ecf20Sopenharmony_ci * lock, or it will cause a deadlock since journal commit threads holds 20668c2ecf20Sopenharmony_ci * this lock and will ask for the page lock when flushing the data. 20678c2ecf20Sopenharmony_ci * put it here to preserve the unlock order. 20688c2ecf20Sopenharmony_ci */ 20698c2ecf20Sopenharmony_ci ocfs2_unlock_pages(wc); 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci if (handle) 20728c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 20738c2ecf20Sopenharmony_ci 20748c2ecf20Sopenharmony_ci ocfs2_run_deallocs(osb, &wc->w_dealloc); 20758c2ecf20Sopenharmony_ci 20768c2ecf20Sopenharmony_ci brelse(wc->w_di_bh); 20778c2ecf20Sopenharmony_ci kfree(wc); 20788c2ecf20Sopenharmony_ci 20798c2ecf20Sopenharmony_ci return copied; 20808c2ecf20Sopenharmony_ci} 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_cistatic int ocfs2_write_end(struct file *file, struct address_space *mapping, 20838c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 20848c2ecf20Sopenharmony_ci struct page *page, void *fsdata) 20858c2ecf20Sopenharmony_ci{ 20868c2ecf20Sopenharmony_ci int ret; 20878c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 20888c2ecf20Sopenharmony_ci 20898c2ecf20Sopenharmony_ci ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata); 20908c2ecf20Sopenharmony_ci 20918c2ecf20Sopenharmony_ci up_write(&OCFS2_I(inode)->ip_alloc_sem); 20928c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci return ret; 20958c2ecf20Sopenharmony_ci} 20968c2ecf20Sopenharmony_ci 20978c2ecf20Sopenharmony_cistruct ocfs2_dio_write_ctxt { 20988c2ecf20Sopenharmony_ci struct list_head dw_zero_list; 20998c2ecf20Sopenharmony_ci unsigned dw_zero_count; 21008c2ecf20Sopenharmony_ci int dw_orphaned; 21018c2ecf20Sopenharmony_ci pid_t dw_writer_pid; 21028c2ecf20Sopenharmony_ci}; 21038c2ecf20Sopenharmony_ci 21048c2ecf20Sopenharmony_cistatic struct ocfs2_dio_write_ctxt * 21058c2ecf20Sopenharmony_ciocfs2_dio_alloc_write_ctx(struct buffer_head *bh, int *alloc) 21068c2ecf20Sopenharmony_ci{ 21078c2ecf20Sopenharmony_ci struct ocfs2_dio_write_ctxt *dwc = NULL; 21088c2ecf20Sopenharmony_ci 21098c2ecf20Sopenharmony_ci if (bh->b_private) 21108c2ecf20Sopenharmony_ci return bh->b_private; 21118c2ecf20Sopenharmony_ci 21128c2ecf20Sopenharmony_ci dwc = kmalloc(sizeof(struct ocfs2_dio_write_ctxt), GFP_NOFS); 21138c2ecf20Sopenharmony_ci if (dwc == NULL) 21148c2ecf20Sopenharmony_ci return NULL; 21158c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dwc->dw_zero_list); 21168c2ecf20Sopenharmony_ci dwc->dw_zero_count = 0; 21178c2ecf20Sopenharmony_ci dwc->dw_orphaned = 0; 21188c2ecf20Sopenharmony_ci dwc->dw_writer_pid = task_pid_nr(current); 21198c2ecf20Sopenharmony_ci bh->b_private = dwc; 21208c2ecf20Sopenharmony_ci *alloc = 1; 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci return dwc; 21238c2ecf20Sopenharmony_ci} 21248c2ecf20Sopenharmony_ci 21258c2ecf20Sopenharmony_cistatic void ocfs2_dio_free_write_ctx(struct inode *inode, 21268c2ecf20Sopenharmony_ci struct ocfs2_dio_write_ctxt *dwc) 21278c2ecf20Sopenharmony_ci{ 21288c2ecf20Sopenharmony_ci ocfs2_free_unwritten_list(inode, &dwc->dw_zero_list); 21298c2ecf20Sopenharmony_ci kfree(dwc); 21308c2ecf20Sopenharmony_ci} 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci/* 21338c2ecf20Sopenharmony_ci * TODO: Make this into a generic get_blocks function. 21348c2ecf20Sopenharmony_ci * 21358c2ecf20Sopenharmony_ci * From do_direct_io in direct-io.c: 21368c2ecf20Sopenharmony_ci * "So what we do is to permit the ->get_blocks function to populate 21378c2ecf20Sopenharmony_ci * bh.b_size with the size of IO which is permitted at this offset and 21388c2ecf20Sopenharmony_ci * this i_blkbits." 21398c2ecf20Sopenharmony_ci * 21408c2ecf20Sopenharmony_ci * This function is called directly from get_more_blocks in direct-io.c. 21418c2ecf20Sopenharmony_ci * 21428c2ecf20Sopenharmony_ci * called like this: dio->get_blocks(dio->inode, fs_startblk, 21438c2ecf20Sopenharmony_ci * fs_count, map_bh, dio->rw == WRITE); 21448c2ecf20Sopenharmony_ci */ 21458c2ecf20Sopenharmony_cistatic int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, 21468c2ecf20Sopenharmony_ci struct buffer_head *bh_result, int create) 21478c2ecf20Sopenharmony_ci{ 21488c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 21498c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 21508c2ecf20Sopenharmony_ci struct ocfs2_write_ctxt *wc; 21518c2ecf20Sopenharmony_ci struct ocfs2_write_cluster_desc *desc = NULL; 21528c2ecf20Sopenharmony_ci struct ocfs2_dio_write_ctxt *dwc = NULL; 21538c2ecf20Sopenharmony_ci struct buffer_head *di_bh = NULL; 21548c2ecf20Sopenharmony_ci u64 p_blkno; 21558c2ecf20Sopenharmony_ci unsigned int i_blkbits = inode->i_sb->s_blocksize_bits; 21568c2ecf20Sopenharmony_ci loff_t pos = iblock << i_blkbits; 21578c2ecf20Sopenharmony_ci sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits; 21588c2ecf20Sopenharmony_ci unsigned len, total_len = bh_result->b_size; 21598c2ecf20Sopenharmony_ci int ret = 0, first_get_block = 0; 21608c2ecf20Sopenharmony_ci 21618c2ecf20Sopenharmony_ci len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); 21628c2ecf20Sopenharmony_ci len = min(total_len, len); 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci /* 21658c2ecf20Sopenharmony_ci * bh_result->b_size is count in get_more_blocks according to write 21668c2ecf20Sopenharmony_ci * "pos" and "end", we need map twice to return different buffer state: 21678c2ecf20Sopenharmony_ci * 1. area in file size, not set NEW; 21688c2ecf20Sopenharmony_ci * 2. area out file size, set NEW. 21698c2ecf20Sopenharmony_ci * 21708c2ecf20Sopenharmony_ci * iblock endblk 21718c2ecf20Sopenharmony_ci * |--------|---------|---------|--------- 21728c2ecf20Sopenharmony_ci * |<-------area in file------->| 21738c2ecf20Sopenharmony_ci */ 21748c2ecf20Sopenharmony_ci 21758c2ecf20Sopenharmony_ci if ((iblock <= endblk) && 21768c2ecf20Sopenharmony_ci ((iblock + ((len - 1) >> i_blkbits)) > endblk)) 21778c2ecf20Sopenharmony_ci len = (endblk - iblock + 1) << i_blkbits; 21788c2ecf20Sopenharmony_ci 21798c2ecf20Sopenharmony_ci mlog(0, "get block of %lu at %llu:%u req %u\n", 21808c2ecf20Sopenharmony_ci inode->i_ino, pos, len, total_len); 21818c2ecf20Sopenharmony_ci 21828c2ecf20Sopenharmony_ci /* 21838c2ecf20Sopenharmony_ci * Because we need to change file size in ocfs2_dio_end_io_write(), or 21848c2ecf20Sopenharmony_ci * we may need to add it to orphan dir. So can not fall to fast path 21858c2ecf20Sopenharmony_ci * while file size will be changed. 21868c2ecf20Sopenharmony_ci */ 21878c2ecf20Sopenharmony_ci if (pos + total_len <= i_size_read(inode)) { 21888c2ecf20Sopenharmony_ci 21898c2ecf20Sopenharmony_ci /* This is the fast path for re-write. */ 21908c2ecf20Sopenharmony_ci ret = ocfs2_lock_get_block(inode, iblock, bh_result, create); 21918c2ecf20Sopenharmony_ci if (buffer_mapped(bh_result) && 21928c2ecf20Sopenharmony_ci !buffer_new(bh_result) && 21938c2ecf20Sopenharmony_ci ret == 0) 21948c2ecf20Sopenharmony_ci goto out; 21958c2ecf20Sopenharmony_ci 21968c2ecf20Sopenharmony_ci /* Clear state set by ocfs2_get_block. */ 21978c2ecf20Sopenharmony_ci bh_result->b_state = 0; 21988c2ecf20Sopenharmony_ci } 21998c2ecf20Sopenharmony_ci 22008c2ecf20Sopenharmony_ci dwc = ocfs2_dio_alloc_write_ctx(bh_result, &first_get_block); 22018c2ecf20Sopenharmony_ci if (unlikely(dwc == NULL)) { 22028c2ecf20Sopenharmony_ci ret = -ENOMEM; 22038c2ecf20Sopenharmony_ci mlog_errno(ret); 22048c2ecf20Sopenharmony_ci goto out; 22058c2ecf20Sopenharmony_ci } 22068c2ecf20Sopenharmony_ci 22078c2ecf20Sopenharmony_ci if (ocfs2_clusters_for_bytes(inode->i_sb, pos + total_len) > 22088c2ecf20Sopenharmony_ci ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)) && 22098c2ecf20Sopenharmony_ci !dwc->dw_orphaned) { 22108c2ecf20Sopenharmony_ci /* 22118c2ecf20Sopenharmony_ci * when we are going to alloc extents beyond file size, add the 22128c2ecf20Sopenharmony_ci * inode to orphan dir, so we can recall those spaces when 22138c2ecf20Sopenharmony_ci * system crashed during write. 22148c2ecf20Sopenharmony_ci */ 22158c2ecf20Sopenharmony_ci ret = ocfs2_add_inode_to_orphan(osb, inode); 22168c2ecf20Sopenharmony_ci if (ret < 0) { 22178c2ecf20Sopenharmony_ci mlog_errno(ret); 22188c2ecf20Sopenharmony_ci goto out; 22198c2ecf20Sopenharmony_ci } 22208c2ecf20Sopenharmony_ci dwc->dw_orphaned = 1; 22218c2ecf20Sopenharmony_ci } 22228c2ecf20Sopenharmony_ci 22238c2ecf20Sopenharmony_ci ret = ocfs2_inode_lock(inode, &di_bh, 1); 22248c2ecf20Sopenharmony_ci if (ret) { 22258c2ecf20Sopenharmony_ci mlog_errno(ret); 22268c2ecf20Sopenharmony_ci goto out; 22278c2ecf20Sopenharmony_ci } 22288c2ecf20Sopenharmony_ci 22298c2ecf20Sopenharmony_ci down_write(&oi->ip_alloc_sem); 22308c2ecf20Sopenharmony_ci 22318c2ecf20Sopenharmony_ci if (first_get_block) { 22328c2ecf20Sopenharmony_ci if (ocfs2_sparse_alloc(osb)) 22338c2ecf20Sopenharmony_ci ret = ocfs2_zero_tail(inode, di_bh, pos); 22348c2ecf20Sopenharmony_ci else 22358c2ecf20Sopenharmony_ci ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, 22368c2ecf20Sopenharmony_ci total_len, NULL); 22378c2ecf20Sopenharmony_ci if (ret < 0) { 22388c2ecf20Sopenharmony_ci mlog_errno(ret); 22398c2ecf20Sopenharmony_ci goto unlock; 22408c2ecf20Sopenharmony_ci } 22418c2ecf20Sopenharmony_ci } 22428c2ecf20Sopenharmony_ci 22438c2ecf20Sopenharmony_ci ret = ocfs2_write_begin_nolock(inode->i_mapping, pos, len, 22448c2ecf20Sopenharmony_ci OCFS2_WRITE_DIRECT, NULL, 22458c2ecf20Sopenharmony_ci (void **)&wc, di_bh, NULL); 22468c2ecf20Sopenharmony_ci if (ret) { 22478c2ecf20Sopenharmony_ci mlog_errno(ret); 22488c2ecf20Sopenharmony_ci goto unlock; 22498c2ecf20Sopenharmony_ci } 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_ci desc = &wc->w_desc[0]; 22528c2ecf20Sopenharmony_ci 22538c2ecf20Sopenharmony_ci p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, desc->c_phys); 22548c2ecf20Sopenharmony_ci BUG_ON(p_blkno == 0); 22558c2ecf20Sopenharmony_ci p_blkno += iblock & (u64)(ocfs2_clusters_to_blocks(inode->i_sb, 1) - 1); 22568c2ecf20Sopenharmony_ci 22578c2ecf20Sopenharmony_ci map_bh(bh_result, inode->i_sb, p_blkno); 22588c2ecf20Sopenharmony_ci bh_result->b_size = len; 22598c2ecf20Sopenharmony_ci if (desc->c_needs_zero) 22608c2ecf20Sopenharmony_ci set_buffer_new(bh_result); 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci if (iblock > endblk) 22638c2ecf20Sopenharmony_ci set_buffer_new(bh_result); 22648c2ecf20Sopenharmony_ci 22658c2ecf20Sopenharmony_ci /* May sleep in end_io. It should not happen in a irq context. So defer 22668c2ecf20Sopenharmony_ci * it to dio work queue. */ 22678c2ecf20Sopenharmony_ci set_buffer_defer_completion(bh_result); 22688c2ecf20Sopenharmony_ci 22698c2ecf20Sopenharmony_ci if (!list_empty(&wc->w_unwritten_list)) { 22708c2ecf20Sopenharmony_ci struct ocfs2_unwritten_extent *ue = NULL; 22718c2ecf20Sopenharmony_ci 22728c2ecf20Sopenharmony_ci ue = list_first_entry(&wc->w_unwritten_list, 22738c2ecf20Sopenharmony_ci struct ocfs2_unwritten_extent, 22748c2ecf20Sopenharmony_ci ue_node); 22758c2ecf20Sopenharmony_ci BUG_ON(ue->ue_cpos != desc->c_cpos); 22768c2ecf20Sopenharmony_ci /* The physical address may be 0, fill it. */ 22778c2ecf20Sopenharmony_ci ue->ue_phys = desc->c_phys; 22788c2ecf20Sopenharmony_ci 22798c2ecf20Sopenharmony_ci list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); 22808c2ecf20Sopenharmony_ci dwc->dw_zero_count += wc->w_unwritten_count; 22818c2ecf20Sopenharmony_ci } 22828c2ecf20Sopenharmony_ci 22838c2ecf20Sopenharmony_ci ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); 22848c2ecf20Sopenharmony_ci BUG_ON(ret != len); 22858c2ecf20Sopenharmony_ci ret = 0; 22868c2ecf20Sopenharmony_ciunlock: 22878c2ecf20Sopenharmony_ci up_write(&oi->ip_alloc_sem); 22888c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 22898c2ecf20Sopenharmony_ci brelse(di_bh); 22908c2ecf20Sopenharmony_ciout: 22918c2ecf20Sopenharmony_ci if (ret < 0) 22928c2ecf20Sopenharmony_ci ret = -EIO; 22938c2ecf20Sopenharmony_ci return ret; 22948c2ecf20Sopenharmony_ci} 22958c2ecf20Sopenharmony_ci 22968c2ecf20Sopenharmony_cistatic int ocfs2_dio_end_io_write(struct inode *inode, 22978c2ecf20Sopenharmony_ci struct ocfs2_dio_write_ctxt *dwc, 22988c2ecf20Sopenharmony_ci loff_t offset, 22998c2ecf20Sopenharmony_ci ssize_t bytes) 23008c2ecf20Sopenharmony_ci{ 23018c2ecf20Sopenharmony_ci struct ocfs2_cached_dealloc_ctxt dealloc; 23028c2ecf20Sopenharmony_ci struct ocfs2_extent_tree et; 23038c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 23048c2ecf20Sopenharmony_ci struct ocfs2_inode_info *oi = OCFS2_I(inode); 23058c2ecf20Sopenharmony_ci struct ocfs2_unwritten_extent *ue = NULL; 23068c2ecf20Sopenharmony_ci struct buffer_head *di_bh = NULL; 23078c2ecf20Sopenharmony_ci struct ocfs2_dinode *di; 23088c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *data_ac = NULL; 23098c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *meta_ac = NULL; 23108c2ecf20Sopenharmony_ci handle_t *handle = NULL; 23118c2ecf20Sopenharmony_ci loff_t end = offset + bytes; 23128c2ecf20Sopenharmony_ci int ret = 0, credits = 0; 23138c2ecf20Sopenharmony_ci 23148c2ecf20Sopenharmony_ci ocfs2_init_dealloc_ctxt(&dealloc); 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci /* We do clear unwritten, delete orphan, change i_size here. If neither 23178c2ecf20Sopenharmony_ci * of these happen, we can skip all this. */ 23188c2ecf20Sopenharmony_ci if (list_empty(&dwc->dw_zero_list) && 23198c2ecf20Sopenharmony_ci end <= i_size_read(inode) && 23208c2ecf20Sopenharmony_ci !dwc->dw_orphaned) 23218c2ecf20Sopenharmony_ci goto out; 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci ret = ocfs2_inode_lock(inode, &di_bh, 1); 23248c2ecf20Sopenharmony_ci if (ret < 0) { 23258c2ecf20Sopenharmony_ci mlog_errno(ret); 23268c2ecf20Sopenharmony_ci goto out; 23278c2ecf20Sopenharmony_ci } 23288c2ecf20Sopenharmony_ci 23298c2ecf20Sopenharmony_ci down_write(&oi->ip_alloc_sem); 23308c2ecf20Sopenharmony_ci 23318c2ecf20Sopenharmony_ci /* Delete orphan before acquire i_mutex. */ 23328c2ecf20Sopenharmony_ci if (dwc->dw_orphaned) { 23338c2ecf20Sopenharmony_ci BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); 23348c2ecf20Sopenharmony_ci 23358c2ecf20Sopenharmony_ci end = end > i_size_read(inode) ? end : 0; 23368c2ecf20Sopenharmony_ci 23378c2ecf20Sopenharmony_ci ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 23388c2ecf20Sopenharmony_ci !!end, end); 23398c2ecf20Sopenharmony_ci if (ret < 0) 23408c2ecf20Sopenharmony_ci mlog_errno(ret); 23418c2ecf20Sopenharmony_ci } 23428c2ecf20Sopenharmony_ci 23438c2ecf20Sopenharmony_ci di = (struct ocfs2_dinode *)di_bh->b_data; 23448c2ecf20Sopenharmony_ci 23458c2ecf20Sopenharmony_ci ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 23468c2ecf20Sopenharmony_ci 23478c2ecf20Sopenharmony_ci /* Attach dealloc with extent tree in case that we may reuse extents 23488c2ecf20Sopenharmony_ci * which are already unlinked from current extent tree due to extent 23498c2ecf20Sopenharmony_ci * rotation and merging. 23508c2ecf20Sopenharmony_ci */ 23518c2ecf20Sopenharmony_ci et.et_dealloc = &dealloc; 23528c2ecf20Sopenharmony_ci 23538c2ecf20Sopenharmony_ci ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2, 23548c2ecf20Sopenharmony_ci &data_ac, &meta_ac); 23558c2ecf20Sopenharmony_ci if (ret) { 23568c2ecf20Sopenharmony_ci mlog_errno(ret); 23578c2ecf20Sopenharmony_ci goto unlock; 23588c2ecf20Sopenharmony_ci } 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_ci credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list); 23618c2ecf20Sopenharmony_ci 23628c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, credits); 23638c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 23648c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 23658c2ecf20Sopenharmony_ci mlog_errno(ret); 23668c2ecf20Sopenharmony_ci goto unlock; 23678c2ecf20Sopenharmony_ci } 23688c2ecf20Sopenharmony_ci ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 23698c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 23708c2ecf20Sopenharmony_ci if (ret) { 23718c2ecf20Sopenharmony_ci mlog_errno(ret); 23728c2ecf20Sopenharmony_ci goto commit; 23738c2ecf20Sopenharmony_ci } 23748c2ecf20Sopenharmony_ci 23758c2ecf20Sopenharmony_ci list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { 23768c2ecf20Sopenharmony_ci ret = ocfs2_mark_extent_written(inode, &et, handle, 23778c2ecf20Sopenharmony_ci ue->ue_cpos, 1, 23788c2ecf20Sopenharmony_ci ue->ue_phys, 23798c2ecf20Sopenharmony_ci meta_ac, &dealloc); 23808c2ecf20Sopenharmony_ci if (ret < 0) { 23818c2ecf20Sopenharmony_ci mlog_errno(ret); 23828c2ecf20Sopenharmony_ci break; 23838c2ecf20Sopenharmony_ci } 23848c2ecf20Sopenharmony_ci } 23858c2ecf20Sopenharmony_ci 23868c2ecf20Sopenharmony_ci if (end > i_size_read(inode)) { 23878c2ecf20Sopenharmony_ci ret = ocfs2_set_inode_size(handle, inode, di_bh, end); 23888c2ecf20Sopenharmony_ci if (ret < 0) 23898c2ecf20Sopenharmony_ci mlog_errno(ret); 23908c2ecf20Sopenharmony_ci } 23918c2ecf20Sopenharmony_cicommit: 23928c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 23938c2ecf20Sopenharmony_ciunlock: 23948c2ecf20Sopenharmony_ci up_write(&oi->ip_alloc_sem); 23958c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 23968c2ecf20Sopenharmony_ci brelse(di_bh); 23978c2ecf20Sopenharmony_ciout: 23988c2ecf20Sopenharmony_ci if (data_ac) 23998c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(data_ac); 24008c2ecf20Sopenharmony_ci if (meta_ac) 24018c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(meta_ac); 24028c2ecf20Sopenharmony_ci ocfs2_run_deallocs(osb, &dealloc); 24038c2ecf20Sopenharmony_ci ocfs2_dio_free_write_ctx(inode, dwc); 24048c2ecf20Sopenharmony_ci 24058c2ecf20Sopenharmony_ci return ret; 24068c2ecf20Sopenharmony_ci} 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci/* 24098c2ecf20Sopenharmony_ci * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 24108c2ecf20Sopenharmony_ci * particularly interested in the aio/dio case. We use the rw_lock DLM lock 24118c2ecf20Sopenharmony_ci * to protect io on one node from truncation on another. 24128c2ecf20Sopenharmony_ci */ 24138c2ecf20Sopenharmony_cistatic int ocfs2_dio_end_io(struct kiocb *iocb, 24148c2ecf20Sopenharmony_ci loff_t offset, 24158c2ecf20Sopenharmony_ci ssize_t bytes, 24168c2ecf20Sopenharmony_ci void *private) 24178c2ecf20Sopenharmony_ci{ 24188c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 24198c2ecf20Sopenharmony_ci int level; 24208c2ecf20Sopenharmony_ci int ret = 0; 24218c2ecf20Sopenharmony_ci 24228c2ecf20Sopenharmony_ci /* this io's submitter should not have unlocked this before we could */ 24238c2ecf20Sopenharmony_ci BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 24248c2ecf20Sopenharmony_ci 24258c2ecf20Sopenharmony_ci if (bytes <= 0) 24268c2ecf20Sopenharmony_ci mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld", 24278c2ecf20Sopenharmony_ci (long long)bytes); 24288c2ecf20Sopenharmony_ci if (private) { 24298c2ecf20Sopenharmony_ci if (bytes > 0) 24308c2ecf20Sopenharmony_ci ret = ocfs2_dio_end_io_write(inode, private, offset, 24318c2ecf20Sopenharmony_ci bytes); 24328c2ecf20Sopenharmony_ci else 24338c2ecf20Sopenharmony_ci ocfs2_dio_free_write_ctx(inode, private); 24348c2ecf20Sopenharmony_ci } 24358c2ecf20Sopenharmony_ci 24368c2ecf20Sopenharmony_ci ocfs2_iocb_clear_rw_locked(iocb); 24378c2ecf20Sopenharmony_ci 24388c2ecf20Sopenharmony_ci level = ocfs2_iocb_rw_locked_level(iocb); 24398c2ecf20Sopenharmony_ci ocfs2_rw_unlock(inode, level); 24408c2ecf20Sopenharmony_ci return ret; 24418c2ecf20Sopenharmony_ci} 24428c2ecf20Sopenharmony_ci 24438c2ecf20Sopenharmony_cistatic ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 24448c2ecf20Sopenharmony_ci{ 24458c2ecf20Sopenharmony_ci struct file *file = iocb->ki_filp; 24468c2ecf20Sopenharmony_ci struct inode *inode = file->f_mapping->host; 24478c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 24488c2ecf20Sopenharmony_ci get_block_t *get_block; 24498c2ecf20Sopenharmony_ci 24508c2ecf20Sopenharmony_ci /* 24518c2ecf20Sopenharmony_ci * Fallback to buffered I/O if we see an inode without 24528c2ecf20Sopenharmony_ci * extents. 24538c2ecf20Sopenharmony_ci */ 24548c2ecf20Sopenharmony_ci if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 24558c2ecf20Sopenharmony_ci return 0; 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_ci /* Fallback to buffered I/O if we do not support append dio. */ 24588c2ecf20Sopenharmony_ci if (iocb->ki_pos + iter->count > i_size_read(inode) && 24598c2ecf20Sopenharmony_ci !ocfs2_supports_append_dio(osb)) 24608c2ecf20Sopenharmony_ci return 0; 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ci if (iov_iter_rw(iter) == READ) 24638c2ecf20Sopenharmony_ci get_block = ocfs2_lock_get_block; 24648c2ecf20Sopenharmony_ci else 24658c2ecf20Sopenharmony_ci get_block = ocfs2_dio_wr_get_block; 24668c2ecf20Sopenharmony_ci 24678c2ecf20Sopenharmony_ci return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, 24688c2ecf20Sopenharmony_ci iter, get_block, 24698c2ecf20Sopenharmony_ci ocfs2_dio_end_io, NULL, 0); 24708c2ecf20Sopenharmony_ci} 24718c2ecf20Sopenharmony_ci 24728c2ecf20Sopenharmony_ciconst struct address_space_operations ocfs2_aops = { 24738c2ecf20Sopenharmony_ci .readpage = ocfs2_readpage, 24748c2ecf20Sopenharmony_ci .readahead = ocfs2_readahead, 24758c2ecf20Sopenharmony_ci .writepage = ocfs2_writepage, 24768c2ecf20Sopenharmony_ci .write_begin = ocfs2_write_begin, 24778c2ecf20Sopenharmony_ci .write_end = ocfs2_write_end, 24788c2ecf20Sopenharmony_ci .bmap = ocfs2_bmap, 24798c2ecf20Sopenharmony_ci .direct_IO = ocfs2_direct_IO, 24808c2ecf20Sopenharmony_ci .invalidatepage = block_invalidatepage, 24818c2ecf20Sopenharmony_ci .releasepage = ocfs2_releasepage, 24828c2ecf20Sopenharmony_ci .migratepage = buffer_migrate_page, 24838c2ecf20Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 24848c2ecf20Sopenharmony_ci .error_remove_page = generic_error_remove_page, 24858c2ecf20Sopenharmony_ci}; 2486