162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2006 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include <linux/iversion.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "xfs.h" 962306a36Sopenharmony_ci#include "xfs_fs.h" 1062306a36Sopenharmony_ci#include "xfs_shared.h" 1162306a36Sopenharmony_ci#include "xfs_format.h" 1262306a36Sopenharmony_ci#include "xfs_log_format.h" 1362306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1462306a36Sopenharmony_ci#include "xfs_mount.h" 1562306a36Sopenharmony_ci#include "xfs_defer.h" 1662306a36Sopenharmony_ci#include "xfs_inode.h" 1762306a36Sopenharmony_ci#include "xfs_dir2.h" 1862306a36Sopenharmony_ci#include "xfs_attr.h" 1962306a36Sopenharmony_ci#include "xfs_trans_space.h" 2062306a36Sopenharmony_ci#include "xfs_trans.h" 2162306a36Sopenharmony_ci#include "xfs_buf_item.h" 2262306a36Sopenharmony_ci#include "xfs_inode_item.h" 2362306a36Sopenharmony_ci#include "xfs_iunlink_item.h" 2462306a36Sopenharmony_ci#include "xfs_ialloc.h" 2562306a36Sopenharmony_ci#include "xfs_bmap.h" 2662306a36Sopenharmony_ci#include "xfs_bmap_util.h" 2762306a36Sopenharmony_ci#include "xfs_errortag.h" 2862306a36Sopenharmony_ci#include "xfs_error.h" 2962306a36Sopenharmony_ci#include "xfs_quota.h" 3062306a36Sopenharmony_ci#include "xfs_filestream.h" 3162306a36Sopenharmony_ci#include "xfs_trace.h" 3262306a36Sopenharmony_ci#include "xfs_icache.h" 3362306a36Sopenharmony_ci#include "xfs_symlink.h" 3462306a36Sopenharmony_ci#include "xfs_trans_priv.h" 3562306a36Sopenharmony_ci#include "xfs_log.h" 3662306a36Sopenharmony_ci#include "xfs_bmap_btree.h" 3762306a36Sopenharmony_ci#include "xfs_reflink.h" 3862306a36Sopenharmony_ci#include "xfs_ag.h" 3962306a36Sopenharmony_ci#include "xfs_log_priv.h" 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistruct kmem_cache *xfs_inode_cache; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* 4462306a36Sopenharmony_ci * Used in xfs_itruncate_extents(). This is the maximum number of extents 4562306a36Sopenharmony_ci * freed from a file in a single transaction. 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_ci#define XFS_ITRUNC_MAX_EXTENTS 2 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ciSTATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); 5062306a36Sopenharmony_ciSTATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, 5162306a36Sopenharmony_ci struct xfs_inode *); 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * helper function to extract extent size hint from inode 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_cixfs_extlen_t 5762306a36Sopenharmony_cixfs_get_extsz_hint( 5862306a36Sopenharmony_ci struct xfs_inode *ip) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci /* 6162306a36Sopenharmony_ci * No point in aligning allocations if we need to COW to actually 6262306a36Sopenharmony_ci * write to them. 6362306a36Sopenharmony_ci */ 6462306a36Sopenharmony_ci if (xfs_is_always_cow_inode(ip)) 6562306a36Sopenharmony_ci return 0; 6662306a36Sopenharmony_ci if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) 6762306a36Sopenharmony_ci return ip->i_extsize; 6862306a36Sopenharmony_ci if (XFS_IS_REALTIME_INODE(ip)) 6962306a36Sopenharmony_ci return ip->i_mount->m_sb.sb_rextsize; 7062306a36Sopenharmony_ci return 0; 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/* 7462306a36Sopenharmony_ci * Helper function to extract CoW extent size hint from inode. 7562306a36Sopenharmony_ci * Between the extent size hint and the CoW extent size hint, we 7662306a36Sopenharmony_ci * return the greater of the two. If the value is zero (automatic), 7762306a36Sopenharmony_ci * use the default size. 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_cixfs_extlen_t 8062306a36Sopenharmony_cixfs_get_cowextsz_hint( 8162306a36Sopenharmony_ci struct xfs_inode *ip) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci xfs_extlen_t a, b; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci a = 0; 8662306a36Sopenharmony_ci if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 8762306a36Sopenharmony_ci a = ip->i_cowextsize; 8862306a36Sopenharmony_ci b = xfs_get_extsz_hint(ip); 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci a = max(a, b); 9162306a36Sopenharmony_ci if (a == 0) 9262306a36Sopenharmony_ci return XFS_DEFAULT_COWEXTSZ_HINT; 9362306a36Sopenharmony_ci return a; 9462306a36Sopenharmony_ci} 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci/* 9762306a36Sopenharmony_ci * These two are wrapper routines around the xfs_ilock() routine used to 9862306a36Sopenharmony_ci * centralize some grungy code. They are used in places that wish to lock the 9962306a36Sopenharmony_ci * inode solely for reading the extents. The reason these places can't just 10062306a36Sopenharmony_ci * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to 10162306a36Sopenharmony_ci * bringing in of the extents from disk for a file in b-tree format. If the 10262306a36Sopenharmony_ci * inode is in b-tree format, then we need to lock the inode exclusively until 10362306a36Sopenharmony_ci * the extents are read in. Locking it exclusively all the time would limit 10462306a36Sopenharmony_ci * our parallelism unnecessarily, though. What we do instead is check to see 10562306a36Sopenharmony_ci * if the extents have been read in yet, and only lock the inode exclusively 10662306a36Sopenharmony_ci * if they have not. 10762306a36Sopenharmony_ci * 10862306a36Sopenharmony_ci * The functions return a value which should be given to the corresponding 10962306a36Sopenharmony_ci * xfs_iunlock() call. 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_ciuint 11262306a36Sopenharmony_cixfs_ilock_data_map_shared( 11362306a36Sopenharmony_ci struct xfs_inode *ip) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci uint lock_mode = XFS_ILOCK_SHARED; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci if (xfs_need_iread_extents(&ip->i_df)) 11862306a36Sopenharmony_ci lock_mode = XFS_ILOCK_EXCL; 11962306a36Sopenharmony_ci xfs_ilock(ip, lock_mode); 12062306a36Sopenharmony_ci return lock_mode; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ciuint 12462306a36Sopenharmony_cixfs_ilock_attr_map_shared( 12562306a36Sopenharmony_ci struct xfs_inode *ip) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci uint lock_mode = XFS_ILOCK_SHARED; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af)) 13062306a36Sopenharmony_ci lock_mode = XFS_ILOCK_EXCL; 13162306a36Sopenharmony_ci xfs_ilock(ip, lock_mode); 13262306a36Sopenharmony_ci return lock_mode; 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci/* 13662306a36Sopenharmony_ci * You can't set both SHARED and EXCL for the same lock, 13762306a36Sopenharmony_ci * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED, 13862306a36Sopenharmony_ci * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values 13962306a36Sopenharmony_ci * to set in lock_flags. 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_cistatic inline void 14262306a36Sopenharmony_cixfs_lock_flags_assert( 14362306a36Sopenharmony_ci uint lock_flags) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 14662306a36Sopenharmony_ci (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 14762306a36Sopenharmony_ci ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != 14862306a36Sopenharmony_ci (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); 14962306a36Sopenharmony_ci ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 15062306a36Sopenharmony_ci (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 15162306a36Sopenharmony_ci ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); 15262306a36Sopenharmony_ci ASSERT(lock_flags != 0); 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci/* 15662306a36Sopenharmony_ci * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 15762306a36Sopenharmony_ci * multi-reader locks: invalidate_lock and the i_lock. This routine allows 15862306a36Sopenharmony_ci * various combinations of the locks to be obtained. 15962306a36Sopenharmony_ci * 16062306a36Sopenharmony_ci * The 3 locks should always be ordered so that the IO lock is obtained first, 16162306a36Sopenharmony_ci * the mmap lock second and the ilock last in order to prevent deadlock. 16262306a36Sopenharmony_ci * 16362306a36Sopenharmony_ci * Basic locking order: 16462306a36Sopenharmony_ci * 16562306a36Sopenharmony_ci * i_rwsem -> invalidate_lock -> page_lock -> i_ilock 16662306a36Sopenharmony_ci * 16762306a36Sopenharmony_ci * mmap_lock locking order: 16862306a36Sopenharmony_ci * 16962306a36Sopenharmony_ci * i_rwsem -> page lock -> mmap_lock 17062306a36Sopenharmony_ci * mmap_lock -> invalidate_lock -> page_lock 17162306a36Sopenharmony_ci * 17262306a36Sopenharmony_ci * The difference in mmap_lock locking order mean that we cannot hold the 17362306a36Sopenharmony_ci * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths 17462306a36Sopenharmony_ci * can fault in pages during copy in/out (for buffered IO) or require the 17562306a36Sopenharmony_ci * mmap_lock in get_user_pages() to map the user pages into the kernel address 17662306a36Sopenharmony_ci * space for direct IO. Similarly the i_rwsem cannot be taken inside a page 17762306a36Sopenharmony_ci * fault because page faults already hold the mmap_lock. 17862306a36Sopenharmony_ci * 17962306a36Sopenharmony_ci * Hence to serialise fully against both syscall and mmap based IO, we need to 18062306a36Sopenharmony_ci * take both the i_rwsem and the invalidate_lock. These locks should *only* be 18162306a36Sopenharmony_ci * both taken in places where we need to invalidate the page cache in a race 18262306a36Sopenharmony_ci * free manner (e.g. truncate, hole punch and other extent manipulation 18362306a36Sopenharmony_ci * functions). 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_civoid 18662306a36Sopenharmony_cixfs_ilock( 18762306a36Sopenharmony_ci xfs_inode_t *ip, 18862306a36Sopenharmony_ci uint lock_flags) 18962306a36Sopenharmony_ci{ 19062306a36Sopenharmony_ci trace_xfs_ilock(ip, lock_flags, _RET_IP_); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci xfs_lock_flags_assert(lock_flags); 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci if (lock_flags & XFS_IOLOCK_EXCL) { 19562306a36Sopenharmony_ci down_write_nested(&VFS_I(ip)->i_rwsem, 19662306a36Sopenharmony_ci XFS_IOLOCK_DEP(lock_flags)); 19762306a36Sopenharmony_ci } else if (lock_flags & XFS_IOLOCK_SHARED) { 19862306a36Sopenharmony_ci down_read_nested(&VFS_I(ip)->i_rwsem, 19962306a36Sopenharmony_ci XFS_IOLOCK_DEP(lock_flags)); 20062306a36Sopenharmony_ci } 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci if (lock_flags & XFS_MMAPLOCK_EXCL) { 20362306a36Sopenharmony_ci down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock, 20462306a36Sopenharmony_ci XFS_MMAPLOCK_DEP(lock_flags)); 20562306a36Sopenharmony_ci } else if (lock_flags & XFS_MMAPLOCK_SHARED) { 20662306a36Sopenharmony_ci down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock, 20762306a36Sopenharmony_ci XFS_MMAPLOCK_DEP(lock_flags)); 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci if (lock_flags & XFS_ILOCK_EXCL) 21162306a36Sopenharmony_ci mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 21262306a36Sopenharmony_ci else if (lock_flags & XFS_ILOCK_SHARED) 21362306a36Sopenharmony_ci mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci/* 21762306a36Sopenharmony_ci * This is just like xfs_ilock(), except that the caller 21862306a36Sopenharmony_ci * is guaranteed not to sleep. It returns 1 if it gets 21962306a36Sopenharmony_ci * the requested locks and 0 otherwise. If the IO lock is 22062306a36Sopenharmony_ci * obtained but the inode lock cannot be, then the IO lock 22162306a36Sopenharmony_ci * is dropped before returning. 22262306a36Sopenharmony_ci * 22362306a36Sopenharmony_ci * ip -- the inode being locked 22462306a36Sopenharmony_ci * lock_flags -- this parameter indicates the inode's locks to be 22562306a36Sopenharmony_ci * to be locked. See the comment for xfs_ilock() for a list 22662306a36Sopenharmony_ci * of valid values. 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ciint 22962306a36Sopenharmony_cixfs_ilock_nowait( 23062306a36Sopenharmony_ci xfs_inode_t *ip, 23162306a36Sopenharmony_ci uint lock_flags) 23262306a36Sopenharmony_ci{ 23362306a36Sopenharmony_ci trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci xfs_lock_flags_assert(lock_flags); 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci if (lock_flags & XFS_IOLOCK_EXCL) { 23862306a36Sopenharmony_ci if (!down_write_trylock(&VFS_I(ip)->i_rwsem)) 23962306a36Sopenharmony_ci goto out; 24062306a36Sopenharmony_ci } else if (lock_flags & XFS_IOLOCK_SHARED) { 24162306a36Sopenharmony_ci if (!down_read_trylock(&VFS_I(ip)->i_rwsem)) 24262306a36Sopenharmony_ci goto out; 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci if (lock_flags & XFS_MMAPLOCK_EXCL) { 24662306a36Sopenharmony_ci if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) 24762306a36Sopenharmony_ci goto out_undo_iolock; 24862306a36Sopenharmony_ci } else if (lock_flags & XFS_MMAPLOCK_SHARED) { 24962306a36Sopenharmony_ci if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) 25062306a36Sopenharmony_ci goto out_undo_iolock; 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci if (lock_flags & XFS_ILOCK_EXCL) { 25462306a36Sopenharmony_ci if (!mrtryupdate(&ip->i_lock)) 25562306a36Sopenharmony_ci goto out_undo_mmaplock; 25662306a36Sopenharmony_ci } else if (lock_flags & XFS_ILOCK_SHARED) { 25762306a36Sopenharmony_ci if (!mrtryaccess(&ip->i_lock)) 25862306a36Sopenharmony_ci goto out_undo_mmaplock; 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci return 1; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ciout_undo_mmaplock: 26362306a36Sopenharmony_ci if (lock_flags & XFS_MMAPLOCK_EXCL) 26462306a36Sopenharmony_ci up_write(&VFS_I(ip)->i_mapping->invalidate_lock); 26562306a36Sopenharmony_ci else if (lock_flags & XFS_MMAPLOCK_SHARED) 26662306a36Sopenharmony_ci up_read(&VFS_I(ip)->i_mapping->invalidate_lock); 26762306a36Sopenharmony_ciout_undo_iolock: 26862306a36Sopenharmony_ci if (lock_flags & XFS_IOLOCK_EXCL) 26962306a36Sopenharmony_ci up_write(&VFS_I(ip)->i_rwsem); 27062306a36Sopenharmony_ci else if (lock_flags & XFS_IOLOCK_SHARED) 27162306a36Sopenharmony_ci up_read(&VFS_I(ip)->i_rwsem); 27262306a36Sopenharmony_ciout: 27362306a36Sopenharmony_ci return 0; 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci/* 27762306a36Sopenharmony_ci * xfs_iunlock() is used to drop the inode locks acquired with 27862306a36Sopenharmony_ci * xfs_ilock() and xfs_ilock_nowait(). The caller must pass 27962306a36Sopenharmony_ci * in the flags given to xfs_ilock() or xfs_ilock_nowait() so 28062306a36Sopenharmony_ci * that we know which locks to drop. 28162306a36Sopenharmony_ci * 28262306a36Sopenharmony_ci * ip -- the inode being unlocked 28362306a36Sopenharmony_ci * lock_flags -- this parameter indicates the inode's locks to be 28462306a36Sopenharmony_ci * to be unlocked. See the comment for xfs_ilock() for a list 28562306a36Sopenharmony_ci * of valid values for this parameter. 28662306a36Sopenharmony_ci * 28762306a36Sopenharmony_ci */ 28862306a36Sopenharmony_civoid 28962306a36Sopenharmony_cixfs_iunlock( 29062306a36Sopenharmony_ci xfs_inode_t *ip, 29162306a36Sopenharmony_ci uint lock_flags) 29262306a36Sopenharmony_ci{ 29362306a36Sopenharmony_ci xfs_lock_flags_assert(lock_flags); 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci if (lock_flags & XFS_IOLOCK_EXCL) 29662306a36Sopenharmony_ci up_write(&VFS_I(ip)->i_rwsem); 29762306a36Sopenharmony_ci else if (lock_flags & XFS_IOLOCK_SHARED) 29862306a36Sopenharmony_ci up_read(&VFS_I(ip)->i_rwsem); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci if (lock_flags & XFS_MMAPLOCK_EXCL) 30162306a36Sopenharmony_ci up_write(&VFS_I(ip)->i_mapping->invalidate_lock); 30262306a36Sopenharmony_ci else if (lock_flags & XFS_MMAPLOCK_SHARED) 30362306a36Sopenharmony_ci up_read(&VFS_I(ip)->i_mapping->invalidate_lock); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (lock_flags & XFS_ILOCK_EXCL) 30662306a36Sopenharmony_ci mrunlock_excl(&ip->i_lock); 30762306a36Sopenharmony_ci else if (lock_flags & XFS_ILOCK_SHARED) 30862306a36Sopenharmony_ci mrunlock_shared(&ip->i_lock); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci/* 31462306a36Sopenharmony_ci * give up write locks. the i/o lock cannot be held nested 31562306a36Sopenharmony_ci * if it is being demoted. 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_civoid 31862306a36Sopenharmony_cixfs_ilock_demote( 31962306a36Sopenharmony_ci xfs_inode_t *ip, 32062306a36Sopenharmony_ci uint lock_flags) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)); 32362306a36Sopenharmony_ci ASSERT((lock_flags & 32462306a36Sopenharmony_ci ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci if (lock_flags & XFS_ILOCK_EXCL) 32762306a36Sopenharmony_ci mrdemote(&ip->i_lock); 32862306a36Sopenharmony_ci if (lock_flags & XFS_MMAPLOCK_EXCL) 32962306a36Sopenharmony_ci downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock); 33062306a36Sopenharmony_ci if (lock_flags & XFS_IOLOCK_EXCL) 33162306a36Sopenharmony_ci downgrade_write(&VFS_I(ip)->i_rwsem); 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 33462306a36Sopenharmony_ci} 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci#if defined(DEBUG) || defined(XFS_WARN) 33762306a36Sopenharmony_cistatic inline bool 33862306a36Sopenharmony_ci__xfs_rwsem_islocked( 33962306a36Sopenharmony_ci struct rw_semaphore *rwsem, 34062306a36Sopenharmony_ci bool shared) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci if (!debug_locks) 34362306a36Sopenharmony_ci return rwsem_is_locked(rwsem); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (!shared) 34662306a36Sopenharmony_ci return lockdep_is_held_type(rwsem, 0); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci /* 34962306a36Sopenharmony_ci * We are checking that the lock is held at least in shared 35062306a36Sopenharmony_ci * mode but don't care that it might be held exclusively 35162306a36Sopenharmony_ci * (i.e. shared | excl). Hence we check if the lock is held 35262306a36Sopenharmony_ci * in any mode rather than an explicit shared mode. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci return lockdep_is_held_type(rwsem, -1); 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cibool 35862306a36Sopenharmony_cixfs_isilocked( 35962306a36Sopenharmony_ci struct xfs_inode *ip, 36062306a36Sopenharmony_ci uint lock_flags) 36162306a36Sopenharmony_ci{ 36262306a36Sopenharmony_ci if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { 36362306a36Sopenharmony_ci if (!(lock_flags & XFS_ILOCK_SHARED)) 36462306a36Sopenharmony_ci return !!ip->i_lock.mr_writer; 36562306a36Sopenharmony_ci return rwsem_is_locked(&ip->i_lock.mr_lock); 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { 36962306a36Sopenharmony_ci return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock, 37062306a36Sopenharmony_ci (lock_flags & XFS_MMAPLOCK_SHARED)); 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) { 37462306a36Sopenharmony_ci return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, 37562306a36Sopenharmony_ci (lock_flags & XFS_IOLOCK_SHARED)); 37662306a36Sopenharmony_ci } 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci ASSERT(0); 37962306a36Sopenharmony_ci return false; 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ci#endif 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci/* 38462306a36Sopenharmony_ci * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when 38562306a36Sopenharmony_ci * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined 38662306a36Sopenharmony_ci * when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build 38762306a36Sopenharmony_ci * errors and warnings. 38862306a36Sopenharmony_ci */ 38962306a36Sopenharmony_ci#if (defined(DEBUG) || defined(XFS_WARN)) && defined(CONFIG_LOCKDEP) 39062306a36Sopenharmony_cistatic bool 39162306a36Sopenharmony_cixfs_lockdep_subclass_ok( 39262306a36Sopenharmony_ci int subclass) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci return subclass < MAX_LOCKDEP_SUBCLASSES; 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci#else 39762306a36Sopenharmony_ci#define xfs_lockdep_subclass_ok(subclass) (true) 39862306a36Sopenharmony_ci#endif 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci/* 40162306a36Sopenharmony_ci * Bump the subclass so xfs_lock_inodes() acquires each lock with a different 40262306a36Sopenharmony_ci * value. This can be called for any type of inode lock combination, including 40362306a36Sopenharmony_ci * parent locking. Care must be taken to ensure we don't overrun the subclass 40462306a36Sopenharmony_ci * storage fields in the class mask we build. 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_cistatic inline uint 40762306a36Sopenharmony_cixfs_lock_inumorder( 40862306a36Sopenharmony_ci uint lock_mode, 40962306a36Sopenharmony_ci uint subclass) 41062306a36Sopenharmony_ci{ 41162306a36Sopenharmony_ci uint class = 0; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP | 41462306a36Sopenharmony_ci XFS_ILOCK_RTSUM))); 41562306a36Sopenharmony_ci ASSERT(xfs_lockdep_subclass_ok(subclass)); 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { 41862306a36Sopenharmony_ci ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); 41962306a36Sopenharmony_ci class += subclass << XFS_IOLOCK_SHIFT; 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { 42362306a36Sopenharmony_ci ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS); 42462306a36Sopenharmony_ci class += subclass << XFS_MMAPLOCK_SHIFT; 42562306a36Sopenharmony_ci } 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) { 42862306a36Sopenharmony_ci ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS); 42962306a36Sopenharmony_ci class += subclass << XFS_ILOCK_SHIFT; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class; 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci/* 43662306a36Sopenharmony_ci * The following routine will lock n inodes in exclusive mode. We assume the 43762306a36Sopenharmony_ci * caller calls us with the inodes in i_ino order. 43862306a36Sopenharmony_ci * 43962306a36Sopenharmony_ci * We need to detect deadlock where an inode that we lock is in the AIL and we 44062306a36Sopenharmony_ci * start waiting for another inode that is locked by a thread in a long running 44162306a36Sopenharmony_ci * transaction (such as truncate). This can result in deadlock since the long 44262306a36Sopenharmony_ci * running trans might need to wait for the inode we just locked in order to 44362306a36Sopenharmony_ci * push the tail and free space in the log. 44462306a36Sopenharmony_ci * 44562306a36Sopenharmony_ci * xfs_lock_inodes() can only be used to lock one type of lock at a time - 44662306a36Sopenharmony_ci * the iolock, the mmaplock or the ilock, but not more than one at a time. If we 44762306a36Sopenharmony_ci * lock more than one at a time, lockdep will report false positives saying we 44862306a36Sopenharmony_ci * have violated locking orders. 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_cistatic void 45162306a36Sopenharmony_cixfs_lock_inodes( 45262306a36Sopenharmony_ci struct xfs_inode **ips, 45362306a36Sopenharmony_ci int inodes, 45462306a36Sopenharmony_ci uint lock_mode) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci int attempts = 0; 45762306a36Sopenharmony_ci uint i; 45862306a36Sopenharmony_ci int j; 45962306a36Sopenharmony_ci bool try_lock; 46062306a36Sopenharmony_ci struct xfs_log_item *lp; 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci /* 46362306a36Sopenharmony_ci * Currently supports between 2 and 5 inodes with exclusive locking. We 46462306a36Sopenharmony_ci * support an arbitrary depth of locking here, but absolute limits on 46562306a36Sopenharmony_ci * inodes depend on the type of locking and the limits placed by 46662306a36Sopenharmony_ci * lockdep annotations in xfs_lock_inumorder. These are all checked by 46762306a36Sopenharmony_ci * the asserts. 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_ci ASSERT(ips && inodes >= 2 && inodes <= 5); 47062306a36Sopenharmony_ci ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | 47162306a36Sopenharmony_ci XFS_ILOCK_EXCL)); 47262306a36Sopenharmony_ci ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | 47362306a36Sopenharmony_ci XFS_ILOCK_SHARED))); 47462306a36Sopenharmony_ci ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || 47562306a36Sopenharmony_ci inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); 47662306a36Sopenharmony_ci ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || 47762306a36Sopenharmony_ci inodes <= XFS_ILOCK_MAX_SUBCLASS + 1); 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci if (lock_mode & XFS_IOLOCK_EXCL) { 48062306a36Sopenharmony_ci ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL))); 48162306a36Sopenharmony_ci } else if (lock_mode & XFS_MMAPLOCK_EXCL) 48262306a36Sopenharmony_ci ASSERT(!(lock_mode & XFS_ILOCK_EXCL)); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ciagain: 48562306a36Sopenharmony_ci try_lock = false; 48662306a36Sopenharmony_ci i = 0; 48762306a36Sopenharmony_ci for (; i < inodes; i++) { 48862306a36Sopenharmony_ci ASSERT(ips[i]); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci if (i && (ips[i] == ips[i - 1])) /* Already locked */ 49162306a36Sopenharmony_ci continue; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci /* 49462306a36Sopenharmony_ci * If try_lock is not set yet, make sure all locked inodes are 49562306a36Sopenharmony_ci * not in the AIL. If any are, set try_lock to be used later. 49662306a36Sopenharmony_ci */ 49762306a36Sopenharmony_ci if (!try_lock) { 49862306a36Sopenharmony_ci for (j = (i - 1); j >= 0 && !try_lock; j--) { 49962306a36Sopenharmony_ci lp = &ips[j]->i_itemp->ili_item; 50062306a36Sopenharmony_ci if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) 50162306a36Sopenharmony_ci try_lock = true; 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci } 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci /* 50662306a36Sopenharmony_ci * If any of the previous locks we have locked is in the AIL, 50762306a36Sopenharmony_ci * we must TRY to get the second and subsequent locks. If 50862306a36Sopenharmony_ci * we can't get any, we must release all we have 50962306a36Sopenharmony_ci * and try again. 51062306a36Sopenharmony_ci */ 51162306a36Sopenharmony_ci if (!try_lock) { 51262306a36Sopenharmony_ci xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 51362306a36Sopenharmony_ci continue; 51462306a36Sopenharmony_ci } 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci /* try_lock means we have an inode locked that is in the AIL. */ 51762306a36Sopenharmony_ci ASSERT(i != 0); 51862306a36Sopenharmony_ci if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) 51962306a36Sopenharmony_ci continue; 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci /* 52262306a36Sopenharmony_ci * Unlock all previous guys and try again. xfs_iunlock will try 52362306a36Sopenharmony_ci * to push the tail if the inode is in the AIL. 52462306a36Sopenharmony_ci */ 52562306a36Sopenharmony_ci attempts++; 52662306a36Sopenharmony_ci for (j = i - 1; j >= 0; j--) { 52762306a36Sopenharmony_ci /* 52862306a36Sopenharmony_ci * Check to see if we've already unlocked this one. Not 52962306a36Sopenharmony_ci * the first one going back, and the inode ptr is the 53062306a36Sopenharmony_ci * same. 53162306a36Sopenharmony_ci */ 53262306a36Sopenharmony_ci if (j != (i - 1) && ips[j] == ips[j + 1]) 53362306a36Sopenharmony_ci continue; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci xfs_iunlock(ips[j], lock_mode); 53662306a36Sopenharmony_ci } 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci if ((attempts % 5) == 0) { 53962306a36Sopenharmony_ci delay(1); /* Don't just spin the CPU */ 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci goto again; 54262306a36Sopenharmony_ci } 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci/* 54662306a36Sopenharmony_ci * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and 54762306a36Sopenharmony_ci * mmaplock must be double-locked separately since we use i_rwsem and 54862306a36Sopenharmony_ci * invalidate_lock for that. We now support taking one lock EXCL and the 54962306a36Sopenharmony_ci * other SHARED. 55062306a36Sopenharmony_ci */ 55162306a36Sopenharmony_civoid 55262306a36Sopenharmony_cixfs_lock_two_inodes( 55362306a36Sopenharmony_ci struct xfs_inode *ip0, 55462306a36Sopenharmony_ci uint ip0_mode, 55562306a36Sopenharmony_ci struct xfs_inode *ip1, 55662306a36Sopenharmony_ci uint ip1_mode) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci int attempts = 0; 55962306a36Sopenharmony_ci struct xfs_log_item *lp; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci ASSERT(hweight32(ip0_mode) == 1); 56262306a36Sopenharmony_ci ASSERT(hweight32(ip1_mode) == 1); 56362306a36Sopenharmony_ci ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 56462306a36Sopenharmony_ci ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 56562306a36Sopenharmony_ci ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); 56662306a36Sopenharmony_ci ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); 56762306a36Sopenharmony_ci ASSERT(ip0->i_ino != ip1->i_ino); 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci if (ip0->i_ino > ip1->i_ino) { 57062306a36Sopenharmony_ci swap(ip0, ip1); 57162306a36Sopenharmony_ci swap(ip0_mode, ip1_mode); 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci again: 57562306a36Sopenharmony_ci xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0)); 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci /* 57862306a36Sopenharmony_ci * If the first lock we have locked is in the AIL, we must TRY to get 57962306a36Sopenharmony_ci * the second lock. If we can't get it, we must release the first one 58062306a36Sopenharmony_ci * and try again. 58162306a36Sopenharmony_ci */ 58262306a36Sopenharmony_ci lp = &ip0->i_itemp->ili_item; 58362306a36Sopenharmony_ci if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { 58462306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { 58562306a36Sopenharmony_ci xfs_iunlock(ip0, ip0_mode); 58662306a36Sopenharmony_ci if ((++attempts % 5) == 0) 58762306a36Sopenharmony_ci delay(1); /* Don't just spin the CPU */ 58862306a36Sopenharmony_ci goto again; 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci } else { 59162306a36Sopenharmony_ci xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci} 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ciuint 59662306a36Sopenharmony_cixfs_ip2xflags( 59762306a36Sopenharmony_ci struct xfs_inode *ip) 59862306a36Sopenharmony_ci{ 59962306a36Sopenharmony_ci uint flags = 0; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_ANY) { 60262306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_REALTIME) 60362306a36Sopenharmony_ci flags |= FS_XFLAG_REALTIME; 60462306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_PREALLOC) 60562306a36Sopenharmony_ci flags |= FS_XFLAG_PREALLOC; 60662306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) 60762306a36Sopenharmony_ci flags |= FS_XFLAG_IMMUTABLE; 60862306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_APPEND) 60962306a36Sopenharmony_ci flags |= FS_XFLAG_APPEND; 61062306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_SYNC) 61162306a36Sopenharmony_ci flags |= FS_XFLAG_SYNC; 61262306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_NOATIME) 61362306a36Sopenharmony_ci flags |= FS_XFLAG_NOATIME; 61462306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_NODUMP) 61562306a36Sopenharmony_ci flags |= FS_XFLAG_NODUMP; 61662306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) 61762306a36Sopenharmony_ci flags |= FS_XFLAG_RTINHERIT; 61862306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) 61962306a36Sopenharmony_ci flags |= FS_XFLAG_PROJINHERIT; 62062306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) 62162306a36Sopenharmony_ci flags |= FS_XFLAG_NOSYMLINKS; 62262306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) 62362306a36Sopenharmony_ci flags |= FS_XFLAG_EXTSIZE; 62462306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) 62562306a36Sopenharmony_ci flags |= FS_XFLAG_EXTSZINHERIT; 62662306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) 62762306a36Sopenharmony_ci flags |= FS_XFLAG_NODEFRAG; 62862306a36Sopenharmony_ci if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) 62962306a36Sopenharmony_ci flags |= FS_XFLAG_FILESTREAM; 63062306a36Sopenharmony_ci } 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { 63362306a36Sopenharmony_ci if (ip->i_diflags2 & XFS_DIFLAG2_DAX) 63462306a36Sopenharmony_ci flags |= FS_XFLAG_DAX; 63562306a36Sopenharmony_ci if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 63662306a36Sopenharmony_ci flags |= FS_XFLAG_COWEXTSIZE; 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci if (xfs_inode_has_attr_fork(ip)) 64062306a36Sopenharmony_ci flags |= FS_XFLAG_HASATTR; 64162306a36Sopenharmony_ci return flags; 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci/* 64562306a36Sopenharmony_ci * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 64662306a36Sopenharmony_ci * is allowed, otherwise it has to be an exact match. If a CI match is found, 64762306a36Sopenharmony_ci * ci_name->name will point to a the actual name (caller must free) or 64862306a36Sopenharmony_ci * will be set to NULL if an exact match is found. 64962306a36Sopenharmony_ci */ 65062306a36Sopenharmony_ciint 65162306a36Sopenharmony_cixfs_lookup( 65262306a36Sopenharmony_ci struct xfs_inode *dp, 65362306a36Sopenharmony_ci const struct xfs_name *name, 65462306a36Sopenharmony_ci struct xfs_inode **ipp, 65562306a36Sopenharmony_ci struct xfs_name *ci_name) 65662306a36Sopenharmony_ci{ 65762306a36Sopenharmony_ci xfs_ino_t inum; 65862306a36Sopenharmony_ci int error; 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci trace_xfs_lookup(dp, name); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci if (xfs_is_shutdown(dp->i_mount)) 66362306a36Sopenharmony_ci return -EIO; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 66662306a36Sopenharmony_ci if (error) 66762306a36Sopenharmony_ci goto out_unlock; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 67062306a36Sopenharmony_ci if (error) 67162306a36Sopenharmony_ci goto out_free_name; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci return 0; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ciout_free_name: 67662306a36Sopenharmony_ci if (ci_name) 67762306a36Sopenharmony_ci kmem_free(ci_name->name); 67862306a36Sopenharmony_ciout_unlock: 67962306a36Sopenharmony_ci *ipp = NULL; 68062306a36Sopenharmony_ci return error; 68162306a36Sopenharmony_ci} 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci/* Propagate di_flags from a parent inode to a child inode. */ 68462306a36Sopenharmony_cistatic void 68562306a36Sopenharmony_cixfs_inode_inherit_flags( 68662306a36Sopenharmony_ci struct xfs_inode *ip, 68762306a36Sopenharmony_ci const struct xfs_inode *pip) 68862306a36Sopenharmony_ci{ 68962306a36Sopenharmony_ci unsigned int di_flags = 0; 69062306a36Sopenharmony_ci xfs_failaddr_t failaddr; 69162306a36Sopenharmony_ci umode_t mode = VFS_I(ip)->i_mode; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci if (S_ISDIR(mode)) { 69462306a36Sopenharmony_ci if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) 69562306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_RTINHERIT; 69662306a36Sopenharmony_ci if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 69762306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_EXTSZINHERIT; 69862306a36Sopenharmony_ci ip->i_extsize = pip->i_extsize; 69962306a36Sopenharmony_ci } 70062306a36Sopenharmony_ci if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) 70162306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_PROJINHERIT; 70262306a36Sopenharmony_ci } else if (S_ISREG(mode)) { 70362306a36Sopenharmony_ci if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && 70462306a36Sopenharmony_ci xfs_has_realtime(ip->i_mount)) 70562306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_REALTIME; 70662306a36Sopenharmony_ci if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 70762306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_EXTSIZE; 70862306a36Sopenharmony_ci ip->i_extsize = pip->i_extsize; 70962306a36Sopenharmony_ci } 71062306a36Sopenharmony_ci } 71162306a36Sopenharmony_ci if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && 71262306a36Sopenharmony_ci xfs_inherit_noatime) 71362306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_NOATIME; 71462306a36Sopenharmony_ci if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && 71562306a36Sopenharmony_ci xfs_inherit_nodump) 71662306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_NODUMP; 71762306a36Sopenharmony_ci if ((pip->i_diflags & XFS_DIFLAG_SYNC) && 71862306a36Sopenharmony_ci xfs_inherit_sync) 71962306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_SYNC; 72062306a36Sopenharmony_ci if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && 72162306a36Sopenharmony_ci xfs_inherit_nosymlinks) 72262306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_NOSYMLINKS; 72362306a36Sopenharmony_ci if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && 72462306a36Sopenharmony_ci xfs_inherit_nodefrag) 72562306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_NODEFRAG; 72662306a36Sopenharmony_ci if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) 72762306a36Sopenharmony_ci di_flags |= XFS_DIFLAG_FILESTREAM; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci ip->i_diflags |= di_flags; 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci /* 73262306a36Sopenharmony_ci * Inode verifiers on older kernels only check that the extent size 73362306a36Sopenharmony_ci * hint is an integer multiple of the rt extent size on realtime files. 73462306a36Sopenharmony_ci * They did not check the hint alignment on a directory with both 73562306a36Sopenharmony_ci * rtinherit and extszinherit flags set. If the misaligned hint is 73662306a36Sopenharmony_ci * propagated from a directory into a new realtime file, new file 73762306a36Sopenharmony_ci * allocations will fail due to math errors in the rt allocator and/or 73862306a36Sopenharmony_ci * trip the verifiers. Validate the hint settings in the new file so 73962306a36Sopenharmony_ci * that we don't let broken hints propagate. 74062306a36Sopenharmony_ci */ 74162306a36Sopenharmony_ci failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, 74262306a36Sopenharmony_ci VFS_I(ip)->i_mode, ip->i_diflags); 74362306a36Sopenharmony_ci if (failaddr) { 74462306a36Sopenharmony_ci ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 74562306a36Sopenharmony_ci XFS_DIFLAG_EXTSZINHERIT); 74662306a36Sopenharmony_ci ip->i_extsize = 0; 74762306a36Sopenharmony_ci } 74862306a36Sopenharmony_ci} 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci/* Propagate di_flags2 from a parent inode to a child inode. */ 75162306a36Sopenharmony_cistatic void 75262306a36Sopenharmony_cixfs_inode_inherit_flags2( 75362306a36Sopenharmony_ci struct xfs_inode *ip, 75462306a36Sopenharmony_ci const struct xfs_inode *pip) 75562306a36Sopenharmony_ci{ 75662306a36Sopenharmony_ci xfs_failaddr_t failaddr; 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { 75962306a36Sopenharmony_ci ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; 76062306a36Sopenharmony_ci ip->i_cowextsize = pip->i_cowextsize; 76162306a36Sopenharmony_ci } 76262306a36Sopenharmony_ci if (pip->i_diflags2 & XFS_DIFLAG2_DAX) 76362306a36Sopenharmony_ci ip->i_diflags2 |= XFS_DIFLAG2_DAX; 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci /* Don't let invalid cowextsize hints propagate. */ 76662306a36Sopenharmony_ci failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, 76762306a36Sopenharmony_ci VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); 76862306a36Sopenharmony_ci if (failaddr) { 76962306a36Sopenharmony_ci ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 77062306a36Sopenharmony_ci ip->i_cowextsize = 0; 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci} 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci/* 77562306a36Sopenharmony_ci * Initialise a newly allocated inode and return the in-core inode to the 77662306a36Sopenharmony_ci * caller locked exclusively. 77762306a36Sopenharmony_ci */ 77862306a36Sopenharmony_ciint 77962306a36Sopenharmony_cixfs_init_new_inode( 78062306a36Sopenharmony_ci struct mnt_idmap *idmap, 78162306a36Sopenharmony_ci struct xfs_trans *tp, 78262306a36Sopenharmony_ci struct xfs_inode *pip, 78362306a36Sopenharmony_ci xfs_ino_t ino, 78462306a36Sopenharmony_ci umode_t mode, 78562306a36Sopenharmony_ci xfs_nlink_t nlink, 78662306a36Sopenharmony_ci dev_t rdev, 78762306a36Sopenharmony_ci prid_t prid, 78862306a36Sopenharmony_ci bool init_xattrs, 78962306a36Sopenharmony_ci struct xfs_inode **ipp) 79062306a36Sopenharmony_ci{ 79162306a36Sopenharmony_ci struct inode *dir = pip ? VFS_I(pip) : NULL; 79262306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 79362306a36Sopenharmony_ci struct xfs_inode *ip; 79462306a36Sopenharmony_ci unsigned int flags; 79562306a36Sopenharmony_ci int error; 79662306a36Sopenharmony_ci struct timespec64 tv; 79762306a36Sopenharmony_ci struct inode *inode; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci /* 80062306a36Sopenharmony_ci * Protect against obviously corrupt allocation btree records. Later 80162306a36Sopenharmony_ci * xfs_iget checks will catch re-allocation of other active in-memory 80262306a36Sopenharmony_ci * and on-disk inodes. If we don't catch reallocating the parent inode 80362306a36Sopenharmony_ci * here we will deadlock in xfs_iget() so we have to do these checks 80462306a36Sopenharmony_ci * first. 80562306a36Sopenharmony_ci */ 80662306a36Sopenharmony_ci if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) { 80762306a36Sopenharmony_ci xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); 80862306a36Sopenharmony_ci return -EFSCORRUPTED; 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci /* 81262306a36Sopenharmony_ci * Get the in-core inode with the lock held exclusively to prevent 81362306a36Sopenharmony_ci * others from looking at until we're done. 81462306a36Sopenharmony_ci */ 81562306a36Sopenharmony_ci error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 81662306a36Sopenharmony_ci if (error) 81762306a36Sopenharmony_ci return error; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci ASSERT(ip != NULL); 82062306a36Sopenharmony_ci inode = VFS_I(ip); 82162306a36Sopenharmony_ci set_nlink(inode, nlink); 82262306a36Sopenharmony_ci inode->i_rdev = rdev; 82362306a36Sopenharmony_ci ip->i_projid = prid; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { 82662306a36Sopenharmony_ci inode_fsuid_set(inode, idmap); 82762306a36Sopenharmony_ci inode->i_gid = dir->i_gid; 82862306a36Sopenharmony_ci inode->i_mode = mode; 82962306a36Sopenharmony_ci } else { 83062306a36Sopenharmony_ci inode_init_owner(idmap, inode, dir, mode); 83162306a36Sopenharmony_ci } 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci /* 83462306a36Sopenharmony_ci * If the group ID of the new file does not match the effective group 83562306a36Sopenharmony_ci * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 83662306a36Sopenharmony_ci * (and only if the irix_sgid_inherit compatibility variable is set). 83762306a36Sopenharmony_ci */ 83862306a36Sopenharmony_ci if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && 83962306a36Sopenharmony_ci !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) 84062306a36Sopenharmony_ci inode->i_mode &= ~S_ISGID; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci ip->i_disk_size = 0; 84362306a36Sopenharmony_ci ip->i_df.if_nextents = 0; 84462306a36Sopenharmony_ci ASSERT(ip->i_nblocks == 0); 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci tv = inode_set_ctime_current(inode); 84762306a36Sopenharmony_ci inode->i_mtime = tv; 84862306a36Sopenharmony_ci inode->i_atime = tv; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci ip->i_extsize = 0; 85162306a36Sopenharmony_ci ip->i_diflags = 0; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci if (xfs_has_v3inodes(mp)) { 85462306a36Sopenharmony_ci inode_set_iversion(inode, 1); 85562306a36Sopenharmony_ci ip->i_cowextsize = 0; 85662306a36Sopenharmony_ci ip->i_crtime = tv; 85762306a36Sopenharmony_ci } 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci flags = XFS_ILOG_CORE; 86062306a36Sopenharmony_ci switch (mode & S_IFMT) { 86162306a36Sopenharmony_ci case S_IFIFO: 86262306a36Sopenharmony_ci case S_IFCHR: 86362306a36Sopenharmony_ci case S_IFBLK: 86462306a36Sopenharmony_ci case S_IFSOCK: 86562306a36Sopenharmony_ci ip->i_df.if_format = XFS_DINODE_FMT_DEV; 86662306a36Sopenharmony_ci flags |= XFS_ILOG_DEV; 86762306a36Sopenharmony_ci break; 86862306a36Sopenharmony_ci case S_IFREG: 86962306a36Sopenharmony_ci case S_IFDIR: 87062306a36Sopenharmony_ci if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) 87162306a36Sopenharmony_ci xfs_inode_inherit_flags(ip, pip); 87262306a36Sopenharmony_ci if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) 87362306a36Sopenharmony_ci xfs_inode_inherit_flags2(ip, pip); 87462306a36Sopenharmony_ci fallthrough; 87562306a36Sopenharmony_ci case S_IFLNK: 87662306a36Sopenharmony_ci ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 87762306a36Sopenharmony_ci ip->i_df.if_bytes = 0; 87862306a36Sopenharmony_ci ip->i_df.if_u1.if_root = NULL; 87962306a36Sopenharmony_ci break; 88062306a36Sopenharmony_ci default: 88162306a36Sopenharmony_ci ASSERT(0); 88262306a36Sopenharmony_ci } 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci /* 88562306a36Sopenharmony_ci * If we need to create attributes immediately after allocating the 88662306a36Sopenharmony_ci * inode, initialise an empty attribute fork right now. We use the 88762306a36Sopenharmony_ci * default fork offset for attributes here as we don't know exactly what 88862306a36Sopenharmony_ci * size or how many attributes we might be adding. We can do this 88962306a36Sopenharmony_ci * safely here because we know the data fork is completely empty and 89062306a36Sopenharmony_ci * this saves us from needing to run a separate transaction to set the 89162306a36Sopenharmony_ci * fork offset in the immediate future. 89262306a36Sopenharmony_ci */ 89362306a36Sopenharmony_ci if (init_xattrs && xfs_has_attr(mp)) { 89462306a36Sopenharmony_ci ip->i_forkoff = xfs_default_attroffset(ip) >> 3; 89562306a36Sopenharmony_ci xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); 89662306a36Sopenharmony_ci } 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci /* 89962306a36Sopenharmony_ci * Log the new values stuffed into the inode. 90062306a36Sopenharmony_ci */ 90162306a36Sopenharmony_ci xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 90262306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, flags); 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci /* now that we have an i_mode we can setup the inode structure */ 90562306a36Sopenharmony_ci xfs_setup_inode(ip); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci *ipp = ip; 90862306a36Sopenharmony_ci return 0; 90962306a36Sopenharmony_ci} 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci/* 91262306a36Sopenharmony_ci * Decrement the link count on an inode & log the change. If this causes the 91362306a36Sopenharmony_ci * link count to go to zero, move the inode to AGI unlinked list so that it can 91462306a36Sopenharmony_ci * be freed when the last active reference goes away via xfs_inactive(). 91562306a36Sopenharmony_ci */ 91662306a36Sopenharmony_cistatic int /* error */ 91762306a36Sopenharmony_cixfs_droplink( 91862306a36Sopenharmony_ci xfs_trans_t *tp, 91962306a36Sopenharmony_ci xfs_inode_t *ip) 92062306a36Sopenharmony_ci{ 92162306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink == 0) { 92262306a36Sopenharmony_ci xfs_alert(ip->i_mount, 92362306a36Sopenharmony_ci "%s: Attempt to drop inode (%llu) with nlink zero.", 92462306a36Sopenharmony_ci __func__, ip->i_ino); 92562306a36Sopenharmony_ci return -EFSCORRUPTED; 92662306a36Sopenharmony_ci } 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci drop_nlink(VFS_I(ip)); 93162306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink) 93462306a36Sopenharmony_ci return 0; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci return xfs_iunlink(tp, ip); 93762306a36Sopenharmony_ci} 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci/* 94062306a36Sopenharmony_ci * Increment the link count on an inode & log the change. 94162306a36Sopenharmony_ci */ 94262306a36Sopenharmony_cistatic void 94362306a36Sopenharmony_cixfs_bumplink( 94462306a36Sopenharmony_ci xfs_trans_t *tp, 94562306a36Sopenharmony_ci xfs_inode_t *ip) 94662306a36Sopenharmony_ci{ 94762306a36Sopenharmony_ci xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci inc_nlink(VFS_I(ip)); 95062306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 95162306a36Sopenharmony_ci} 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ciint 95462306a36Sopenharmony_cixfs_create( 95562306a36Sopenharmony_ci struct mnt_idmap *idmap, 95662306a36Sopenharmony_ci xfs_inode_t *dp, 95762306a36Sopenharmony_ci struct xfs_name *name, 95862306a36Sopenharmony_ci umode_t mode, 95962306a36Sopenharmony_ci dev_t rdev, 96062306a36Sopenharmony_ci bool init_xattrs, 96162306a36Sopenharmony_ci xfs_inode_t **ipp) 96262306a36Sopenharmony_ci{ 96362306a36Sopenharmony_ci int is_dir = S_ISDIR(mode); 96462306a36Sopenharmony_ci struct xfs_mount *mp = dp->i_mount; 96562306a36Sopenharmony_ci struct xfs_inode *ip = NULL; 96662306a36Sopenharmony_ci struct xfs_trans *tp = NULL; 96762306a36Sopenharmony_ci int error; 96862306a36Sopenharmony_ci bool unlock_dp_on_error = false; 96962306a36Sopenharmony_ci prid_t prid; 97062306a36Sopenharmony_ci struct xfs_dquot *udqp = NULL; 97162306a36Sopenharmony_ci struct xfs_dquot *gdqp = NULL; 97262306a36Sopenharmony_ci struct xfs_dquot *pdqp = NULL; 97362306a36Sopenharmony_ci struct xfs_trans_res *tres; 97462306a36Sopenharmony_ci uint resblks; 97562306a36Sopenharmony_ci xfs_ino_t ino; 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci trace_xfs_create(dp, name); 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) 98062306a36Sopenharmony_ci return -EIO; 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci prid = xfs_get_initial_prid(dp); 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci /* 98562306a36Sopenharmony_ci * Make sure that we have allocated dquot(s) on disk. 98662306a36Sopenharmony_ci */ 98762306a36Sopenharmony_ci error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), 98862306a36Sopenharmony_ci mapped_fsgid(idmap, &init_user_ns), prid, 98962306a36Sopenharmony_ci XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 99062306a36Sopenharmony_ci &udqp, &gdqp, &pdqp); 99162306a36Sopenharmony_ci if (error) 99262306a36Sopenharmony_ci return error; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci if (is_dir) { 99562306a36Sopenharmony_ci resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 99662306a36Sopenharmony_ci tres = &M_RES(mp)->tr_mkdir; 99762306a36Sopenharmony_ci } else { 99862306a36Sopenharmony_ci resblks = XFS_CREATE_SPACE_RES(mp, name->len); 99962306a36Sopenharmony_ci tres = &M_RES(mp)->tr_create; 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci /* 100362306a36Sopenharmony_ci * Initially assume that the file does not exist and 100462306a36Sopenharmony_ci * reserve the resources for that case. If that is not 100562306a36Sopenharmony_ci * the case we'll drop the one we have and get a more 100662306a36Sopenharmony_ci * appropriate transaction later. 100762306a36Sopenharmony_ci */ 100862306a36Sopenharmony_ci error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, 100962306a36Sopenharmony_ci &tp); 101062306a36Sopenharmony_ci if (error == -ENOSPC) { 101162306a36Sopenharmony_ci /* flush outstanding delalloc blocks and retry */ 101262306a36Sopenharmony_ci xfs_flush_inodes(mp); 101362306a36Sopenharmony_ci error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, 101462306a36Sopenharmony_ci resblks, &tp); 101562306a36Sopenharmony_ci } 101662306a36Sopenharmony_ci if (error) 101762306a36Sopenharmony_ci goto out_release_dquots; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 102062306a36Sopenharmony_ci unlock_dp_on_error = true; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci /* 102362306a36Sopenharmony_ci * A newly created regular or special file just has one directory 102462306a36Sopenharmony_ci * entry pointing to them, but a directory also the "." entry 102562306a36Sopenharmony_ci * pointing to itself. 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_ci error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); 102862306a36Sopenharmony_ci if (!error) 102962306a36Sopenharmony_ci error = xfs_init_new_inode(idmap, tp, dp, ino, mode, 103062306a36Sopenharmony_ci is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); 103162306a36Sopenharmony_ci if (error) 103262306a36Sopenharmony_ci goto out_trans_cancel; 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci /* 103562306a36Sopenharmony_ci * Now we join the directory inode to the transaction. We do not do it 103662306a36Sopenharmony_ci * earlier because xfs_dialloc might commit the previous transaction 103762306a36Sopenharmony_ci * (and release all the locks). An error from here on will result in 103862306a36Sopenharmony_ci * the transaction cancel unlocking dp so don't do it explicitly in the 103962306a36Sopenharmony_ci * error path. 104062306a36Sopenharmony_ci */ 104162306a36Sopenharmony_ci xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 104262306a36Sopenharmony_ci unlock_dp_on_error = false; 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci error = xfs_dir_createname(tp, dp, name, ip->i_ino, 104562306a36Sopenharmony_ci resblks - XFS_IALLOC_SPACE_RES(mp)); 104662306a36Sopenharmony_ci if (error) { 104762306a36Sopenharmony_ci ASSERT(error != -ENOSPC); 104862306a36Sopenharmony_ci goto out_trans_cancel; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 105162306a36Sopenharmony_ci xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci if (is_dir) { 105462306a36Sopenharmony_ci error = xfs_dir_init(tp, ip, dp); 105562306a36Sopenharmony_ci if (error) 105662306a36Sopenharmony_ci goto out_trans_cancel; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci xfs_bumplink(tp, dp); 105962306a36Sopenharmony_ci } 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci /* 106262306a36Sopenharmony_ci * If this is a synchronous mount, make sure that the 106362306a36Sopenharmony_ci * create transaction goes to disk before returning to 106462306a36Sopenharmony_ci * the user. 106562306a36Sopenharmony_ci */ 106662306a36Sopenharmony_ci if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) 106762306a36Sopenharmony_ci xfs_trans_set_sync(tp); 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci /* 107062306a36Sopenharmony_ci * Attach the dquot(s) to the inodes and modify them incore. 107162306a36Sopenharmony_ci * These ids of the inode couldn't have changed since the new 107262306a36Sopenharmony_ci * inode has been locked ever since it was created. 107362306a36Sopenharmony_ci */ 107462306a36Sopenharmony_ci xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci error = xfs_trans_commit(tp); 107762306a36Sopenharmony_ci if (error) 107862306a36Sopenharmony_ci goto out_release_inode; 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci xfs_qm_dqrele(udqp); 108162306a36Sopenharmony_ci xfs_qm_dqrele(gdqp); 108262306a36Sopenharmony_ci xfs_qm_dqrele(pdqp); 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci *ipp = ip; 108562306a36Sopenharmony_ci return 0; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci out_trans_cancel: 108862306a36Sopenharmony_ci xfs_trans_cancel(tp); 108962306a36Sopenharmony_ci out_release_inode: 109062306a36Sopenharmony_ci /* 109162306a36Sopenharmony_ci * Wait until after the current transaction is aborted to finish the 109262306a36Sopenharmony_ci * setup of the inode and release the inode. This prevents recursive 109362306a36Sopenharmony_ci * transactions and deadlocks from xfs_inactive. 109462306a36Sopenharmony_ci */ 109562306a36Sopenharmony_ci if (ip) { 109662306a36Sopenharmony_ci xfs_finish_inode_setup(ip); 109762306a36Sopenharmony_ci xfs_irele(ip); 109862306a36Sopenharmony_ci } 109962306a36Sopenharmony_ci out_release_dquots: 110062306a36Sopenharmony_ci xfs_qm_dqrele(udqp); 110162306a36Sopenharmony_ci xfs_qm_dqrele(gdqp); 110262306a36Sopenharmony_ci xfs_qm_dqrele(pdqp); 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci if (unlock_dp_on_error) 110562306a36Sopenharmony_ci xfs_iunlock(dp, XFS_ILOCK_EXCL); 110662306a36Sopenharmony_ci return error; 110762306a36Sopenharmony_ci} 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ciint 111062306a36Sopenharmony_cixfs_create_tmpfile( 111162306a36Sopenharmony_ci struct mnt_idmap *idmap, 111262306a36Sopenharmony_ci struct xfs_inode *dp, 111362306a36Sopenharmony_ci umode_t mode, 111462306a36Sopenharmony_ci struct xfs_inode **ipp) 111562306a36Sopenharmony_ci{ 111662306a36Sopenharmony_ci struct xfs_mount *mp = dp->i_mount; 111762306a36Sopenharmony_ci struct xfs_inode *ip = NULL; 111862306a36Sopenharmony_ci struct xfs_trans *tp = NULL; 111962306a36Sopenharmony_ci int error; 112062306a36Sopenharmony_ci prid_t prid; 112162306a36Sopenharmony_ci struct xfs_dquot *udqp = NULL; 112262306a36Sopenharmony_ci struct xfs_dquot *gdqp = NULL; 112362306a36Sopenharmony_ci struct xfs_dquot *pdqp = NULL; 112462306a36Sopenharmony_ci struct xfs_trans_res *tres; 112562306a36Sopenharmony_ci uint resblks; 112662306a36Sopenharmony_ci xfs_ino_t ino; 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) 112962306a36Sopenharmony_ci return -EIO; 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci prid = xfs_get_initial_prid(dp); 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci /* 113462306a36Sopenharmony_ci * Make sure that we have allocated dquot(s) on disk. 113562306a36Sopenharmony_ci */ 113662306a36Sopenharmony_ci error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), 113762306a36Sopenharmony_ci mapped_fsgid(idmap, &init_user_ns), prid, 113862306a36Sopenharmony_ci XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 113962306a36Sopenharmony_ci &udqp, &gdqp, &pdqp); 114062306a36Sopenharmony_ci if (error) 114162306a36Sopenharmony_ci return error; 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci resblks = XFS_IALLOC_SPACE_RES(mp); 114462306a36Sopenharmony_ci tres = &M_RES(mp)->tr_create_tmpfile; 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, 114762306a36Sopenharmony_ci &tp); 114862306a36Sopenharmony_ci if (error) 114962306a36Sopenharmony_ci goto out_release_dquots; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); 115262306a36Sopenharmony_ci if (!error) 115362306a36Sopenharmony_ci error = xfs_init_new_inode(idmap, tp, dp, ino, mode, 115462306a36Sopenharmony_ci 0, 0, prid, false, &ip); 115562306a36Sopenharmony_ci if (error) 115662306a36Sopenharmony_ci goto out_trans_cancel; 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci if (xfs_has_wsync(mp)) 115962306a36Sopenharmony_ci xfs_trans_set_sync(tp); 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci /* 116262306a36Sopenharmony_ci * Attach the dquot(s) to the inodes and modify them incore. 116362306a36Sopenharmony_ci * These ids of the inode couldn't have changed since the new 116462306a36Sopenharmony_ci * inode has been locked ever since it was created. 116562306a36Sopenharmony_ci */ 116662306a36Sopenharmony_ci xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci error = xfs_iunlink(tp, ip); 116962306a36Sopenharmony_ci if (error) 117062306a36Sopenharmony_ci goto out_trans_cancel; 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci error = xfs_trans_commit(tp); 117362306a36Sopenharmony_ci if (error) 117462306a36Sopenharmony_ci goto out_release_inode; 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci xfs_qm_dqrele(udqp); 117762306a36Sopenharmony_ci xfs_qm_dqrele(gdqp); 117862306a36Sopenharmony_ci xfs_qm_dqrele(pdqp); 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci *ipp = ip; 118162306a36Sopenharmony_ci return 0; 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci out_trans_cancel: 118462306a36Sopenharmony_ci xfs_trans_cancel(tp); 118562306a36Sopenharmony_ci out_release_inode: 118662306a36Sopenharmony_ci /* 118762306a36Sopenharmony_ci * Wait until after the current transaction is aborted to finish the 118862306a36Sopenharmony_ci * setup of the inode and release the inode. This prevents recursive 118962306a36Sopenharmony_ci * transactions and deadlocks from xfs_inactive. 119062306a36Sopenharmony_ci */ 119162306a36Sopenharmony_ci if (ip) { 119262306a36Sopenharmony_ci xfs_finish_inode_setup(ip); 119362306a36Sopenharmony_ci xfs_irele(ip); 119462306a36Sopenharmony_ci } 119562306a36Sopenharmony_ci out_release_dquots: 119662306a36Sopenharmony_ci xfs_qm_dqrele(udqp); 119762306a36Sopenharmony_ci xfs_qm_dqrele(gdqp); 119862306a36Sopenharmony_ci xfs_qm_dqrele(pdqp); 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci return error; 120162306a36Sopenharmony_ci} 120262306a36Sopenharmony_ci 120362306a36Sopenharmony_ciint 120462306a36Sopenharmony_cixfs_link( 120562306a36Sopenharmony_ci xfs_inode_t *tdp, 120662306a36Sopenharmony_ci xfs_inode_t *sip, 120762306a36Sopenharmony_ci struct xfs_name *target_name) 120862306a36Sopenharmony_ci{ 120962306a36Sopenharmony_ci xfs_mount_t *mp = tdp->i_mount; 121062306a36Sopenharmony_ci xfs_trans_t *tp; 121162306a36Sopenharmony_ci int error, nospace_error = 0; 121262306a36Sopenharmony_ci int resblks; 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci trace_xfs_link(tdp, target_name); 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci ASSERT(!S_ISDIR(VFS_I(sip)->i_mode)); 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) 121962306a36Sopenharmony_ci return -EIO; 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci error = xfs_qm_dqattach(sip); 122262306a36Sopenharmony_ci if (error) 122362306a36Sopenharmony_ci goto std_return; 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci error = xfs_qm_dqattach(tdp); 122662306a36Sopenharmony_ci if (error) 122762306a36Sopenharmony_ci goto std_return; 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 123062306a36Sopenharmony_ci error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks, 123162306a36Sopenharmony_ci &tp, &nospace_error); 123262306a36Sopenharmony_ci if (error) 123362306a36Sopenharmony_ci goto std_return; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci /* 123662306a36Sopenharmony_ci * If we are using project inheritance, we only allow hard link 123762306a36Sopenharmony_ci * creation in our tree when the project IDs are the same; else 123862306a36Sopenharmony_ci * the tree quota mechanism could be circumvented. 123962306a36Sopenharmony_ci */ 124062306a36Sopenharmony_ci if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) && 124162306a36Sopenharmony_ci tdp->i_projid != sip->i_projid)) { 124262306a36Sopenharmony_ci error = -EXDEV; 124362306a36Sopenharmony_ci goto error_return; 124462306a36Sopenharmony_ci } 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci if (!resblks) { 124762306a36Sopenharmony_ci error = xfs_dir_canenter(tp, tdp, target_name); 124862306a36Sopenharmony_ci if (error) 124962306a36Sopenharmony_ci goto error_return; 125062306a36Sopenharmony_ci } 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci /* 125362306a36Sopenharmony_ci * Handle initial link state of O_TMPFILE inode 125462306a36Sopenharmony_ci */ 125562306a36Sopenharmony_ci if (VFS_I(sip)->i_nlink == 0) { 125662306a36Sopenharmony_ci struct xfs_perag *pag; 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino)); 125962306a36Sopenharmony_ci error = xfs_iunlink_remove(tp, pag, sip); 126062306a36Sopenharmony_ci xfs_perag_put(pag); 126162306a36Sopenharmony_ci if (error) 126262306a36Sopenharmony_ci goto error_return; 126362306a36Sopenharmony_ci } 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 126662306a36Sopenharmony_ci resblks); 126762306a36Sopenharmony_ci if (error) 126862306a36Sopenharmony_ci goto error_return; 126962306a36Sopenharmony_ci xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 127062306a36Sopenharmony_ci xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci xfs_bumplink(tp, sip); 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci /* 127562306a36Sopenharmony_ci * If this is a synchronous mount, make sure that the 127662306a36Sopenharmony_ci * link transaction goes to disk before returning to 127762306a36Sopenharmony_ci * the user. 127862306a36Sopenharmony_ci */ 127962306a36Sopenharmony_ci if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) 128062306a36Sopenharmony_ci xfs_trans_set_sync(tp); 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci return xfs_trans_commit(tp); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci error_return: 128562306a36Sopenharmony_ci xfs_trans_cancel(tp); 128662306a36Sopenharmony_ci std_return: 128762306a36Sopenharmony_ci if (error == -ENOSPC && nospace_error) 128862306a36Sopenharmony_ci error = nospace_error; 128962306a36Sopenharmony_ci return error; 129062306a36Sopenharmony_ci} 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci/* Clear the reflink flag and the cowblocks tag if possible. */ 129362306a36Sopenharmony_cistatic void 129462306a36Sopenharmony_cixfs_itruncate_clear_reflink_flags( 129562306a36Sopenharmony_ci struct xfs_inode *ip) 129662306a36Sopenharmony_ci{ 129762306a36Sopenharmony_ci struct xfs_ifork *dfork; 129862306a36Sopenharmony_ci struct xfs_ifork *cfork; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci if (!xfs_is_reflink_inode(ip)) 130162306a36Sopenharmony_ci return; 130262306a36Sopenharmony_ci dfork = xfs_ifork_ptr(ip, XFS_DATA_FORK); 130362306a36Sopenharmony_ci cfork = xfs_ifork_ptr(ip, XFS_COW_FORK); 130462306a36Sopenharmony_ci if (dfork->if_bytes == 0 && cfork->if_bytes == 0) 130562306a36Sopenharmony_ci ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 130662306a36Sopenharmony_ci if (cfork->if_bytes == 0) 130762306a36Sopenharmony_ci xfs_inode_clear_cowblocks_tag(ip); 130862306a36Sopenharmony_ci} 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ci/* 131162306a36Sopenharmony_ci * Free up the underlying blocks past new_size. The new size must be smaller 131262306a36Sopenharmony_ci * than the current size. This routine can be used both for the attribute and 131362306a36Sopenharmony_ci * data fork, and does not modify the inode size, which is left to the caller. 131462306a36Sopenharmony_ci * 131562306a36Sopenharmony_ci * The transaction passed to this routine must have made a permanent log 131662306a36Sopenharmony_ci * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 131762306a36Sopenharmony_ci * given transaction and start new ones, so make sure everything involved in 131862306a36Sopenharmony_ci * the transaction is tidy before calling here. Some transaction will be 131962306a36Sopenharmony_ci * returned to the caller to be committed. The incoming transaction must 132062306a36Sopenharmony_ci * already include the inode, and both inode locks must be held exclusively. 132162306a36Sopenharmony_ci * The inode must also be "held" within the transaction. On return the inode 132262306a36Sopenharmony_ci * will be "held" within the returned transaction. This routine does NOT 132362306a36Sopenharmony_ci * require any disk space to be reserved for it within the transaction. 132462306a36Sopenharmony_ci * 132562306a36Sopenharmony_ci * If we get an error, we must return with the inode locked and linked into the 132662306a36Sopenharmony_ci * current transaction. This keeps things simple for the higher level code, 132762306a36Sopenharmony_ci * because it always knows that the inode is locked and held in the transaction 132862306a36Sopenharmony_ci * that returns to it whether errors occur or not. We don't mark the inode 132962306a36Sopenharmony_ci * dirty on error so that transactions can be easily aborted if possible. 133062306a36Sopenharmony_ci */ 133162306a36Sopenharmony_ciint 133262306a36Sopenharmony_cixfs_itruncate_extents_flags( 133362306a36Sopenharmony_ci struct xfs_trans **tpp, 133462306a36Sopenharmony_ci struct xfs_inode *ip, 133562306a36Sopenharmony_ci int whichfork, 133662306a36Sopenharmony_ci xfs_fsize_t new_size, 133762306a36Sopenharmony_ci int flags) 133862306a36Sopenharmony_ci{ 133962306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 134062306a36Sopenharmony_ci struct xfs_trans *tp = *tpp; 134162306a36Sopenharmony_ci xfs_fileoff_t first_unmap_block; 134262306a36Sopenharmony_ci xfs_filblks_t unmap_len; 134362306a36Sopenharmony_ci int error = 0; 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 134662306a36Sopenharmony_ci ASSERT(!atomic_read(&VFS_I(ip)->i_count) || 134762306a36Sopenharmony_ci xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 134862306a36Sopenharmony_ci ASSERT(new_size <= XFS_ISIZE(ip)); 134962306a36Sopenharmony_ci ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 135062306a36Sopenharmony_ci ASSERT(ip->i_itemp != NULL); 135162306a36Sopenharmony_ci ASSERT(ip->i_itemp->ili_lock_flags == 0); 135262306a36Sopenharmony_ci ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci trace_xfs_itruncate_extents_start(ip, new_size); 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci flags |= xfs_bmapi_aflag(whichfork); 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci /* 135962306a36Sopenharmony_ci * Since it is possible for space to become allocated beyond 136062306a36Sopenharmony_ci * the end of the file (in a crash where the space is allocated 136162306a36Sopenharmony_ci * but the inode size is not yet updated), simply remove any 136262306a36Sopenharmony_ci * blocks which show up between the new EOF and the maximum 136362306a36Sopenharmony_ci * possible file size. 136462306a36Sopenharmony_ci * 136562306a36Sopenharmony_ci * We have to free all the blocks to the bmbt maximum offset, even if 136662306a36Sopenharmony_ci * the page cache can't scale that far. 136762306a36Sopenharmony_ci */ 136862306a36Sopenharmony_ci first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 136962306a36Sopenharmony_ci if (!xfs_verify_fileoff(mp, first_unmap_block)) { 137062306a36Sopenharmony_ci WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); 137162306a36Sopenharmony_ci return 0; 137262306a36Sopenharmony_ci } 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; 137562306a36Sopenharmony_ci while (unmap_len > 0) { 137662306a36Sopenharmony_ci ASSERT(tp->t_highest_agno == NULLAGNUMBER); 137762306a36Sopenharmony_ci error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, 137862306a36Sopenharmony_ci flags, XFS_ITRUNC_MAX_EXTENTS); 137962306a36Sopenharmony_ci if (error) 138062306a36Sopenharmony_ci goto out; 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ci /* free the just unmapped extents */ 138362306a36Sopenharmony_ci error = xfs_defer_finish(&tp); 138462306a36Sopenharmony_ci if (error) 138562306a36Sopenharmony_ci goto out; 138662306a36Sopenharmony_ci } 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci if (whichfork == XFS_DATA_FORK) { 138962306a36Sopenharmony_ci /* Remove all pending CoW reservations. */ 139062306a36Sopenharmony_ci error = xfs_reflink_cancel_cow_blocks(ip, &tp, 139162306a36Sopenharmony_ci first_unmap_block, XFS_MAX_FILEOFF, true); 139262306a36Sopenharmony_ci if (error) 139362306a36Sopenharmony_ci goto out; 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci xfs_itruncate_clear_reflink_flags(ip); 139662306a36Sopenharmony_ci } 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci /* 139962306a36Sopenharmony_ci * Always re-log the inode so that our permanent transaction can keep 140062306a36Sopenharmony_ci * on rolling it forward in the log. 140162306a36Sopenharmony_ci */ 140262306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci trace_xfs_itruncate_extents_end(ip, new_size); 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ciout: 140762306a36Sopenharmony_ci *tpp = tp; 140862306a36Sopenharmony_ci return error; 140962306a36Sopenharmony_ci} 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ciint 141262306a36Sopenharmony_cixfs_release( 141362306a36Sopenharmony_ci xfs_inode_t *ip) 141462306a36Sopenharmony_ci{ 141562306a36Sopenharmony_ci xfs_mount_t *mp = ip->i_mount; 141662306a36Sopenharmony_ci int error = 0; 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0)) 141962306a36Sopenharmony_ci return 0; 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci /* If this is a read-only mount, don't do this (would generate I/O) */ 142262306a36Sopenharmony_ci if (xfs_is_readonly(mp)) 142362306a36Sopenharmony_ci return 0; 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci if (!xfs_is_shutdown(mp)) { 142662306a36Sopenharmony_ci int truncated; 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci /* 142962306a36Sopenharmony_ci * If we previously truncated this file and removed old data 143062306a36Sopenharmony_ci * in the process, we want to initiate "early" writeout on 143162306a36Sopenharmony_ci * the last close. This is an attempt to combat the notorious 143262306a36Sopenharmony_ci * NULL files problem which is particularly noticeable from a 143362306a36Sopenharmony_ci * truncate down, buffered (re-)write (delalloc), followed by 143462306a36Sopenharmony_ci * a crash. What we are effectively doing here is 143562306a36Sopenharmony_ci * significantly reducing the time window where we'd otherwise 143662306a36Sopenharmony_ci * be exposed to that problem. 143762306a36Sopenharmony_ci */ 143862306a36Sopenharmony_ci truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 143962306a36Sopenharmony_ci if (truncated) { 144062306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 144162306a36Sopenharmony_ci if (ip->i_delayed_blks > 0) { 144262306a36Sopenharmony_ci error = filemap_flush(VFS_I(ip)->i_mapping); 144362306a36Sopenharmony_ci if (error) 144462306a36Sopenharmony_ci return error; 144562306a36Sopenharmony_ci } 144662306a36Sopenharmony_ci } 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink == 0) 145062306a36Sopenharmony_ci return 0; 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci /* 145362306a36Sopenharmony_ci * If we can't get the iolock just skip truncating the blocks past EOF 145462306a36Sopenharmony_ci * because we could deadlock with the mmap_lock otherwise. We'll get 145562306a36Sopenharmony_ci * another chance to drop them once the last reference to the inode is 145662306a36Sopenharmony_ci * dropped, so we'll never leak blocks permanently. 145762306a36Sopenharmony_ci */ 145862306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) 145962306a36Sopenharmony_ci return 0; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci if (xfs_can_free_eofblocks(ip, false)) { 146262306a36Sopenharmony_ci /* 146362306a36Sopenharmony_ci * Check if the inode is being opened, written and closed 146462306a36Sopenharmony_ci * frequently and we have delayed allocation blocks outstanding 146562306a36Sopenharmony_ci * (e.g. streaming writes from the NFS server), truncating the 146662306a36Sopenharmony_ci * blocks past EOF will cause fragmentation to occur. 146762306a36Sopenharmony_ci * 146862306a36Sopenharmony_ci * In this case don't do the truncation, but we have to be 146962306a36Sopenharmony_ci * careful how we detect this case. Blocks beyond EOF show up as 147062306a36Sopenharmony_ci * i_delayed_blks even when the inode is clean, so we need to 147162306a36Sopenharmony_ci * truncate them away first before checking for a dirty release. 147262306a36Sopenharmony_ci * Hence on the first dirty close we will still remove the 147362306a36Sopenharmony_ci * speculative allocation, but after that we will leave it in 147462306a36Sopenharmony_ci * place. 147562306a36Sopenharmony_ci */ 147662306a36Sopenharmony_ci if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) 147762306a36Sopenharmony_ci goto out_unlock; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci error = xfs_free_eofblocks(ip); 148062306a36Sopenharmony_ci if (error) 148162306a36Sopenharmony_ci goto out_unlock; 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci /* delalloc blocks after truncation means it really is dirty */ 148462306a36Sopenharmony_ci if (ip->i_delayed_blks) 148562306a36Sopenharmony_ci xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); 148662306a36Sopenharmony_ci } 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ciout_unlock: 148962306a36Sopenharmony_ci xfs_iunlock(ip, XFS_IOLOCK_EXCL); 149062306a36Sopenharmony_ci return error; 149162306a36Sopenharmony_ci} 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci/* 149462306a36Sopenharmony_ci * xfs_inactive_truncate 149562306a36Sopenharmony_ci * 149662306a36Sopenharmony_ci * Called to perform a truncate when an inode becomes unlinked. 149762306a36Sopenharmony_ci */ 149862306a36Sopenharmony_ciSTATIC int 149962306a36Sopenharmony_cixfs_inactive_truncate( 150062306a36Sopenharmony_ci struct xfs_inode *ip) 150162306a36Sopenharmony_ci{ 150262306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 150362306a36Sopenharmony_ci struct xfs_trans *tp; 150462306a36Sopenharmony_ci int error; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 150762306a36Sopenharmony_ci if (error) { 150862306a36Sopenharmony_ci ASSERT(xfs_is_shutdown(mp)); 150962306a36Sopenharmony_ci return error; 151062306a36Sopenharmony_ci } 151162306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_EXCL); 151262306a36Sopenharmony_ci xfs_trans_ijoin(tp, ip, 0); 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci /* 151562306a36Sopenharmony_ci * Log the inode size first to prevent stale data exposure in the event 151662306a36Sopenharmony_ci * of a system crash before the truncate completes. See the related 151762306a36Sopenharmony_ci * comment in xfs_vn_setattr_size() for details. 151862306a36Sopenharmony_ci */ 151962306a36Sopenharmony_ci ip->i_disk_size = 0; 152062306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); 152362306a36Sopenharmony_ci if (error) 152462306a36Sopenharmony_ci goto error_trans_cancel; 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci ASSERT(ip->i_df.if_nextents == 0); 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci error = xfs_trans_commit(tp); 152962306a36Sopenharmony_ci if (error) 153062306a36Sopenharmony_ci goto error_unlock; 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 153362306a36Sopenharmony_ci return 0; 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_cierror_trans_cancel: 153662306a36Sopenharmony_ci xfs_trans_cancel(tp); 153762306a36Sopenharmony_cierror_unlock: 153862306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 153962306a36Sopenharmony_ci return error; 154062306a36Sopenharmony_ci} 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci/* 154362306a36Sopenharmony_ci * xfs_inactive_ifree() 154462306a36Sopenharmony_ci * 154562306a36Sopenharmony_ci * Perform the inode free when an inode is unlinked. 154662306a36Sopenharmony_ci */ 154762306a36Sopenharmony_ciSTATIC int 154862306a36Sopenharmony_cixfs_inactive_ifree( 154962306a36Sopenharmony_ci struct xfs_inode *ip) 155062306a36Sopenharmony_ci{ 155162306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 155262306a36Sopenharmony_ci struct xfs_trans *tp; 155362306a36Sopenharmony_ci int error; 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci /* 155662306a36Sopenharmony_ci * We try to use a per-AG reservation for any block needed by the finobt 155762306a36Sopenharmony_ci * tree, but as the finobt feature predates the per-AG reservation 155862306a36Sopenharmony_ci * support a degraded file system might not have enough space for the 155962306a36Sopenharmony_ci * reservation at mount time. In that case try to dip into the reserved 156062306a36Sopenharmony_ci * pool and pray. 156162306a36Sopenharmony_ci * 156262306a36Sopenharmony_ci * Send a warning if the reservation does happen to fail, as the inode 156362306a36Sopenharmony_ci * now remains allocated and sits on the unlinked list until the fs is 156462306a36Sopenharmony_ci * repaired. 156562306a36Sopenharmony_ci */ 156662306a36Sopenharmony_ci if (unlikely(mp->m_finobt_nores)) { 156762306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 156862306a36Sopenharmony_ci XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, 156962306a36Sopenharmony_ci &tp); 157062306a36Sopenharmony_ci } else { 157162306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); 157262306a36Sopenharmony_ci } 157362306a36Sopenharmony_ci if (error) { 157462306a36Sopenharmony_ci if (error == -ENOSPC) { 157562306a36Sopenharmony_ci xfs_warn_ratelimited(mp, 157662306a36Sopenharmony_ci "Failed to remove inode(s) from unlinked list. " 157762306a36Sopenharmony_ci "Please free space, unmount and run xfs_repair."); 157862306a36Sopenharmony_ci } else { 157962306a36Sopenharmony_ci ASSERT(xfs_is_shutdown(mp)); 158062306a36Sopenharmony_ci } 158162306a36Sopenharmony_ci return error; 158262306a36Sopenharmony_ci } 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_ci /* 158562306a36Sopenharmony_ci * We do not hold the inode locked across the entire rolling transaction 158662306a36Sopenharmony_ci * here. We only need to hold it for the first transaction that 158762306a36Sopenharmony_ci * xfs_ifree() builds, which may mark the inode XFS_ISTALE if the 158862306a36Sopenharmony_ci * underlying cluster buffer is freed. Relogging an XFS_ISTALE inode 158962306a36Sopenharmony_ci * here breaks the relationship between cluster buffer invalidation and 159062306a36Sopenharmony_ci * stale inode invalidation on cluster buffer item journal commit 159162306a36Sopenharmony_ci * completion, and can result in leaving dirty stale inodes hanging 159262306a36Sopenharmony_ci * around in memory. 159362306a36Sopenharmony_ci * 159462306a36Sopenharmony_ci * We have no need for serialising this inode operation against other 159562306a36Sopenharmony_ci * operations - we freed the inode and hence reallocation is required 159662306a36Sopenharmony_ci * and that will serialise on reallocating the space the deferops need 159762306a36Sopenharmony_ci * to free. Hence we can unlock the inode on the first commit of 159862306a36Sopenharmony_ci * the transaction rather than roll it right through the deferops. This 159962306a36Sopenharmony_ci * avoids relogging the XFS_ISTALE inode. 160062306a36Sopenharmony_ci * 160162306a36Sopenharmony_ci * We check that xfs_ifree() hasn't grown an internal transaction roll 160262306a36Sopenharmony_ci * by asserting that the inode is still locked when it returns. 160362306a36Sopenharmony_ci */ 160462306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_EXCL); 160562306a36Sopenharmony_ci xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci error = xfs_ifree(tp, ip); 160862306a36Sopenharmony_ci ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 160962306a36Sopenharmony_ci if (error) { 161062306a36Sopenharmony_ci /* 161162306a36Sopenharmony_ci * If we fail to free the inode, shut down. The cancel 161262306a36Sopenharmony_ci * might do that, we need to make sure. Otherwise the 161362306a36Sopenharmony_ci * inode might be lost for a long time or forever. 161462306a36Sopenharmony_ci */ 161562306a36Sopenharmony_ci if (!xfs_is_shutdown(mp)) { 161662306a36Sopenharmony_ci xfs_notice(mp, "%s: xfs_ifree returned error %d", 161762306a36Sopenharmony_ci __func__, error); 161862306a36Sopenharmony_ci xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 161962306a36Sopenharmony_ci } 162062306a36Sopenharmony_ci xfs_trans_cancel(tp); 162162306a36Sopenharmony_ci return error; 162262306a36Sopenharmony_ci } 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci /* 162562306a36Sopenharmony_ci * Credit the quota account(s). The inode is gone. 162662306a36Sopenharmony_ci */ 162762306a36Sopenharmony_ci xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci return xfs_trans_commit(tp); 163062306a36Sopenharmony_ci} 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci/* 163362306a36Sopenharmony_ci * Returns true if we need to update the on-disk metadata before we can free 163462306a36Sopenharmony_ci * the memory used by this inode. Updates include freeing post-eof 163562306a36Sopenharmony_ci * preallocations; freeing COW staging extents; and marking the inode free in 163662306a36Sopenharmony_ci * the inobt if it is on the unlinked list. 163762306a36Sopenharmony_ci */ 163862306a36Sopenharmony_cibool 163962306a36Sopenharmony_cixfs_inode_needs_inactive( 164062306a36Sopenharmony_ci struct xfs_inode *ip) 164162306a36Sopenharmony_ci{ 164262306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 164362306a36Sopenharmony_ci struct xfs_ifork *cow_ifp = xfs_ifork_ptr(ip, XFS_COW_FORK); 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci /* 164662306a36Sopenharmony_ci * If the inode is already free, then there can be nothing 164762306a36Sopenharmony_ci * to clean up here. 164862306a36Sopenharmony_ci */ 164962306a36Sopenharmony_ci if (VFS_I(ip)->i_mode == 0) 165062306a36Sopenharmony_ci return false; 165162306a36Sopenharmony_ci 165262306a36Sopenharmony_ci /* 165362306a36Sopenharmony_ci * If this is a read-only mount, don't do this (would generate I/O) 165462306a36Sopenharmony_ci * unless we're in log recovery and cleaning the iunlinked list. 165562306a36Sopenharmony_ci */ 165662306a36Sopenharmony_ci if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) 165762306a36Sopenharmony_ci return false; 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci /* If the log isn't running, push inodes straight to reclaim. */ 166062306a36Sopenharmony_ci if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp)) 166162306a36Sopenharmony_ci return false; 166262306a36Sopenharmony_ci 166362306a36Sopenharmony_ci /* Metadata inodes require explicit resource cleanup. */ 166462306a36Sopenharmony_ci if (xfs_is_metadata_inode(ip)) 166562306a36Sopenharmony_ci return false; 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci /* Want to clean out the cow blocks if there are any. */ 166862306a36Sopenharmony_ci if (cow_ifp && cow_ifp->if_bytes > 0) 166962306a36Sopenharmony_ci return true; 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci /* Unlinked files must be freed. */ 167262306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink == 0) 167362306a36Sopenharmony_ci return true; 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci /* 167662306a36Sopenharmony_ci * This file isn't being freed, so check if there are post-eof blocks 167762306a36Sopenharmony_ci * to free. @force is true because we are evicting an inode from the 167862306a36Sopenharmony_ci * cache. Post-eof blocks must be freed, lest we end up with broken 167962306a36Sopenharmony_ci * free space accounting. 168062306a36Sopenharmony_ci * 168162306a36Sopenharmony_ci * Note: don't bother with iolock here since lockdep complains about 168262306a36Sopenharmony_ci * acquiring it in reclaim context. We have the only reference to the 168362306a36Sopenharmony_ci * inode at this point anyways. 168462306a36Sopenharmony_ci */ 168562306a36Sopenharmony_ci return xfs_can_free_eofblocks(ip, true); 168662306a36Sopenharmony_ci} 168762306a36Sopenharmony_ci 168862306a36Sopenharmony_ci/* 168962306a36Sopenharmony_ci * xfs_inactive 169062306a36Sopenharmony_ci * 169162306a36Sopenharmony_ci * This is called when the vnode reference count for the vnode 169262306a36Sopenharmony_ci * goes to zero. If the file has been unlinked, then it must 169362306a36Sopenharmony_ci * now be truncated. Also, we clear all of the read-ahead state 169462306a36Sopenharmony_ci * kept for the inode here since the file is now closed. 169562306a36Sopenharmony_ci */ 169662306a36Sopenharmony_ciint 169762306a36Sopenharmony_cixfs_inactive( 169862306a36Sopenharmony_ci xfs_inode_t *ip) 169962306a36Sopenharmony_ci{ 170062306a36Sopenharmony_ci struct xfs_mount *mp; 170162306a36Sopenharmony_ci int error = 0; 170262306a36Sopenharmony_ci int truncate = 0; 170362306a36Sopenharmony_ci 170462306a36Sopenharmony_ci /* 170562306a36Sopenharmony_ci * If the inode is already free, then there can be nothing 170662306a36Sopenharmony_ci * to clean up here. 170762306a36Sopenharmony_ci */ 170862306a36Sopenharmony_ci if (VFS_I(ip)->i_mode == 0) { 170962306a36Sopenharmony_ci ASSERT(ip->i_df.if_broot_bytes == 0); 171062306a36Sopenharmony_ci goto out; 171162306a36Sopenharmony_ci } 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci mp = ip->i_mount; 171462306a36Sopenharmony_ci ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); 171562306a36Sopenharmony_ci 171662306a36Sopenharmony_ci /* 171762306a36Sopenharmony_ci * If this is a read-only mount, don't do this (would generate I/O) 171862306a36Sopenharmony_ci * unless we're in log recovery and cleaning the iunlinked list. 171962306a36Sopenharmony_ci */ 172062306a36Sopenharmony_ci if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) 172162306a36Sopenharmony_ci goto out; 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci /* Metadata inodes require explicit resource cleanup. */ 172462306a36Sopenharmony_ci if (xfs_is_metadata_inode(ip)) 172562306a36Sopenharmony_ci goto out; 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci /* Try to clean out the cow blocks if there are any. */ 172862306a36Sopenharmony_ci if (xfs_inode_has_cow_data(ip)) 172962306a36Sopenharmony_ci xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink != 0) { 173262306a36Sopenharmony_ci /* 173362306a36Sopenharmony_ci * force is true because we are evicting an inode from the 173462306a36Sopenharmony_ci * cache. Post-eof blocks must be freed, lest we end up with 173562306a36Sopenharmony_ci * broken free space accounting. 173662306a36Sopenharmony_ci * 173762306a36Sopenharmony_ci * Note: don't bother with iolock here since lockdep complains 173862306a36Sopenharmony_ci * about acquiring it in reclaim context. We have the only 173962306a36Sopenharmony_ci * reference to the inode at this point anyways. 174062306a36Sopenharmony_ci */ 174162306a36Sopenharmony_ci if (xfs_can_free_eofblocks(ip, true)) 174262306a36Sopenharmony_ci error = xfs_free_eofblocks(ip); 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci goto out; 174562306a36Sopenharmony_ci } 174662306a36Sopenharmony_ci 174762306a36Sopenharmony_ci if (S_ISREG(VFS_I(ip)->i_mode) && 174862306a36Sopenharmony_ci (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 || 174962306a36Sopenharmony_ci ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) 175062306a36Sopenharmony_ci truncate = 1; 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { 175362306a36Sopenharmony_ci /* 175462306a36Sopenharmony_ci * If this inode is being inactivated during a quotacheck and 175562306a36Sopenharmony_ci * has not yet been scanned by quotacheck, we /must/ remove 175662306a36Sopenharmony_ci * the dquots from the inode before inactivation changes the 175762306a36Sopenharmony_ci * block and inode counts. Most probably this is a result of 175862306a36Sopenharmony_ci * reloading the incore iunlinked list to purge unrecovered 175962306a36Sopenharmony_ci * unlinked inodes. 176062306a36Sopenharmony_ci */ 176162306a36Sopenharmony_ci xfs_qm_dqdetach(ip); 176262306a36Sopenharmony_ci } else { 176362306a36Sopenharmony_ci error = xfs_qm_dqattach(ip); 176462306a36Sopenharmony_ci if (error) 176562306a36Sopenharmony_ci goto out; 176662306a36Sopenharmony_ci } 176762306a36Sopenharmony_ci 176862306a36Sopenharmony_ci if (S_ISLNK(VFS_I(ip)->i_mode)) 176962306a36Sopenharmony_ci error = xfs_inactive_symlink(ip); 177062306a36Sopenharmony_ci else if (truncate) 177162306a36Sopenharmony_ci error = xfs_inactive_truncate(ip); 177262306a36Sopenharmony_ci if (error) 177362306a36Sopenharmony_ci goto out; 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci /* 177662306a36Sopenharmony_ci * If there are attributes associated with the file then blow them away 177762306a36Sopenharmony_ci * now. The code calls a routine that recursively deconstructs the 177862306a36Sopenharmony_ci * attribute fork. If also blows away the in-core attribute fork. 177962306a36Sopenharmony_ci */ 178062306a36Sopenharmony_ci if (xfs_inode_has_attr_fork(ip)) { 178162306a36Sopenharmony_ci error = xfs_attr_inactive(ip); 178262306a36Sopenharmony_ci if (error) 178362306a36Sopenharmony_ci goto out; 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci ASSERT(ip->i_forkoff == 0); 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ci /* 178962306a36Sopenharmony_ci * Free the inode. 179062306a36Sopenharmony_ci */ 179162306a36Sopenharmony_ci error = xfs_inactive_ifree(ip); 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ciout: 179462306a36Sopenharmony_ci /* 179562306a36Sopenharmony_ci * We're done making metadata updates for this inode, so we can release 179662306a36Sopenharmony_ci * the attached dquots. 179762306a36Sopenharmony_ci */ 179862306a36Sopenharmony_ci xfs_qm_dqdetach(ip); 179962306a36Sopenharmony_ci return error; 180062306a36Sopenharmony_ci} 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci/* 180362306a36Sopenharmony_ci * In-Core Unlinked List Lookups 180462306a36Sopenharmony_ci * ============================= 180562306a36Sopenharmony_ci * 180662306a36Sopenharmony_ci * Every inode is supposed to be reachable from some other piece of metadata 180762306a36Sopenharmony_ci * with the exception of the root directory. Inodes with a connection to a 180862306a36Sopenharmony_ci * file descriptor but not linked from anywhere in the on-disk directory tree 180962306a36Sopenharmony_ci * are collectively known as unlinked inodes, though the filesystem itself 181062306a36Sopenharmony_ci * maintains links to these inodes so that on-disk metadata are consistent. 181162306a36Sopenharmony_ci * 181262306a36Sopenharmony_ci * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI 181362306a36Sopenharmony_ci * header contains a number of buckets that point to an inode, and each inode 181462306a36Sopenharmony_ci * record has a pointer to the next inode in the hash chain. This 181562306a36Sopenharmony_ci * singly-linked list causes scaling problems in the iunlink remove function 181662306a36Sopenharmony_ci * because we must walk that list to find the inode that points to the inode 181762306a36Sopenharmony_ci * being removed from the unlinked hash bucket list. 181862306a36Sopenharmony_ci * 181962306a36Sopenharmony_ci * Hence we keep an in-memory double linked list to link each inode on an 182062306a36Sopenharmony_ci * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer 182162306a36Sopenharmony_ci * based lists would require having 64 list heads in the perag, one for each 182262306a36Sopenharmony_ci * list. This is expensive in terms of memory (think millions of AGs) and cache 182362306a36Sopenharmony_ci * misses on lookups. Instead, use the fact that inodes on the unlinked list 182462306a36Sopenharmony_ci * must be referenced at the VFS level to keep them on the list and hence we 182562306a36Sopenharmony_ci * have an existence guarantee for inodes on the unlinked list. 182662306a36Sopenharmony_ci * 182762306a36Sopenharmony_ci * Given we have an existence guarantee, we can use lockless inode cache lookups 182862306a36Sopenharmony_ci * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode 182962306a36Sopenharmony_ci * for the double linked unlinked list, and we don't need any extra locking to 183062306a36Sopenharmony_ci * keep the list safe as all manipulations are done under the AGI buffer lock. 183162306a36Sopenharmony_ci * Keeping the list up to date does not require memory allocation, just finding 183262306a36Sopenharmony_ci * the XFS inode and updating the next/prev unlinked list aginos. 183362306a36Sopenharmony_ci */ 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci/* 183662306a36Sopenharmony_ci * Find an inode on the unlinked list. This does not take references to the 183762306a36Sopenharmony_ci * inode as we have existence guarantees by holding the AGI buffer lock and that 183862306a36Sopenharmony_ci * only unlinked, referenced inodes can be on the unlinked inode list. If we 183962306a36Sopenharmony_ci * don't find the inode in cache, then let the caller handle the situation. 184062306a36Sopenharmony_ci */ 184162306a36Sopenharmony_cistatic struct xfs_inode * 184262306a36Sopenharmony_cixfs_iunlink_lookup( 184362306a36Sopenharmony_ci struct xfs_perag *pag, 184462306a36Sopenharmony_ci xfs_agino_t agino) 184562306a36Sopenharmony_ci{ 184662306a36Sopenharmony_ci struct xfs_inode *ip; 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_ci rcu_read_lock(); 184962306a36Sopenharmony_ci ip = radix_tree_lookup(&pag->pag_ici_root, agino); 185062306a36Sopenharmony_ci if (!ip) { 185162306a36Sopenharmony_ci /* Caller can handle inode not being in memory. */ 185262306a36Sopenharmony_ci rcu_read_unlock(); 185362306a36Sopenharmony_ci return NULL; 185462306a36Sopenharmony_ci } 185562306a36Sopenharmony_ci 185662306a36Sopenharmony_ci /* 185762306a36Sopenharmony_ci * Inode in RCU freeing limbo should not happen. Warn about this and 185862306a36Sopenharmony_ci * let the caller handle the failure. 185962306a36Sopenharmony_ci */ 186062306a36Sopenharmony_ci if (WARN_ON_ONCE(!ip->i_ino)) { 186162306a36Sopenharmony_ci rcu_read_unlock(); 186262306a36Sopenharmony_ci return NULL; 186362306a36Sopenharmony_ci } 186462306a36Sopenharmony_ci ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)); 186562306a36Sopenharmony_ci rcu_read_unlock(); 186662306a36Sopenharmony_ci return ip; 186762306a36Sopenharmony_ci} 186862306a36Sopenharmony_ci 186962306a36Sopenharmony_ci/* 187062306a36Sopenharmony_ci * Update the prev pointer of the next agino. Returns -ENOLINK if the inode 187162306a36Sopenharmony_ci * is not in cache. 187262306a36Sopenharmony_ci */ 187362306a36Sopenharmony_cistatic int 187462306a36Sopenharmony_cixfs_iunlink_update_backref( 187562306a36Sopenharmony_ci struct xfs_perag *pag, 187662306a36Sopenharmony_ci xfs_agino_t prev_agino, 187762306a36Sopenharmony_ci xfs_agino_t next_agino) 187862306a36Sopenharmony_ci{ 187962306a36Sopenharmony_ci struct xfs_inode *ip; 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci /* No update necessary if we are at the end of the list. */ 188262306a36Sopenharmony_ci if (next_agino == NULLAGINO) 188362306a36Sopenharmony_ci return 0; 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci ip = xfs_iunlink_lookup(pag, next_agino); 188662306a36Sopenharmony_ci if (!ip) 188762306a36Sopenharmony_ci return -ENOLINK; 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci ip->i_prev_unlinked = prev_agino; 189062306a36Sopenharmony_ci return 0; 189162306a36Sopenharmony_ci} 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci/* 189462306a36Sopenharmony_ci * Point the AGI unlinked bucket at an inode and log the results. The caller 189562306a36Sopenharmony_ci * is responsible for validating the old value. 189662306a36Sopenharmony_ci */ 189762306a36Sopenharmony_ciSTATIC int 189862306a36Sopenharmony_cixfs_iunlink_update_bucket( 189962306a36Sopenharmony_ci struct xfs_trans *tp, 190062306a36Sopenharmony_ci struct xfs_perag *pag, 190162306a36Sopenharmony_ci struct xfs_buf *agibp, 190262306a36Sopenharmony_ci unsigned int bucket_index, 190362306a36Sopenharmony_ci xfs_agino_t new_agino) 190462306a36Sopenharmony_ci{ 190562306a36Sopenharmony_ci struct xfs_agi *agi = agibp->b_addr; 190662306a36Sopenharmony_ci xfs_agino_t old_value; 190762306a36Sopenharmony_ci int offset; 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_ci ASSERT(xfs_verify_agino_or_null(pag, new_agino)); 191062306a36Sopenharmony_ci 191162306a36Sopenharmony_ci old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); 191262306a36Sopenharmony_ci trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, 191362306a36Sopenharmony_ci old_value, new_agino); 191462306a36Sopenharmony_ci 191562306a36Sopenharmony_ci /* 191662306a36Sopenharmony_ci * We should never find the head of the list already set to the value 191762306a36Sopenharmony_ci * passed in because either we're adding or removing ourselves from the 191862306a36Sopenharmony_ci * head of the list. 191962306a36Sopenharmony_ci */ 192062306a36Sopenharmony_ci if (old_value == new_agino) { 192162306a36Sopenharmony_ci xfs_buf_mark_corrupt(agibp); 192262306a36Sopenharmony_ci return -EFSCORRUPTED; 192362306a36Sopenharmony_ci } 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); 192662306a36Sopenharmony_ci offset = offsetof(struct xfs_agi, agi_unlinked) + 192762306a36Sopenharmony_ci (sizeof(xfs_agino_t) * bucket_index); 192862306a36Sopenharmony_ci xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); 192962306a36Sopenharmony_ci return 0; 193062306a36Sopenharmony_ci} 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci/* 193362306a36Sopenharmony_ci * Load the inode @next_agino into the cache and set its prev_unlinked pointer 193462306a36Sopenharmony_ci * to @prev_agino. Caller must hold the AGI to synchronize with other changes 193562306a36Sopenharmony_ci * to the unlinked list. 193662306a36Sopenharmony_ci */ 193762306a36Sopenharmony_ciSTATIC int 193862306a36Sopenharmony_cixfs_iunlink_reload_next( 193962306a36Sopenharmony_ci struct xfs_trans *tp, 194062306a36Sopenharmony_ci struct xfs_buf *agibp, 194162306a36Sopenharmony_ci xfs_agino_t prev_agino, 194262306a36Sopenharmony_ci xfs_agino_t next_agino) 194362306a36Sopenharmony_ci{ 194462306a36Sopenharmony_ci struct xfs_perag *pag = agibp->b_pag; 194562306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 194662306a36Sopenharmony_ci struct xfs_inode *next_ip = NULL; 194762306a36Sopenharmony_ci xfs_ino_t ino; 194862306a36Sopenharmony_ci int error; 194962306a36Sopenharmony_ci 195062306a36Sopenharmony_ci ASSERT(next_agino != NULLAGINO); 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci#ifdef DEBUG 195362306a36Sopenharmony_ci rcu_read_lock(); 195462306a36Sopenharmony_ci next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino); 195562306a36Sopenharmony_ci ASSERT(next_ip == NULL); 195662306a36Sopenharmony_ci rcu_read_unlock(); 195762306a36Sopenharmony_ci#endif 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci xfs_info_ratelimited(mp, 196062306a36Sopenharmony_ci "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.", 196162306a36Sopenharmony_ci next_agino, pag->pag_agno); 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci /* 196462306a36Sopenharmony_ci * Use an untrusted lookup just to be cautious in case the AGI has been 196562306a36Sopenharmony_ci * corrupted and now points at a free inode. That shouldn't happen, 196662306a36Sopenharmony_ci * but we'd rather shut down now since we're already running in a weird 196762306a36Sopenharmony_ci * situation. 196862306a36Sopenharmony_ci */ 196962306a36Sopenharmony_ci ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); 197062306a36Sopenharmony_ci error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip); 197162306a36Sopenharmony_ci if (error) 197262306a36Sopenharmony_ci return error; 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_ci /* If this is not an unlinked inode, something is very wrong. */ 197562306a36Sopenharmony_ci if (VFS_I(next_ip)->i_nlink != 0) { 197662306a36Sopenharmony_ci error = -EFSCORRUPTED; 197762306a36Sopenharmony_ci goto rele; 197862306a36Sopenharmony_ci } 197962306a36Sopenharmony_ci 198062306a36Sopenharmony_ci next_ip->i_prev_unlinked = prev_agino; 198162306a36Sopenharmony_ci trace_xfs_iunlink_reload_next(next_ip); 198262306a36Sopenharmony_cirele: 198362306a36Sopenharmony_ci ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE)); 198462306a36Sopenharmony_ci if (xfs_is_quotacheck_running(mp) && next_ip) 198562306a36Sopenharmony_ci xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED); 198662306a36Sopenharmony_ci xfs_irele(next_ip); 198762306a36Sopenharmony_ci return error; 198862306a36Sopenharmony_ci} 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_cistatic int 199162306a36Sopenharmony_cixfs_iunlink_insert_inode( 199262306a36Sopenharmony_ci struct xfs_trans *tp, 199362306a36Sopenharmony_ci struct xfs_perag *pag, 199462306a36Sopenharmony_ci struct xfs_buf *agibp, 199562306a36Sopenharmony_ci struct xfs_inode *ip) 199662306a36Sopenharmony_ci{ 199762306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 199862306a36Sopenharmony_ci struct xfs_agi *agi = agibp->b_addr; 199962306a36Sopenharmony_ci xfs_agino_t next_agino; 200062306a36Sopenharmony_ci xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 200162306a36Sopenharmony_ci short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 200262306a36Sopenharmony_ci int error; 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci /* 200562306a36Sopenharmony_ci * Get the index into the agi hash table for the list this inode will 200662306a36Sopenharmony_ci * go on. Make sure the pointer isn't garbage and that this inode 200762306a36Sopenharmony_ci * isn't already on the list. 200862306a36Sopenharmony_ci */ 200962306a36Sopenharmony_ci next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 201062306a36Sopenharmony_ci if (next_agino == agino || 201162306a36Sopenharmony_ci !xfs_verify_agino_or_null(pag, next_agino)) { 201262306a36Sopenharmony_ci xfs_buf_mark_corrupt(agibp); 201362306a36Sopenharmony_ci return -EFSCORRUPTED; 201462306a36Sopenharmony_ci } 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci /* 201762306a36Sopenharmony_ci * Update the prev pointer in the next inode to point back to this 201862306a36Sopenharmony_ci * inode. 201962306a36Sopenharmony_ci */ 202062306a36Sopenharmony_ci error = xfs_iunlink_update_backref(pag, agino, next_agino); 202162306a36Sopenharmony_ci if (error == -ENOLINK) 202262306a36Sopenharmony_ci error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); 202362306a36Sopenharmony_ci if (error) 202462306a36Sopenharmony_ci return error; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci if (next_agino != NULLAGINO) { 202762306a36Sopenharmony_ci /* 202862306a36Sopenharmony_ci * There is already another inode in the bucket, so point this 202962306a36Sopenharmony_ci * inode to the current head of the list. 203062306a36Sopenharmony_ci */ 203162306a36Sopenharmony_ci error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); 203262306a36Sopenharmony_ci if (error) 203362306a36Sopenharmony_ci return error; 203462306a36Sopenharmony_ci ip->i_next_unlinked = next_agino; 203562306a36Sopenharmony_ci } 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_ci /* Point the head of the list to point to this inode. */ 203862306a36Sopenharmony_ci ip->i_prev_unlinked = NULLAGINO; 203962306a36Sopenharmony_ci return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); 204062306a36Sopenharmony_ci} 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci/* 204362306a36Sopenharmony_ci * This is called when the inode's link count has gone to 0 or we are creating 204462306a36Sopenharmony_ci * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. 204562306a36Sopenharmony_ci * 204662306a36Sopenharmony_ci * We place the on-disk inode on a list in the AGI. It will be pulled from this 204762306a36Sopenharmony_ci * list when the inode is freed. 204862306a36Sopenharmony_ci */ 204962306a36Sopenharmony_ciSTATIC int 205062306a36Sopenharmony_cixfs_iunlink( 205162306a36Sopenharmony_ci struct xfs_trans *tp, 205262306a36Sopenharmony_ci struct xfs_inode *ip) 205362306a36Sopenharmony_ci{ 205462306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 205562306a36Sopenharmony_ci struct xfs_perag *pag; 205662306a36Sopenharmony_ci struct xfs_buf *agibp; 205762306a36Sopenharmony_ci int error; 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_nlink == 0); 206062306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_mode != 0); 206162306a36Sopenharmony_ci trace_xfs_iunlink(ip); 206262306a36Sopenharmony_ci 206362306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci /* Get the agi buffer first. It ensures lock ordering on the list. */ 206662306a36Sopenharmony_ci error = xfs_read_agi(pag, tp, &agibp); 206762306a36Sopenharmony_ci if (error) 206862306a36Sopenharmony_ci goto out; 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_ci error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); 207162306a36Sopenharmony_ciout: 207262306a36Sopenharmony_ci xfs_perag_put(pag); 207362306a36Sopenharmony_ci return error; 207462306a36Sopenharmony_ci} 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_cistatic int 207762306a36Sopenharmony_cixfs_iunlink_remove_inode( 207862306a36Sopenharmony_ci struct xfs_trans *tp, 207962306a36Sopenharmony_ci struct xfs_perag *pag, 208062306a36Sopenharmony_ci struct xfs_buf *agibp, 208162306a36Sopenharmony_ci struct xfs_inode *ip) 208262306a36Sopenharmony_ci{ 208362306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 208462306a36Sopenharmony_ci struct xfs_agi *agi = agibp->b_addr; 208562306a36Sopenharmony_ci xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 208662306a36Sopenharmony_ci xfs_agino_t head_agino; 208762306a36Sopenharmony_ci short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 208862306a36Sopenharmony_ci int error; 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci trace_xfs_iunlink_remove(ip); 209162306a36Sopenharmony_ci 209262306a36Sopenharmony_ci /* 209362306a36Sopenharmony_ci * Get the index into the agi hash table for the list this inode will 209462306a36Sopenharmony_ci * go on. Make sure the head pointer isn't garbage. 209562306a36Sopenharmony_ci */ 209662306a36Sopenharmony_ci head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 209762306a36Sopenharmony_ci if (!xfs_verify_agino(pag, head_agino)) { 209862306a36Sopenharmony_ci XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 209962306a36Sopenharmony_ci agi, sizeof(*agi)); 210062306a36Sopenharmony_ci return -EFSCORRUPTED; 210162306a36Sopenharmony_ci } 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci /* 210462306a36Sopenharmony_ci * Set our inode's next_unlinked pointer to NULL and then return 210562306a36Sopenharmony_ci * the old pointer value so that we can update whatever was previous 210662306a36Sopenharmony_ci * to us in the list to point to whatever was next in the list. 210762306a36Sopenharmony_ci */ 210862306a36Sopenharmony_ci error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); 210962306a36Sopenharmony_ci if (error) 211062306a36Sopenharmony_ci return error; 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_ci /* 211362306a36Sopenharmony_ci * Update the prev pointer in the next inode to point back to previous 211462306a36Sopenharmony_ci * inode in the chain. 211562306a36Sopenharmony_ci */ 211662306a36Sopenharmony_ci error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, 211762306a36Sopenharmony_ci ip->i_next_unlinked); 211862306a36Sopenharmony_ci if (error == -ENOLINK) 211962306a36Sopenharmony_ci error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, 212062306a36Sopenharmony_ci ip->i_next_unlinked); 212162306a36Sopenharmony_ci if (error) 212262306a36Sopenharmony_ci return error; 212362306a36Sopenharmony_ci 212462306a36Sopenharmony_ci if (head_agino != agino) { 212562306a36Sopenharmony_ci struct xfs_inode *prev_ip; 212662306a36Sopenharmony_ci 212762306a36Sopenharmony_ci prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); 212862306a36Sopenharmony_ci if (!prev_ip) 212962306a36Sopenharmony_ci return -EFSCORRUPTED; 213062306a36Sopenharmony_ci 213162306a36Sopenharmony_ci error = xfs_iunlink_log_inode(tp, prev_ip, pag, 213262306a36Sopenharmony_ci ip->i_next_unlinked); 213362306a36Sopenharmony_ci prev_ip->i_next_unlinked = ip->i_next_unlinked; 213462306a36Sopenharmony_ci } else { 213562306a36Sopenharmony_ci /* Point the head of the list to the next unlinked inode. */ 213662306a36Sopenharmony_ci error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, 213762306a36Sopenharmony_ci ip->i_next_unlinked); 213862306a36Sopenharmony_ci } 213962306a36Sopenharmony_ci 214062306a36Sopenharmony_ci ip->i_next_unlinked = NULLAGINO; 214162306a36Sopenharmony_ci ip->i_prev_unlinked = 0; 214262306a36Sopenharmony_ci return error; 214362306a36Sopenharmony_ci} 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci/* 214662306a36Sopenharmony_ci * Pull the on-disk inode from the AGI unlinked list. 214762306a36Sopenharmony_ci */ 214862306a36Sopenharmony_ciSTATIC int 214962306a36Sopenharmony_cixfs_iunlink_remove( 215062306a36Sopenharmony_ci struct xfs_trans *tp, 215162306a36Sopenharmony_ci struct xfs_perag *pag, 215262306a36Sopenharmony_ci struct xfs_inode *ip) 215362306a36Sopenharmony_ci{ 215462306a36Sopenharmony_ci struct xfs_buf *agibp; 215562306a36Sopenharmony_ci int error; 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci trace_xfs_iunlink_remove(ip); 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci /* Get the agi buffer first. It ensures lock ordering on the list. */ 216062306a36Sopenharmony_ci error = xfs_read_agi(pag, tp, &agibp); 216162306a36Sopenharmony_ci if (error) 216262306a36Sopenharmony_ci return error; 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci return xfs_iunlink_remove_inode(tp, pag, agibp, ip); 216562306a36Sopenharmony_ci} 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci/* 216862306a36Sopenharmony_ci * Look up the inode number specified and if it is not already marked XFS_ISTALE 216962306a36Sopenharmony_ci * mark it stale. We should only find clean inodes in this lookup that aren't 217062306a36Sopenharmony_ci * already stale. 217162306a36Sopenharmony_ci */ 217262306a36Sopenharmony_cistatic void 217362306a36Sopenharmony_cixfs_ifree_mark_inode_stale( 217462306a36Sopenharmony_ci struct xfs_perag *pag, 217562306a36Sopenharmony_ci struct xfs_inode *free_ip, 217662306a36Sopenharmony_ci xfs_ino_t inum) 217762306a36Sopenharmony_ci{ 217862306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 217962306a36Sopenharmony_ci struct xfs_inode_log_item *iip; 218062306a36Sopenharmony_ci struct xfs_inode *ip; 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ciretry: 218362306a36Sopenharmony_ci rcu_read_lock(); 218462306a36Sopenharmony_ci ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum)); 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci /* Inode not in memory, nothing to do */ 218762306a36Sopenharmony_ci if (!ip) { 218862306a36Sopenharmony_ci rcu_read_unlock(); 218962306a36Sopenharmony_ci return; 219062306a36Sopenharmony_ci } 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_ci /* 219362306a36Sopenharmony_ci * because this is an RCU protected lookup, we could find a recently 219462306a36Sopenharmony_ci * freed or even reallocated inode during the lookup. We need to check 219562306a36Sopenharmony_ci * under the i_flags_lock for a valid inode here. Skip it if it is not 219662306a36Sopenharmony_ci * valid, the wrong inode or stale. 219762306a36Sopenharmony_ci */ 219862306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 219962306a36Sopenharmony_ci if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) 220062306a36Sopenharmony_ci goto out_iflags_unlock; 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci /* 220362306a36Sopenharmony_ci * Don't try to lock/unlock the current inode, but we _cannot_ skip the 220462306a36Sopenharmony_ci * other inodes that we did not find in the list attached to the buffer 220562306a36Sopenharmony_ci * and are not already marked stale. If we can't lock it, back off and 220662306a36Sopenharmony_ci * retry. 220762306a36Sopenharmony_ci */ 220862306a36Sopenharmony_ci if (ip != free_ip) { 220962306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 221062306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 221162306a36Sopenharmony_ci rcu_read_unlock(); 221262306a36Sopenharmony_ci delay(1); 221362306a36Sopenharmony_ci goto retry; 221462306a36Sopenharmony_ci } 221562306a36Sopenharmony_ci } 221662306a36Sopenharmony_ci ip->i_flags |= XFS_ISTALE; 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ci /* 221962306a36Sopenharmony_ci * If the inode is flushing, it is already attached to the buffer. All 222062306a36Sopenharmony_ci * we needed to do here is mark the inode stale so buffer IO completion 222162306a36Sopenharmony_ci * will remove it from the AIL. 222262306a36Sopenharmony_ci */ 222362306a36Sopenharmony_ci iip = ip->i_itemp; 222462306a36Sopenharmony_ci if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { 222562306a36Sopenharmony_ci ASSERT(!list_empty(&iip->ili_item.li_bio_list)); 222662306a36Sopenharmony_ci ASSERT(iip->ili_last_fields); 222762306a36Sopenharmony_ci goto out_iunlock; 222862306a36Sopenharmony_ci } 222962306a36Sopenharmony_ci 223062306a36Sopenharmony_ci /* 223162306a36Sopenharmony_ci * Inodes not attached to the buffer can be released immediately. 223262306a36Sopenharmony_ci * Everything else has to go through xfs_iflush_abort() on journal 223362306a36Sopenharmony_ci * commit as the flock synchronises removal of the inode from the 223462306a36Sopenharmony_ci * cluster buffer against inode reclaim. 223562306a36Sopenharmony_ci */ 223662306a36Sopenharmony_ci if (!iip || list_empty(&iip->ili_item.li_bio_list)) 223762306a36Sopenharmony_ci goto out_iunlock; 223862306a36Sopenharmony_ci 223962306a36Sopenharmony_ci __xfs_iflags_set(ip, XFS_IFLUSHING); 224062306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 224162306a36Sopenharmony_ci rcu_read_unlock(); 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci /* we have a dirty inode in memory that has not yet been flushed. */ 224462306a36Sopenharmony_ci spin_lock(&iip->ili_lock); 224562306a36Sopenharmony_ci iip->ili_last_fields = iip->ili_fields; 224662306a36Sopenharmony_ci iip->ili_fields = 0; 224762306a36Sopenharmony_ci iip->ili_fsync_fields = 0; 224862306a36Sopenharmony_ci spin_unlock(&iip->ili_lock); 224962306a36Sopenharmony_ci ASSERT(iip->ili_last_fields); 225062306a36Sopenharmony_ci 225162306a36Sopenharmony_ci if (ip != free_ip) 225262306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 225362306a36Sopenharmony_ci return; 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ciout_iunlock: 225662306a36Sopenharmony_ci if (ip != free_ip) 225762306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 225862306a36Sopenharmony_ciout_iflags_unlock: 225962306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 226062306a36Sopenharmony_ci rcu_read_unlock(); 226162306a36Sopenharmony_ci} 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci/* 226462306a36Sopenharmony_ci * A big issue when freeing the inode cluster is that we _cannot_ skip any 226562306a36Sopenharmony_ci * inodes that are in memory - they all must be marked stale and attached to 226662306a36Sopenharmony_ci * the cluster buffer. 226762306a36Sopenharmony_ci */ 226862306a36Sopenharmony_cistatic int 226962306a36Sopenharmony_cixfs_ifree_cluster( 227062306a36Sopenharmony_ci struct xfs_trans *tp, 227162306a36Sopenharmony_ci struct xfs_perag *pag, 227262306a36Sopenharmony_ci struct xfs_inode *free_ip, 227362306a36Sopenharmony_ci struct xfs_icluster *xic) 227462306a36Sopenharmony_ci{ 227562306a36Sopenharmony_ci struct xfs_mount *mp = free_ip->i_mount; 227662306a36Sopenharmony_ci struct xfs_ino_geometry *igeo = M_IGEO(mp); 227762306a36Sopenharmony_ci struct xfs_buf *bp; 227862306a36Sopenharmony_ci xfs_daddr_t blkno; 227962306a36Sopenharmony_ci xfs_ino_t inum = xic->first_ino; 228062306a36Sopenharmony_ci int nbufs; 228162306a36Sopenharmony_ci int i, j; 228262306a36Sopenharmony_ci int ioffset; 228362306a36Sopenharmony_ci int error; 228462306a36Sopenharmony_ci 228562306a36Sopenharmony_ci nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; 228662306a36Sopenharmony_ci 228762306a36Sopenharmony_ci for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { 228862306a36Sopenharmony_ci /* 228962306a36Sopenharmony_ci * The allocation bitmap tells us which inodes of the chunk were 229062306a36Sopenharmony_ci * physically allocated. Skip the cluster if an inode falls into 229162306a36Sopenharmony_ci * a sparse region. 229262306a36Sopenharmony_ci */ 229362306a36Sopenharmony_ci ioffset = inum - xic->first_ino; 229462306a36Sopenharmony_ci if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { 229562306a36Sopenharmony_ci ASSERT(ioffset % igeo->inodes_per_cluster == 0); 229662306a36Sopenharmony_ci continue; 229762306a36Sopenharmony_ci } 229862306a36Sopenharmony_ci 229962306a36Sopenharmony_ci blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 230062306a36Sopenharmony_ci XFS_INO_TO_AGBNO(mp, inum)); 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ci /* 230362306a36Sopenharmony_ci * We obtain and lock the backing buffer first in the process 230462306a36Sopenharmony_ci * here to ensure dirty inodes attached to the buffer remain in 230562306a36Sopenharmony_ci * the flushing state while we mark them stale. 230662306a36Sopenharmony_ci * 230762306a36Sopenharmony_ci * If we scan the in-memory inodes first, then buffer IO can 230862306a36Sopenharmony_ci * complete before we get a lock on it, and hence we may fail 230962306a36Sopenharmony_ci * to mark all the active inodes on the buffer stale. 231062306a36Sopenharmony_ci */ 231162306a36Sopenharmony_ci error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 231262306a36Sopenharmony_ci mp->m_bsize * igeo->blocks_per_cluster, 231362306a36Sopenharmony_ci XBF_UNMAPPED, &bp); 231462306a36Sopenharmony_ci if (error) 231562306a36Sopenharmony_ci return error; 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_ci /* 231862306a36Sopenharmony_ci * This buffer may not have been correctly initialised as we 231962306a36Sopenharmony_ci * didn't read it from disk. That's not important because we are 232062306a36Sopenharmony_ci * only using to mark the buffer as stale in the log, and to 232162306a36Sopenharmony_ci * attach stale cached inodes on it. That means it will never be 232262306a36Sopenharmony_ci * dispatched for IO. If it is, we want to know about it, and we 232362306a36Sopenharmony_ci * want it to fail. We can acheive this by adding a write 232462306a36Sopenharmony_ci * verifier to the buffer. 232562306a36Sopenharmony_ci */ 232662306a36Sopenharmony_ci bp->b_ops = &xfs_inode_buf_ops; 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci /* 232962306a36Sopenharmony_ci * Now we need to set all the cached clean inodes as XFS_ISTALE, 233062306a36Sopenharmony_ci * too. This requires lookups, and will skip inodes that we've 233162306a36Sopenharmony_ci * already marked XFS_ISTALE. 233262306a36Sopenharmony_ci */ 233362306a36Sopenharmony_ci for (i = 0; i < igeo->inodes_per_cluster; i++) 233462306a36Sopenharmony_ci xfs_ifree_mark_inode_stale(pag, free_ip, inum + i); 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci xfs_trans_stale_inode_buf(tp, bp); 233762306a36Sopenharmony_ci xfs_trans_binval(tp, bp); 233862306a36Sopenharmony_ci } 233962306a36Sopenharmony_ci return 0; 234062306a36Sopenharmony_ci} 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci/* 234362306a36Sopenharmony_ci * This is called to return an inode to the inode free list. The inode should 234462306a36Sopenharmony_ci * already be truncated to 0 length and have no pages associated with it. This 234562306a36Sopenharmony_ci * routine also assumes that the inode is already a part of the transaction. 234662306a36Sopenharmony_ci * 234762306a36Sopenharmony_ci * The on-disk copy of the inode will have been added to the list of unlinked 234862306a36Sopenharmony_ci * inodes in the AGI. We need to remove the inode from that list atomically with 234962306a36Sopenharmony_ci * respect to freeing it here. 235062306a36Sopenharmony_ci */ 235162306a36Sopenharmony_ciint 235262306a36Sopenharmony_cixfs_ifree( 235362306a36Sopenharmony_ci struct xfs_trans *tp, 235462306a36Sopenharmony_ci struct xfs_inode *ip) 235562306a36Sopenharmony_ci{ 235662306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 235762306a36Sopenharmony_ci struct xfs_perag *pag; 235862306a36Sopenharmony_ci struct xfs_icluster xic = { 0 }; 235962306a36Sopenharmony_ci struct xfs_inode_log_item *iip = ip->i_itemp; 236062306a36Sopenharmony_ci int error; 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 236362306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_nlink == 0); 236462306a36Sopenharmony_ci ASSERT(ip->i_df.if_nextents == 0); 236562306a36Sopenharmony_ci ASSERT(ip->i_disk_size == 0 || !S_ISREG(VFS_I(ip)->i_mode)); 236662306a36Sopenharmony_ci ASSERT(ip->i_nblocks == 0); 236762306a36Sopenharmony_ci 236862306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci /* 237162306a36Sopenharmony_ci * Free the inode first so that we guarantee that the AGI lock is going 237262306a36Sopenharmony_ci * to be taken before we remove the inode from the unlinked list. This 237362306a36Sopenharmony_ci * makes the AGI lock -> unlinked list modification order the same as 237462306a36Sopenharmony_ci * used in O_TMPFILE creation. 237562306a36Sopenharmony_ci */ 237662306a36Sopenharmony_ci error = xfs_difree(tp, pag, ip->i_ino, &xic); 237762306a36Sopenharmony_ci if (error) 237862306a36Sopenharmony_ci goto out; 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_ci error = xfs_iunlink_remove(tp, pag, ip); 238162306a36Sopenharmony_ci if (error) 238262306a36Sopenharmony_ci goto out; 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci /* 238562306a36Sopenharmony_ci * Free any local-format data sitting around before we reset the 238662306a36Sopenharmony_ci * data fork to extents format. Note that the attr fork data has 238762306a36Sopenharmony_ci * already been freed by xfs_attr_inactive. 238862306a36Sopenharmony_ci */ 238962306a36Sopenharmony_ci if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 239062306a36Sopenharmony_ci kmem_free(ip->i_df.if_u1.if_data); 239162306a36Sopenharmony_ci ip->i_df.if_u1.if_data = NULL; 239262306a36Sopenharmony_ci ip->i_df.if_bytes = 0; 239362306a36Sopenharmony_ci } 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 239662306a36Sopenharmony_ci ip->i_diflags = 0; 239762306a36Sopenharmony_ci ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 239862306a36Sopenharmony_ci ip->i_forkoff = 0; /* mark the attr fork not in use */ 239962306a36Sopenharmony_ci ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 240062306a36Sopenharmony_ci if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS)) 240162306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS); 240262306a36Sopenharmony_ci 240362306a36Sopenharmony_ci /* Don't attempt to replay owner changes for a deleted inode */ 240462306a36Sopenharmony_ci spin_lock(&iip->ili_lock); 240562306a36Sopenharmony_ci iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER); 240662306a36Sopenharmony_ci spin_unlock(&iip->ili_lock); 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci /* 240962306a36Sopenharmony_ci * Bump the generation count so no one will be confused 241062306a36Sopenharmony_ci * by reincarnations of this inode. 241162306a36Sopenharmony_ci */ 241262306a36Sopenharmony_ci VFS_I(ip)->i_generation++; 241362306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci if (xic.deleted) 241662306a36Sopenharmony_ci error = xfs_ifree_cluster(tp, pag, ip, &xic); 241762306a36Sopenharmony_ciout: 241862306a36Sopenharmony_ci xfs_perag_put(pag); 241962306a36Sopenharmony_ci return error; 242062306a36Sopenharmony_ci} 242162306a36Sopenharmony_ci 242262306a36Sopenharmony_ci/* 242362306a36Sopenharmony_ci * This is called to unpin an inode. The caller must have the inode locked 242462306a36Sopenharmony_ci * in at least shared mode so that the buffer cannot be subsequently pinned 242562306a36Sopenharmony_ci * once someone is waiting for it to be unpinned. 242662306a36Sopenharmony_ci */ 242762306a36Sopenharmony_cistatic void 242862306a36Sopenharmony_cixfs_iunpin( 242962306a36Sopenharmony_ci struct xfs_inode *ip) 243062306a36Sopenharmony_ci{ 243162306a36Sopenharmony_ci ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 243262306a36Sopenharmony_ci 243362306a36Sopenharmony_ci trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 243462306a36Sopenharmony_ci 243562306a36Sopenharmony_ci /* Give the log a push to start the unpinning I/O */ 243662306a36Sopenharmony_ci xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); 243762306a36Sopenharmony_ci 243862306a36Sopenharmony_ci} 243962306a36Sopenharmony_ci 244062306a36Sopenharmony_cistatic void 244162306a36Sopenharmony_ci__xfs_iunpin_wait( 244262306a36Sopenharmony_ci struct xfs_inode *ip) 244362306a36Sopenharmony_ci{ 244462306a36Sopenharmony_ci wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT); 244562306a36Sopenharmony_ci DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT); 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci xfs_iunpin(ip); 244862306a36Sopenharmony_ci 244962306a36Sopenharmony_ci do { 245062306a36Sopenharmony_ci prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 245162306a36Sopenharmony_ci if (xfs_ipincount(ip)) 245262306a36Sopenharmony_ci io_schedule(); 245362306a36Sopenharmony_ci } while (xfs_ipincount(ip)); 245462306a36Sopenharmony_ci finish_wait(wq, &wait.wq_entry); 245562306a36Sopenharmony_ci} 245662306a36Sopenharmony_ci 245762306a36Sopenharmony_civoid 245862306a36Sopenharmony_cixfs_iunpin_wait( 245962306a36Sopenharmony_ci struct xfs_inode *ip) 246062306a36Sopenharmony_ci{ 246162306a36Sopenharmony_ci if (xfs_ipincount(ip)) 246262306a36Sopenharmony_ci __xfs_iunpin_wait(ip); 246362306a36Sopenharmony_ci} 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci/* 246662306a36Sopenharmony_ci * Removing an inode from the namespace involves removing the directory entry 246762306a36Sopenharmony_ci * and dropping the link count on the inode. Removing the directory entry can 246862306a36Sopenharmony_ci * result in locking an AGF (directory blocks were freed) and removing a link 246962306a36Sopenharmony_ci * count can result in placing the inode on an unlinked list which results in 247062306a36Sopenharmony_ci * locking an AGI. 247162306a36Sopenharmony_ci * 247262306a36Sopenharmony_ci * The big problem here is that we have an ordering constraint on AGF and AGI 247362306a36Sopenharmony_ci * locking - inode allocation locks the AGI, then can allocate a new extent for 247462306a36Sopenharmony_ci * new inodes, locking the AGF after the AGI. Similarly, freeing the inode 247562306a36Sopenharmony_ci * removes the inode from the unlinked list, requiring that we lock the AGI 247662306a36Sopenharmony_ci * first, and then freeing the inode can result in an inode chunk being freed 247762306a36Sopenharmony_ci * and hence freeing disk space requiring that we lock an AGF. 247862306a36Sopenharmony_ci * 247962306a36Sopenharmony_ci * Hence the ordering that is imposed by other parts of the code is AGI before 248062306a36Sopenharmony_ci * AGF. This means we cannot remove the directory entry before we drop the inode 248162306a36Sopenharmony_ci * reference count and put it on the unlinked list as this results in a lock 248262306a36Sopenharmony_ci * order of AGF then AGI, and this can deadlock against inode allocation and 248362306a36Sopenharmony_ci * freeing. Therefore we must drop the link counts before we remove the 248462306a36Sopenharmony_ci * directory entry. 248562306a36Sopenharmony_ci * 248662306a36Sopenharmony_ci * This is still safe from a transactional point of view - it is not until we 248762306a36Sopenharmony_ci * get to xfs_defer_finish() that we have the possibility of multiple 248862306a36Sopenharmony_ci * transactions in this operation. Hence as long as we remove the directory 248962306a36Sopenharmony_ci * entry and drop the link count in the first transaction of the remove 249062306a36Sopenharmony_ci * operation, there are no transactional constraints on the ordering here. 249162306a36Sopenharmony_ci */ 249262306a36Sopenharmony_ciint 249362306a36Sopenharmony_cixfs_remove( 249462306a36Sopenharmony_ci xfs_inode_t *dp, 249562306a36Sopenharmony_ci struct xfs_name *name, 249662306a36Sopenharmony_ci xfs_inode_t *ip) 249762306a36Sopenharmony_ci{ 249862306a36Sopenharmony_ci xfs_mount_t *mp = dp->i_mount; 249962306a36Sopenharmony_ci xfs_trans_t *tp = NULL; 250062306a36Sopenharmony_ci int is_dir = S_ISDIR(VFS_I(ip)->i_mode); 250162306a36Sopenharmony_ci int dontcare; 250262306a36Sopenharmony_ci int error = 0; 250362306a36Sopenharmony_ci uint resblks; 250462306a36Sopenharmony_ci 250562306a36Sopenharmony_ci trace_xfs_remove(dp, name); 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) 250862306a36Sopenharmony_ci return -EIO; 250962306a36Sopenharmony_ci 251062306a36Sopenharmony_ci error = xfs_qm_dqattach(dp); 251162306a36Sopenharmony_ci if (error) 251262306a36Sopenharmony_ci goto std_return; 251362306a36Sopenharmony_ci 251462306a36Sopenharmony_ci error = xfs_qm_dqattach(ip); 251562306a36Sopenharmony_ci if (error) 251662306a36Sopenharmony_ci goto std_return; 251762306a36Sopenharmony_ci 251862306a36Sopenharmony_ci /* 251962306a36Sopenharmony_ci * We try to get the real space reservation first, allowing for 252062306a36Sopenharmony_ci * directory btree deletion(s) implying possible bmap insert(s). If we 252162306a36Sopenharmony_ci * can't get the space reservation then we use 0 instead, and avoid the 252262306a36Sopenharmony_ci * bmap btree insert(s) in the directory code by, if the bmap insert 252362306a36Sopenharmony_ci * tries to happen, instead trimming the LAST block from the directory. 252462306a36Sopenharmony_ci * 252562306a36Sopenharmony_ci * Ignore EDQUOT and ENOSPC being returned via nospace_error because 252662306a36Sopenharmony_ci * the directory code can handle a reservationless update and we don't 252762306a36Sopenharmony_ci * want to prevent a user from trying to free space by deleting things. 252862306a36Sopenharmony_ci */ 252962306a36Sopenharmony_ci resblks = XFS_REMOVE_SPACE_RES(mp); 253062306a36Sopenharmony_ci error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, 253162306a36Sopenharmony_ci &tp, &dontcare); 253262306a36Sopenharmony_ci if (error) { 253362306a36Sopenharmony_ci ASSERT(error != -ENOSPC); 253462306a36Sopenharmony_ci goto std_return; 253562306a36Sopenharmony_ci } 253662306a36Sopenharmony_ci 253762306a36Sopenharmony_ci /* 253862306a36Sopenharmony_ci * If we're removing a directory perform some additional validation. 253962306a36Sopenharmony_ci */ 254062306a36Sopenharmony_ci if (is_dir) { 254162306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_nlink >= 2); 254262306a36Sopenharmony_ci if (VFS_I(ip)->i_nlink != 2) { 254362306a36Sopenharmony_ci error = -ENOTEMPTY; 254462306a36Sopenharmony_ci goto out_trans_cancel; 254562306a36Sopenharmony_ci } 254662306a36Sopenharmony_ci if (!xfs_dir_isempty(ip)) { 254762306a36Sopenharmony_ci error = -ENOTEMPTY; 254862306a36Sopenharmony_ci goto out_trans_cancel; 254962306a36Sopenharmony_ci } 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci /* Drop the link from ip's "..". */ 255262306a36Sopenharmony_ci error = xfs_droplink(tp, dp); 255362306a36Sopenharmony_ci if (error) 255462306a36Sopenharmony_ci goto out_trans_cancel; 255562306a36Sopenharmony_ci 255662306a36Sopenharmony_ci /* Drop the "." link from ip to self. */ 255762306a36Sopenharmony_ci error = xfs_droplink(tp, ip); 255862306a36Sopenharmony_ci if (error) 255962306a36Sopenharmony_ci goto out_trans_cancel; 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci /* 256262306a36Sopenharmony_ci * Point the unlinked child directory's ".." entry to the root 256362306a36Sopenharmony_ci * directory to eliminate back-references to inodes that may 256462306a36Sopenharmony_ci * get freed before the child directory is closed. If the fs 256562306a36Sopenharmony_ci * gets shrunk, this can lead to dirent inode validation errors. 256662306a36Sopenharmony_ci */ 256762306a36Sopenharmony_ci if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { 256862306a36Sopenharmony_ci error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, 256962306a36Sopenharmony_ci tp->t_mountp->m_sb.sb_rootino, 0); 257062306a36Sopenharmony_ci if (error) 257162306a36Sopenharmony_ci goto out_trans_cancel; 257262306a36Sopenharmony_ci } 257362306a36Sopenharmony_ci } else { 257462306a36Sopenharmony_ci /* 257562306a36Sopenharmony_ci * When removing a non-directory we need to log the parent 257662306a36Sopenharmony_ci * inode here. For a directory this is done implicitly 257762306a36Sopenharmony_ci * by the xfs_droplink call for the ".." entry. 257862306a36Sopenharmony_ci */ 257962306a36Sopenharmony_ci xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 258062306a36Sopenharmony_ci } 258162306a36Sopenharmony_ci xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 258262306a36Sopenharmony_ci 258362306a36Sopenharmony_ci /* Drop the link from dp to ip. */ 258462306a36Sopenharmony_ci error = xfs_droplink(tp, ip); 258562306a36Sopenharmony_ci if (error) 258662306a36Sopenharmony_ci goto out_trans_cancel; 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks); 258962306a36Sopenharmony_ci if (error) { 259062306a36Sopenharmony_ci ASSERT(error != -ENOENT); 259162306a36Sopenharmony_ci goto out_trans_cancel; 259262306a36Sopenharmony_ci } 259362306a36Sopenharmony_ci 259462306a36Sopenharmony_ci /* 259562306a36Sopenharmony_ci * If this is a synchronous mount, make sure that the 259662306a36Sopenharmony_ci * remove transaction goes to disk before returning to 259762306a36Sopenharmony_ci * the user. 259862306a36Sopenharmony_ci */ 259962306a36Sopenharmony_ci if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) 260062306a36Sopenharmony_ci xfs_trans_set_sync(tp); 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci error = xfs_trans_commit(tp); 260362306a36Sopenharmony_ci if (error) 260462306a36Sopenharmony_ci goto std_return; 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci if (is_dir && xfs_inode_is_filestream(ip)) 260762306a36Sopenharmony_ci xfs_filestream_deassociate(ip); 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci return 0; 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci out_trans_cancel: 261262306a36Sopenharmony_ci xfs_trans_cancel(tp); 261362306a36Sopenharmony_ci std_return: 261462306a36Sopenharmony_ci return error; 261562306a36Sopenharmony_ci} 261662306a36Sopenharmony_ci 261762306a36Sopenharmony_ci/* 261862306a36Sopenharmony_ci * Enter all inodes for a rename transaction into a sorted array. 261962306a36Sopenharmony_ci */ 262062306a36Sopenharmony_ci#define __XFS_SORT_INODES 5 262162306a36Sopenharmony_ciSTATIC void 262262306a36Sopenharmony_cixfs_sort_for_rename( 262362306a36Sopenharmony_ci struct xfs_inode *dp1, /* in: old (source) directory inode */ 262462306a36Sopenharmony_ci struct xfs_inode *dp2, /* in: new (target) directory inode */ 262562306a36Sopenharmony_ci struct xfs_inode *ip1, /* in: inode of old entry */ 262662306a36Sopenharmony_ci struct xfs_inode *ip2, /* in: inode of new entry */ 262762306a36Sopenharmony_ci struct xfs_inode *wip, /* in: whiteout inode */ 262862306a36Sopenharmony_ci struct xfs_inode **i_tab,/* out: sorted array of inodes */ 262962306a36Sopenharmony_ci int *num_inodes) /* in/out: inodes in array */ 263062306a36Sopenharmony_ci{ 263162306a36Sopenharmony_ci int i, j; 263262306a36Sopenharmony_ci 263362306a36Sopenharmony_ci ASSERT(*num_inodes == __XFS_SORT_INODES); 263462306a36Sopenharmony_ci memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *)); 263562306a36Sopenharmony_ci 263662306a36Sopenharmony_ci /* 263762306a36Sopenharmony_ci * i_tab contains a list of pointers to inodes. We initialize 263862306a36Sopenharmony_ci * the table here & we'll sort it. We will then use it to 263962306a36Sopenharmony_ci * order the acquisition of the inode locks. 264062306a36Sopenharmony_ci * 264162306a36Sopenharmony_ci * Note that the table may contain duplicates. e.g., dp1 == dp2. 264262306a36Sopenharmony_ci */ 264362306a36Sopenharmony_ci i = 0; 264462306a36Sopenharmony_ci i_tab[i++] = dp1; 264562306a36Sopenharmony_ci i_tab[i++] = dp2; 264662306a36Sopenharmony_ci i_tab[i++] = ip1; 264762306a36Sopenharmony_ci if (ip2) 264862306a36Sopenharmony_ci i_tab[i++] = ip2; 264962306a36Sopenharmony_ci if (wip) 265062306a36Sopenharmony_ci i_tab[i++] = wip; 265162306a36Sopenharmony_ci *num_inodes = i; 265262306a36Sopenharmony_ci 265362306a36Sopenharmony_ci /* 265462306a36Sopenharmony_ci * Sort the elements via bubble sort. (Remember, there are at 265562306a36Sopenharmony_ci * most 5 elements to sort, so this is adequate.) 265662306a36Sopenharmony_ci */ 265762306a36Sopenharmony_ci for (i = 0; i < *num_inodes; i++) { 265862306a36Sopenharmony_ci for (j = 1; j < *num_inodes; j++) { 265962306a36Sopenharmony_ci if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 266062306a36Sopenharmony_ci struct xfs_inode *temp = i_tab[j]; 266162306a36Sopenharmony_ci i_tab[j] = i_tab[j-1]; 266262306a36Sopenharmony_ci i_tab[j-1] = temp; 266362306a36Sopenharmony_ci } 266462306a36Sopenharmony_ci } 266562306a36Sopenharmony_ci } 266662306a36Sopenharmony_ci} 266762306a36Sopenharmony_ci 266862306a36Sopenharmony_cistatic int 266962306a36Sopenharmony_cixfs_finish_rename( 267062306a36Sopenharmony_ci struct xfs_trans *tp) 267162306a36Sopenharmony_ci{ 267262306a36Sopenharmony_ci /* 267362306a36Sopenharmony_ci * If this is a synchronous mount, make sure that the rename transaction 267462306a36Sopenharmony_ci * goes to disk before returning to the user. 267562306a36Sopenharmony_ci */ 267662306a36Sopenharmony_ci if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) 267762306a36Sopenharmony_ci xfs_trans_set_sync(tp); 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_ci return xfs_trans_commit(tp); 268062306a36Sopenharmony_ci} 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_ci/* 268362306a36Sopenharmony_ci * xfs_cross_rename() 268462306a36Sopenharmony_ci * 268562306a36Sopenharmony_ci * responsible for handling RENAME_EXCHANGE flag in renameat2() syscall 268662306a36Sopenharmony_ci */ 268762306a36Sopenharmony_ciSTATIC int 268862306a36Sopenharmony_cixfs_cross_rename( 268962306a36Sopenharmony_ci struct xfs_trans *tp, 269062306a36Sopenharmony_ci struct xfs_inode *dp1, 269162306a36Sopenharmony_ci struct xfs_name *name1, 269262306a36Sopenharmony_ci struct xfs_inode *ip1, 269362306a36Sopenharmony_ci struct xfs_inode *dp2, 269462306a36Sopenharmony_ci struct xfs_name *name2, 269562306a36Sopenharmony_ci struct xfs_inode *ip2, 269662306a36Sopenharmony_ci int spaceres) 269762306a36Sopenharmony_ci{ 269862306a36Sopenharmony_ci int error = 0; 269962306a36Sopenharmony_ci int ip1_flags = 0; 270062306a36Sopenharmony_ci int ip2_flags = 0; 270162306a36Sopenharmony_ci int dp2_flags = 0; 270262306a36Sopenharmony_ci 270362306a36Sopenharmony_ci /* Swap inode number for dirent in first parent */ 270462306a36Sopenharmony_ci error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); 270562306a36Sopenharmony_ci if (error) 270662306a36Sopenharmony_ci goto out_trans_abort; 270762306a36Sopenharmony_ci 270862306a36Sopenharmony_ci /* Swap inode number for dirent in second parent */ 270962306a36Sopenharmony_ci error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres); 271062306a36Sopenharmony_ci if (error) 271162306a36Sopenharmony_ci goto out_trans_abort; 271262306a36Sopenharmony_ci 271362306a36Sopenharmony_ci /* 271462306a36Sopenharmony_ci * If we're renaming one or more directories across different parents, 271562306a36Sopenharmony_ci * update the respective ".." entries (and link counts) to match the new 271662306a36Sopenharmony_ci * parents. 271762306a36Sopenharmony_ci */ 271862306a36Sopenharmony_ci if (dp1 != dp2) { 271962306a36Sopenharmony_ci dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci if (S_ISDIR(VFS_I(ip2)->i_mode)) { 272262306a36Sopenharmony_ci error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, 272362306a36Sopenharmony_ci dp1->i_ino, spaceres); 272462306a36Sopenharmony_ci if (error) 272562306a36Sopenharmony_ci goto out_trans_abort; 272662306a36Sopenharmony_ci 272762306a36Sopenharmony_ci /* transfer ip2 ".." reference to dp1 */ 272862306a36Sopenharmony_ci if (!S_ISDIR(VFS_I(ip1)->i_mode)) { 272962306a36Sopenharmony_ci error = xfs_droplink(tp, dp2); 273062306a36Sopenharmony_ci if (error) 273162306a36Sopenharmony_ci goto out_trans_abort; 273262306a36Sopenharmony_ci xfs_bumplink(tp, dp1); 273362306a36Sopenharmony_ci } 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci /* 273662306a36Sopenharmony_ci * Although ip1 isn't changed here, userspace needs 273762306a36Sopenharmony_ci * to be warned about the change, so that applications 273862306a36Sopenharmony_ci * relying on it (like backup ones), will properly 273962306a36Sopenharmony_ci * notify the change 274062306a36Sopenharmony_ci */ 274162306a36Sopenharmony_ci ip1_flags |= XFS_ICHGTIME_CHG; 274262306a36Sopenharmony_ci ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 274362306a36Sopenharmony_ci } 274462306a36Sopenharmony_ci 274562306a36Sopenharmony_ci if (S_ISDIR(VFS_I(ip1)->i_mode)) { 274662306a36Sopenharmony_ci error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, 274762306a36Sopenharmony_ci dp2->i_ino, spaceres); 274862306a36Sopenharmony_ci if (error) 274962306a36Sopenharmony_ci goto out_trans_abort; 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci /* transfer ip1 ".." reference to dp2 */ 275262306a36Sopenharmony_ci if (!S_ISDIR(VFS_I(ip2)->i_mode)) { 275362306a36Sopenharmony_ci error = xfs_droplink(tp, dp1); 275462306a36Sopenharmony_ci if (error) 275562306a36Sopenharmony_ci goto out_trans_abort; 275662306a36Sopenharmony_ci xfs_bumplink(tp, dp2); 275762306a36Sopenharmony_ci } 275862306a36Sopenharmony_ci 275962306a36Sopenharmony_ci /* 276062306a36Sopenharmony_ci * Although ip2 isn't changed here, userspace needs 276162306a36Sopenharmony_ci * to be warned about the change, so that applications 276262306a36Sopenharmony_ci * relying on it (like backup ones), will properly 276362306a36Sopenharmony_ci * notify the change 276462306a36Sopenharmony_ci */ 276562306a36Sopenharmony_ci ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 276662306a36Sopenharmony_ci ip2_flags |= XFS_ICHGTIME_CHG; 276762306a36Sopenharmony_ci } 276862306a36Sopenharmony_ci } 276962306a36Sopenharmony_ci 277062306a36Sopenharmony_ci if (ip1_flags) { 277162306a36Sopenharmony_ci xfs_trans_ichgtime(tp, ip1, ip1_flags); 277262306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); 277362306a36Sopenharmony_ci } 277462306a36Sopenharmony_ci if (ip2_flags) { 277562306a36Sopenharmony_ci xfs_trans_ichgtime(tp, ip2, ip2_flags); 277662306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); 277762306a36Sopenharmony_ci } 277862306a36Sopenharmony_ci if (dp2_flags) { 277962306a36Sopenharmony_ci xfs_trans_ichgtime(tp, dp2, dp2_flags); 278062306a36Sopenharmony_ci xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); 278162306a36Sopenharmony_ci } 278262306a36Sopenharmony_ci xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 278362306a36Sopenharmony_ci xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 278462306a36Sopenharmony_ci return xfs_finish_rename(tp); 278562306a36Sopenharmony_ci 278662306a36Sopenharmony_ciout_trans_abort: 278762306a36Sopenharmony_ci xfs_trans_cancel(tp); 278862306a36Sopenharmony_ci return error; 278962306a36Sopenharmony_ci} 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_ci/* 279262306a36Sopenharmony_ci * xfs_rename_alloc_whiteout() 279362306a36Sopenharmony_ci * 279462306a36Sopenharmony_ci * Return a referenced, unlinked, unlocked inode that can be used as a 279562306a36Sopenharmony_ci * whiteout in a rename transaction. We use a tmpfile inode here so that if we 279662306a36Sopenharmony_ci * crash between allocating the inode and linking it into the rename transaction 279762306a36Sopenharmony_ci * recovery will free the inode and we won't leak it. 279862306a36Sopenharmony_ci */ 279962306a36Sopenharmony_cistatic int 280062306a36Sopenharmony_cixfs_rename_alloc_whiteout( 280162306a36Sopenharmony_ci struct mnt_idmap *idmap, 280262306a36Sopenharmony_ci struct xfs_name *src_name, 280362306a36Sopenharmony_ci struct xfs_inode *dp, 280462306a36Sopenharmony_ci struct xfs_inode **wip) 280562306a36Sopenharmony_ci{ 280662306a36Sopenharmony_ci struct xfs_inode *tmpfile; 280762306a36Sopenharmony_ci struct qstr name; 280862306a36Sopenharmony_ci int error; 280962306a36Sopenharmony_ci 281062306a36Sopenharmony_ci error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, 281162306a36Sopenharmony_ci &tmpfile); 281262306a36Sopenharmony_ci if (error) 281362306a36Sopenharmony_ci return error; 281462306a36Sopenharmony_ci 281562306a36Sopenharmony_ci name.name = src_name->name; 281662306a36Sopenharmony_ci name.len = src_name->len; 281762306a36Sopenharmony_ci error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name); 281862306a36Sopenharmony_ci if (error) { 281962306a36Sopenharmony_ci xfs_finish_inode_setup(tmpfile); 282062306a36Sopenharmony_ci xfs_irele(tmpfile); 282162306a36Sopenharmony_ci return error; 282262306a36Sopenharmony_ci } 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci /* 282562306a36Sopenharmony_ci * Prepare the tmpfile inode as if it were created through the VFS. 282662306a36Sopenharmony_ci * Complete the inode setup and flag it as linkable. nlink is already 282762306a36Sopenharmony_ci * zero, so we can skip the drop_nlink. 282862306a36Sopenharmony_ci */ 282962306a36Sopenharmony_ci xfs_setup_iops(tmpfile); 283062306a36Sopenharmony_ci xfs_finish_inode_setup(tmpfile); 283162306a36Sopenharmony_ci VFS_I(tmpfile)->i_state |= I_LINKABLE; 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci *wip = tmpfile; 283462306a36Sopenharmony_ci return 0; 283562306a36Sopenharmony_ci} 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci/* 283862306a36Sopenharmony_ci * xfs_rename 283962306a36Sopenharmony_ci */ 284062306a36Sopenharmony_ciint 284162306a36Sopenharmony_cixfs_rename( 284262306a36Sopenharmony_ci struct mnt_idmap *idmap, 284362306a36Sopenharmony_ci struct xfs_inode *src_dp, 284462306a36Sopenharmony_ci struct xfs_name *src_name, 284562306a36Sopenharmony_ci struct xfs_inode *src_ip, 284662306a36Sopenharmony_ci struct xfs_inode *target_dp, 284762306a36Sopenharmony_ci struct xfs_name *target_name, 284862306a36Sopenharmony_ci struct xfs_inode *target_ip, 284962306a36Sopenharmony_ci unsigned int flags) 285062306a36Sopenharmony_ci{ 285162306a36Sopenharmony_ci struct xfs_mount *mp = src_dp->i_mount; 285262306a36Sopenharmony_ci struct xfs_trans *tp; 285362306a36Sopenharmony_ci struct xfs_inode *wip = NULL; /* whiteout inode */ 285462306a36Sopenharmony_ci struct xfs_inode *inodes[__XFS_SORT_INODES]; 285562306a36Sopenharmony_ci int i; 285662306a36Sopenharmony_ci int num_inodes = __XFS_SORT_INODES; 285762306a36Sopenharmony_ci bool new_parent = (src_dp != target_dp); 285862306a36Sopenharmony_ci bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); 285962306a36Sopenharmony_ci int spaceres; 286062306a36Sopenharmony_ci bool retried = false; 286162306a36Sopenharmony_ci int error, nospace_error = 0; 286262306a36Sopenharmony_ci 286362306a36Sopenharmony_ci trace_xfs_rename(src_dp, target_dp, src_name, target_name); 286462306a36Sopenharmony_ci 286562306a36Sopenharmony_ci if ((flags & RENAME_EXCHANGE) && !target_ip) 286662306a36Sopenharmony_ci return -EINVAL; 286762306a36Sopenharmony_ci 286862306a36Sopenharmony_ci /* 286962306a36Sopenharmony_ci * If we are doing a whiteout operation, allocate the whiteout inode 287062306a36Sopenharmony_ci * we will be placing at the target and ensure the type is set 287162306a36Sopenharmony_ci * appropriately. 287262306a36Sopenharmony_ci */ 287362306a36Sopenharmony_ci if (flags & RENAME_WHITEOUT) { 287462306a36Sopenharmony_ci error = xfs_rename_alloc_whiteout(idmap, src_name, 287562306a36Sopenharmony_ci target_dp, &wip); 287662306a36Sopenharmony_ci if (error) 287762306a36Sopenharmony_ci return error; 287862306a36Sopenharmony_ci 287962306a36Sopenharmony_ci /* setup target dirent info as whiteout */ 288062306a36Sopenharmony_ci src_name->type = XFS_DIR3_FT_CHRDEV; 288162306a36Sopenharmony_ci } 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_ci xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, 288462306a36Sopenharmony_ci inodes, &num_inodes); 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_ciretry: 288762306a36Sopenharmony_ci nospace_error = 0; 288862306a36Sopenharmony_ci spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 288962306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); 289062306a36Sopenharmony_ci if (error == -ENOSPC) { 289162306a36Sopenharmony_ci nospace_error = error; 289262306a36Sopenharmony_ci spaceres = 0; 289362306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0, 289462306a36Sopenharmony_ci &tp); 289562306a36Sopenharmony_ci } 289662306a36Sopenharmony_ci if (error) 289762306a36Sopenharmony_ci goto out_release_wip; 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_ci /* 290062306a36Sopenharmony_ci * Attach the dquots to the inodes 290162306a36Sopenharmony_ci */ 290262306a36Sopenharmony_ci error = xfs_qm_vop_rename_dqattach(inodes); 290362306a36Sopenharmony_ci if (error) 290462306a36Sopenharmony_ci goto out_trans_cancel; 290562306a36Sopenharmony_ci 290662306a36Sopenharmony_ci /* 290762306a36Sopenharmony_ci * Lock all the participating inodes. Depending upon whether 290862306a36Sopenharmony_ci * the target_name exists in the target directory, and 290962306a36Sopenharmony_ci * whether the target directory is the same as the source 291062306a36Sopenharmony_ci * directory, we can lock from 2 to 5 inodes. 291162306a36Sopenharmony_ci */ 291262306a36Sopenharmony_ci xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci /* 291562306a36Sopenharmony_ci * Join all the inodes to the transaction. From this point on, 291662306a36Sopenharmony_ci * we can rely on either trans_commit or trans_cancel to unlock 291762306a36Sopenharmony_ci * them. 291862306a36Sopenharmony_ci */ 291962306a36Sopenharmony_ci xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 292062306a36Sopenharmony_ci if (new_parent) 292162306a36Sopenharmony_ci xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 292262306a36Sopenharmony_ci xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 292362306a36Sopenharmony_ci if (target_ip) 292462306a36Sopenharmony_ci xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 292562306a36Sopenharmony_ci if (wip) 292662306a36Sopenharmony_ci xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL); 292762306a36Sopenharmony_ci 292862306a36Sopenharmony_ci /* 292962306a36Sopenharmony_ci * If we are using project inheritance, we only allow renames 293062306a36Sopenharmony_ci * into our tree when the project IDs are the same; else the 293162306a36Sopenharmony_ci * tree quota mechanism would be circumvented. 293262306a36Sopenharmony_ci */ 293362306a36Sopenharmony_ci if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) && 293462306a36Sopenharmony_ci target_dp->i_projid != src_ip->i_projid)) { 293562306a36Sopenharmony_ci error = -EXDEV; 293662306a36Sopenharmony_ci goto out_trans_cancel; 293762306a36Sopenharmony_ci } 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci /* RENAME_EXCHANGE is unique from here on. */ 294062306a36Sopenharmony_ci if (flags & RENAME_EXCHANGE) 294162306a36Sopenharmony_ci return xfs_cross_rename(tp, src_dp, src_name, src_ip, 294262306a36Sopenharmony_ci target_dp, target_name, target_ip, 294362306a36Sopenharmony_ci spaceres); 294462306a36Sopenharmony_ci 294562306a36Sopenharmony_ci /* 294662306a36Sopenharmony_ci * Try to reserve quota to handle an expansion of the target directory. 294762306a36Sopenharmony_ci * We'll allow the rename to continue in reservationless mode if we hit 294862306a36Sopenharmony_ci * a space usage constraint. If we trigger reservationless mode, save 294962306a36Sopenharmony_ci * the errno if there isn't any free space in the target directory. 295062306a36Sopenharmony_ci */ 295162306a36Sopenharmony_ci if (spaceres != 0) { 295262306a36Sopenharmony_ci error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres, 295362306a36Sopenharmony_ci 0, false); 295462306a36Sopenharmony_ci if (error == -EDQUOT || error == -ENOSPC) { 295562306a36Sopenharmony_ci if (!retried) { 295662306a36Sopenharmony_ci xfs_trans_cancel(tp); 295762306a36Sopenharmony_ci xfs_blockgc_free_quota(target_dp, 0); 295862306a36Sopenharmony_ci retried = true; 295962306a36Sopenharmony_ci goto retry; 296062306a36Sopenharmony_ci } 296162306a36Sopenharmony_ci 296262306a36Sopenharmony_ci nospace_error = error; 296362306a36Sopenharmony_ci spaceres = 0; 296462306a36Sopenharmony_ci error = 0; 296562306a36Sopenharmony_ci } 296662306a36Sopenharmony_ci if (error) 296762306a36Sopenharmony_ci goto out_trans_cancel; 296862306a36Sopenharmony_ci } 296962306a36Sopenharmony_ci 297062306a36Sopenharmony_ci /* 297162306a36Sopenharmony_ci * Check for expected errors before we dirty the transaction 297262306a36Sopenharmony_ci * so we can return an error without a transaction abort. 297362306a36Sopenharmony_ci */ 297462306a36Sopenharmony_ci if (target_ip == NULL) { 297562306a36Sopenharmony_ci /* 297662306a36Sopenharmony_ci * If there's no space reservation, check the entry will 297762306a36Sopenharmony_ci * fit before actually inserting it. 297862306a36Sopenharmony_ci */ 297962306a36Sopenharmony_ci if (!spaceres) { 298062306a36Sopenharmony_ci error = xfs_dir_canenter(tp, target_dp, target_name); 298162306a36Sopenharmony_ci if (error) 298262306a36Sopenharmony_ci goto out_trans_cancel; 298362306a36Sopenharmony_ci } 298462306a36Sopenharmony_ci } else { 298562306a36Sopenharmony_ci /* 298662306a36Sopenharmony_ci * If target exists and it's a directory, check that whether 298762306a36Sopenharmony_ci * it can be destroyed. 298862306a36Sopenharmony_ci */ 298962306a36Sopenharmony_ci if (S_ISDIR(VFS_I(target_ip)->i_mode) && 299062306a36Sopenharmony_ci (!xfs_dir_isempty(target_ip) || 299162306a36Sopenharmony_ci (VFS_I(target_ip)->i_nlink > 2))) { 299262306a36Sopenharmony_ci error = -EEXIST; 299362306a36Sopenharmony_ci goto out_trans_cancel; 299462306a36Sopenharmony_ci } 299562306a36Sopenharmony_ci } 299662306a36Sopenharmony_ci 299762306a36Sopenharmony_ci /* 299862306a36Sopenharmony_ci * Lock the AGI buffers we need to handle bumping the nlink of the 299962306a36Sopenharmony_ci * whiteout inode off the unlinked list and to handle dropping the 300062306a36Sopenharmony_ci * nlink of the target inode. Per locking order rules, do this in 300162306a36Sopenharmony_ci * increasing AG order and before directory block allocation tries to 300262306a36Sopenharmony_ci * grab AGFs because we grab AGIs before AGFs. 300362306a36Sopenharmony_ci * 300462306a36Sopenharmony_ci * The (vfs) caller must ensure that if src is a directory then 300562306a36Sopenharmony_ci * target_ip is either null or an empty directory. 300662306a36Sopenharmony_ci */ 300762306a36Sopenharmony_ci for (i = 0; i < num_inodes && inodes[i] != NULL; i++) { 300862306a36Sopenharmony_ci if (inodes[i] == wip || 300962306a36Sopenharmony_ci (inodes[i] == target_ip && 301062306a36Sopenharmony_ci (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { 301162306a36Sopenharmony_ci struct xfs_perag *pag; 301262306a36Sopenharmony_ci struct xfs_buf *bp; 301362306a36Sopenharmony_ci 301462306a36Sopenharmony_ci pag = xfs_perag_get(mp, 301562306a36Sopenharmony_ci XFS_INO_TO_AGNO(mp, inodes[i]->i_ino)); 301662306a36Sopenharmony_ci error = xfs_read_agi(pag, tp, &bp); 301762306a36Sopenharmony_ci xfs_perag_put(pag); 301862306a36Sopenharmony_ci if (error) 301962306a36Sopenharmony_ci goto out_trans_cancel; 302062306a36Sopenharmony_ci } 302162306a36Sopenharmony_ci } 302262306a36Sopenharmony_ci 302362306a36Sopenharmony_ci /* 302462306a36Sopenharmony_ci * Directory entry creation below may acquire the AGF. Remove 302562306a36Sopenharmony_ci * the whiteout from the unlinked list first to preserve correct 302662306a36Sopenharmony_ci * AGI/AGF locking order. This dirties the transaction so failures 302762306a36Sopenharmony_ci * after this point will abort and log recovery will clean up the 302862306a36Sopenharmony_ci * mess. 302962306a36Sopenharmony_ci * 303062306a36Sopenharmony_ci * For whiteouts, we need to bump the link count on the whiteout 303162306a36Sopenharmony_ci * inode. After this point, we have a real link, clear the tmpfile 303262306a36Sopenharmony_ci * state flag from the inode so it doesn't accidentally get misused 303362306a36Sopenharmony_ci * in future. 303462306a36Sopenharmony_ci */ 303562306a36Sopenharmony_ci if (wip) { 303662306a36Sopenharmony_ci struct xfs_perag *pag; 303762306a36Sopenharmony_ci 303862306a36Sopenharmony_ci ASSERT(VFS_I(wip)->i_nlink == 0); 303962306a36Sopenharmony_ci 304062306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino)); 304162306a36Sopenharmony_ci error = xfs_iunlink_remove(tp, pag, wip); 304262306a36Sopenharmony_ci xfs_perag_put(pag); 304362306a36Sopenharmony_ci if (error) 304462306a36Sopenharmony_ci goto out_trans_cancel; 304562306a36Sopenharmony_ci 304662306a36Sopenharmony_ci xfs_bumplink(tp, wip); 304762306a36Sopenharmony_ci VFS_I(wip)->i_state &= ~I_LINKABLE; 304862306a36Sopenharmony_ci } 304962306a36Sopenharmony_ci 305062306a36Sopenharmony_ci /* 305162306a36Sopenharmony_ci * Set up the target. 305262306a36Sopenharmony_ci */ 305362306a36Sopenharmony_ci if (target_ip == NULL) { 305462306a36Sopenharmony_ci /* 305562306a36Sopenharmony_ci * If target does not exist and the rename crosses 305662306a36Sopenharmony_ci * directories, adjust the target directory link count 305762306a36Sopenharmony_ci * to account for the ".." reference from the new entry. 305862306a36Sopenharmony_ci */ 305962306a36Sopenharmony_ci error = xfs_dir_createname(tp, target_dp, target_name, 306062306a36Sopenharmony_ci src_ip->i_ino, spaceres); 306162306a36Sopenharmony_ci if (error) 306262306a36Sopenharmony_ci goto out_trans_cancel; 306362306a36Sopenharmony_ci 306462306a36Sopenharmony_ci xfs_trans_ichgtime(tp, target_dp, 306562306a36Sopenharmony_ci XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 306662306a36Sopenharmony_ci 306762306a36Sopenharmony_ci if (new_parent && src_is_directory) { 306862306a36Sopenharmony_ci xfs_bumplink(tp, target_dp); 306962306a36Sopenharmony_ci } 307062306a36Sopenharmony_ci } else { /* target_ip != NULL */ 307162306a36Sopenharmony_ci /* 307262306a36Sopenharmony_ci * Link the source inode under the target name. 307362306a36Sopenharmony_ci * If the source inode is a directory and we are moving 307462306a36Sopenharmony_ci * it across directories, its ".." entry will be 307562306a36Sopenharmony_ci * inconsistent until we replace that down below. 307662306a36Sopenharmony_ci * 307762306a36Sopenharmony_ci * In case there is already an entry with the same 307862306a36Sopenharmony_ci * name at the destination directory, remove it first. 307962306a36Sopenharmony_ci */ 308062306a36Sopenharmony_ci error = xfs_dir_replace(tp, target_dp, target_name, 308162306a36Sopenharmony_ci src_ip->i_ino, spaceres); 308262306a36Sopenharmony_ci if (error) 308362306a36Sopenharmony_ci goto out_trans_cancel; 308462306a36Sopenharmony_ci 308562306a36Sopenharmony_ci xfs_trans_ichgtime(tp, target_dp, 308662306a36Sopenharmony_ci XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 308762306a36Sopenharmony_ci 308862306a36Sopenharmony_ci /* 308962306a36Sopenharmony_ci * Decrement the link count on the target since the target 309062306a36Sopenharmony_ci * dir no longer points to it. 309162306a36Sopenharmony_ci */ 309262306a36Sopenharmony_ci error = xfs_droplink(tp, target_ip); 309362306a36Sopenharmony_ci if (error) 309462306a36Sopenharmony_ci goto out_trans_cancel; 309562306a36Sopenharmony_ci 309662306a36Sopenharmony_ci if (src_is_directory) { 309762306a36Sopenharmony_ci /* 309862306a36Sopenharmony_ci * Drop the link from the old "." entry. 309962306a36Sopenharmony_ci */ 310062306a36Sopenharmony_ci error = xfs_droplink(tp, target_ip); 310162306a36Sopenharmony_ci if (error) 310262306a36Sopenharmony_ci goto out_trans_cancel; 310362306a36Sopenharmony_ci } 310462306a36Sopenharmony_ci } /* target_ip != NULL */ 310562306a36Sopenharmony_ci 310662306a36Sopenharmony_ci /* 310762306a36Sopenharmony_ci * Remove the source. 310862306a36Sopenharmony_ci */ 310962306a36Sopenharmony_ci if (new_parent && src_is_directory) { 311062306a36Sopenharmony_ci /* 311162306a36Sopenharmony_ci * Rewrite the ".." entry to point to the new 311262306a36Sopenharmony_ci * directory. 311362306a36Sopenharmony_ci */ 311462306a36Sopenharmony_ci error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 311562306a36Sopenharmony_ci target_dp->i_ino, spaceres); 311662306a36Sopenharmony_ci ASSERT(error != -EEXIST); 311762306a36Sopenharmony_ci if (error) 311862306a36Sopenharmony_ci goto out_trans_cancel; 311962306a36Sopenharmony_ci } 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ci /* 312262306a36Sopenharmony_ci * We always want to hit the ctime on the source inode. 312362306a36Sopenharmony_ci * 312462306a36Sopenharmony_ci * This isn't strictly required by the standards since the source 312562306a36Sopenharmony_ci * inode isn't really being changed, but old unix file systems did 312662306a36Sopenharmony_ci * it and some incremental backup programs won't work without it. 312762306a36Sopenharmony_ci */ 312862306a36Sopenharmony_ci xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 312962306a36Sopenharmony_ci xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); 313062306a36Sopenharmony_ci 313162306a36Sopenharmony_ci /* 313262306a36Sopenharmony_ci * Adjust the link count on src_dp. This is necessary when 313362306a36Sopenharmony_ci * renaming a directory, either within one parent when 313462306a36Sopenharmony_ci * the target existed, or across two parent directories. 313562306a36Sopenharmony_ci */ 313662306a36Sopenharmony_ci if (src_is_directory && (new_parent || target_ip != NULL)) { 313762306a36Sopenharmony_ci 313862306a36Sopenharmony_ci /* 313962306a36Sopenharmony_ci * Decrement link count on src_directory since the 314062306a36Sopenharmony_ci * entry that's moved no longer points to it. 314162306a36Sopenharmony_ci */ 314262306a36Sopenharmony_ci error = xfs_droplink(tp, src_dp); 314362306a36Sopenharmony_ci if (error) 314462306a36Sopenharmony_ci goto out_trans_cancel; 314562306a36Sopenharmony_ci } 314662306a36Sopenharmony_ci 314762306a36Sopenharmony_ci /* 314862306a36Sopenharmony_ci * For whiteouts, we only need to update the source dirent with the 314962306a36Sopenharmony_ci * inode number of the whiteout inode rather than removing it 315062306a36Sopenharmony_ci * altogether. 315162306a36Sopenharmony_ci */ 315262306a36Sopenharmony_ci if (wip) 315362306a36Sopenharmony_ci error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, 315462306a36Sopenharmony_ci spaceres); 315562306a36Sopenharmony_ci else 315662306a36Sopenharmony_ci error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 315762306a36Sopenharmony_ci spaceres); 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci if (error) 316062306a36Sopenharmony_ci goto out_trans_cancel; 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_ci xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 316362306a36Sopenharmony_ci xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 316462306a36Sopenharmony_ci if (new_parent) 316562306a36Sopenharmony_ci xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 316662306a36Sopenharmony_ci 316762306a36Sopenharmony_ci error = xfs_finish_rename(tp); 316862306a36Sopenharmony_ci if (wip) 316962306a36Sopenharmony_ci xfs_irele(wip); 317062306a36Sopenharmony_ci return error; 317162306a36Sopenharmony_ci 317262306a36Sopenharmony_ciout_trans_cancel: 317362306a36Sopenharmony_ci xfs_trans_cancel(tp); 317462306a36Sopenharmony_ciout_release_wip: 317562306a36Sopenharmony_ci if (wip) 317662306a36Sopenharmony_ci xfs_irele(wip); 317762306a36Sopenharmony_ci if (error == -ENOSPC && nospace_error) 317862306a36Sopenharmony_ci error = nospace_error; 317962306a36Sopenharmony_ci return error; 318062306a36Sopenharmony_ci} 318162306a36Sopenharmony_ci 318262306a36Sopenharmony_cistatic int 318362306a36Sopenharmony_cixfs_iflush( 318462306a36Sopenharmony_ci struct xfs_inode *ip, 318562306a36Sopenharmony_ci struct xfs_buf *bp) 318662306a36Sopenharmony_ci{ 318762306a36Sopenharmony_ci struct xfs_inode_log_item *iip = ip->i_itemp; 318862306a36Sopenharmony_ci struct xfs_dinode *dip; 318962306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 319062306a36Sopenharmony_ci int error; 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_ci ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 319362306a36Sopenharmony_ci ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING)); 319462306a36Sopenharmony_ci ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 319562306a36Sopenharmony_ci ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 319662306a36Sopenharmony_ci ASSERT(iip->ili_item.li_buf == bp); 319762306a36Sopenharmony_ci 319862306a36Sopenharmony_ci dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); 319962306a36Sopenharmony_ci 320062306a36Sopenharmony_ci /* 320162306a36Sopenharmony_ci * We don't flush the inode if any of the following checks fail, but we 320262306a36Sopenharmony_ci * do still update the log item and attach to the backing buffer as if 320362306a36Sopenharmony_ci * the flush happened. This is a formality to facilitate predictable 320462306a36Sopenharmony_ci * error handling as the caller will shutdown and fail the buffer. 320562306a36Sopenharmony_ci */ 320662306a36Sopenharmony_ci error = -EFSCORRUPTED; 320762306a36Sopenharmony_ci if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 320862306a36Sopenharmony_ci mp, XFS_ERRTAG_IFLUSH_1)) { 320962306a36Sopenharmony_ci xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 321062306a36Sopenharmony_ci "%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT, 321162306a36Sopenharmony_ci __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 321262306a36Sopenharmony_ci goto flush_out; 321362306a36Sopenharmony_ci } 321462306a36Sopenharmony_ci if (S_ISREG(VFS_I(ip)->i_mode)) { 321562306a36Sopenharmony_ci if (XFS_TEST_ERROR( 321662306a36Sopenharmony_ci ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && 321762306a36Sopenharmony_ci ip->i_df.if_format != XFS_DINODE_FMT_BTREE, 321862306a36Sopenharmony_ci mp, XFS_ERRTAG_IFLUSH_3)) { 321962306a36Sopenharmony_ci xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 322062306a36Sopenharmony_ci "%s: Bad regular inode %llu, ptr "PTR_FMT, 322162306a36Sopenharmony_ci __func__, ip->i_ino, ip); 322262306a36Sopenharmony_ci goto flush_out; 322362306a36Sopenharmony_ci } 322462306a36Sopenharmony_ci } else if (S_ISDIR(VFS_I(ip)->i_mode)) { 322562306a36Sopenharmony_ci if (XFS_TEST_ERROR( 322662306a36Sopenharmony_ci ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && 322762306a36Sopenharmony_ci ip->i_df.if_format != XFS_DINODE_FMT_BTREE && 322862306a36Sopenharmony_ci ip->i_df.if_format != XFS_DINODE_FMT_LOCAL, 322962306a36Sopenharmony_ci mp, XFS_ERRTAG_IFLUSH_4)) { 323062306a36Sopenharmony_ci xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 323162306a36Sopenharmony_ci "%s: Bad directory inode %llu, ptr "PTR_FMT, 323262306a36Sopenharmony_ci __func__, ip->i_ino, ip); 323362306a36Sopenharmony_ci goto flush_out; 323462306a36Sopenharmony_ci } 323562306a36Sopenharmony_ci } 323662306a36Sopenharmony_ci if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) > 323762306a36Sopenharmony_ci ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { 323862306a36Sopenharmony_ci xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 323962306a36Sopenharmony_ci "%s: detected corrupt incore inode %llu, " 324062306a36Sopenharmony_ci "total extents = %llu nblocks = %lld, ptr "PTR_FMT, 324162306a36Sopenharmony_ci __func__, ip->i_ino, 324262306a36Sopenharmony_ci ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af), 324362306a36Sopenharmony_ci ip->i_nblocks, ip); 324462306a36Sopenharmony_ci goto flush_out; 324562306a36Sopenharmony_ci } 324662306a36Sopenharmony_ci if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize, 324762306a36Sopenharmony_ci mp, XFS_ERRTAG_IFLUSH_6)) { 324862306a36Sopenharmony_ci xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 324962306a36Sopenharmony_ci "%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT, 325062306a36Sopenharmony_ci __func__, ip->i_ino, ip->i_forkoff, ip); 325162306a36Sopenharmony_ci goto flush_out; 325262306a36Sopenharmony_ci } 325362306a36Sopenharmony_ci 325462306a36Sopenharmony_ci /* 325562306a36Sopenharmony_ci * Inode item log recovery for v2 inodes are dependent on the flushiter 325662306a36Sopenharmony_ci * count for correct sequencing. We bump the flush iteration count so 325762306a36Sopenharmony_ci * we can detect flushes which postdate a log record during recovery. 325862306a36Sopenharmony_ci * This is redundant as we now log every change and hence this can't 325962306a36Sopenharmony_ci * happen but we need to still do it to ensure backwards compatibility 326062306a36Sopenharmony_ci * with old kernels that predate logging all inode changes. 326162306a36Sopenharmony_ci */ 326262306a36Sopenharmony_ci if (!xfs_has_v3inodes(mp)) 326362306a36Sopenharmony_ci ip->i_flushiter++; 326462306a36Sopenharmony_ci 326562306a36Sopenharmony_ci /* 326662306a36Sopenharmony_ci * If there are inline format data / attr forks attached to this inode, 326762306a36Sopenharmony_ci * make sure they are not corrupt. 326862306a36Sopenharmony_ci */ 326962306a36Sopenharmony_ci if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL && 327062306a36Sopenharmony_ci xfs_ifork_verify_local_data(ip)) 327162306a36Sopenharmony_ci goto flush_out; 327262306a36Sopenharmony_ci if (xfs_inode_has_attr_fork(ip) && 327362306a36Sopenharmony_ci ip->i_af.if_format == XFS_DINODE_FMT_LOCAL && 327462306a36Sopenharmony_ci xfs_ifork_verify_local_attr(ip)) 327562306a36Sopenharmony_ci goto flush_out; 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ci /* 327862306a36Sopenharmony_ci * Copy the dirty parts of the inode into the on-disk inode. We always 327962306a36Sopenharmony_ci * copy out the core of the inode, because if the inode is dirty at all 328062306a36Sopenharmony_ci * the core must be. 328162306a36Sopenharmony_ci */ 328262306a36Sopenharmony_ci xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn); 328362306a36Sopenharmony_ci 328462306a36Sopenharmony_ci /* Wrap, we never let the log put out DI_MAX_FLUSH */ 328562306a36Sopenharmony_ci if (!xfs_has_v3inodes(mp)) { 328662306a36Sopenharmony_ci if (ip->i_flushiter == DI_MAX_FLUSH) 328762306a36Sopenharmony_ci ip->i_flushiter = 0; 328862306a36Sopenharmony_ci } 328962306a36Sopenharmony_ci 329062306a36Sopenharmony_ci xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); 329162306a36Sopenharmony_ci if (xfs_inode_has_attr_fork(ip)) 329262306a36Sopenharmony_ci xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); 329362306a36Sopenharmony_ci 329462306a36Sopenharmony_ci /* 329562306a36Sopenharmony_ci * We've recorded everything logged in the inode, so we'd like to clear 329662306a36Sopenharmony_ci * the ili_fields bits so we don't log and flush things unnecessarily. 329762306a36Sopenharmony_ci * However, we can't stop logging all this information until the data 329862306a36Sopenharmony_ci * we've copied into the disk buffer is written to disk. If we did we 329962306a36Sopenharmony_ci * might overwrite the copy of the inode in the log with all the data 330062306a36Sopenharmony_ci * after re-logging only part of it, and in the face of a crash we 330162306a36Sopenharmony_ci * wouldn't have all the data we need to recover. 330262306a36Sopenharmony_ci * 330362306a36Sopenharmony_ci * What we do is move the bits to the ili_last_fields field. When 330462306a36Sopenharmony_ci * logging the inode, these bits are moved back to the ili_fields field. 330562306a36Sopenharmony_ci * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since 330662306a36Sopenharmony_ci * we know that the information those bits represent is permanently on 330762306a36Sopenharmony_ci * disk. As long as the flush completes before the inode is logged 330862306a36Sopenharmony_ci * again, then both ili_fields and ili_last_fields will be cleared. 330962306a36Sopenharmony_ci */ 331062306a36Sopenharmony_ci error = 0; 331162306a36Sopenharmony_ciflush_out: 331262306a36Sopenharmony_ci spin_lock(&iip->ili_lock); 331362306a36Sopenharmony_ci iip->ili_last_fields = iip->ili_fields; 331462306a36Sopenharmony_ci iip->ili_fields = 0; 331562306a36Sopenharmony_ci iip->ili_fsync_fields = 0; 331662306a36Sopenharmony_ci spin_unlock(&iip->ili_lock); 331762306a36Sopenharmony_ci 331862306a36Sopenharmony_ci /* 331962306a36Sopenharmony_ci * Store the current LSN of the inode so that we can tell whether the 332062306a36Sopenharmony_ci * item has moved in the AIL from xfs_buf_inode_iodone(). 332162306a36Sopenharmony_ci */ 332262306a36Sopenharmony_ci xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 332362306a36Sopenharmony_ci &iip->ili_item.li_lsn); 332462306a36Sopenharmony_ci 332562306a36Sopenharmony_ci /* generate the checksum. */ 332662306a36Sopenharmony_ci xfs_dinode_calc_crc(mp, dip); 332762306a36Sopenharmony_ci return error; 332862306a36Sopenharmony_ci} 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci/* 333162306a36Sopenharmony_ci * Non-blocking flush of dirty inode metadata into the backing buffer. 333262306a36Sopenharmony_ci * 333362306a36Sopenharmony_ci * The caller must have a reference to the inode and hold the cluster buffer 333462306a36Sopenharmony_ci * locked. The function will walk across all the inodes on the cluster buffer it 333562306a36Sopenharmony_ci * can find and lock without blocking, and flush them to the cluster buffer. 333662306a36Sopenharmony_ci * 333762306a36Sopenharmony_ci * On successful flushing of at least one inode, the caller must write out the 333862306a36Sopenharmony_ci * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and 333962306a36Sopenharmony_ci * the caller needs to release the buffer. On failure, the filesystem will be 334062306a36Sopenharmony_ci * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED 334162306a36Sopenharmony_ci * will be returned. 334262306a36Sopenharmony_ci */ 334362306a36Sopenharmony_ciint 334462306a36Sopenharmony_cixfs_iflush_cluster( 334562306a36Sopenharmony_ci struct xfs_buf *bp) 334662306a36Sopenharmony_ci{ 334762306a36Sopenharmony_ci struct xfs_mount *mp = bp->b_mount; 334862306a36Sopenharmony_ci struct xfs_log_item *lip, *n; 334962306a36Sopenharmony_ci struct xfs_inode *ip; 335062306a36Sopenharmony_ci struct xfs_inode_log_item *iip; 335162306a36Sopenharmony_ci int clcount = 0; 335262306a36Sopenharmony_ci int error = 0; 335362306a36Sopenharmony_ci 335462306a36Sopenharmony_ci /* 335562306a36Sopenharmony_ci * We must use the safe variant here as on shutdown xfs_iflush_abort() 335662306a36Sopenharmony_ci * will remove itself from the list. 335762306a36Sopenharmony_ci */ 335862306a36Sopenharmony_ci list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { 335962306a36Sopenharmony_ci iip = (struct xfs_inode_log_item *)lip; 336062306a36Sopenharmony_ci ip = iip->ili_inode; 336162306a36Sopenharmony_ci 336262306a36Sopenharmony_ci /* 336362306a36Sopenharmony_ci * Quick and dirty check to avoid locks if possible. 336462306a36Sopenharmony_ci */ 336562306a36Sopenharmony_ci if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) 336662306a36Sopenharmony_ci continue; 336762306a36Sopenharmony_ci if (xfs_ipincount(ip)) 336862306a36Sopenharmony_ci continue; 336962306a36Sopenharmony_ci 337062306a36Sopenharmony_ci /* 337162306a36Sopenharmony_ci * The inode is still attached to the buffer, which means it is 337262306a36Sopenharmony_ci * dirty but reclaim might try to grab it. Check carefully for 337362306a36Sopenharmony_ci * that, and grab the ilock while still holding the i_flags_lock 337462306a36Sopenharmony_ci * to guarantee reclaim will not be able to reclaim this inode 337562306a36Sopenharmony_ci * once we drop the i_flags_lock. 337662306a36Sopenharmony_ci */ 337762306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 337862306a36Sopenharmony_ci ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE)); 337962306a36Sopenharmony_ci if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) { 338062306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 338162306a36Sopenharmony_ci continue; 338262306a36Sopenharmony_ci } 338362306a36Sopenharmony_ci 338462306a36Sopenharmony_ci /* 338562306a36Sopenharmony_ci * ILOCK will pin the inode against reclaim and prevent 338662306a36Sopenharmony_ci * concurrent transactions modifying the inode while we are 338762306a36Sopenharmony_ci * flushing the inode. If we get the lock, set the flushing 338862306a36Sopenharmony_ci * state before we drop the i_flags_lock. 338962306a36Sopenharmony_ci */ 339062306a36Sopenharmony_ci if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 339162306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 339262306a36Sopenharmony_ci continue; 339362306a36Sopenharmony_ci } 339462306a36Sopenharmony_ci __xfs_iflags_set(ip, XFS_IFLUSHING); 339562306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 339662306a36Sopenharmony_ci 339762306a36Sopenharmony_ci /* 339862306a36Sopenharmony_ci * Abort flushing this inode if we are shut down because the 339962306a36Sopenharmony_ci * inode may not currently be in the AIL. This can occur when 340062306a36Sopenharmony_ci * log I/O failure unpins the inode without inserting into the 340162306a36Sopenharmony_ci * AIL, leaving a dirty/unpinned inode attached to the buffer 340262306a36Sopenharmony_ci * that otherwise looks like it should be flushed. 340362306a36Sopenharmony_ci */ 340462306a36Sopenharmony_ci if (xlog_is_shutdown(mp->m_log)) { 340562306a36Sopenharmony_ci xfs_iunpin_wait(ip); 340662306a36Sopenharmony_ci xfs_iflush_abort(ip); 340762306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 340862306a36Sopenharmony_ci error = -EIO; 340962306a36Sopenharmony_ci continue; 341062306a36Sopenharmony_ci } 341162306a36Sopenharmony_ci 341262306a36Sopenharmony_ci /* don't block waiting on a log force to unpin dirty inodes */ 341362306a36Sopenharmony_ci if (xfs_ipincount(ip)) { 341462306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IFLUSHING); 341562306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 341662306a36Sopenharmony_ci continue; 341762306a36Sopenharmony_ci } 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci if (!xfs_inode_clean(ip)) 342062306a36Sopenharmony_ci error = xfs_iflush(ip, bp); 342162306a36Sopenharmony_ci else 342262306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IFLUSHING); 342362306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 342462306a36Sopenharmony_ci if (error) 342562306a36Sopenharmony_ci break; 342662306a36Sopenharmony_ci clcount++; 342762306a36Sopenharmony_ci } 342862306a36Sopenharmony_ci 342962306a36Sopenharmony_ci if (error) { 343062306a36Sopenharmony_ci /* 343162306a36Sopenharmony_ci * Shutdown first so we kill the log before we release this 343262306a36Sopenharmony_ci * buffer. If it is an INODE_ALLOC buffer and pins the tail 343362306a36Sopenharmony_ci * of the log, failing it before the _log_ is shut down can 343462306a36Sopenharmony_ci * result in the log tail being moved forward in the journal 343562306a36Sopenharmony_ci * on disk because log writes can still be taking place. Hence 343662306a36Sopenharmony_ci * unpinning the tail will allow the ICREATE intent to be 343762306a36Sopenharmony_ci * removed from the log an recovery will fail with uninitialised 343862306a36Sopenharmony_ci * inode cluster buffers. 343962306a36Sopenharmony_ci */ 344062306a36Sopenharmony_ci xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 344162306a36Sopenharmony_ci bp->b_flags |= XBF_ASYNC; 344262306a36Sopenharmony_ci xfs_buf_ioend_fail(bp); 344362306a36Sopenharmony_ci return error; 344462306a36Sopenharmony_ci } 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_ci if (!clcount) 344762306a36Sopenharmony_ci return -EAGAIN; 344862306a36Sopenharmony_ci 344962306a36Sopenharmony_ci XFS_STATS_INC(mp, xs_icluster_flushcnt); 345062306a36Sopenharmony_ci XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); 345162306a36Sopenharmony_ci return 0; 345262306a36Sopenharmony_ci 345362306a36Sopenharmony_ci} 345462306a36Sopenharmony_ci 345562306a36Sopenharmony_ci/* Release an inode. */ 345662306a36Sopenharmony_civoid 345762306a36Sopenharmony_cixfs_irele( 345862306a36Sopenharmony_ci struct xfs_inode *ip) 345962306a36Sopenharmony_ci{ 346062306a36Sopenharmony_ci trace_xfs_irele(ip, _RET_IP_); 346162306a36Sopenharmony_ci iput(VFS_I(ip)); 346262306a36Sopenharmony_ci} 346362306a36Sopenharmony_ci 346462306a36Sopenharmony_ci/* 346562306a36Sopenharmony_ci * Ensure all commited transactions touching the inode are written to the log. 346662306a36Sopenharmony_ci */ 346762306a36Sopenharmony_ciint 346862306a36Sopenharmony_cixfs_log_force_inode( 346962306a36Sopenharmony_ci struct xfs_inode *ip) 347062306a36Sopenharmony_ci{ 347162306a36Sopenharmony_ci xfs_csn_t seq = 0; 347262306a36Sopenharmony_ci 347362306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_SHARED); 347462306a36Sopenharmony_ci if (xfs_ipincount(ip)) 347562306a36Sopenharmony_ci seq = ip->i_itemp->ili_commit_seq; 347662306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 347762306a36Sopenharmony_ci 347862306a36Sopenharmony_ci if (!seq) 347962306a36Sopenharmony_ci return 0; 348062306a36Sopenharmony_ci return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL); 348162306a36Sopenharmony_ci} 348262306a36Sopenharmony_ci 348362306a36Sopenharmony_ci/* 348462306a36Sopenharmony_ci * Grab the exclusive iolock for a data copy from src to dest, making sure to 348562306a36Sopenharmony_ci * abide vfs locking order (lowest pointer value goes first) and breaking the 348662306a36Sopenharmony_ci * layout leases before proceeding. The loop is needed because we cannot call 348762306a36Sopenharmony_ci * the blocking break_layout() with the iolocks held, and therefore have to 348862306a36Sopenharmony_ci * back out both locks. 348962306a36Sopenharmony_ci */ 349062306a36Sopenharmony_cistatic int 349162306a36Sopenharmony_cixfs_iolock_two_inodes_and_break_layout( 349262306a36Sopenharmony_ci struct inode *src, 349362306a36Sopenharmony_ci struct inode *dest) 349462306a36Sopenharmony_ci{ 349562306a36Sopenharmony_ci int error; 349662306a36Sopenharmony_ci 349762306a36Sopenharmony_ci if (src > dest) 349862306a36Sopenharmony_ci swap(src, dest); 349962306a36Sopenharmony_ci 350062306a36Sopenharmony_ciretry: 350162306a36Sopenharmony_ci /* Wait to break both inodes' layouts before we start locking. */ 350262306a36Sopenharmony_ci error = break_layout(src, true); 350362306a36Sopenharmony_ci if (error) 350462306a36Sopenharmony_ci return error; 350562306a36Sopenharmony_ci if (src != dest) { 350662306a36Sopenharmony_ci error = break_layout(dest, true); 350762306a36Sopenharmony_ci if (error) 350862306a36Sopenharmony_ci return error; 350962306a36Sopenharmony_ci } 351062306a36Sopenharmony_ci 351162306a36Sopenharmony_ci /* Lock one inode and make sure nobody got in and leased it. */ 351262306a36Sopenharmony_ci inode_lock(src); 351362306a36Sopenharmony_ci error = break_layout(src, false); 351462306a36Sopenharmony_ci if (error) { 351562306a36Sopenharmony_ci inode_unlock(src); 351662306a36Sopenharmony_ci if (error == -EWOULDBLOCK) 351762306a36Sopenharmony_ci goto retry; 351862306a36Sopenharmony_ci return error; 351962306a36Sopenharmony_ci } 352062306a36Sopenharmony_ci 352162306a36Sopenharmony_ci if (src == dest) 352262306a36Sopenharmony_ci return 0; 352362306a36Sopenharmony_ci 352462306a36Sopenharmony_ci /* Lock the other inode and make sure nobody got in and leased it. */ 352562306a36Sopenharmony_ci inode_lock_nested(dest, I_MUTEX_NONDIR2); 352662306a36Sopenharmony_ci error = break_layout(dest, false); 352762306a36Sopenharmony_ci if (error) { 352862306a36Sopenharmony_ci inode_unlock(src); 352962306a36Sopenharmony_ci inode_unlock(dest); 353062306a36Sopenharmony_ci if (error == -EWOULDBLOCK) 353162306a36Sopenharmony_ci goto retry; 353262306a36Sopenharmony_ci return error; 353362306a36Sopenharmony_ci } 353462306a36Sopenharmony_ci 353562306a36Sopenharmony_ci return 0; 353662306a36Sopenharmony_ci} 353762306a36Sopenharmony_ci 353862306a36Sopenharmony_cistatic int 353962306a36Sopenharmony_cixfs_mmaplock_two_inodes_and_break_dax_layout( 354062306a36Sopenharmony_ci struct xfs_inode *ip1, 354162306a36Sopenharmony_ci struct xfs_inode *ip2) 354262306a36Sopenharmony_ci{ 354362306a36Sopenharmony_ci int error; 354462306a36Sopenharmony_ci bool retry; 354562306a36Sopenharmony_ci struct page *page; 354662306a36Sopenharmony_ci 354762306a36Sopenharmony_ci if (ip1->i_ino > ip2->i_ino) 354862306a36Sopenharmony_ci swap(ip1, ip2); 354962306a36Sopenharmony_ci 355062306a36Sopenharmony_ciagain: 355162306a36Sopenharmony_ci retry = false; 355262306a36Sopenharmony_ci /* Lock the first inode */ 355362306a36Sopenharmony_ci xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); 355462306a36Sopenharmony_ci error = xfs_break_dax_layouts(VFS_I(ip1), &retry); 355562306a36Sopenharmony_ci if (error || retry) { 355662306a36Sopenharmony_ci xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); 355762306a36Sopenharmony_ci if (error == 0 && retry) 355862306a36Sopenharmony_ci goto again; 355962306a36Sopenharmony_ci return error; 356062306a36Sopenharmony_ci } 356162306a36Sopenharmony_ci 356262306a36Sopenharmony_ci if (ip1 == ip2) 356362306a36Sopenharmony_ci return 0; 356462306a36Sopenharmony_ci 356562306a36Sopenharmony_ci /* Nested lock the second inode */ 356662306a36Sopenharmony_ci xfs_ilock(ip2, xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1)); 356762306a36Sopenharmony_ci /* 356862306a36Sopenharmony_ci * We cannot use xfs_break_dax_layouts() directly here because it may 356962306a36Sopenharmony_ci * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable 357062306a36Sopenharmony_ci * for this nested lock case. 357162306a36Sopenharmony_ci */ 357262306a36Sopenharmony_ci page = dax_layout_busy_page(VFS_I(ip2)->i_mapping); 357362306a36Sopenharmony_ci if (page && page_ref_count(page) != 1) { 357462306a36Sopenharmony_ci xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); 357562306a36Sopenharmony_ci xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); 357662306a36Sopenharmony_ci goto again; 357762306a36Sopenharmony_ci } 357862306a36Sopenharmony_ci 357962306a36Sopenharmony_ci return 0; 358062306a36Sopenharmony_ci} 358162306a36Sopenharmony_ci 358262306a36Sopenharmony_ci/* 358362306a36Sopenharmony_ci * Lock two inodes so that userspace cannot initiate I/O via file syscalls or 358462306a36Sopenharmony_ci * mmap activity. 358562306a36Sopenharmony_ci */ 358662306a36Sopenharmony_ciint 358762306a36Sopenharmony_cixfs_ilock2_io_mmap( 358862306a36Sopenharmony_ci struct xfs_inode *ip1, 358962306a36Sopenharmony_ci struct xfs_inode *ip2) 359062306a36Sopenharmony_ci{ 359162306a36Sopenharmony_ci int ret; 359262306a36Sopenharmony_ci 359362306a36Sopenharmony_ci ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); 359462306a36Sopenharmony_ci if (ret) 359562306a36Sopenharmony_ci return ret; 359662306a36Sopenharmony_ci 359762306a36Sopenharmony_ci if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) { 359862306a36Sopenharmony_ci ret = xfs_mmaplock_two_inodes_and_break_dax_layout(ip1, ip2); 359962306a36Sopenharmony_ci if (ret) { 360062306a36Sopenharmony_ci inode_unlock(VFS_I(ip2)); 360162306a36Sopenharmony_ci if (ip1 != ip2) 360262306a36Sopenharmony_ci inode_unlock(VFS_I(ip1)); 360362306a36Sopenharmony_ci return ret; 360462306a36Sopenharmony_ci } 360562306a36Sopenharmony_ci } else 360662306a36Sopenharmony_ci filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping, 360762306a36Sopenharmony_ci VFS_I(ip2)->i_mapping); 360862306a36Sopenharmony_ci 360962306a36Sopenharmony_ci return 0; 361062306a36Sopenharmony_ci} 361162306a36Sopenharmony_ci 361262306a36Sopenharmony_ci/* Unlock both inodes to allow IO and mmap activity. */ 361362306a36Sopenharmony_civoid 361462306a36Sopenharmony_cixfs_iunlock2_io_mmap( 361562306a36Sopenharmony_ci struct xfs_inode *ip1, 361662306a36Sopenharmony_ci struct xfs_inode *ip2) 361762306a36Sopenharmony_ci{ 361862306a36Sopenharmony_ci if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) { 361962306a36Sopenharmony_ci xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); 362062306a36Sopenharmony_ci if (ip1 != ip2) 362162306a36Sopenharmony_ci xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); 362262306a36Sopenharmony_ci } else 362362306a36Sopenharmony_ci filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping, 362462306a36Sopenharmony_ci VFS_I(ip2)->i_mapping); 362562306a36Sopenharmony_ci 362662306a36Sopenharmony_ci inode_unlock(VFS_I(ip2)); 362762306a36Sopenharmony_ci if (ip1 != ip2) 362862306a36Sopenharmony_ci inode_unlock(VFS_I(ip1)); 362962306a36Sopenharmony_ci} 363062306a36Sopenharmony_ci 363162306a36Sopenharmony_ci/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */ 363262306a36Sopenharmony_civoid 363362306a36Sopenharmony_cixfs_iunlock2_remapping( 363462306a36Sopenharmony_ci struct xfs_inode *ip1, 363562306a36Sopenharmony_ci struct xfs_inode *ip2) 363662306a36Sopenharmony_ci{ 363762306a36Sopenharmony_ci xfs_iflags_clear(ip1, XFS_IREMAPPING); 363862306a36Sopenharmony_ci 363962306a36Sopenharmony_ci if (ip1 != ip2) 364062306a36Sopenharmony_ci xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED); 364162306a36Sopenharmony_ci xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); 364262306a36Sopenharmony_ci 364362306a36Sopenharmony_ci if (ip1 != ip2) 364462306a36Sopenharmony_ci inode_unlock_shared(VFS_I(ip1)); 364562306a36Sopenharmony_ci inode_unlock(VFS_I(ip2)); 364662306a36Sopenharmony_ci} 364762306a36Sopenharmony_ci 364862306a36Sopenharmony_ci/* 364962306a36Sopenharmony_ci * Reload the incore inode list for this inode. Caller should ensure that 365062306a36Sopenharmony_ci * the link count cannot change, either by taking ILOCK_SHARED or otherwise 365162306a36Sopenharmony_ci * preventing other threads from executing. 365262306a36Sopenharmony_ci */ 365362306a36Sopenharmony_ciint 365462306a36Sopenharmony_cixfs_inode_reload_unlinked_bucket( 365562306a36Sopenharmony_ci struct xfs_trans *tp, 365662306a36Sopenharmony_ci struct xfs_inode *ip) 365762306a36Sopenharmony_ci{ 365862306a36Sopenharmony_ci struct xfs_mount *mp = tp->t_mountp; 365962306a36Sopenharmony_ci struct xfs_buf *agibp; 366062306a36Sopenharmony_ci struct xfs_agi *agi; 366162306a36Sopenharmony_ci struct xfs_perag *pag; 366262306a36Sopenharmony_ci xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 366362306a36Sopenharmony_ci xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 366462306a36Sopenharmony_ci xfs_agino_t prev_agino, next_agino; 366562306a36Sopenharmony_ci unsigned int bucket; 366662306a36Sopenharmony_ci bool foundit = false; 366762306a36Sopenharmony_ci int error; 366862306a36Sopenharmony_ci 366962306a36Sopenharmony_ci /* Grab the first inode in the list */ 367062306a36Sopenharmony_ci pag = xfs_perag_get(mp, agno); 367162306a36Sopenharmony_ci error = xfs_ialloc_read_agi(pag, tp, &agibp); 367262306a36Sopenharmony_ci xfs_perag_put(pag); 367362306a36Sopenharmony_ci if (error) 367462306a36Sopenharmony_ci return error; 367562306a36Sopenharmony_ci 367662306a36Sopenharmony_ci /* 367762306a36Sopenharmony_ci * We've taken ILOCK_SHARED and the AGI buffer lock to stabilize the 367862306a36Sopenharmony_ci * incore unlinked list pointers for this inode. Check once more to 367962306a36Sopenharmony_ci * see if we raced with anyone else to reload the unlinked list. 368062306a36Sopenharmony_ci */ 368162306a36Sopenharmony_ci if (!xfs_inode_unlinked_incomplete(ip)) { 368262306a36Sopenharmony_ci foundit = true; 368362306a36Sopenharmony_ci goto out_agibp; 368462306a36Sopenharmony_ci } 368562306a36Sopenharmony_ci 368662306a36Sopenharmony_ci bucket = agino % XFS_AGI_UNLINKED_BUCKETS; 368762306a36Sopenharmony_ci agi = agibp->b_addr; 368862306a36Sopenharmony_ci 368962306a36Sopenharmony_ci trace_xfs_inode_reload_unlinked_bucket(ip); 369062306a36Sopenharmony_ci 369162306a36Sopenharmony_ci xfs_info_ratelimited(mp, 369262306a36Sopenharmony_ci "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.", 369362306a36Sopenharmony_ci agino, agno); 369462306a36Sopenharmony_ci 369562306a36Sopenharmony_ci prev_agino = NULLAGINO; 369662306a36Sopenharmony_ci next_agino = be32_to_cpu(agi->agi_unlinked[bucket]); 369762306a36Sopenharmony_ci while (next_agino != NULLAGINO) { 369862306a36Sopenharmony_ci struct xfs_inode *next_ip = NULL; 369962306a36Sopenharmony_ci 370062306a36Sopenharmony_ci /* Found this caller's inode, set its backlink. */ 370162306a36Sopenharmony_ci if (next_agino == agino) { 370262306a36Sopenharmony_ci next_ip = ip; 370362306a36Sopenharmony_ci next_ip->i_prev_unlinked = prev_agino; 370462306a36Sopenharmony_ci foundit = true; 370562306a36Sopenharmony_ci goto next_inode; 370662306a36Sopenharmony_ci } 370762306a36Sopenharmony_ci 370862306a36Sopenharmony_ci /* Try in-memory lookup first. */ 370962306a36Sopenharmony_ci next_ip = xfs_iunlink_lookup(pag, next_agino); 371062306a36Sopenharmony_ci if (next_ip) 371162306a36Sopenharmony_ci goto next_inode; 371262306a36Sopenharmony_ci 371362306a36Sopenharmony_ci /* Inode not in memory, try reloading it. */ 371462306a36Sopenharmony_ci error = xfs_iunlink_reload_next(tp, agibp, prev_agino, 371562306a36Sopenharmony_ci next_agino); 371662306a36Sopenharmony_ci if (error) 371762306a36Sopenharmony_ci break; 371862306a36Sopenharmony_ci 371962306a36Sopenharmony_ci /* Grab the reloaded inode. */ 372062306a36Sopenharmony_ci next_ip = xfs_iunlink_lookup(pag, next_agino); 372162306a36Sopenharmony_ci if (!next_ip) { 372262306a36Sopenharmony_ci /* No incore inode at all? We reloaded it... */ 372362306a36Sopenharmony_ci ASSERT(next_ip != NULL); 372462306a36Sopenharmony_ci error = -EFSCORRUPTED; 372562306a36Sopenharmony_ci break; 372662306a36Sopenharmony_ci } 372762306a36Sopenharmony_ci 372862306a36Sopenharmony_cinext_inode: 372962306a36Sopenharmony_ci prev_agino = next_agino; 373062306a36Sopenharmony_ci next_agino = next_ip->i_next_unlinked; 373162306a36Sopenharmony_ci } 373262306a36Sopenharmony_ci 373362306a36Sopenharmony_ciout_agibp: 373462306a36Sopenharmony_ci xfs_trans_brelse(tp, agibp); 373562306a36Sopenharmony_ci /* Should have found this inode somewhere in the iunlinked bucket. */ 373662306a36Sopenharmony_ci if (!error && !foundit) 373762306a36Sopenharmony_ci error = -EFSCORRUPTED; 373862306a36Sopenharmony_ci return error; 373962306a36Sopenharmony_ci} 374062306a36Sopenharmony_ci 374162306a36Sopenharmony_ci/* Decide if this inode is missing its unlinked list and reload it. */ 374262306a36Sopenharmony_ciint 374362306a36Sopenharmony_cixfs_inode_reload_unlinked( 374462306a36Sopenharmony_ci struct xfs_inode *ip) 374562306a36Sopenharmony_ci{ 374662306a36Sopenharmony_ci struct xfs_trans *tp; 374762306a36Sopenharmony_ci int error; 374862306a36Sopenharmony_ci 374962306a36Sopenharmony_ci error = xfs_trans_alloc_empty(ip->i_mount, &tp); 375062306a36Sopenharmony_ci if (error) 375162306a36Sopenharmony_ci return error; 375262306a36Sopenharmony_ci 375362306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_SHARED); 375462306a36Sopenharmony_ci if (xfs_inode_unlinked_incomplete(ip)) 375562306a36Sopenharmony_ci error = xfs_inode_reload_unlinked_bucket(tp, ip); 375662306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 375762306a36Sopenharmony_ci xfs_trans_cancel(tp); 375862306a36Sopenharmony_ci 375962306a36Sopenharmony_ci return error; 376062306a36Sopenharmony_ci} 3761