162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. 662306a36Sopenharmony_ci * Copyright (c) 2002 Richard Russon 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/buffer_head.h> 1062306a36Sopenharmony_ci#include <linux/slab.h> 1162306a36Sopenharmony_ci#include <linux/swap.h> 1262306a36Sopenharmony_ci#include <linux/bio.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include "attrib.h" 1562306a36Sopenharmony_ci#include "aops.h" 1662306a36Sopenharmony_ci#include "bitmap.h" 1762306a36Sopenharmony_ci#include "debug.h" 1862306a36Sopenharmony_ci#include "dir.h" 1962306a36Sopenharmony_ci#include "lcnalloc.h" 2062306a36Sopenharmony_ci#include "malloc.h" 2162306a36Sopenharmony_ci#include "mft.h" 2262306a36Sopenharmony_ci#include "ntfs.h" 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define MAX_BHS (PAGE_SIZE / NTFS_BLOCK_SIZE) 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/** 2762306a36Sopenharmony_ci * map_mft_record_page - map the page in which a specific mft record resides 2862306a36Sopenharmony_ci * @ni: ntfs inode whose mft record page to map 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * This maps the page in which the mft record of the ntfs inode @ni is situated 3162306a36Sopenharmony_ci * and returns a pointer to the mft record within the mapped page. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR() 3462306a36Sopenharmony_ci * contains the negative error code returned. 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_cistatic inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci loff_t i_size; 3962306a36Sopenharmony_ci ntfs_volume *vol = ni->vol; 4062306a36Sopenharmony_ci struct inode *mft_vi = vol->mft_ino; 4162306a36Sopenharmony_ci struct page *page; 4262306a36Sopenharmony_ci unsigned long index, end_index; 4362306a36Sopenharmony_ci unsigned ofs; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci BUG_ON(ni->page); 4662306a36Sopenharmony_ci /* 4762306a36Sopenharmony_ci * The index into the page cache and the offset within the page cache 4862306a36Sopenharmony_ci * page of the wanted mft record. FIXME: We need to check for 4962306a36Sopenharmony_ci * overflowing the unsigned long, but I don't think we would ever get 5062306a36Sopenharmony_ci * here if the volume was that big... 5162306a36Sopenharmony_ci */ 5262306a36Sopenharmony_ci index = (u64)ni->mft_no << vol->mft_record_size_bits >> 5362306a36Sopenharmony_ci PAGE_SHIFT; 5462306a36Sopenharmony_ci ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci i_size = i_size_read(mft_vi); 5762306a36Sopenharmony_ci /* The maximum valid index into the page cache for $MFT's data. */ 5862306a36Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci /* If the wanted index is out of bounds the mft record doesn't exist. */ 6162306a36Sopenharmony_ci if (unlikely(index >= end_index)) { 6262306a36Sopenharmony_ci if (index > end_index || (i_size & ~PAGE_MASK) < ofs + 6362306a36Sopenharmony_ci vol->mft_record_size) { 6462306a36Sopenharmony_ci page = ERR_PTR(-ENOENT); 6562306a36Sopenharmony_ci ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, " 6662306a36Sopenharmony_ci "which is beyond the end of the mft. " 6762306a36Sopenharmony_ci "This is probably a bug in the ntfs " 6862306a36Sopenharmony_ci "driver.", ni->mft_no); 6962306a36Sopenharmony_ci goto err_out; 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ci /* Read, map, and pin the page. */ 7362306a36Sopenharmony_ci page = ntfs_map_page(mft_vi->i_mapping, index); 7462306a36Sopenharmony_ci if (!IS_ERR(page)) { 7562306a36Sopenharmony_ci /* Catch multi sector transfer fixup errors. */ 7662306a36Sopenharmony_ci if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) + 7762306a36Sopenharmony_ci ofs)))) { 7862306a36Sopenharmony_ci ni->page = page; 7962306a36Sopenharmony_ci ni->page_ofs = ofs; 8062306a36Sopenharmony_ci return page_address(page) + ofs; 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci ntfs_error(vol->sb, "Mft record 0x%lx is corrupt. " 8362306a36Sopenharmony_ci "Run chkdsk.", ni->mft_no); 8462306a36Sopenharmony_ci ntfs_unmap_page(page); 8562306a36Sopenharmony_ci page = ERR_PTR(-EIO); 8662306a36Sopenharmony_ci NVolSetErrors(vol); 8762306a36Sopenharmony_ci } 8862306a36Sopenharmony_cierr_out: 8962306a36Sopenharmony_ci ni->page = NULL; 9062306a36Sopenharmony_ci ni->page_ofs = 0; 9162306a36Sopenharmony_ci return (void*)page; 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci/** 9562306a36Sopenharmony_ci * map_mft_record - map, pin and lock an mft record 9662306a36Sopenharmony_ci * @ni: ntfs inode whose MFT record to map 9762306a36Sopenharmony_ci * 9862306a36Sopenharmony_ci * First, take the mrec_lock mutex. We might now be sleeping, while waiting 9962306a36Sopenharmony_ci * for the mutex if it was already locked by someone else. 10062306a36Sopenharmony_ci * 10162306a36Sopenharmony_ci * The page of the record is mapped using map_mft_record_page() before being 10262306a36Sopenharmony_ci * returned to the caller. 10362306a36Sopenharmony_ci * 10462306a36Sopenharmony_ci * This in turn uses ntfs_map_page() to get the page containing the wanted mft 10562306a36Sopenharmony_ci * record (it in turn calls read_cache_page() which reads it in from disk if 10662306a36Sopenharmony_ci * necessary, increments the use count on the page so that it cannot disappear 10762306a36Sopenharmony_ci * under us and returns a reference to the page cache page). 10862306a36Sopenharmony_ci * 10962306a36Sopenharmony_ci * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it 11062306a36Sopenharmony_ci * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed 11162306a36Sopenharmony_ci * and the post-read mst fixups on each mft record in the page have been 11262306a36Sopenharmony_ci * performed, the page gets PG_uptodate set and PG_locked cleared (this is done 11362306a36Sopenharmony_ci * in our asynchronous I/O completion handler end_buffer_read_mft_async()). 11462306a36Sopenharmony_ci * ntfs_map_page() waits for PG_locked to become clear and checks if 11562306a36Sopenharmony_ci * PG_uptodate is set and returns an error code if not. This provides 11662306a36Sopenharmony_ci * sufficient protection against races when reading/using the page. 11762306a36Sopenharmony_ci * 11862306a36Sopenharmony_ci * However there is the write mapping to think about. Doing the above described 11962306a36Sopenharmony_ci * checking here will be fine, because when initiating the write we will set 12062306a36Sopenharmony_ci * PG_locked and clear PG_uptodate making sure nobody is touching the page 12162306a36Sopenharmony_ci * contents. Doing the locking this way means that the commit to disk code in 12262306a36Sopenharmony_ci * the page cache code paths is automatically sufficiently locked with us as 12362306a36Sopenharmony_ci * we will not touch a page that has been locked or is not uptodate. The only 12462306a36Sopenharmony_ci * locking problem then is them locking the page while we are accessing it. 12562306a36Sopenharmony_ci * 12662306a36Sopenharmony_ci * So that code will end up having to own the mrec_lock of all mft 12762306a36Sopenharmony_ci * records/inodes present in the page before I/O can proceed. In that case we 12862306a36Sopenharmony_ci * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be 12962306a36Sopenharmony_ci * accessing anything without owning the mrec_lock mutex. But we do need to 13062306a36Sopenharmony_ci * use them because of the read_cache_page() invocation and the code becomes so 13162306a36Sopenharmony_ci * much simpler this way that it is well worth it. 13262306a36Sopenharmony_ci * 13362306a36Sopenharmony_ci * The mft record is now ours and we return a pointer to it. You need to check 13462306a36Sopenharmony_ci * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return 13562306a36Sopenharmony_ci * the error code. 13662306a36Sopenharmony_ci * 13762306a36Sopenharmony_ci * NOTE: Caller is responsible for setting the mft record dirty before calling 13862306a36Sopenharmony_ci * unmap_mft_record(). This is obviously only necessary if the caller really 13962306a36Sopenharmony_ci * modified the mft record... 14062306a36Sopenharmony_ci * Q: Do we want to recycle one of the VFS inode state bits instead? 14162306a36Sopenharmony_ci * A: No, the inode ones mean we want to change the mft record, not we want to 14262306a36Sopenharmony_ci * write it out. 14362306a36Sopenharmony_ci */ 14462306a36Sopenharmony_ciMFT_RECORD *map_mft_record(ntfs_inode *ni) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci MFT_RECORD *m; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci /* Make sure the ntfs inode doesn't go away. */ 15162306a36Sopenharmony_ci atomic_inc(&ni->count); 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci /* Serialize access to this mft record. */ 15462306a36Sopenharmony_ci mutex_lock(&ni->mrec_lock); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci m = map_mft_record_page(ni); 15762306a36Sopenharmony_ci if (!IS_ERR(m)) 15862306a36Sopenharmony_ci return m; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci mutex_unlock(&ni->mrec_lock); 16162306a36Sopenharmony_ci atomic_dec(&ni->count); 16262306a36Sopenharmony_ci ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); 16362306a36Sopenharmony_ci return m; 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci/** 16762306a36Sopenharmony_ci * unmap_mft_record_page - unmap the page in which a specific mft record resides 16862306a36Sopenharmony_ci * @ni: ntfs inode whose mft record page to unmap 16962306a36Sopenharmony_ci * 17062306a36Sopenharmony_ci * This unmaps the page in which the mft record of the ntfs inode @ni is 17162306a36Sopenharmony_ci * situated and returns. This is a NOOP if highmem is not configured. 17262306a36Sopenharmony_ci * 17362306a36Sopenharmony_ci * The unmap happens via ntfs_unmap_page() which in turn decrements the use 17462306a36Sopenharmony_ci * count on the page thus releasing it from the pinned state. 17562306a36Sopenharmony_ci * 17662306a36Sopenharmony_ci * We do not actually unmap the page from memory of course, as that will be 17762306a36Sopenharmony_ci * done by the page cache code itself when memory pressure increases or 17862306a36Sopenharmony_ci * whatever. 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_cistatic inline void unmap_mft_record_page(ntfs_inode *ni) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci BUG_ON(!ni->page); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci // TODO: If dirty, blah... 18562306a36Sopenharmony_ci ntfs_unmap_page(ni->page); 18662306a36Sopenharmony_ci ni->page = NULL; 18762306a36Sopenharmony_ci ni->page_ofs = 0; 18862306a36Sopenharmony_ci return; 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci/** 19262306a36Sopenharmony_ci * unmap_mft_record - release a mapped mft record 19362306a36Sopenharmony_ci * @ni: ntfs inode whose MFT record to unmap 19462306a36Sopenharmony_ci * 19562306a36Sopenharmony_ci * We release the page mapping and the mrec_lock mutex which unmaps the mft 19662306a36Sopenharmony_ci * record and releases it for others to get hold of. We also release the ntfs 19762306a36Sopenharmony_ci * inode by decrementing the ntfs inode reference count. 19862306a36Sopenharmony_ci * 19962306a36Sopenharmony_ci * NOTE: If caller has modified the mft record, it is imperative to set the mft 20062306a36Sopenharmony_ci * record dirty BEFORE calling unmap_mft_record(). 20162306a36Sopenharmony_ci */ 20262306a36Sopenharmony_civoid unmap_mft_record(ntfs_inode *ni) 20362306a36Sopenharmony_ci{ 20462306a36Sopenharmony_ci struct page *page = ni->page; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci BUG_ON(!page); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci unmap_mft_record_page(ni); 21162306a36Sopenharmony_ci mutex_unlock(&ni->mrec_lock); 21262306a36Sopenharmony_ci atomic_dec(&ni->count); 21362306a36Sopenharmony_ci /* 21462306a36Sopenharmony_ci * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to 21562306a36Sopenharmony_ci * ntfs_clear_extent_inode() in the extent inode case, and to the 21662306a36Sopenharmony_ci * caller in the non-extent, yet pure ntfs inode case, to do the actual 21762306a36Sopenharmony_ci * tear down of all structures and freeing of all allocated memory. 21862306a36Sopenharmony_ci */ 21962306a36Sopenharmony_ci return; 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci/** 22362306a36Sopenharmony_ci * map_extent_mft_record - load an extent inode and attach it to its base 22462306a36Sopenharmony_ci * @base_ni: base ntfs inode 22562306a36Sopenharmony_ci * @mref: mft reference of the extent inode to load 22662306a36Sopenharmony_ci * @ntfs_ino: on successful return, pointer to the ntfs_inode structure 22762306a36Sopenharmony_ci * 22862306a36Sopenharmony_ci * Load the extent mft record @mref and attach it to its base inode @base_ni. 22962306a36Sopenharmony_ci * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise 23062306a36Sopenharmony_ci * PTR_ERR(result) gives the negative error code. 23162306a36Sopenharmony_ci * 23262306a36Sopenharmony_ci * On successful return, @ntfs_ino contains a pointer to the ntfs_inode 23362306a36Sopenharmony_ci * structure of the mapped extent inode. 23462306a36Sopenharmony_ci */ 23562306a36Sopenharmony_ciMFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, 23662306a36Sopenharmony_ci ntfs_inode **ntfs_ino) 23762306a36Sopenharmony_ci{ 23862306a36Sopenharmony_ci MFT_RECORD *m; 23962306a36Sopenharmony_ci ntfs_inode *ni = NULL; 24062306a36Sopenharmony_ci ntfs_inode **extent_nis = NULL; 24162306a36Sopenharmony_ci int i; 24262306a36Sopenharmony_ci unsigned long mft_no = MREF(mref); 24362306a36Sopenharmony_ci u16 seq_no = MSEQNO(mref); 24462306a36Sopenharmony_ci bool destroy_ni = false; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", 24762306a36Sopenharmony_ci mft_no, base_ni->mft_no); 24862306a36Sopenharmony_ci /* Make sure the base ntfs inode doesn't go away. */ 24962306a36Sopenharmony_ci atomic_inc(&base_ni->count); 25062306a36Sopenharmony_ci /* 25162306a36Sopenharmony_ci * Check if this extent inode has already been added to the base inode, 25262306a36Sopenharmony_ci * in which case just return it. If not found, add it to the base 25362306a36Sopenharmony_ci * inode before returning it. 25462306a36Sopenharmony_ci */ 25562306a36Sopenharmony_ci mutex_lock(&base_ni->extent_lock); 25662306a36Sopenharmony_ci if (base_ni->nr_extents > 0) { 25762306a36Sopenharmony_ci extent_nis = base_ni->ext.extent_ntfs_inos; 25862306a36Sopenharmony_ci for (i = 0; i < base_ni->nr_extents; i++) { 25962306a36Sopenharmony_ci if (mft_no != extent_nis[i]->mft_no) 26062306a36Sopenharmony_ci continue; 26162306a36Sopenharmony_ci ni = extent_nis[i]; 26262306a36Sopenharmony_ci /* Make sure the ntfs inode doesn't go away. */ 26362306a36Sopenharmony_ci atomic_inc(&ni->count); 26462306a36Sopenharmony_ci break; 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci if (likely(ni != NULL)) { 26862306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 26962306a36Sopenharmony_ci atomic_dec(&base_ni->count); 27062306a36Sopenharmony_ci /* We found the record; just have to map and return it. */ 27162306a36Sopenharmony_ci m = map_mft_record(ni); 27262306a36Sopenharmony_ci /* map_mft_record() has incremented this on success. */ 27362306a36Sopenharmony_ci atomic_dec(&ni->count); 27462306a36Sopenharmony_ci if (!IS_ERR(m)) { 27562306a36Sopenharmony_ci /* Verify the sequence number. */ 27662306a36Sopenharmony_ci if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { 27762306a36Sopenharmony_ci ntfs_debug("Done 1."); 27862306a36Sopenharmony_ci *ntfs_ino = ni; 27962306a36Sopenharmony_ci return m; 28062306a36Sopenharmony_ci } 28162306a36Sopenharmony_ci unmap_mft_record(ni); 28262306a36Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Found stale extent mft " 28362306a36Sopenharmony_ci "reference! Corrupt filesystem. " 28462306a36Sopenharmony_ci "Run chkdsk."); 28562306a36Sopenharmony_ci return ERR_PTR(-EIO); 28662306a36Sopenharmony_ci } 28762306a36Sopenharmony_cimap_err_out: 28862306a36Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Failed to map extent " 28962306a36Sopenharmony_ci "mft record, error code %ld.", -PTR_ERR(m)); 29062306a36Sopenharmony_ci return m; 29162306a36Sopenharmony_ci } 29262306a36Sopenharmony_ci /* Record wasn't there. Get a new ntfs inode and initialize it. */ 29362306a36Sopenharmony_ci ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); 29462306a36Sopenharmony_ci if (unlikely(!ni)) { 29562306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 29662306a36Sopenharmony_ci atomic_dec(&base_ni->count); 29762306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci ni->vol = base_ni->vol; 30062306a36Sopenharmony_ci ni->seq_no = seq_no; 30162306a36Sopenharmony_ci ni->nr_extents = -1; 30262306a36Sopenharmony_ci ni->ext.base_ntfs_ino = base_ni; 30362306a36Sopenharmony_ci /* Now map the record. */ 30462306a36Sopenharmony_ci m = map_mft_record(ni); 30562306a36Sopenharmony_ci if (IS_ERR(m)) { 30662306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 30762306a36Sopenharmony_ci atomic_dec(&base_ni->count); 30862306a36Sopenharmony_ci ntfs_clear_extent_inode(ni); 30962306a36Sopenharmony_ci goto map_err_out; 31062306a36Sopenharmony_ci } 31162306a36Sopenharmony_ci /* Verify the sequence number if it is present. */ 31262306a36Sopenharmony_ci if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { 31362306a36Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Found stale extent mft " 31462306a36Sopenharmony_ci "reference! Corrupt filesystem. Run chkdsk."); 31562306a36Sopenharmony_ci destroy_ni = true; 31662306a36Sopenharmony_ci m = ERR_PTR(-EIO); 31762306a36Sopenharmony_ci goto unm_err_out; 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci /* Attach extent inode to base inode, reallocating memory if needed. */ 32062306a36Sopenharmony_ci if (!(base_ni->nr_extents & 3)) { 32162306a36Sopenharmony_ci ntfs_inode **tmp; 32262306a36Sopenharmony_ci int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci tmp = kmalloc(new_size, GFP_NOFS); 32562306a36Sopenharmony_ci if (unlikely(!tmp)) { 32662306a36Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Failed to allocate " 32762306a36Sopenharmony_ci "internal buffer."); 32862306a36Sopenharmony_ci destroy_ni = true; 32962306a36Sopenharmony_ci m = ERR_PTR(-ENOMEM); 33062306a36Sopenharmony_ci goto unm_err_out; 33162306a36Sopenharmony_ci } 33262306a36Sopenharmony_ci if (base_ni->nr_extents) { 33362306a36Sopenharmony_ci BUG_ON(!base_ni->ext.extent_ntfs_inos); 33462306a36Sopenharmony_ci memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - 33562306a36Sopenharmony_ci 4 * sizeof(ntfs_inode *)); 33662306a36Sopenharmony_ci kfree(base_ni->ext.extent_ntfs_inos); 33762306a36Sopenharmony_ci } 33862306a36Sopenharmony_ci base_ni->ext.extent_ntfs_inos = tmp; 33962306a36Sopenharmony_ci } 34062306a36Sopenharmony_ci base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; 34162306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 34262306a36Sopenharmony_ci atomic_dec(&base_ni->count); 34362306a36Sopenharmony_ci ntfs_debug("Done 2."); 34462306a36Sopenharmony_ci *ntfs_ino = ni; 34562306a36Sopenharmony_ci return m; 34662306a36Sopenharmony_ciunm_err_out: 34762306a36Sopenharmony_ci unmap_mft_record(ni); 34862306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 34962306a36Sopenharmony_ci atomic_dec(&base_ni->count); 35062306a36Sopenharmony_ci /* 35162306a36Sopenharmony_ci * If the extent inode was not attached to the base inode we need to 35262306a36Sopenharmony_ci * release it or we will leak memory. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci if (destroy_ni) 35562306a36Sopenharmony_ci ntfs_clear_extent_inode(ni); 35662306a36Sopenharmony_ci return m; 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci#ifdef NTFS_RW 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci/** 36262306a36Sopenharmony_ci * __mark_mft_record_dirty - set the mft record and the page containing it dirty 36362306a36Sopenharmony_ci * @ni: ntfs inode describing the mapped mft record 36462306a36Sopenharmony_ci * 36562306a36Sopenharmony_ci * Internal function. Users should call mark_mft_record_dirty() instead. 36662306a36Sopenharmony_ci * 36762306a36Sopenharmony_ci * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni, 36862306a36Sopenharmony_ci * as well as the page containing the mft record, dirty. Also, mark the base 36962306a36Sopenharmony_ci * vfs inode dirty. This ensures that any changes to the mft record are 37062306a36Sopenharmony_ci * written out to disk. 37162306a36Sopenharmony_ci * 37262306a36Sopenharmony_ci * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) 37362306a36Sopenharmony_ci * on the base vfs inode, because even though file data may have been modified, 37462306a36Sopenharmony_ci * it is dirty in the inode meta data rather than the data page cache of the 37562306a36Sopenharmony_ci * inode, and thus there are no data pages that need writing out. Therefore, a 37662306a36Sopenharmony_ci * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 37762306a36Sopenharmony_ci * other hand, is not sufficient, because ->write_inode needs to be called even 37862306a36Sopenharmony_ci * in case of fdatasync. This needs to happen or the file data would not 37962306a36Sopenharmony_ci * necessarily hit the device synchronously, even though the vfs inode has the 38062306a36Sopenharmony_ci * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just 38162306a36Sopenharmony_ci * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, 38262306a36Sopenharmony_ci * which is not what I_DIRTY_SYNC on its own would suggest. 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_civoid __mark_mft_record_dirty(ntfs_inode *ni) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci ntfs_inode *base_ni; 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); 38962306a36Sopenharmony_ci BUG_ON(NInoAttr(ni)); 39062306a36Sopenharmony_ci mark_ntfs_record_dirty(ni->page, ni->page_ofs); 39162306a36Sopenharmony_ci /* Determine the base vfs inode and mark it dirty, too. */ 39262306a36Sopenharmony_ci mutex_lock(&ni->extent_lock); 39362306a36Sopenharmony_ci if (likely(ni->nr_extents >= 0)) 39462306a36Sopenharmony_ci base_ni = ni; 39562306a36Sopenharmony_ci else 39662306a36Sopenharmony_ci base_ni = ni->ext.base_ntfs_ino; 39762306a36Sopenharmony_ci mutex_unlock(&ni->extent_lock); 39862306a36Sopenharmony_ci __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC); 39962306a36Sopenharmony_ci} 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_cistatic const char *ntfs_please_email = "Please email " 40262306a36Sopenharmony_ci "linux-ntfs-dev@lists.sourceforge.net and say that you saw " 40362306a36Sopenharmony_ci "this message. Thank you."; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci/** 40662306a36Sopenharmony_ci * ntfs_sync_mft_mirror_umount - synchronise an mft record to the mft mirror 40762306a36Sopenharmony_ci * @vol: ntfs volume on which the mft record to synchronize resides 40862306a36Sopenharmony_ci * @mft_no: mft record number of mft record to synchronize 40962306a36Sopenharmony_ci * @m: mapped, mst protected (extent) mft record to synchronize 41062306a36Sopenharmony_ci * 41162306a36Sopenharmony_ci * Write the mapped, mst protected (extent) mft record @m with mft record 41262306a36Sopenharmony_ci * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol, 41362306a36Sopenharmony_ci * bypassing the page cache and the $MFTMirr inode itself. 41462306a36Sopenharmony_ci * 41562306a36Sopenharmony_ci * This function is only for use at umount time when the mft mirror inode has 41662306a36Sopenharmony_ci * already been disposed off. We BUG() if we are called while the mft mirror 41762306a36Sopenharmony_ci * inode is still attached to the volume. 41862306a36Sopenharmony_ci * 41962306a36Sopenharmony_ci * On success return 0. On error return -errno. 42062306a36Sopenharmony_ci * 42162306a36Sopenharmony_ci * NOTE: This function is not implemented yet as I am not convinced it can 42262306a36Sopenharmony_ci * actually be triggered considering the sequence of commits we do in super.c:: 42362306a36Sopenharmony_ci * ntfs_put_super(). But just in case we provide this place holder as the 42462306a36Sopenharmony_ci * alternative would be either to BUG() or to get a NULL pointer dereference 42562306a36Sopenharmony_ci * and Oops. 42662306a36Sopenharmony_ci */ 42762306a36Sopenharmony_cistatic int ntfs_sync_mft_mirror_umount(ntfs_volume *vol, 42862306a36Sopenharmony_ci const unsigned long mft_no, MFT_RECORD *m) 42962306a36Sopenharmony_ci{ 43062306a36Sopenharmony_ci BUG_ON(vol->mftmirr_ino); 43162306a36Sopenharmony_ci ntfs_error(vol->sb, "Umount time mft mirror syncing is not " 43262306a36Sopenharmony_ci "implemented yet. %s", ntfs_please_email); 43362306a36Sopenharmony_ci return -EOPNOTSUPP; 43462306a36Sopenharmony_ci} 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci/** 43762306a36Sopenharmony_ci * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror 43862306a36Sopenharmony_ci * @vol: ntfs volume on which the mft record to synchronize resides 43962306a36Sopenharmony_ci * @mft_no: mft record number of mft record to synchronize 44062306a36Sopenharmony_ci * @m: mapped, mst protected (extent) mft record to synchronize 44162306a36Sopenharmony_ci * @sync: if true, wait for i/o completion 44262306a36Sopenharmony_ci * 44362306a36Sopenharmony_ci * Write the mapped, mst protected (extent) mft record @m with mft record 44462306a36Sopenharmony_ci * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol. 44562306a36Sopenharmony_ci * 44662306a36Sopenharmony_ci * On success return 0. On error return -errno and set the volume errors flag 44762306a36Sopenharmony_ci * in the ntfs volume @vol. 44862306a36Sopenharmony_ci * 44962306a36Sopenharmony_ci * NOTE: We always perform synchronous i/o and ignore the @sync parameter. 45062306a36Sopenharmony_ci * 45162306a36Sopenharmony_ci * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just 45262306a36Sopenharmony_ci * schedule i/o via ->writepage or do it via kntfsd or whatever. 45362306a36Sopenharmony_ci */ 45462306a36Sopenharmony_ciint ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, 45562306a36Sopenharmony_ci MFT_RECORD *m, int sync) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci struct page *page; 45862306a36Sopenharmony_ci unsigned int blocksize = vol->sb->s_blocksize; 45962306a36Sopenharmony_ci int max_bhs = vol->mft_record_size / blocksize; 46062306a36Sopenharmony_ci struct buffer_head *bhs[MAX_BHS]; 46162306a36Sopenharmony_ci struct buffer_head *bh, *head; 46262306a36Sopenharmony_ci u8 *kmirr; 46362306a36Sopenharmony_ci runlist_element *rl; 46462306a36Sopenharmony_ci unsigned int block_start, block_end, m_start, m_end, page_ofs; 46562306a36Sopenharmony_ci int i_bhs, nr_bhs, err = 0; 46662306a36Sopenharmony_ci unsigned char blocksize_bits = vol->sb->s_blocksize_bits; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", mft_no); 46962306a36Sopenharmony_ci BUG_ON(!max_bhs); 47062306a36Sopenharmony_ci if (WARN_ON(max_bhs > MAX_BHS)) 47162306a36Sopenharmony_ci return -EINVAL; 47262306a36Sopenharmony_ci if (unlikely(!vol->mftmirr_ino)) { 47362306a36Sopenharmony_ci /* This could happen during umount... */ 47462306a36Sopenharmony_ci err = ntfs_sync_mft_mirror_umount(vol, mft_no, m); 47562306a36Sopenharmony_ci if (likely(!err)) 47662306a36Sopenharmony_ci return err; 47762306a36Sopenharmony_ci goto err_out; 47862306a36Sopenharmony_ci } 47962306a36Sopenharmony_ci /* Get the page containing the mirror copy of the mft record @m. */ 48062306a36Sopenharmony_ci page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >> 48162306a36Sopenharmony_ci (PAGE_SHIFT - vol->mft_record_size_bits)); 48262306a36Sopenharmony_ci if (IS_ERR(page)) { 48362306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft mirror page."); 48462306a36Sopenharmony_ci err = PTR_ERR(page); 48562306a36Sopenharmony_ci goto err_out; 48662306a36Sopenharmony_ci } 48762306a36Sopenharmony_ci lock_page(page); 48862306a36Sopenharmony_ci BUG_ON(!PageUptodate(page)); 48962306a36Sopenharmony_ci ClearPageUptodate(page); 49062306a36Sopenharmony_ci /* Offset of the mft mirror record inside the page. */ 49162306a36Sopenharmony_ci page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK; 49262306a36Sopenharmony_ci /* The address in the page of the mirror copy of the mft record @m. */ 49362306a36Sopenharmony_ci kmirr = page_address(page) + page_ofs; 49462306a36Sopenharmony_ci /* Copy the mst protected mft record to the mirror. */ 49562306a36Sopenharmony_ci memcpy(kmirr, m, vol->mft_record_size); 49662306a36Sopenharmony_ci /* Create uptodate buffers if not present. */ 49762306a36Sopenharmony_ci if (unlikely(!page_has_buffers(page))) { 49862306a36Sopenharmony_ci struct buffer_head *tail; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci bh = head = alloc_page_buffers(page, blocksize, true); 50162306a36Sopenharmony_ci do { 50262306a36Sopenharmony_ci set_buffer_uptodate(bh); 50362306a36Sopenharmony_ci tail = bh; 50462306a36Sopenharmony_ci bh = bh->b_this_page; 50562306a36Sopenharmony_ci } while (bh); 50662306a36Sopenharmony_ci tail->b_this_page = head; 50762306a36Sopenharmony_ci attach_page_private(page, head); 50862306a36Sopenharmony_ci } 50962306a36Sopenharmony_ci bh = head = page_buffers(page); 51062306a36Sopenharmony_ci BUG_ON(!bh); 51162306a36Sopenharmony_ci rl = NULL; 51262306a36Sopenharmony_ci nr_bhs = 0; 51362306a36Sopenharmony_ci block_start = 0; 51462306a36Sopenharmony_ci m_start = kmirr - (u8*)page_address(page); 51562306a36Sopenharmony_ci m_end = m_start + vol->mft_record_size; 51662306a36Sopenharmony_ci do { 51762306a36Sopenharmony_ci block_end = block_start + blocksize; 51862306a36Sopenharmony_ci /* If the buffer is outside the mft record, skip it. */ 51962306a36Sopenharmony_ci if (block_end <= m_start) 52062306a36Sopenharmony_ci continue; 52162306a36Sopenharmony_ci if (unlikely(block_start >= m_end)) 52262306a36Sopenharmony_ci break; 52362306a36Sopenharmony_ci /* Need to map the buffer if it is not mapped already. */ 52462306a36Sopenharmony_ci if (unlikely(!buffer_mapped(bh))) { 52562306a36Sopenharmony_ci VCN vcn; 52662306a36Sopenharmony_ci LCN lcn; 52762306a36Sopenharmony_ci unsigned int vcn_ofs; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci bh->b_bdev = vol->sb->s_bdev; 53062306a36Sopenharmony_ci /* Obtain the vcn and offset of the current block. */ 53162306a36Sopenharmony_ci vcn = ((VCN)mft_no << vol->mft_record_size_bits) + 53262306a36Sopenharmony_ci (block_start - m_start); 53362306a36Sopenharmony_ci vcn_ofs = vcn & vol->cluster_size_mask; 53462306a36Sopenharmony_ci vcn >>= vol->cluster_size_bits; 53562306a36Sopenharmony_ci if (!rl) { 53662306a36Sopenharmony_ci down_read(&NTFS_I(vol->mftmirr_ino)-> 53762306a36Sopenharmony_ci runlist.lock); 53862306a36Sopenharmony_ci rl = NTFS_I(vol->mftmirr_ino)->runlist.rl; 53962306a36Sopenharmony_ci /* 54062306a36Sopenharmony_ci * $MFTMirr always has the whole of its runlist 54162306a36Sopenharmony_ci * in memory. 54262306a36Sopenharmony_ci */ 54362306a36Sopenharmony_ci BUG_ON(!rl); 54462306a36Sopenharmony_ci } 54562306a36Sopenharmony_ci /* Seek to element containing target vcn. */ 54662306a36Sopenharmony_ci while (rl->length && rl[1].vcn <= vcn) 54762306a36Sopenharmony_ci rl++; 54862306a36Sopenharmony_ci lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 54962306a36Sopenharmony_ci /* For $MFTMirr, only lcn >= 0 is a successful remap. */ 55062306a36Sopenharmony_ci if (likely(lcn >= 0)) { 55162306a36Sopenharmony_ci /* Setup buffer head to correct block. */ 55262306a36Sopenharmony_ci bh->b_blocknr = ((lcn << 55362306a36Sopenharmony_ci vol->cluster_size_bits) + 55462306a36Sopenharmony_ci vcn_ofs) >> blocksize_bits; 55562306a36Sopenharmony_ci set_buffer_mapped(bh); 55662306a36Sopenharmony_ci } else { 55762306a36Sopenharmony_ci bh->b_blocknr = -1; 55862306a36Sopenharmony_ci ntfs_error(vol->sb, "Cannot write mft mirror " 55962306a36Sopenharmony_ci "record 0x%lx because its " 56062306a36Sopenharmony_ci "location on disk could not " 56162306a36Sopenharmony_ci "be determined (error code " 56262306a36Sopenharmony_ci "%lli).", mft_no, 56362306a36Sopenharmony_ci (long long)lcn); 56462306a36Sopenharmony_ci err = -EIO; 56562306a36Sopenharmony_ci } 56662306a36Sopenharmony_ci } 56762306a36Sopenharmony_ci BUG_ON(!buffer_uptodate(bh)); 56862306a36Sopenharmony_ci BUG_ON(!nr_bhs && (m_start != block_start)); 56962306a36Sopenharmony_ci BUG_ON(nr_bhs >= max_bhs); 57062306a36Sopenharmony_ci bhs[nr_bhs++] = bh; 57162306a36Sopenharmony_ci BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); 57262306a36Sopenharmony_ci } while (block_start = block_end, (bh = bh->b_this_page) != head); 57362306a36Sopenharmony_ci if (unlikely(rl)) 57462306a36Sopenharmony_ci up_read(&NTFS_I(vol->mftmirr_ino)->runlist.lock); 57562306a36Sopenharmony_ci if (likely(!err)) { 57662306a36Sopenharmony_ci /* Lock buffers and start synchronous write i/o on them. */ 57762306a36Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 57862306a36Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (!trylock_buffer(tbh)) 58162306a36Sopenharmony_ci BUG(); 58262306a36Sopenharmony_ci BUG_ON(!buffer_uptodate(tbh)); 58362306a36Sopenharmony_ci clear_buffer_dirty(tbh); 58462306a36Sopenharmony_ci get_bh(tbh); 58562306a36Sopenharmony_ci tbh->b_end_io = end_buffer_write_sync; 58662306a36Sopenharmony_ci submit_bh(REQ_OP_WRITE, tbh); 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci /* Wait on i/o completion of buffers. */ 58962306a36Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 59062306a36Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci wait_on_buffer(tbh); 59362306a36Sopenharmony_ci if (unlikely(!buffer_uptodate(tbh))) { 59462306a36Sopenharmony_ci err = -EIO; 59562306a36Sopenharmony_ci /* 59662306a36Sopenharmony_ci * Set the buffer uptodate so the page and 59762306a36Sopenharmony_ci * buffer states do not become out of sync. 59862306a36Sopenharmony_ci */ 59962306a36Sopenharmony_ci set_buffer_uptodate(tbh); 60062306a36Sopenharmony_ci } 60162306a36Sopenharmony_ci } 60262306a36Sopenharmony_ci } else /* if (unlikely(err)) */ { 60362306a36Sopenharmony_ci /* Clean the buffers. */ 60462306a36Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) 60562306a36Sopenharmony_ci clear_buffer_dirty(bhs[i_bhs]); 60662306a36Sopenharmony_ci } 60762306a36Sopenharmony_ci /* Current state: all buffers are clean, unlocked, and uptodate. */ 60862306a36Sopenharmony_ci /* Remove the mst protection fixups again. */ 60962306a36Sopenharmony_ci post_write_mst_fixup((NTFS_RECORD*)kmirr); 61062306a36Sopenharmony_ci flush_dcache_page(page); 61162306a36Sopenharmony_ci SetPageUptodate(page); 61262306a36Sopenharmony_ci unlock_page(page); 61362306a36Sopenharmony_ci ntfs_unmap_page(page); 61462306a36Sopenharmony_ci if (likely(!err)) { 61562306a36Sopenharmony_ci ntfs_debug("Done."); 61662306a36Sopenharmony_ci } else { 61762306a36Sopenharmony_ci ntfs_error(vol->sb, "I/O error while writing mft mirror " 61862306a36Sopenharmony_ci "record 0x%lx!", mft_no); 61962306a36Sopenharmony_cierr_out: 62062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error " 62162306a36Sopenharmony_ci "code %i). Volume will be left marked dirty " 62262306a36Sopenharmony_ci "on umount. Run ntfsfix on the partition " 62362306a36Sopenharmony_ci "after umounting to correct this.", -err); 62462306a36Sopenharmony_ci NVolSetErrors(vol); 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci return err; 62762306a36Sopenharmony_ci} 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci/** 63062306a36Sopenharmony_ci * write_mft_record_nolock - write out a mapped (extent) mft record 63162306a36Sopenharmony_ci * @ni: ntfs inode describing the mapped (extent) mft record 63262306a36Sopenharmony_ci * @m: mapped (extent) mft record to write 63362306a36Sopenharmony_ci * @sync: if true, wait for i/o completion 63462306a36Sopenharmony_ci * 63562306a36Sopenharmony_ci * Write the mapped (extent) mft record @m described by the (regular or extent) 63662306a36Sopenharmony_ci * ntfs inode @ni to backing store. If the mft record @m has a counterpart in 63762306a36Sopenharmony_ci * the mft mirror, that is also updated. 63862306a36Sopenharmony_ci * 63962306a36Sopenharmony_ci * We only write the mft record if the ntfs inode @ni is dirty and the first 64062306a36Sopenharmony_ci * buffer belonging to its mft record is dirty, too. We ignore the dirty state 64162306a36Sopenharmony_ci * of subsequent buffers because we could have raced with 64262306a36Sopenharmony_ci * fs/ntfs/aops.c::mark_ntfs_record_dirty(). 64362306a36Sopenharmony_ci * 64462306a36Sopenharmony_ci * On success, clean the mft record and return 0. On error, leave the mft 64562306a36Sopenharmony_ci * record dirty and return -errno. 64662306a36Sopenharmony_ci * 64762306a36Sopenharmony_ci * NOTE: We always perform synchronous i/o and ignore the @sync parameter. 64862306a36Sopenharmony_ci * However, if the mft record has a counterpart in the mft mirror and @sync is 64962306a36Sopenharmony_ci * true, we write the mft record, wait for i/o completion, and only then write 65062306a36Sopenharmony_ci * the mft mirror copy. This ensures that if the system crashes either the mft 65162306a36Sopenharmony_ci * or the mft mirror will contain a self-consistent mft record @m. If @sync is 65262306a36Sopenharmony_ci * false on the other hand, we start i/o on both and then wait for completion 65362306a36Sopenharmony_ci * on them. This provides a speedup but no longer guarantees that you will end 65462306a36Sopenharmony_ci * up with a self-consistent mft record in the case of a crash but if you asked 65562306a36Sopenharmony_ci * for asynchronous writing you probably do not care about that anyway. 65662306a36Sopenharmony_ci * 65762306a36Sopenharmony_ci * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just 65862306a36Sopenharmony_ci * schedule i/o via ->writepage or do it via kntfsd or whatever. 65962306a36Sopenharmony_ci */ 66062306a36Sopenharmony_ciint write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci ntfs_volume *vol = ni->vol; 66362306a36Sopenharmony_ci struct page *page = ni->page; 66462306a36Sopenharmony_ci unsigned int blocksize = vol->sb->s_blocksize; 66562306a36Sopenharmony_ci unsigned char blocksize_bits = vol->sb->s_blocksize_bits; 66662306a36Sopenharmony_ci int max_bhs = vol->mft_record_size / blocksize; 66762306a36Sopenharmony_ci struct buffer_head *bhs[MAX_BHS]; 66862306a36Sopenharmony_ci struct buffer_head *bh, *head; 66962306a36Sopenharmony_ci runlist_element *rl; 67062306a36Sopenharmony_ci unsigned int block_start, block_end, m_start, m_end; 67162306a36Sopenharmony_ci int i_bhs, nr_bhs, err = 0; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); 67462306a36Sopenharmony_ci BUG_ON(NInoAttr(ni)); 67562306a36Sopenharmony_ci BUG_ON(!max_bhs); 67662306a36Sopenharmony_ci BUG_ON(!PageLocked(page)); 67762306a36Sopenharmony_ci if (WARN_ON(max_bhs > MAX_BHS)) { 67862306a36Sopenharmony_ci err = -EINVAL; 67962306a36Sopenharmony_ci goto err_out; 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci /* 68262306a36Sopenharmony_ci * If the ntfs_inode is clean no need to do anything. If it is dirty, 68362306a36Sopenharmony_ci * mark it as clean now so that it can be redirtied later on if needed. 68462306a36Sopenharmony_ci * There is no danger of races since the caller is holding the locks 68562306a36Sopenharmony_ci * for the mft record @m and the page it is in. 68662306a36Sopenharmony_ci */ 68762306a36Sopenharmony_ci if (!NInoTestClearDirty(ni)) 68862306a36Sopenharmony_ci goto done; 68962306a36Sopenharmony_ci bh = head = page_buffers(page); 69062306a36Sopenharmony_ci BUG_ON(!bh); 69162306a36Sopenharmony_ci rl = NULL; 69262306a36Sopenharmony_ci nr_bhs = 0; 69362306a36Sopenharmony_ci block_start = 0; 69462306a36Sopenharmony_ci m_start = ni->page_ofs; 69562306a36Sopenharmony_ci m_end = m_start + vol->mft_record_size; 69662306a36Sopenharmony_ci do { 69762306a36Sopenharmony_ci block_end = block_start + blocksize; 69862306a36Sopenharmony_ci /* If the buffer is outside the mft record, skip it. */ 69962306a36Sopenharmony_ci if (block_end <= m_start) 70062306a36Sopenharmony_ci continue; 70162306a36Sopenharmony_ci if (unlikely(block_start >= m_end)) 70262306a36Sopenharmony_ci break; 70362306a36Sopenharmony_ci /* 70462306a36Sopenharmony_ci * If this block is not the first one in the record, we ignore 70562306a36Sopenharmony_ci * the buffer's dirty state because we could have raced with a 70662306a36Sopenharmony_ci * parallel mark_ntfs_record_dirty(). 70762306a36Sopenharmony_ci */ 70862306a36Sopenharmony_ci if (block_start == m_start) { 70962306a36Sopenharmony_ci /* This block is the first one in the record. */ 71062306a36Sopenharmony_ci if (!buffer_dirty(bh)) { 71162306a36Sopenharmony_ci BUG_ON(nr_bhs); 71262306a36Sopenharmony_ci /* Clean records are not written out. */ 71362306a36Sopenharmony_ci break; 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci } 71662306a36Sopenharmony_ci /* Need to map the buffer if it is not mapped already. */ 71762306a36Sopenharmony_ci if (unlikely(!buffer_mapped(bh))) { 71862306a36Sopenharmony_ci VCN vcn; 71962306a36Sopenharmony_ci LCN lcn; 72062306a36Sopenharmony_ci unsigned int vcn_ofs; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci bh->b_bdev = vol->sb->s_bdev; 72362306a36Sopenharmony_ci /* Obtain the vcn and offset of the current block. */ 72462306a36Sopenharmony_ci vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) + 72562306a36Sopenharmony_ci (block_start - m_start); 72662306a36Sopenharmony_ci vcn_ofs = vcn & vol->cluster_size_mask; 72762306a36Sopenharmony_ci vcn >>= vol->cluster_size_bits; 72862306a36Sopenharmony_ci if (!rl) { 72962306a36Sopenharmony_ci down_read(&NTFS_I(vol->mft_ino)->runlist.lock); 73062306a36Sopenharmony_ci rl = NTFS_I(vol->mft_ino)->runlist.rl; 73162306a36Sopenharmony_ci BUG_ON(!rl); 73262306a36Sopenharmony_ci } 73362306a36Sopenharmony_ci /* Seek to element containing target vcn. */ 73462306a36Sopenharmony_ci while (rl->length && rl[1].vcn <= vcn) 73562306a36Sopenharmony_ci rl++; 73662306a36Sopenharmony_ci lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 73762306a36Sopenharmony_ci /* For $MFT, only lcn >= 0 is a successful remap. */ 73862306a36Sopenharmony_ci if (likely(lcn >= 0)) { 73962306a36Sopenharmony_ci /* Setup buffer head to correct block. */ 74062306a36Sopenharmony_ci bh->b_blocknr = ((lcn << 74162306a36Sopenharmony_ci vol->cluster_size_bits) + 74262306a36Sopenharmony_ci vcn_ofs) >> blocksize_bits; 74362306a36Sopenharmony_ci set_buffer_mapped(bh); 74462306a36Sopenharmony_ci } else { 74562306a36Sopenharmony_ci bh->b_blocknr = -1; 74662306a36Sopenharmony_ci ntfs_error(vol->sb, "Cannot write mft record " 74762306a36Sopenharmony_ci "0x%lx because its location " 74862306a36Sopenharmony_ci "on disk could not be " 74962306a36Sopenharmony_ci "determined (error code %lli).", 75062306a36Sopenharmony_ci ni->mft_no, (long long)lcn); 75162306a36Sopenharmony_ci err = -EIO; 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci } 75462306a36Sopenharmony_ci BUG_ON(!buffer_uptodate(bh)); 75562306a36Sopenharmony_ci BUG_ON(!nr_bhs && (m_start != block_start)); 75662306a36Sopenharmony_ci BUG_ON(nr_bhs >= max_bhs); 75762306a36Sopenharmony_ci bhs[nr_bhs++] = bh; 75862306a36Sopenharmony_ci BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); 75962306a36Sopenharmony_ci } while (block_start = block_end, (bh = bh->b_this_page) != head); 76062306a36Sopenharmony_ci if (unlikely(rl)) 76162306a36Sopenharmony_ci up_read(&NTFS_I(vol->mft_ino)->runlist.lock); 76262306a36Sopenharmony_ci if (!nr_bhs) 76362306a36Sopenharmony_ci goto done; 76462306a36Sopenharmony_ci if (unlikely(err)) 76562306a36Sopenharmony_ci goto cleanup_out; 76662306a36Sopenharmony_ci /* Apply the mst protection fixups. */ 76762306a36Sopenharmony_ci err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size); 76862306a36Sopenharmony_ci if (err) { 76962306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to apply mst fixups!"); 77062306a36Sopenharmony_ci goto cleanup_out; 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci flush_dcache_mft_record_page(ni); 77362306a36Sopenharmony_ci /* Lock buffers and start synchronous write i/o on them. */ 77462306a36Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 77562306a36Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci if (!trylock_buffer(tbh)) 77862306a36Sopenharmony_ci BUG(); 77962306a36Sopenharmony_ci BUG_ON(!buffer_uptodate(tbh)); 78062306a36Sopenharmony_ci clear_buffer_dirty(tbh); 78162306a36Sopenharmony_ci get_bh(tbh); 78262306a36Sopenharmony_ci tbh->b_end_io = end_buffer_write_sync; 78362306a36Sopenharmony_ci submit_bh(REQ_OP_WRITE, tbh); 78462306a36Sopenharmony_ci } 78562306a36Sopenharmony_ci /* Synchronize the mft mirror now if not @sync. */ 78662306a36Sopenharmony_ci if (!sync && ni->mft_no < vol->mftmirr_size) 78762306a36Sopenharmony_ci ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync); 78862306a36Sopenharmony_ci /* Wait on i/o completion of buffers. */ 78962306a36Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 79062306a36Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci wait_on_buffer(tbh); 79362306a36Sopenharmony_ci if (unlikely(!buffer_uptodate(tbh))) { 79462306a36Sopenharmony_ci err = -EIO; 79562306a36Sopenharmony_ci /* 79662306a36Sopenharmony_ci * Set the buffer uptodate so the page and buffer 79762306a36Sopenharmony_ci * states do not become out of sync. 79862306a36Sopenharmony_ci */ 79962306a36Sopenharmony_ci if (PageUptodate(page)) 80062306a36Sopenharmony_ci set_buffer_uptodate(tbh); 80162306a36Sopenharmony_ci } 80262306a36Sopenharmony_ci } 80362306a36Sopenharmony_ci /* If @sync, now synchronize the mft mirror. */ 80462306a36Sopenharmony_ci if (sync && ni->mft_no < vol->mftmirr_size) 80562306a36Sopenharmony_ci ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync); 80662306a36Sopenharmony_ci /* Remove the mst protection fixups again. */ 80762306a36Sopenharmony_ci post_write_mst_fixup((NTFS_RECORD*)m); 80862306a36Sopenharmony_ci flush_dcache_mft_record_page(ni); 80962306a36Sopenharmony_ci if (unlikely(err)) { 81062306a36Sopenharmony_ci /* I/O error during writing. This is really bad! */ 81162306a36Sopenharmony_ci ntfs_error(vol->sb, "I/O error while writing mft record " 81262306a36Sopenharmony_ci "0x%lx! Marking base inode as bad. You " 81362306a36Sopenharmony_ci "should unmount the volume and run chkdsk.", 81462306a36Sopenharmony_ci ni->mft_no); 81562306a36Sopenharmony_ci goto err_out; 81662306a36Sopenharmony_ci } 81762306a36Sopenharmony_cidone: 81862306a36Sopenharmony_ci ntfs_debug("Done."); 81962306a36Sopenharmony_ci return 0; 82062306a36Sopenharmony_cicleanup_out: 82162306a36Sopenharmony_ci /* Clean the buffers. */ 82262306a36Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) 82362306a36Sopenharmony_ci clear_buffer_dirty(bhs[i_bhs]); 82462306a36Sopenharmony_cierr_out: 82562306a36Sopenharmony_ci /* 82662306a36Sopenharmony_ci * Current state: all buffers are clean, unlocked, and uptodate. 82762306a36Sopenharmony_ci * The caller should mark the base inode as bad so that no more i/o 82862306a36Sopenharmony_ci * happens. ->clear_inode() will still be invoked so all extent inodes 82962306a36Sopenharmony_ci * and other allocated memory will be freed. 83062306a36Sopenharmony_ci */ 83162306a36Sopenharmony_ci if (err == -ENOMEM) { 83262306a36Sopenharmony_ci ntfs_error(vol->sb, "Not enough memory to write mft record. " 83362306a36Sopenharmony_ci "Redirtying so the write is retried later."); 83462306a36Sopenharmony_ci mark_mft_record_dirty(ni); 83562306a36Sopenharmony_ci err = 0; 83662306a36Sopenharmony_ci } else 83762306a36Sopenharmony_ci NVolSetErrors(vol); 83862306a36Sopenharmony_ci return err; 83962306a36Sopenharmony_ci} 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci/** 84262306a36Sopenharmony_ci * ntfs_may_write_mft_record - check if an mft record may be written out 84362306a36Sopenharmony_ci * @vol: [IN] ntfs volume on which the mft record to check resides 84462306a36Sopenharmony_ci * @mft_no: [IN] mft record number of the mft record to check 84562306a36Sopenharmony_ci * @m: [IN] mapped mft record to check 84662306a36Sopenharmony_ci * @locked_ni: [OUT] caller has to unlock this ntfs inode if one is returned 84762306a36Sopenharmony_ci * 84862306a36Sopenharmony_ci * Check if the mapped (base or extent) mft record @m with mft record number 84962306a36Sopenharmony_ci * @mft_no belonging to the ntfs volume @vol may be written out. If necessary 85062306a36Sopenharmony_ci * and possible the ntfs inode of the mft record is locked and the base vfs 85162306a36Sopenharmony_ci * inode is pinned. The locked ntfs inode is then returned in @locked_ni. The 85262306a36Sopenharmony_ci * caller is responsible for unlocking the ntfs inode and unpinning the base 85362306a36Sopenharmony_ci * vfs inode. 85462306a36Sopenharmony_ci * 85562306a36Sopenharmony_ci * Return 'true' if the mft record may be written out and 'false' if not. 85662306a36Sopenharmony_ci * 85762306a36Sopenharmony_ci * The caller has locked the page and cleared the uptodate flag on it which 85862306a36Sopenharmony_ci * means that we can safely write out any dirty mft records that do not have 85962306a36Sopenharmony_ci * their inodes in icache as determined by ilookup5() as anyone 86062306a36Sopenharmony_ci * opening/creating such an inode would block when attempting to map the mft 86162306a36Sopenharmony_ci * record in read_cache_page() until we are finished with the write out. 86262306a36Sopenharmony_ci * 86362306a36Sopenharmony_ci * Here is a description of the tests we perform: 86462306a36Sopenharmony_ci * 86562306a36Sopenharmony_ci * If the inode is found in icache we know the mft record must be a base mft 86662306a36Sopenharmony_ci * record. If it is dirty, we do not write it and return 'false' as the vfs 86762306a36Sopenharmony_ci * inode write paths will result in the access times being updated which would 86862306a36Sopenharmony_ci * cause the base mft record to be redirtied and written out again. (We know 86962306a36Sopenharmony_ci * the access time update will modify the base mft record because Windows 87062306a36Sopenharmony_ci * chkdsk complains if the standard information attribute is not in the base 87162306a36Sopenharmony_ci * mft record.) 87262306a36Sopenharmony_ci * 87362306a36Sopenharmony_ci * If the inode is in icache and not dirty, we attempt to lock the mft record 87462306a36Sopenharmony_ci * and if we find the lock was already taken, it is not safe to write the mft 87562306a36Sopenharmony_ci * record and we return 'false'. 87662306a36Sopenharmony_ci * 87762306a36Sopenharmony_ci * If we manage to obtain the lock we have exclusive access to the mft record, 87862306a36Sopenharmony_ci * which also allows us safe writeout of the mft record. We then set 87962306a36Sopenharmony_ci * @locked_ni to the locked ntfs inode and return 'true'. 88062306a36Sopenharmony_ci * 88162306a36Sopenharmony_ci * Note we cannot just lock the mft record and sleep while waiting for the lock 88262306a36Sopenharmony_ci * because this would deadlock due to lock reversal (normally the mft record is 88362306a36Sopenharmony_ci * locked before the page is locked but we already have the page locked here 88462306a36Sopenharmony_ci * when we try to lock the mft record). 88562306a36Sopenharmony_ci * 88662306a36Sopenharmony_ci * If the inode is not in icache we need to perform further checks. 88762306a36Sopenharmony_ci * 88862306a36Sopenharmony_ci * If the mft record is not a FILE record or it is a base mft record, we can 88962306a36Sopenharmony_ci * safely write it and return 'true'. 89062306a36Sopenharmony_ci * 89162306a36Sopenharmony_ci * We now know the mft record is an extent mft record. We check if the inode 89262306a36Sopenharmony_ci * corresponding to its base mft record is in icache and obtain a reference to 89362306a36Sopenharmony_ci * it if it is. If it is not, we can safely write it and return 'true'. 89462306a36Sopenharmony_ci * 89562306a36Sopenharmony_ci * We now have the base inode for the extent mft record. We check if it has an 89662306a36Sopenharmony_ci * ntfs inode for the extent mft record attached and if not it is safe to write 89762306a36Sopenharmony_ci * the extent mft record and we return 'true'. 89862306a36Sopenharmony_ci * 89962306a36Sopenharmony_ci * The ntfs inode for the extent mft record is attached to the base inode so we 90062306a36Sopenharmony_ci * attempt to lock the extent mft record and if we find the lock was already 90162306a36Sopenharmony_ci * taken, it is not safe to write the extent mft record and we return 'false'. 90262306a36Sopenharmony_ci * 90362306a36Sopenharmony_ci * If we manage to obtain the lock we have exclusive access to the extent mft 90462306a36Sopenharmony_ci * record, which also allows us safe writeout of the extent mft record. We 90562306a36Sopenharmony_ci * set the ntfs inode of the extent mft record clean and then set @locked_ni to 90662306a36Sopenharmony_ci * the now locked ntfs inode and return 'true'. 90762306a36Sopenharmony_ci * 90862306a36Sopenharmony_ci * Note, the reason for actually writing dirty mft records here and not just 90962306a36Sopenharmony_ci * relying on the vfs inode dirty code paths is that we can have mft records 91062306a36Sopenharmony_ci * modified without them ever having actual inodes in memory. Also we can have 91162306a36Sopenharmony_ci * dirty mft records with clean ntfs inodes in memory. None of the described 91262306a36Sopenharmony_ci * cases would result in the dirty mft records being written out if we only 91362306a36Sopenharmony_ci * relied on the vfs inode dirty code paths. And these cases can really occur 91462306a36Sopenharmony_ci * during allocation of new mft records and in particular when the 91562306a36Sopenharmony_ci * initialized_size of the $MFT/$DATA attribute is extended and the new space 91662306a36Sopenharmony_ci * is initialized using ntfs_mft_record_format(). The clean inode can then 91762306a36Sopenharmony_ci * appear if the mft record is reused for a new inode before it got written 91862306a36Sopenharmony_ci * out. 91962306a36Sopenharmony_ci */ 92062306a36Sopenharmony_cibool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, 92162306a36Sopenharmony_ci const MFT_RECORD *m, ntfs_inode **locked_ni) 92262306a36Sopenharmony_ci{ 92362306a36Sopenharmony_ci struct super_block *sb = vol->sb; 92462306a36Sopenharmony_ci struct inode *mft_vi = vol->mft_ino; 92562306a36Sopenharmony_ci struct inode *vi; 92662306a36Sopenharmony_ci ntfs_inode *ni, *eni, **extent_nis; 92762306a36Sopenharmony_ci int i; 92862306a36Sopenharmony_ci ntfs_attr na; 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", mft_no); 93162306a36Sopenharmony_ci /* 93262306a36Sopenharmony_ci * Normally we do not return a locked inode so set @locked_ni to NULL. 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ci BUG_ON(!locked_ni); 93562306a36Sopenharmony_ci *locked_ni = NULL; 93662306a36Sopenharmony_ci /* 93762306a36Sopenharmony_ci * Check if the inode corresponding to this mft record is in the VFS 93862306a36Sopenharmony_ci * inode cache and obtain a reference to it if it is. 93962306a36Sopenharmony_ci */ 94062306a36Sopenharmony_ci ntfs_debug("Looking for inode 0x%lx in icache.", mft_no); 94162306a36Sopenharmony_ci na.mft_no = mft_no; 94262306a36Sopenharmony_ci na.name = NULL; 94362306a36Sopenharmony_ci na.name_len = 0; 94462306a36Sopenharmony_ci na.type = AT_UNUSED; 94562306a36Sopenharmony_ci /* 94662306a36Sopenharmony_ci * Optimize inode 0, i.e. $MFT itself, since we have it in memory and 94762306a36Sopenharmony_ci * we get here for it rather often. 94862306a36Sopenharmony_ci */ 94962306a36Sopenharmony_ci if (!mft_no) { 95062306a36Sopenharmony_ci /* Balance the below iput(). */ 95162306a36Sopenharmony_ci vi = igrab(mft_vi); 95262306a36Sopenharmony_ci BUG_ON(vi != mft_vi); 95362306a36Sopenharmony_ci } else { 95462306a36Sopenharmony_ci /* 95562306a36Sopenharmony_ci * Have to use ilookup5_nowait() since ilookup5() waits for the 95662306a36Sopenharmony_ci * inode lock which causes ntfs to deadlock when a concurrent 95762306a36Sopenharmony_ci * inode write via the inode dirty code paths and the page 95862306a36Sopenharmony_ci * dirty code path of the inode dirty code path when writing 95962306a36Sopenharmony_ci * $MFT occurs. 96062306a36Sopenharmony_ci */ 96162306a36Sopenharmony_ci vi = ilookup5_nowait(sb, mft_no, ntfs_test_inode, &na); 96262306a36Sopenharmony_ci } 96362306a36Sopenharmony_ci if (vi) { 96462306a36Sopenharmony_ci ntfs_debug("Base inode 0x%lx is in icache.", mft_no); 96562306a36Sopenharmony_ci /* The inode is in icache. */ 96662306a36Sopenharmony_ci ni = NTFS_I(vi); 96762306a36Sopenharmony_ci /* Take a reference to the ntfs inode. */ 96862306a36Sopenharmony_ci atomic_inc(&ni->count); 96962306a36Sopenharmony_ci /* If the inode is dirty, do not write this record. */ 97062306a36Sopenharmony_ci if (NInoDirty(ni)) { 97162306a36Sopenharmony_ci ntfs_debug("Inode 0x%lx is dirty, do not write it.", 97262306a36Sopenharmony_ci mft_no); 97362306a36Sopenharmony_ci atomic_dec(&ni->count); 97462306a36Sopenharmony_ci iput(vi); 97562306a36Sopenharmony_ci return false; 97662306a36Sopenharmony_ci } 97762306a36Sopenharmony_ci ntfs_debug("Inode 0x%lx is not dirty.", mft_no); 97862306a36Sopenharmony_ci /* The inode is not dirty, try to take the mft record lock. */ 97962306a36Sopenharmony_ci if (unlikely(!mutex_trylock(&ni->mrec_lock))) { 98062306a36Sopenharmony_ci ntfs_debug("Mft record 0x%lx is already locked, do " 98162306a36Sopenharmony_ci "not write it.", mft_no); 98262306a36Sopenharmony_ci atomic_dec(&ni->count); 98362306a36Sopenharmony_ci iput(vi); 98462306a36Sopenharmony_ci return false; 98562306a36Sopenharmony_ci } 98662306a36Sopenharmony_ci ntfs_debug("Managed to lock mft record 0x%lx, write it.", 98762306a36Sopenharmony_ci mft_no); 98862306a36Sopenharmony_ci /* 98962306a36Sopenharmony_ci * The write has to occur while we hold the mft record lock so 99062306a36Sopenharmony_ci * return the locked ntfs inode. 99162306a36Sopenharmony_ci */ 99262306a36Sopenharmony_ci *locked_ni = ni; 99362306a36Sopenharmony_ci return true; 99462306a36Sopenharmony_ci } 99562306a36Sopenharmony_ci ntfs_debug("Inode 0x%lx is not in icache.", mft_no); 99662306a36Sopenharmony_ci /* The inode is not in icache. */ 99762306a36Sopenharmony_ci /* Write the record if it is not a mft record (type "FILE"). */ 99862306a36Sopenharmony_ci if (!ntfs_is_mft_record(m->magic)) { 99962306a36Sopenharmony_ci ntfs_debug("Mft record 0x%lx is not a FILE record, write it.", 100062306a36Sopenharmony_ci mft_no); 100162306a36Sopenharmony_ci return true; 100262306a36Sopenharmony_ci } 100362306a36Sopenharmony_ci /* Write the mft record if it is a base inode. */ 100462306a36Sopenharmony_ci if (!m->base_mft_record) { 100562306a36Sopenharmony_ci ntfs_debug("Mft record 0x%lx is a base record, write it.", 100662306a36Sopenharmony_ci mft_no); 100762306a36Sopenharmony_ci return true; 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci /* 101062306a36Sopenharmony_ci * This is an extent mft record. Check if the inode corresponding to 101162306a36Sopenharmony_ci * its base mft record is in icache and obtain a reference to it if it 101262306a36Sopenharmony_ci * is. 101362306a36Sopenharmony_ci */ 101462306a36Sopenharmony_ci na.mft_no = MREF_LE(m->base_mft_record); 101562306a36Sopenharmony_ci ntfs_debug("Mft record 0x%lx is an extent record. Looking for base " 101662306a36Sopenharmony_ci "inode 0x%lx in icache.", mft_no, na.mft_no); 101762306a36Sopenharmony_ci if (!na.mft_no) { 101862306a36Sopenharmony_ci /* Balance the below iput(). */ 101962306a36Sopenharmony_ci vi = igrab(mft_vi); 102062306a36Sopenharmony_ci BUG_ON(vi != mft_vi); 102162306a36Sopenharmony_ci } else 102262306a36Sopenharmony_ci vi = ilookup5_nowait(sb, na.mft_no, ntfs_test_inode, 102362306a36Sopenharmony_ci &na); 102462306a36Sopenharmony_ci if (!vi) { 102562306a36Sopenharmony_ci /* 102662306a36Sopenharmony_ci * The base inode is not in icache, write this extent mft 102762306a36Sopenharmony_ci * record. 102862306a36Sopenharmony_ci */ 102962306a36Sopenharmony_ci ntfs_debug("Base inode 0x%lx is not in icache, write the " 103062306a36Sopenharmony_ci "extent record.", na.mft_no); 103162306a36Sopenharmony_ci return true; 103262306a36Sopenharmony_ci } 103362306a36Sopenharmony_ci ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no); 103462306a36Sopenharmony_ci /* 103562306a36Sopenharmony_ci * The base inode is in icache. Check if it has the extent inode 103662306a36Sopenharmony_ci * corresponding to this extent mft record attached. 103762306a36Sopenharmony_ci */ 103862306a36Sopenharmony_ci ni = NTFS_I(vi); 103962306a36Sopenharmony_ci mutex_lock(&ni->extent_lock); 104062306a36Sopenharmony_ci if (ni->nr_extents <= 0) { 104162306a36Sopenharmony_ci /* 104262306a36Sopenharmony_ci * The base inode has no attached extent inodes, write this 104362306a36Sopenharmony_ci * extent mft record. 104462306a36Sopenharmony_ci */ 104562306a36Sopenharmony_ci mutex_unlock(&ni->extent_lock); 104662306a36Sopenharmony_ci iput(vi); 104762306a36Sopenharmony_ci ntfs_debug("Base inode 0x%lx has no attached extent inodes, " 104862306a36Sopenharmony_ci "write the extent record.", na.mft_no); 104962306a36Sopenharmony_ci return true; 105062306a36Sopenharmony_ci } 105162306a36Sopenharmony_ci /* Iterate over the attached extent inodes. */ 105262306a36Sopenharmony_ci extent_nis = ni->ext.extent_ntfs_inos; 105362306a36Sopenharmony_ci for (eni = NULL, i = 0; i < ni->nr_extents; ++i) { 105462306a36Sopenharmony_ci if (mft_no == extent_nis[i]->mft_no) { 105562306a36Sopenharmony_ci /* 105662306a36Sopenharmony_ci * Found the extent inode corresponding to this extent 105762306a36Sopenharmony_ci * mft record. 105862306a36Sopenharmony_ci */ 105962306a36Sopenharmony_ci eni = extent_nis[i]; 106062306a36Sopenharmony_ci break; 106162306a36Sopenharmony_ci } 106262306a36Sopenharmony_ci } 106362306a36Sopenharmony_ci /* 106462306a36Sopenharmony_ci * If the extent inode was not attached to the base inode, write this 106562306a36Sopenharmony_ci * extent mft record. 106662306a36Sopenharmony_ci */ 106762306a36Sopenharmony_ci if (!eni) { 106862306a36Sopenharmony_ci mutex_unlock(&ni->extent_lock); 106962306a36Sopenharmony_ci iput(vi); 107062306a36Sopenharmony_ci ntfs_debug("Extent inode 0x%lx is not attached to its base " 107162306a36Sopenharmony_ci "inode 0x%lx, write the extent record.", 107262306a36Sopenharmony_ci mft_no, na.mft_no); 107362306a36Sopenharmony_ci return true; 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.", 107662306a36Sopenharmony_ci mft_no, na.mft_no); 107762306a36Sopenharmony_ci /* Take a reference to the extent ntfs inode. */ 107862306a36Sopenharmony_ci atomic_inc(&eni->count); 107962306a36Sopenharmony_ci mutex_unlock(&ni->extent_lock); 108062306a36Sopenharmony_ci /* 108162306a36Sopenharmony_ci * Found the extent inode coresponding to this extent mft record. 108262306a36Sopenharmony_ci * Try to take the mft record lock. 108362306a36Sopenharmony_ci */ 108462306a36Sopenharmony_ci if (unlikely(!mutex_trylock(&eni->mrec_lock))) { 108562306a36Sopenharmony_ci atomic_dec(&eni->count); 108662306a36Sopenharmony_ci iput(vi); 108762306a36Sopenharmony_ci ntfs_debug("Extent mft record 0x%lx is already locked, do " 108862306a36Sopenharmony_ci "not write it.", mft_no); 108962306a36Sopenharmony_ci return false; 109062306a36Sopenharmony_ci } 109162306a36Sopenharmony_ci ntfs_debug("Managed to lock extent mft record 0x%lx, write it.", 109262306a36Sopenharmony_ci mft_no); 109362306a36Sopenharmony_ci if (NInoTestClearDirty(eni)) 109462306a36Sopenharmony_ci ntfs_debug("Extent inode 0x%lx is dirty, marking it clean.", 109562306a36Sopenharmony_ci mft_no); 109662306a36Sopenharmony_ci /* 109762306a36Sopenharmony_ci * The write has to occur while we hold the mft record lock so return 109862306a36Sopenharmony_ci * the locked extent ntfs inode. 109962306a36Sopenharmony_ci */ 110062306a36Sopenharmony_ci *locked_ni = eni; 110162306a36Sopenharmony_ci return true; 110262306a36Sopenharmony_ci} 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_cistatic const char *es = " Leaving inconsistent metadata. Unmount and run " 110562306a36Sopenharmony_ci "chkdsk."; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci/** 110862306a36Sopenharmony_ci * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name 110962306a36Sopenharmony_ci * @vol: volume on which to search for a free mft record 111062306a36Sopenharmony_ci * @base_ni: open base inode if allocating an extent mft record or NULL 111162306a36Sopenharmony_ci * 111262306a36Sopenharmony_ci * Search for a free mft record in the mft bitmap attribute on the ntfs volume 111362306a36Sopenharmony_ci * @vol. 111462306a36Sopenharmony_ci * 111562306a36Sopenharmony_ci * If @base_ni is NULL start the search at the default allocator position. 111662306a36Sopenharmony_ci * 111762306a36Sopenharmony_ci * If @base_ni is not NULL start the search at the mft record after the base 111862306a36Sopenharmony_ci * mft record @base_ni. 111962306a36Sopenharmony_ci * 112062306a36Sopenharmony_ci * Return the free mft record on success and -errno on error. An error code of 112162306a36Sopenharmony_ci * -ENOSPC means that there are no free mft records in the currently 112262306a36Sopenharmony_ci * initialized mft bitmap. 112362306a36Sopenharmony_ci * 112462306a36Sopenharmony_ci * Locking: Caller must hold vol->mftbmp_lock for writing. 112562306a36Sopenharmony_ci */ 112662306a36Sopenharmony_cistatic int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, 112762306a36Sopenharmony_ci ntfs_inode *base_ni) 112862306a36Sopenharmony_ci{ 112962306a36Sopenharmony_ci s64 pass_end, ll, data_pos, pass_start, ofs, bit; 113062306a36Sopenharmony_ci unsigned long flags; 113162306a36Sopenharmony_ci struct address_space *mftbmp_mapping; 113262306a36Sopenharmony_ci u8 *buf, *byte; 113362306a36Sopenharmony_ci struct page *page; 113462306a36Sopenharmony_ci unsigned int page_ofs, size; 113562306a36Sopenharmony_ci u8 pass, b; 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci ntfs_debug("Searching for free mft record in the currently " 113862306a36Sopenharmony_ci "initialized mft bitmap."); 113962306a36Sopenharmony_ci mftbmp_mapping = vol->mftbmp_ino->i_mapping; 114062306a36Sopenharmony_ci /* 114162306a36Sopenharmony_ci * Set the end of the pass making sure we do not overflow the mft 114262306a36Sopenharmony_ci * bitmap. 114362306a36Sopenharmony_ci */ 114462306a36Sopenharmony_ci read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags); 114562306a36Sopenharmony_ci pass_end = NTFS_I(vol->mft_ino)->allocated_size >> 114662306a36Sopenharmony_ci vol->mft_record_size_bits; 114762306a36Sopenharmony_ci read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags); 114862306a36Sopenharmony_ci read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 114962306a36Sopenharmony_ci ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; 115062306a36Sopenharmony_ci read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 115162306a36Sopenharmony_ci if (pass_end > ll) 115262306a36Sopenharmony_ci pass_end = ll; 115362306a36Sopenharmony_ci pass = 1; 115462306a36Sopenharmony_ci if (!base_ni) 115562306a36Sopenharmony_ci data_pos = vol->mft_data_pos; 115662306a36Sopenharmony_ci else 115762306a36Sopenharmony_ci data_pos = base_ni->mft_no + 1; 115862306a36Sopenharmony_ci if (data_pos < 24) 115962306a36Sopenharmony_ci data_pos = 24; 116062306a36Sopenharmony_ci if (data_pos >= pass_end) { 116162306a36Sopenharmony_ci data_pos = 24; 116262306a36Sopenharmony_ci pass = 2; 116362306a36Sopenharmony_ci /* This happens on a freshly formatted volume. */ 116462306a36Sopenharmony_ci if (data_pos >= pass_end) 116562306a36Sopenharmony_ci return -ENOSPC; 116662306a36Sopenharmony_ci } 116762306a36Sopenharmony_ci pass_start = data_pos; 116862306a36Sopenharmony_ci ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, " 116962306a36Sopenharmony_ci "pass_end 0x%llx, data_pos 0x%llx.", pass, 117062306a36Sopenharmony_ci (long long)pass_start, (long long)pass_end, 117162306a36Sopenharmony_ci (long long)data_pos); 117262306a36Sopenharmony_ci /* Loop until a free mft record is found. */ 117362306a36Sopenharmony_ci for (; pass <= 2;) { 117462306a36Sopenharmony_ci /* Cap size to pass_end. */ 117562306a36Sopenharmony_ci ofs = data_pos >> 3; 117662306a36Sopenharmony_ci page_ofs = ofs & ~PAGE_MASK; 117762306a36Sopenharmony_ci size = PAGE_SIZE - page_ofs; 117862306a36Sopenharmony_ci ll = ((pass_end + 7) >> 3) - ofs; 117962306a36Sopenharmony_ci if (size > ll) 118062306a36Sopenharmony_ci size = ll; 118162306a36Sopenharmony_ci size <<= 3; 118262306a36Sopenharmony_ci /* 118362306a36Sopenharmony_ci * If we are still within the active pass, search the next page 118462306a36Sopenharmony_ci * for a zero bit. 118562306a36Sopenharmony_ci */ 118662306a36Sopenharmony_ci if (size) { 118762306a36Sopenharmony_ci page = ntfs_map_page(mftbmp_mapping, 118862306a36Sopenharmony_ci ofs >> PAGE_SHIFT); 118962306a36Sopenharmony_ci if (IS_ERR(page)) { 119062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to read mft " 119162306a36Sopenharmony_ci "bitmap, aborting."); 119262306a36Sopenharmony_ci return PTR_ERR(page); 119362306a36Sopenharmony_ci } 119462306a36Sopenharmony_ci buf = (u8*)page_address(page) + page_ofs; 119562306a36Sopenharmony_ci bit = data_pos & 7; 119662306a36Sopenharmony_ci data_pos &= ~7ull; 119762306a36Sopenharmony_ci ntfs_debug("Before inner for loop: size 0x%x, " 119862306a36Sopenharmony_ci "data_pos 0x%llx, bit 0x%llx", size, 119962306a36Sopenharmony_ci (long long)data_pos, (long long)bit); 120062306a36Sopenharmony_ci for (; bit < size && data_pos + bit < pass_end; 120162306a36Sopenharmony_ci bit &= ~7ull, bit += 8) { 120262306a36Sopenharmony_ci byte = buf + (bit >> 3); 120362306a36Sopenharmony_ci if (*byte == 0xff) 120462306a36Sopenharmony_ci continue; 120562306a36Sopenharmony_ci b = ffz((unsigned long)*byte); 120662306a36Sopenharmony_ci if (b < 8 && b >= (bit & 7)) { 120762306a36Sopenharmony_ci ll = data_pos + (bit & ~7ull) + b; 120862306a36Sopenharmony_ci if (unlikely(ll > (1ll << 32))) { 120962306a36Sopenharmony_ci ntfs_unmap_page(page); 121062306a36Sopenharmony_ci return -ENOSPC; 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci *byte |= 1 << b; 121362306a36Sopenharmony_ci flush_dcache_page(page); 121462306a36Sopenharmony_ci set_page_dirty(page); 121562306a36Sopenharmony_ci ntfs_unmap_page(page); 121662306a36Sopenharmony_ci ntfs_debug("Done. (Found and " 121762306a36Sopenharmony_ci "allocated mft record " 121862306a36Sopenharmony_ci "0x%llx.)", 121962306a36Sopenharmony_ci (long long)ll); 122062306a36Sopenharmony_ci return ll; 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci } 122362306a36Sopenharmony_ci ntfs_debug("After inner for loop: size 0x%x, " 122462306a36Sopenharmony_ci "data_pos 0x%llx, bit 0x%llx", size, 122562306a36Sopenharmony_ci (long long)data_pos, (long long)bit); 122662306a36Sopenharmony_ci data_pos += size; 122762306a36Sopenharmony_ci ntfs_unmap_page(page); 122862306a36Sopenharmony_ci /* 122962306a36Sopenharmony_ci * If the end of the pass has not been reached yet, 123062306a36Sopenharmony_ci * continue searching the mft bitmap for a zero bit. 123162306a36Sopenharmony_ci */ 123262306a36Sopenharmony_ci if (data_pos < pass_end) 123362306a36Sopenharmony_ci continue; 123462306a36Sopenharmony_ci } 123562306a36Sopenharmony_ci /* Do the next pass. */ 123662306a36Sopenharmony_ci if (++pass == 2) { 123762306a36Sopenharmony_ci /* 123862306a36Sopenharmony_ci * Starting the second pass, in which we scan the first 123962306a36Sopenharmony_ci * part of the zone which we omitted earlier. 124062306a36Sopenharmony_ci */ 124162306a36Sopenharmony_ci pass_end = pass_start; 124262306a36Sopenharmony_ci data_pos = pass_start = 24; 124362306a36Sopenharmony_ci ntfs_debug("pass %i, pass_start 0x%llx, pass_end " 124462306a36Sopenharmony_ci "0x%llx.", pass, (long long)pass_start, 124562306a36Sopenharmony_ci (long long)pass_end); 124662306a36Sopenharmony_ci if (data_pos >= pass_end) 124762306a36Sopenharmony_ci break; 124862306a36Sopenharmony_ci } 124962306a36Sopenharmony_ci } 125062306a36Sopenharmony_ci /* No free mft records in currently initialized mft bitmap. */ 125162306a36Sopenharmony_ci ntfs_debug("Done. (No free mft records left in currently initialized " 125262306a36Sopenharmony_ci "mft bitmap.)"); 125362306a36Sopenharmony_ci return -ENOSPC; 125462306a36Sopenharmony_ci} 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci/** 125762306a36Sopenharmony_ci * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster 125862306a36Sopenharmony_ci * @vol: volume on which to extend the mft bitmap attribute 125962306a36Sopenharmony_ci * 126062306a36Sopenharmony_ci * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster. 126162306a36Sopenharmony_ci * 126262306a36Sopenharmony_ci * Note: Only changes allocated_size, i.e. does not touch initialized_size or 126362306a36Sopenharmony_ci * data_size. 126462306a36Sopenharmony_ci * 126562306a36Sopenharmony_ci * Return 0 on success and -errno on error. 126662306a36Sopenharmony_ci * 126762306a36Sopenharmony_ci * Locking: - Caller must hold vol->mftbmp_lock for writing. 126862306a36Sopenharmony_ci * - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for 126962306a36Sopenharmony_ci * writing and releases it before returning. 127062306a36Sopenharmony_ci * - This function takes vol->lcnbmp_lock for writing and releases it 127162306a36Sopenharmony_ci * before returning. 127262306a36Sopenharmony_ci */ 127362306a36Sopenharmony_cistatic int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) 127462306a36Sopenharmony_ci{ 127562306a36Sopenharmony_ci LCN lcn; 127662306a36Sopenharmony_ci s64 ll; 127762306a36Sopenharmony_ci unsigned long flags; 127862306a36Sopenharmony_ci struct page *page; 127962306a36Sopenharmony_ci ntfs_inode *mft_ni, *mftbmp_ni; 128062306a36Sopenharmony_ci runlist_element *rl, *rl2 = NULL; 128162306a36Sopenharmony_ci ntfs_attr_search_ctx *ctx = NULL; 128262306a36Sopenharmony_ci MFT_RECORD *mrec; 128362306a36Sopenharmony_ci ATTR_RECORD *a = NULL; 128462306a36Sopenharmony_ci int ret, mp_size; 128562306a36Sopenharmony_ci u32 old_alen = 0; 128662306a36Sopenharmony_ci u8 *b, tb; 128762306a36Sopenharmony_ci struct { 128862306a36Sopenharmony_ci u8 added_cluster:1; 128962306a36Sopenharmony_ci u8 added_run:1; 129062306a36Sopenharmony_ci u8 mp_rebuilt:1; 129162306a36Sopenharmony_ci } status = { 0, 0, 0 }; 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci ntfs_debug("Extending mft bitmap allocation."); 129462306a36Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 129562306a36Sopenharmony_ci mftbmp_ni = NTFS_I(vol->mftbmp_ino); 129662306a36Sopenharmony_ci /* 129762306a36Sopenharmony_ci * Determine the last lcn of the mft bitmap. The allocated size of the 129862306a36Sopenharmony_ci * mft bitmap cannot be zero so we are ok to do this. 129962306a36Sopenharmony_ci */ 130062306a36Sopenharmony_ci down_write(&mftbmp_ni->runlist.lock); 130162306a36Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 130262306a36Sopenharmony_ci ll = mftbmp_ni->allocated_size; 130362306a36Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 130462306a36Sopenharmony_ci rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, 130562306a36Sopenharmony_ci (ll - 1) >> vol->cluster_size_bits, NULL); 130662306a36Sopenharmony_ci if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 130762306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 130862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to determine last allocated " 130962306a36Sopenharmony_ci "cluster of mft bitmap attribute."); 131062306a36Sopenharmony_ci if (!IS_ERR(rl)) 131162306a36Sopenharmony_ci ret = -EIO; 131262306a36Sopenharmony_ci else 131362306a36Sopenharmony_ci ret = PTR_ERR(rl); 131462306a36Sopenharmony_ci return ret; 131562306a36Sopenharmony_ci } 131662306a36Sopenharmony_ci lcn = rl->lcn + rl->length; 131762306a36Sopenharmony_ci ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.", 131862306a36Sopenharmony_ci (long long)lcn); 131962306a36Sopenharmony_ci /* 132062306a36Sopenharmony_ci * Attempt to get the cluster following the last allocated cluster by 132162306a36Sopenharmony_ci * hand as it may be in the MFT zone so the allocator would not give it 132262306a36Sopenharmony_ci * to us. 132362306a36Sopenharmony_ci */ 132462306a36Sopenharmony_ci ll = lcn >> 3; 132562306a36Sopenharmony_ci page = ntfs_map_page(vol->lcnbmp_ino->i_mapping, 132662306a36Sopenharmony_ci ll >> PAGE_SHIFT); 132762306a36Sopenharmony_ci if (IS_ERR(page)) { 132862306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 132962306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to read from lcn bitmap."); 133062306a36Sopenharmony_ci return PTR_ERR(page); 133162306a36Sopenharmony_ci } 133262306a36Sopenharmony_ci b = (u8*)page_address(page) + (ll & ~PAGE_MASK); 133362306a36Sopenharmony_ci tb = 1 << (lcn & 7ull); 133462306a36Sopenharmony_ci down_write(&vol->lcnbmp_lock); 133562306a36Sopenharmony_ci if (*b != 0xff && !(*b & tb)) { 133662306a36Sopenharmony_ci /* Next cluster is free, allocate it. */ 133762306a36Sopenharmony_ci *b |= tb; 133862306a36Sopenharmony_ci flush_dcache_page(page); 133962306a36Sopenharmony_ci set_page_dirty(page); 134062306a36Sopenharmony_ci up_write(&vol->lcnbmp_lock); 134162306a36Sopenharmony_ci ntfs_unmap_page(page); 134262306a36Sopenharmony_ci /* Update the mft bitmap runlist. */ 134362306a36Sopenharmony_ci rl->length++; 134462306a36Sopenharmony_ci rl[1].vcn++; 134562306a36Sopenharmony_ci status.added_cluster = 1; 134662306a36Sopenharmony_ci ntfs_debug("Appending one cluster to mft bitmap."); 134762306a36Sopenharmony_ci } else { 134862306a36Sopenharmony_ci up_write(&vol->lcnbmp_lock); 134962306a36Sopenharmony_ci ntfs_unmap_page(page); 135062306a36Sopenharmony_ci /* Allocate a cluster from the DATA_ZONE. */ 135162306a36Sopenharmony_ci rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE, 135262306a36Sopenharmony_ci true); 135362306a36Sopenharmony_ci if (IS_ERR(rl2)) { 135462306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 135562306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate a cluster for " 135662306a36Sopenharmony_ci "the mft bitmap."); 135762306a36Sopenharmony_ci return PTR_ERR(rl2); 135862306a36Sopenharmony_ci } 135962306a36Sopenharmony_ci rl = ntfs_runlists_merge(mftbmp_ni->runlist.rl, rl2); 136062306a36Sopenharmony_ci if (IS_ERR(rl)) { 136162306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 136262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to merge runlists for mft " 136362306a36Sopenharmony_ci "bitmap."); 136462306a36Sopenharmony_ci if (ntfs_cluster_free_from_rl(vol, rl2)) { 136562306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to deallocate " 136662306a36Sopenharmony_ci "allocated cluster.%s", es); 136762306a36Sopenharmony_ci NVolSetErrors(vol); 136862306a36Sopenharmony_ci } 136962306a36Sopenharmony_ci ntfs_free(rl2); 137062306a36Sopenharmony_ci return PTR_ERR(rl); 137162306a36Sopenharmony_ci } 137262306a36Sopenharmony_ci mftbmp_ni->runlist.rl = rl; 137362306a36Sopenharmony_ci status.added_run = 1; 137462306a36Sopenharmony_ci ntfs_debug("Adding one run to mft bitmap."); 137562306a36Sopenharmony_ci /* Find the last run in the new runlist. */ 137662306a36Sopenharmony_ci for (; rl[1].length; rl++) 137762306a36Sopenharmony_ci ; 137862306a36Sopenharmony_ci } 137962306a36Sopenharmony_ci /* 138062306a36Sopenharmony_ci * Update the attribute record as well. Note: @rl is the last 138162306a36Sopenharmony_ci * (non-terminator) runlist element of mft bitmap. 138262306a36Sopenharmony_ci */ 138362306a36Sopenharmony_ci mrec = map_mft_record(mft_ni); 138462306a36Sopenharmony_ci if (IS_ERR(mrec)) { 138562306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 138662306a36Sopenharmony_ci ret = PTR_ERR(mrec); 138762306a36Sopenharmony_ci goto undo_alloc; 138862306a36Sopenharmony_ci } 138962306a36Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 139062306a36Sopenharmony_ci if (unlikely(!ctx)) { 139162306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 139262306a36Sopenharmony_ci ret = -ENOMEM; 139362306a36Sopenharmony_ci goto undo_alloc; 139462306a36Sopenharmony_ci } 139562306a36Sopenharmony_ci ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 139662306a36Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 139762306a36Sopenharmony_ci 0, ctx); 139862306a36Sopenharmony_ci if (unlikely(ret)) { 139962306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 140062306a36Sopenharmony_ci "mft bitmap attribute."); 140162306a36Sopenharmony_ci if (ret == -ENOENT) 140262306a36Sopenharmony_ci ret = -EIO; 140362306a36Sopenharmony_ci goto undo_alloc; 140462306a36Sopenharmony_ci } 140562306a36Sopenharmony_ci a = ctx->attr; 140662306a36Sopenharmony_ci ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); 140762306a36Sopenharmony_ci /* Search back for the previous last allocated cluster of mft bitmap. */ 140862306a36Sopenharmony_ci for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) { 140962306a36Sopenharmony_ci if (ll >= rl2->vcn) 141062306a36Sopenharmony_ci break; 141162306a36Sopenharmony_ci } 141262306a36Sopenharmony_ci BUG_ON(ll < rl2->vcn); 141362306a36Sopenharmony_ci BUG_ON(ll >= rl2->vcn + rl2->length); 141462306a36Sopenharmony_ci /* Get the size for the new mapping pairs array for this extent. */ 141562306a36Sopenharmony_ci mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); 141662306a36Sopenharmony_ci if (unlikely(mp_size <= 0)) { 141762306a36Sopenharmony_ci ntfs_error(vol->sb, "Get size for mapping pairs failed for " 141862306a36Sopenharmony_ci "mft bitmap attribute extent."); 141962306a36Sopenharmony_ci ret = mp_size; 142062306a36Sopenharmony_ci if (!ret) 142162306a36Sopenharmony_ci ret = -EIO; 142262306a36Sopenharmony_ci goto undo_alloc; 142362306a36Sopenharmony_ci } 142462306a36Sopenharmony_ci /* Expand the attribute record if necessary. */ 142562306a36Sopenharmony_ci old_alen = le32_to_cpu(a->length); 142662306a36Sopenharmony_ci ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 142762306a36Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 142862306a36Sopenharmony_ci if (unlikely(ret)) { 142962306a36Sopenharmony_ci if (ret != -ENOSPC) { 143062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to resize attribute " 143162306a36Sopenharmony_ci "record for mft bitmap attribute."); 143262306a36Sopenharmony_ci goto undo_alloc; 143362306a36Sopenharmony_ci } 143462306a36Sopenharmony_ci // TODO: Deal with this by moving this extent to a new mft 143562306a36Sopenharmony_ci // record or by starting a new extent in a new mft record or by 143662306a36Sopenharmony_ci // moving other attributes out of this mft record. 143762306a36Sopenharmony_ci // Note: It will need to be a special mft record and if none of 143862306a36Sopenharmony_ci // those are available it gets rather complicated... 143962306a36Sopenharmony_ci ntfs_error(vol->sb, "Not enough space in this mft record to " 144062306a36Sopenharmony_ci "accommodate extended mft bitmap attribute " 144162306a36Sopenharmony_ci "extent. Cannot handle this yet."); 144262306a36Sopenharmony_ci ret = -EOPNOTSUPP; 144362306a36Sopenharmony_ci goto undo_alloc; 144462306a36Sopenharmony_ci } 144562306a36Sopenharmony_ci status.mp_rebuilt = 1; 144662306a36Sopenharmony_ci /* Generate the mapping pairs array directly into the attr record. */ 144762306a36Sopenharmony_ci ret = ntfs_mapping_pairs_build(vol, (u8*)a + 144862306a36Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 144962306a36Sopenharmony_ci mp_size, rl2, ll, -1, NULL); 145062306a36Sopenharmony_ci if (unlikely(ret)) { 145162306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to build mapping pairs array for " 145262306a36Sopenharmony_ci "mft bitmap attribute."); 145362306a36Sopenharmony_ci goto undo_alloc; 145462306a36Sopenharmony_ci } 145562306a36Sopenharmony_ci /* Update the highest_vcn. */ 145662306a36Sopenharmony_ci a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1); 145762306a36Sopenharmony_ci /* 145862306a36Sopenharmony_ci * We now have extended the mft bitmap allocated_size by one cluster. 145962306a36Sopenharmony_ci * Reflect this in the ntfs_inode structure and the attribute record. 146062306a36Sopenharmony_ci */ 146162306a36Sopenharmony_ci if (a->data.non_resident.lowest_vcn) { 146262306a36Sopenharmony_ci /* 146362306a36Sopenharmony_ci * We are not in the first attribute extent, switch to it, but 146462306a36Sopenharmony_ci * first ensure the changes will make it to disk later. 146562306a36Sopenharmony_ci */ 146662306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 146762306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 146862306a36Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 146962306a36Sopenharmony_ci ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 147062306a36Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 147162306a36Sopenharmony_ci 0, ctx); 147262306a36Sopenharmony_ci if (unlikely(ret)) { 147362306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute " 147462306a36Sopenharmony_ci "extent of mft bitmap attribute."); 147562306a36Sopenharmony_ci goto restore_undo_alloc; 147662306a36Sopenharmony_ci } 147762306a36Sopenharmony_ci a = ctx->attr; 147862306a36Sopenharmony_ci } 147962306a36Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 148062306a36Sopenharmony_ci mftbmp_ni->allocated_size += vol->cluster_size; 148162306a36Sopenharmony_ci a->data.non_resident.allocated_size = 148262306a36Sopenharmony_ci cpu_to_sle64(mftbmp_ni->allocated_size); 148362306a36Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 148462306a36Sopenharmony_ci /* Ensure the changes make it to disk. */ 148562306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 148662306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 148762306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 148862306a36Sopenharmony_ci unmap_mft_record(mft_ni); 148962306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 149062306a36Sopenharmony_ci ntfs_debug("Done."); 149162306a36Sopenharmony_ci return 0; 149262306a36Sopenharmony_cirestore_undo_alloc: 149362306a36Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 149462306a36Sopenharmony_ci if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 149562306a36Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 149662306a36Sopenharmony_ci 0, ctx)) { 149762306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 149862306a36Sopenharmony_ci "mft bitmap attribute.%s", es); 149962306a36Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 150062306a36Sopenharmony_ci mftbmp_ni->allocated_size += vol->cluster_size; 150162306a36Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 150262306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 150362306a36Sopenharmony_ci unmap_mft_record(mft_ni); 150462306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 150562306a36Sopenharmony_ci /* 150662306a36Sopenharmony_ci * The only thing that is now wrong is ->allocated_size of the 150762306a36Sopenharmony_ci * base attribute extent which chkdsk should be able to fix. 150862306a36Sopenharmony_ci */ 150962306a36Sopenharmony_ci NVolSetErrors(vol); 151062306a36Sopenharmony_ci return ret; 151162306a36Sopenharmony_ci } 151262306a36Sopenharmony_ci a = ctx->attr; 151362306a36Sopenharmony_ci a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 2); 151462306a36Sopenharmony_ciundo_alloc: 151562306a36Sopenharmony_ci if (status.added_cluster) { 151662306a36Sopenharmony_ci /* Truncate the last run in the runlist by one cluster. */ 151762306a36Sopenharmony_ci rl->length--; 151862306a36Sopenharmony_ci rl[1].vcn--; 151962306a36Sopenharmony_ci } else if (status.added_run) { 152062306a36Sopenharmony_ci lcn = rl->lcn; 152162306a36Sopenharmony_ci /* Remove the last run from the runlist. */ 152262306a36Sopenharmony_ci rl->lcn = rl[1].lcn; 152362306a36Sopenharmony_ci rl->length = 0; 152462306a36Sopenharmony_ci } 152562306a36Sopenharmony_ci /* Deallocate the cluster. */ 152662306a36Sopenharmony_ci down_write(&vol->lcnbmp_lock); 152762306a36Sopenharmony_ci if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 152862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es); 152962306a36Sopenharmony_ci NVolSetErrors(vol); 153062306a36Sopenharmony_ci } 153162306a36Sopenharmony_ci up_write(&vol->lcnbmp_lock); 153262306a36Sopenharmony_ci if (status.mp_rebuilt) { 153362306a36Sopenharmony_ci if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 153462306a36Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 153562306a36Sopenharmony_ci old_alen - le16_to_cpu( 153662306a36Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 153762306a36Sopenharmony_ci rl2, ll, -1, NULL)) { 153862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore mapping pairs " 153962306a36Sopenharmony_ci "array.%s", es); 154062306a36Sopenharmony_ci NVolSetErrors(vol); 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 154362306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore attribute " 154462306a36Sopenharmony_ci "record.%s", es); 154562306a36Sopenharmony_ci NVolSetErrors(vol); 154662306a36Sopenharmony_ci } 154762306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 154862306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 154962306a36Sopenharmony_ci } 155062306a36Sopenharmony_ci if (ctx) 155162306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 155262306a36Sopenharmony_ci if (!IS_ERR(mrec)) 155362306a36Sopenharmony_ci unmap_mft_record(mft_ni); 155462306a36Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 155562306a36Sopenharmony_ci return ret; 155662306a36Sopenharmony_ci} 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci/** 155962306a36Sopenharmony_ci * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data 156062306a36Sopenharmony_ci * @vol: volume on which to extend the mft bitmap attribute 156162306a36Sopenharmony_ci * 156262306a36Sopenharmony_ci * Extend the initialized portion of the mft bitmap attribute on the ntfs 156362306a36Sopenharmony_ci * volume @vol by 8 bytes. 156462306a36Sopenharmony_ci * 156562306a36Sopenharmony_ci * Note: Only changes initialized_size and data_size, i.e. requires that 156662306a36Sopenharmony_ci * allocated_size is big enough to fit the new initialized_size. 156762306a36Sopenharmony_ci * 156862306a36Sopenharmony_ci * Return 0 on success and -error on error. 156962306a36Sopenharmony_ci * 157062306a36Sopenharmony_ci * Locking: Caller must hold vol->mftbmp_lock for writing. 157162306a36Sopenharmony_ci */ 157262306a36Sopenharmony_cistatic int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) 157362306a36Sopenharmony_ci{ 157462306a36Sopenharmony_ci s64 old_data_size, old_initialized_size; 157562306a36Sopenharmony_ci unsigned long flags; 157662306a36Sopenharmony_ci struct inode *mftbmp_vi; 157762306a36Sopenharmony_ci ntfs_inode *mft_ni, *mftbmp_ni; 157862306a36Sopenharmony_ci ntfs_attr_search_ctx *ctx; 157962306a36Sopenharmony_ci MFT_RECORD *mrec; 158062306a36Sopenharmony_ci ATTR_RECORD *a; 158162306a36Sopenharmony_ci int ret; 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci ntfs_debug("Extending mft bitmap initiailized (and data) size."); 158462306a36Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 158562306a36Sopenharmony_ci mftbmp_vi = vol->mftbmp_ino; 158662306a36Sopenharmony_ci mftbmp_ni = NTFS_I(mftbmp_vi); 158762306a36Sopenharmony_ci /* Get the attribute record. */ 158862306a36Sopenharmony_ci mrec = map_mft_record(mft_ni); 158962306a36Sopenharmony_ci if (IS_ERR(mrec)) { 159062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 159162306a36Sopenharmony_ci return PTR_ERR(mrec); 159262306a36Sopenharmony_ci } 159362306a36Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 159462306a36Sopenharmony_ci if (unlikely(!ctx)) { 159562306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 159662306a36Sopenharmony_ci ret = -ENOMEM; 159762306a36Sopenharmony_ci goto unm_err_out; 159862306a36Sopenharmony_ci } 159962306a36Sopenharmony_ci ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 160062306a36Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); 160162306a36Sopenharmony_ci if (unlikely(ret)) { 160262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute extent of " 160362306a36Sopenharmony_ci "mft bitmap attribute."); 160462306a36Sopenharmony_ci if (ret == -ENOENT) 160562306a36Sopenharmony_ci ret = -EIO; 160662306a36Sopenharmony_ci goto put_err_out; 160762306a36Sopenharmony_ci } 160862306a36Sopenharmony_ci a = ctx->attr; 160962306a36Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 161062306a36Sopenharmony_ci old_data_size = i_size_read(mftbmp_vi); 161162306a36Sopenharmony_ci old_initialized_size = mftbmp_ni->initialized_size; 161262306a36Sopenharmony_ci /* 161362306a36Sopenharmony_ci * We can simply update the initialized_size before filling the space 161462306a36Sopenharmony_ci * with zeroes because the caller is holding the mft bitmap lock for 161562306a36Sopenharmony_ci * writing which ensures that no one else is trying to access the data. 161662306a36Sopenharmony_ci */ 161762306a36Sopenharmony_ci mftbmp_ni->initialized_size += 8; 161862306a36Sopenharmony_ci a->data.non_resident.initialized_size = 161962306a36Sopenharmony_ci cpu_to_sle64(mftbmp_ni->initialized_size); 162062306a36Sopenharmony_ci if (mftbmp_ni->initialized_size > old_data_size) { 162162306a36Sopenharmony_ci i_size_write(mftbmp_vi, mftbmp_ni->initialized_size); 162262306a36Sopenharmony_ci a->data.non_resident.data_size = 162362306a36Sopenharmony_ci cpu_to_sle64(mftbmp_ni->initialized_size); 162462306a36Sopenharmony_ci } 162562306a36Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 162662306a36Sopenharmony_ci /* Ensure the changes make it to disk. */ 162762306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 162862306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 162962306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 163062306a36Sopenharmony_ci unmap_mft_record(mft_ni); 163162306a36Sopenharmony_ci /* Initialize the mft bitmap attribute value with zeroes. */ 163262306a36Sopenharmony_ci ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0); 163362306a36Sopenharmony_ci if (likely(!ret)) { 163462306a36Sopenharmony_ci ntfs_debug("Done. (Wrote eight initialized bytes to mft " 163562306a36Sopenharmony_ci "bitmap."); 163662306a36Sopenharmony_ci return 0; 163762306a36Sopenharmony_ci } 163862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to write to mft bitmap."); 163962306a36Sopenharmony_ci /* Try to recover from the error. */ 164062306a36Sopenharmony_ci mrec = map_mft_record(mft_ni); 164162306a36Sopenharmony_ci if (IS_ERR(mrec)) { 164262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record.%s", es); 164362306a36Sopenharmony_ci NVolSetErrors(vol); 164462306a36Sopenharmony_ci return ret; 164562306a36Sopenharmony_ci } 164662306a36Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 164762306a36Sopenharmony_ci if (unlikely(!ctx)) { 164862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context.%s", es); 164962306a36Sopenharmony_ci NVolSetErrors(vol); 165062306a36Sopenharmony_ci goto unm_err_out; 165162306a36Sopenharmony_ci } 165262306a36Sopenharmony_ci if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 165362306a36Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) { 165462306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute extent of " 165562306a36Sopenharmony_ci "mft bitmap attribute.%s", es); 165662306a36Sopenharmony_ci NVolSetErrors(vol); 165762306a36Sopenharmony_ciput_err_out: 165862306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 165962306a36Sopenharmony_ciunm_err_out: 166062306a36Sopenharmony_ci unmap_mft_record(mft_ni); 166162306a36Sopenharmony_ci goto err_out; 166262306a36Sopenharmony_ci } 166362306a36Sopenharmony_ci a = ctx->attr; 166462306a36Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 166562306a36Sopenharmony_ci mftbmp_ni->initialized_size = old_initialized_size; 166662306a36Sopenharmony_ci a->data.non_resident.initialized_size = 166762306a36Sopenharmony_ci cpu_to_sle64(old_initialized_size); 166862306a36Sopenharmony_ci if (i_size_read(mftbmp_vi) != old_data_size) { 166962306a36Sopenharmony_ci i_size_write(mftbmp_vi, old_data_size); 167062306a36Sopenharmony_ci a->data.non_resident.data_size = cpu_to_sle64(old_data_size); 167162306a36Sopenharmony_ci } 167262306a36Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 167362306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 167462306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 167562306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 167662306a36Sopenharmony_ci unmap_mft_record(mft_ni); 167762306a36Sopenharmony_ci#ifdef DEBUG 167862306a36Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 167962306a36Sopenharmony_ci ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " 168062306a36Sopenharmony_ci "data_size 0x%llx, initialized_size 0x%llx.", 168162306a36Sopenharmony_ci (long long)mftbmp_ni->allocated_size, 168262306a36Sopenharmony_ci (long long)i_size_read(mftbmp_vi), 168362306a36Sopenharmony_ci (long long)mftbmp_ni->initialized_size); 168462306a36Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 168562306a36Sopenharmony_ci#endif /* DEBUG */ 168662306a36Sopenharmony_cierr_out: 168762306a36Sopenharmony_ci return ret; 168862306a36Sopenharmony_ci} 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci/** 169162306a36Sopenharmony_ci * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute 169262306a36Sopenharmony_ci * @vol: volume on which to extend the mft data attribute 169362306a36Sopenharmony_ci * 169462306a36Sopenharmony_ci * Extend the mft data attribute on the ntfs volume @vol by 16 mft records 169562306a36Sopenharmony_ci * worth of clusters or if not enough space for this by one mft record worth 169662306a36Sopenharmony_ci * of clusters. 169762306a36Sopenharmony_ci * 169862306a36Sopenharmony_ci * Note: Only changes allocated_size, i.e. does not touch initialized_size or 169962306a36Sopenharmony_ci * data_size. 170062306a36Sopenharmony_ci * 170162306a36Sopenharmony_ci * Return 0 on success and -errno on error. 170262306a36Sopenharmony_ci * 170362306a36Sopenharmony_ci * Locking: - Caller must hold vol->mftbmp_lock for writing. 170462306a36Sopenharmony_ci * - This function takes NTFS_I(vol->mft_ino)->runlist.lock for 170562306a36Sopenharmony_ci * writing and releases it before returning. 170662306a36Sopenharmony_ci * - This function calls functions which take vol->lcnbmp_lock for 170762306a36Sopenharmony_ci * writing and release it before returning. 170862306a36Sopenharmony_ci */ 170962306a36Sopenharmony_cistatic int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) 171062306a36Sopenharmony_ci{ 171162306a36Sopenharmony_ci LCN lcn; 171262306a36Sopenharmony_ci VCN old_last_vcn; 171362306a36Sopenharmony_ci s64 min_nr, nr, ll; 171462306a36Sopenharmony_ci unsigned long flags; 171562306a36Sopenharmony_ci ntfs_inode *mft_ni; 171662306a36Sopenharmony_ci runlist_element *rl, *rl2; 171762306a36Sopenharmony_ci ntfs_attr_search_ctx *ctx = NULL; 171862306a36Sopenharmony_ci MFT_RECORD *mrec; 171962306a36Sopenharmony_ci ATTR_RECORD *a = NULL; 172062306a36Sopenharmony_ci int ret, mp_size; 172162306a36Sopenharmony_ci u32 old_alen = 0; 172262306a36Sopenharmony_ci bool mp_rebuilt = false; 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci ntfs_debug("Extending mft data allocation."); 172562306a36Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 172662306a36Sopenharmony_ci /* 172762306a36Sopenharmony_ci * Determine the preferred allocation location, i.e. the last lcn of 172862306a36Sopenharmony_ci * the mft data attribute. The allocated size of the mft data 172962306a36Sopenharmony_ci * attribute cannot be zero so we are ok to do this. 173062306a36Sopenharmony_ci */ 173162306a36Sopenharmony_ci down_write(&mft_ni->runlist.lock); 173262306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 173362306a36Sopenharmony_ci ll = mft_ni->allocated_size; 173462306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 173562306a36Sopenharmony_ci rl = ntfs_attr_find_vcn_nolock(mft_ni, 173662306a36Sopenharmony_ci (ll - 1) >> vol->cluster_size_bits, NULL); 173762306a36Sopenharmony_ci if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 173862306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 173962306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to determine last allocated " 174062306a36Sopenharmony_ci "cluster of mft data attribute."); 174162306a36Sopenharmony_ci if (!IS_ERR(rl)) 174262306a36Sopenharmony_ci ret = -EIO; 174362306a36Sopenharmony_ci else 174462306a36Sopenharmony_ci ret = PTR_ERR(rl); 174562306a36Sopenharmony_ci return ret; 174662306a36Sopenharmony_ci } 174762306a36Sopenharmony_ci lcn = rl->lcn + rl->length; 174862306a36Sopenharmony_ci ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn); 174962306a36Sopenharmony_ci /* Minimum allocation is one mft record worth of clusters. */ 175062306a36Sopenharmony_ci min_nr = vol->mft_record_size >> vol->cluster_size_bits; 175162306a36Sopenharmony_ci if (!min_nr) 175262306a36Sopenharmony_ci min_nr = 1; 175362306a36Sopenharmony_ci /* Want to allocate 16 mft records worth of clusters. */ 175462306a36Sopenharmony_ci nr = vol->mft_record_size << 4 >> vol->cluster_size_bits; 175562306a36Sopenharmony_ci if (!nr) 175662306a36Sopenharmony_ci nr = min_nr; 175762306a36Sopenharmony_ci /* Ensure we do not go above 2^32-1 mft records. */ 175862306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 175962306a36Sopenharmony_ci ll = mft_ni->allocated_size; 176062306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 176162306a36Sopenharmony_ci if (unlikely((ll + (nr << vol->cluster_size_bits)) >> 176262306a36Sopenharmony_ci vol->mft_record_size_bits >= (1ll << 32))) { 176362306a36Sopenharmony_ci nr = min_nr; 176462306a36Sopenharmony_ci if (unlikely((ll + (nr << vol->cluster_size_bits)) >> 176562306a36Sopenharmony_ci vol->mft_record_size_bits >= (1ll << 32))) { 176662306a36Sopenharmony_ci ntfs_warning(vol->sb, "Cannot allocate mft record " 176762306a36Sopenharmony_ci "because the maximum number of inodes " 176862306a36Sopenharmony_ci "(2^32) has already been reached."); 176962306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 177062306a36Sopenharmony_ci return -ENOSPC; 177162306a36Sopenharmony_ci } 177262306a36Sopenharmony_ci } 177362306a36Sopenharmony_ci ntfs_debug("Trying mft data allocation with %s cluster count %lli.", 177462306a36Sopenharmony_ci nr > min_nr ? "default" : "minimal", (long long)nr); 177562306a36Sopenharmony_ci old_last_vcn = rl[1].vcn; 177662306a36Sopenharmony_ci do { 177762306a36Sopenharmony_ci rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE, 177862306a36Sopenharmony_ci true); 177962306a36Sopenharmony_ci if (!IS_ERR(rl2)) 178062306a36Sopenharmony_ci break; 178162306a36Sopenharmony_ci if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { 178262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate the minimal " 178362306a36Sopenharmony_ci "number of clusters (%lli) for the " 178462306a36Sopenharmony_ci "mft data attribute.", (long long)nr); 178562306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 178662306a36Sopenharmony_ci return PTR_ERR(rl2); 178762306a36Sopenharmony_ci } 178862306a36Sopenharmony_ci /* 178962306a36Sopenharmony_ci * There is not enough space to do the allocation, but there 179062306a36Sopenharmony_ci * might be enough space to do a minimal allocation so try that 179162306a36Sopenharmony_ci * before failing. 179262306a36Sopenharmony_ci */ 179362306a36Sopenharmony_ci nr = min_nr; 179462306a36Sopenharmony_ci ntfs_debug("Retrying mft data allocation with minimal cluster " 179562306a36Sopenharmony_ci "count %lli.", (long long)nr); 179662306a36Sopenharmony_ci } while (1); 179762306a36Sopenharmony_ci rl = ntfs_runlists_merge(mft_ni->runlist.rl, rl2); 179862306a36Sopenharmony_ci if (IS_ERR(rl)) { 179962306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 180062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to merge runlists for mft data " 180162306a36Sopenharmony_ci "attribute."); 180262306a36Sopenharmony_ci if (ntfs_cluster_free_from_rl(vol, rl2)) { 180362306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to deallocate clusters " 180462306a36Sopenharmony_ci "from the mft data attribute.%s", es); 180562306a36Sopenharmony_ci NVolSetErrors(vol); 180662306a36Sopenharmony_ci } 180762306a36Sopenharmony_ci ntfs_free(rl2); 180862306a36Sopenharmony_ci return PTR_ERR(rl); 180962306a36Sopenharmony_ci } 181062306a36Sopenharmony_ci mft_ni->runlist.rl = rl; 181162306a36Sopenharmony_ci ntfs_debug("Allocated %lli clusters.", (long long)nr); 181262306a36Sopenharmony_ci /* Find the last run in the new runlist. */ 181362306a36Sopenharmony_ci for (; rl[1].length; rl++) 181462306a36Sopenharmony_ci ; 181562306a36Sopenharmony_ci /* Update the attribute record as well. */ 181662306a36Sopenharmony_ci mrec = map_mft_record(mft_ni); 181762306a36Sopenharmony_ci if (IS_ERR(mrec)) { 181862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 181962306a36Sopenharmony_ci ret = PTR_ERR(mrec); 182062306a36Sopenharmony_ci goto undo_alloc; 182162306a36Sopenharmony_ci } 182262306a36Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 182362306a36Sopenharmony_ci if (unlikely(!ctx)) { 182462306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 182562306a36Sopenharmony_ci ret = -ENOMEM; 182662306a36Sopenharmony_ci goto undo_alloc; 182762306a36Sopenharmony_ci } 182862306a36Sopenharmony_ci ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 182962306a36Sopenharmony_ci CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx); 183062306a36Sopenharmony_ci if (unlikely(ret)) { 183162306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 183262306a36Sopenharmony_ci "mft data attribute."); 183362306a36Sopenharmony_ci if (ret == -ENOENT) 183462306a36Sopenharmony_ci ret = -EIO; 183562306a36Sopenharmony_ci goto undo_alloc; 183662306a36Sopenharmony_ci } 183762306a36Sopenharmony_ci a = ctx->attr; 183862306a36Sopenharmony_ci ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); 183962306a36Sopenharmony_ci /* Search back for the previous last allocated cluster of mft bitmap. */ 184062306a36Sopenharmony_ci for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) { 184162306a36Sopenharmony_ci if (ll >= rl2->vcn) 184262306a36Sopenharmony_ci break; 184362306a36Sopenharmony_ci } 184462306a36Sopenharmony_ci BUG_ON(ll < rl2->vcn); 184562306a36Sopenharmony_ci BUG_ON(ll >= rl2->vcn + rl2->length); 184662306a36Sopenharmony_ci /* Get the size for the new mapping pairs array for this extent. */ 184762306a36Sopenharmony_ci mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); 184862306a36Sopenharmony_ci if (unlikely(mp_size <= 0)) { 184962306a36Sopenharmony_ci ntfs_error(vol->sb, "Get size for mapping pairs failed for " 185062306a36Sopenharmony_ci "mft data attribute extent."); 185162306a36Sopenharmony_ci ret = mp_size; 185262306a36Sopenharmony_ci if (!ret) 185362306a36Sopenharmony_ci ret = -EIO; 185462306a36Sopenharmony_ci goto undo_alloc; 185562306a36Sopenharmony_ci } 185662306a36Sopenharmony_ci /* Expand the attribute record if necessary. */ 185762306a36Sopenharmony_ci old_alen = le32_to_cpu(a->length); 185862306a36Sopenharmony_ci ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 185962306a36Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 186062306a36Sopenharmony_ci if (unlikely(ret)) { 186162306a36Sopenharmony_ci if (ret != -ENOSPC) { 186262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to resize attribute " 186362306a36Sopenharmony_ci "record for mft data attribute."); 186462306a36Sopenharmony_ci goto undo_alloc; 186562306a36Sopenharmony_ci } 186662306a36Sopenharmony_ci // TODO: Deal with this by moving this extent to a new mft 186762306a36Sopenharmony_ci // record or by starting a new extent in a new mft record or by 186862306a36Sopenharmony_ci // moving other attributes out of this mft record. 186962306a36Sopenharmony_ci // Note: Use the special reserved mft records and ensure that 187062306a36Sopenharmony_ci // this extent is not required to find the mft record in 187162306a36Sopenharmony_ci // question. If no free special records left we would need to 187262306a36Sopenharmony_ci // move an existing record away, insert ours in its place, and 187362306a36Sopenharmony_ci // then place the moved record into the newly allocated space 187462306a36Sopenharmony_ci // and we would then need to update all references to this mft 187562306a36Sopenharmony_ci // record appropriately. This is rather complicated... 187662306a36Sopenharmony_ci ntfs_error(vol->sb, "Not enough space in this mft record to " 187762306a36Sopenharmony_ci "accommodate extended mft data attribute " 187862306a36Sopenharmony_ci "extent. Cannot handle this yet."); 187962306a36Sopenharmony_ci ret = -EOPNOTSUPP; 188062306a36Sopenharmony_ci goto undo_alloc; 188162306a36Sopenharmony_ci } 188262306a36Sopenharmony_ci mp_rebuilt = true; 188362306a36Sopenharmony_ci /* Generate the mapping pairs array directly into the attr record. */ 188462306a36Sopenharmony_ci ret = ntfs_mapping_pairs_build(vol, (u8*)a + 188562306a36Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 188662306a36Sopenharmony_ci mp_size, rl2, ll, -1, NULL); 188762306a36Sopenharmony_ci if (unlikely(ret)) { 188862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to build mapping pairs array of " 188962306a36Sopenharmony_ci "mft data attribute."); 189062306a36Sopenharmony_ci goto undo_alloc; 189162306a36Sopenharmony_ci } 189262306a36Sopenharmony_ci /* Update the highest_vcn. */ 189362306a36Sopenharmony_ci a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1); 189462306a36Sopenharmony_ci /* 189562306a36Sopenharmony_ci * We now have extended the mft data allocated_size by nr clusters. 189662306a36Sopenharmony_ci * Reflect this in the ntfs_inode structure and the attribute record. 189762306a36Sopenharmony_ci * @rl is the last (non-terminator) runlist element of mft data 189862306a36Sopenharmony_ci * attribute. 189962306a36Sopenharmony_ci */ 190062306a36Sopenharmony_ci if (a->data.non_resident.lowest_vcn) { 190162306a36Sopenharmony_ci /* 190262306a36Sopenharmony_ci * We are not in the first attribute extent, switch to it, but 190362306a36Sopenharmony_ci * first ensure the changes will make it to disk later. 190462306a36Sopenharmony_ci */ 190562306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 190662306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 190762306a36Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 190862306a36Sopenharmony_ci ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, 190962306a36Sopenharmony_ci mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, 191062306a36Sopenharmony_ci ctx); 191162306a36Sopenharmony_ci if (unlikely(ret)) { 191262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute " 191362306a36Sopenharmony_ci "extent of mft data attribute."); 191462306a36Sopenharmony_ci goto restore_undo_alloc; 191562306a36Sopenharmony_ci } 191662306a36Sopenharmony_ci a = ctx->attr; 191762306a36Sopenharmony_ci } 191862306a36Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 191962306a36Sopenharmony_ci mft_ni->allocated_size += nr << vol->cluster_size_bits; 192062306a36Sopenharmony_ci a->data.non_resident.allocated_size = 192162306a36Sopenharmony_ci cpu_to_sle64(mft_ni->allocated_size); 192262306a36Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 192362306a36Sopenharmony_ci /* Ensure the changes make it to disk. */ 192462306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 192562306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 192662306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 192762306a36Sopenharmony_ci unmap_mft_record(mft_ni); 192862306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 192962306a36Sopenharmony_ci ntfs_debug("Done."); 193062306a36Sopenharmony_ci return 0; 193162306a36Sopenharmony_cirestore_undo_alloc: 193262306a36Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 193362306a36Sopenharmony_ci if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 193462306a36Sopenharmony_ci CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { 193562306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 193662306a36Sopenharmony_ci "mft data attribute.%s", es); 193762306a36Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 193862306a36Sopenharmony_ci mft_ni->allocated_size += nr << vol->cluster_size_bits; 193962306a36Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 194062306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 194162306a36Sopenharmony_ci unmap_mft_record(mft_ni); 194262306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 194362306a36Sopenharmony_ci /* 194462306a36Sopenharmony_ci * The only thing that is now wrong is ->allocated_size of the 194562306a36Sopenharmony_ci * base attribute extent which chkdsk should be able to fix. 194662306a36Sopenharmony_ci */ 194762306a36Sopenharmony_ci NVolSetErrors(vol); 194862306a36Sopenharmony_ci return ret; 194962306a36Sopenharmony_ci } 195062306a36Sopenharmony_ci ctx->attr->data.non_resident.highest_vcn = 195162306a36Sopenharmony_ci cpu_to_sle64(old_last_vcn - 1); 195262306a36Sopenharmony_ciundo_alloc: 195362306a36Sopenharmony_ci if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) { 195462306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to free clusters from mft data " 195562306a36Sopenharmony_ci "attribute.%s", es); 195662306a36Sopenharmony_ci NVolSetErrors(vol); 195762306a36Sopenharmony_ci } 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { 196062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to truncate mft data attribute " 196162306a36Sopenharmony_ci "runlist.%s", es); 196262306a36Sopenharmony_ci NVolSetErrors(vol); 196362306a36Sopenharmony_ci } 196462306a36Sopenharmony_ci if (ctx) { 196562306a36Sopenharmony_ci a = ctx->attr; 196662306a36Sopenharmony_ci if (mp_rebuilt && !IS_ERR(ctx->mrec)) { 196762306a36Sopenharmony_ci if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu( 196862306a36Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 196962306a36Sopenharmony_ci old_alen - le16_to_cpu( 197062306a36Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 197162306a36Sopenharmony_ci rl2, ll, -1, NULL)) { 197262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore mapping pairs " 197362306a36Sopenharmony_ci "array.%s", es); 197462306a36Sopenharmony_ci NVolSetErrors(vol); 197562306a36Sopenharmony_ci } 197662306a36Sopenharmony_ci if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 197762306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore attribute " 197862306a36Sopenharmony_ci "record.%s", es); 197962306a36Sopenharmony_ci NVolSetErrors(vol); 198062306a36Sopenharmony_ci } 198162306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 198262306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 198362306a36Sopenharmony_ci } else if (IS_ERR(ctx->mrec)) { 198462306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore attribute search " 198562306a36Sopenharmony_ci "context.%s", es); 198662306a36Sopenharmony_ci NVolSetErrors(vol); 198762306a36Sopenharmony_ci } 198862306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 198962306a36Sopenharmony_ci } 199062306a36Sopenharmony_ci if (!IS_ERR(mrec)) 199162306a36Sopenharmony_ci unmap_mft_record(mft_ni); 199262306a36Sopenharmony_ci up_write(&mft_ni->runlist.lock); 199362306a36Sopenharmony_ci return ret; 199462306a36Sopenharmony_ci} 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci/** 199762306a36Sopenharmony_ci * ntfs_mft_record_layout - layout an mft record into a memory buffer 199862306a36Sopenharmony_ci * @vol: volume to which the mft record will belong 199962306a36Sopenharmony_ci * @mft_no: mft reference specifying the mft record number 200062306a36Sopenharmony_ci * @m: destination buffer of size >= @vol->mft_record_size bytes 200162306a36Sopenharmony_ci * 200262306a36Sopenharmony_ci * Layout an empty, unused mft record with the mft record number @mft_no into 200362306a36Sopenharmony_ci * the buffer @m. The volume @vol is needed because the mft record structure 200462306a36Sopenharmony_ci * was modified in NTFS 3.1 so we need to know which volume version this mft 200562306a36Sopenharmony_ci * record will be used on. 200662306a36Sopenharmony_ci * 200762306a36Sopenharmony_ci * Return 0 on success and -errno on error. 200862306a36Sopenharmony_ci */ 200962306a36Sopenharmony_cistatic int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no, 201062306a36Sopenharmony_ci MFT_RECORD *m) 201162306a36Sopenharmony_ci{ 201262306a36Sopenharmony_ci ATTR_RECORD *a; 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 201562306a36Sopenharmony_ci if (mft_no >= (1ll << 32)) { 201662306a36Sopenharmony_ci ntfs_error(vol->sb, "Mft record number 0x%llx exceeds " 201762306a36Sopenharmony_ci "maximum of 2^32.", (long long)mft_no); 201862306a36Sopenharmony_ci return -ERANGE; 201962306a36Sopenharmony_ci } 202062306a36Sopenharmony_ci /* Start by clearing the whole mft record to gives us a clean slate. */ 202162306a36Sopenharmony_ci memset(m, 0, vol->mft_record_size); 202262306a36Sopenharmony_ci /* Aligned to 2-byte boundary. */ 202362306a36Sopenharmony_ci if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver)) 202462306a36Sopenharmony_ci m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1); 202562306a36Sopenharmony_ci else { 202662306a36Sopenharmony_ci m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1); 202762306a36Sopenharmony_ci /* 202862306a36Sopenharmony_ci * Set the NTFS 3.1+ specific fields while we know that the 202962306a36Sopenharmony_ci * volume version is 3.1+. 203062306a36Sopenharmony_ci */ 203162306a36Sopenharmony_ci m->reserved = 0; 203262306a36Sopenharmony_ci m->mft_record_number = cpu_to_le32((u32)mft_no); 203362306a36Sopenharmony_ci } 203462306a36Sopenharmony_ci m->magic = magic_FILE; 203562306a36Sopenharmony_ci if (vol->mft_record_size >= NTFS_BLOCK_SIZE) 203662306a36Sopenharmony_ci m->usa_count = cpu_to_le16(vol->mft_record_size / 203762306a36Sopenharmony_ci NTFS_BLOCK_SIZE + 1); 203862306a36Sopenharmony_ci else { 203962306a36Sopenharmony_ci m->usa_count = cpu_to_le16(1); 204062306a36Sopenharmony_ci ntfs_warning(vol->sb, "Sector size is bigger than mft record " 204162306a36Sopenharmony_ci "size. Setting usa_count to 1. If chkdsk " 204262306a36Sopenharmony_ci "reports this as corruption, please email " 204362306a36Sopenharmony_ci "linux-ntfs-dev@lists.sourceforge.net stating " 204462306a36Sopenharmony_ci "that you saw this message and that the " 204562306a36Sopenharmony_ci "modified filesystem created was corrupt. " 204662306a36Sopenharmony_ci "Thank you."); 204762306a36Sopenharmony_ci } 204862306a36Sopenharmony_ci /* Set the update sequence number to 1. */ 204962306a36Sopenharmony_ci *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1); 205062306a36Sopenharmony_ci m->lsn = 0; 205162306a36Sopenharmony_ci m->sequence_number = cpu_to_le16(1); 205262306a36Sopenharmony_ci m->link_count = 0; 205362306a36Sopenharmony_ci /* 205462306a36Sopenharmony_ci * Place the attributes straight after the update sequence array, 205562306a36Sopenharmony_ci * aligned to 8-byte boundary. 205662306a36Sopenharmony_ci */ 205762306a36Sopenharmony_ci m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + 205862306a36Sopenharmony_ci (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); 205962306a36Sopenharmony_ci m->flags = 0; 206062306a36Sopenharmony_ci /* 206162306a36Sopenharmony_ci * Using attrs_offset plus eight bytes (for the termination attribute). 206262306a36Sopenharmony_ci * attrs_offset is already aligned to 8-byte boundary, so no need to 206362306a36Sopenharmony_ci * align again. 206462306a36Sopenharmony_ci */ 206562306a36Sopenharmony_ci m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8); 206662306a36Sopenharmony_ci m->bytes_allocated = cpu_to_le32(vol->mft_record_size); 206762306a36Sopenharmony_ci m->base_mft_record = 0; 206862306a36Sopenharmony_ci m->next_attr_instance = 0; 206962306a36Sopenharmony_ci /* Add the termination attribute. */ 207062306a36Sopenharmony_ci a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 207162306a36Sopenharmony_ci a->type = AT_END; 207262306a36Sopenharmony_ci a->length = 0; 207362306a36Sopenharmony_ci ntfs_debug("Done."); 207462306a36Sopenharmony_ci return 0; 207562306a36Sopenharmony_ci} 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci/** 207862306a36Sopenharmony_ci * ntfs_mft_record_format - format an mft record on an ntfs volume 207962306a36Sopenharmony_ci * @vol: volume on which to format the mft record 208062306a36Sopenharmony_ci * @mft_no: mft record number to format 208162306a36Sopenharmony_ci * 208262306a36Sopenharmony_ci * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused 208362306a36Sopenharmony_ci * mft record into the appropriate place of the mft data attribute. This is 208462306a36Sopenharmony_ci * used when extending the mft data attribute. 208562306a36Sopenharmony_ci * 208662306a36Sopenharmony_ci * Return 0 on success and -errno on error. 208762306a36Sopenharmony_ci */ 208862306a36Sopenharmony_cistatic int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) 208962306a36Sopenharmony_ci{ 209062306a36Sopenharmony_ci loff_t i_size; 209162306a36Sopenharmony_ci struct inode *mft_vi = vol->mft_ino; 209262306a36Sopenharmony_ci struct page *page; 209362306a36Sopenharmony_ci MFT_RECORD *m; 209462306a36Sopenharmony_ci pgoff_t index, end_index; 209562306a36Sopenharmony_ci unsigned int ofs; 209662306a36Sopenharmony_ci int err; 209762306a36Sopenharmony_ci 209862306a36Sopenharmony_ci ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 209962306a36Sopenharmony_ci /* 210062306a36Sopenharmony_ci * The index into the page cache and the offset within the page cache 210162306a36Sopenharmony_ci * page of the wanted mft record. 210262306a36Sopenharmony_ci */ 210362306a36Sopenharmony_ci index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT; 210462306a36Sopenharmony_ci ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK; 210562306a36Sopenharmony_ci /* The maximum valid index into the page cache for $MFT's data. */ 210662306a36Sopenharmony_ci i_size = i_size_read(mft_vi); 210762306a36Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 210862306a36Sopenharmony_ci if (unlikely(index >= end_index)) { 210962306a36Sopenharmony_ci if (unlikely(index > end_index || ofs + vol->mft_record_size >= 211062306a36Sopenharmony_ci (i_size & ~PAGE_MASK))) { 211162306a36Sopenharmony_ci ntfs_error(vol->sb, "Tried to format non-existing mft " 211262306a36Sopenharmony_ci "record 0x%llx.", (long long)mft_no); 211362306a36Sopenharmony_ci return -ENOENT; 211462306a36Sopenharmony_ci } 211562306a36Sopenharmony_ci } 211662306a36Sopenharmony_ci /* Read, map, and pin the page containing the mft record. */ 211762306a36Sopenharmony_ci page = ntfs_map_page(mft_vi->i_mapping, index); 211862306a36Sopenharmony_ci if (IS_ERR(page)) { 211962306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map page containing mft record " 212062306a36Sopenharmony_ci "to format 0x%llx.", (long long)mft_no); 212162306a36Sopenharmony_ci return PTR_ERR(page); 212262306a36Sopenharmony_ci } 212362306a36Sopenharmony_ci lock_page(page); 212462306a36Sopenharmony_ci BUG_ON(!PageUptodate(page)); 212562306a36Sopenharmony_ci ClearPageUptodate(page); 212662306a36Sopenharmony_ci m = (MFT_RECORD*)((u8*)page_address(page) + ofs); 212762306a36Sopenharmony_ci err = ntfs_mft_record_layout(vol, mft_no, m); 212862306a36Sopenharmony_ci if (unlikely(err)) { 212962306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.", 213062306a36Sopenharmony_ci (long long)mft_no); 213162306a36Sopenharmony_ci SetPageUptodate(page); 213262306a36Sopenharmony_ci unlock_page(page); 213362306a36Sopenharmony_ci ntfs_unmap_page(page); 213462306a36Sopenharmony_ci return err; 213562306a36Sopenharmony_ci } 213662306a36Sopenharmony_ci flush_dcache_page(page); 213762306a36Sopenharmony_ci SetPageUptodate(page); 213862306a36Sopenharmony_ci unlock_page(page); 213962306a36Sopenharmony_ci /* 214062306a36Sopenharmony_ci * Make sure the mft record is written out to disk. We could use 214162306a36Sopenharmony_ci * ilookup5() to check if an inode is in icache and so on but this is 214262306a36Sopenharmony_ci * unnecessary as ntfs_writepage() will write the dirty record anyway. 214362306a36Sopenharmony_ci */ 214462306a36Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 214562306a36Sopenharmony_ci ntfs_unmap_page(page); 214662306a36Sopenharmony_ci ntfs_debug("Done."); 214762306a36Sopenharmony_ci return 0; 214862306a36Sopenharmony_ci} 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_ci/** 215162306a36Sopenharmony_ci * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume 215262306a36Sopenharmony_ci * @vol: [IN] volume on which to allocate the mft record 215362306a36Sopenharmony_ci * @mode: [IN] mode if want a file or directory, i.e. base inode or 0 215462306a36Sopenharmony_ci * @base_ni: [IN] open base inode if allocating an extent mft record or NULL 215562306a36Sopenharmony_ci * @mrec: [OUT] on successful return this is the mapped mft record 215662306a36Sopenharmony_ci * 215762306a36Sopenharmony_ci * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol. 215862306a36Sopenharmony_ci * 215962306a36Sopenharmony_ci * If @base_ni is NULL make the mft record a base mft record, i.e. a file or 216062306a36Sopenharmony_ci * direvctory inode, and allocate it at the default allocator position. In 216162306a36Sopenharmony_ci * this case @mode is the file mode as given to us by the caller. We in 216262306a36Sopenharmony_ci * particular use @mode to distinguish whether a file or a directory is being 216362306a36Sopenharmony_ci * created (S_IFDIR(mode) and S_IFREG(mode), respectively). 216462306a36Sopenharmony_ci * 216562306a36Sopenharmony_ci * If @base_ni is not NULL make the allocated mft record an extent record, 216662306a36Sopenharmony_ci * allocate it starting at the mft record after the base mft record and attach 216762306a36Sopenharmony_ci * the allocated and opened ntfs inode to the base inode @base_ni. In this 216862306a36Sopenharmony_ci * case @mode must be 0 as it is meaningless for extent inodes. 216962306a36Sopenharmony_ci * 217062306a36Sopenharmony_ci * You need to check the return value with IS_ERR(). If false, the function 217162306a36Sopenharmony_ci * was successful and the return value is the now opened ntfs inode of the 217262306a36Sopenharmony_ci * allocated mft record. *@mrec is then set to the allocated, mapped, pinned, 217362306a36Sopenharmony_ci * and locked mft record. If IS_ERR() is true, the function failed and the 217462306a36Sopenharmony_ci * error code is obtained from PTR_ERR(return value). *@mrec is undefined in 217562306a36Sopenharmony_ci * this case. 217662306a36Sopenharmony_ci * 217762306a36Sopenharmony_ci * Allocation strategy: 217862306a36Sopenharmony_ci * 217962306a36Sopenharmony_ci * To find a free mft record, we scan the mft bitmap for a zero bit. To 218062306a36Sopenharmony_ci * optimize this we start scanning at the place specified by @base_ni or if 218162306a36Sopenharmony_ci * @base_ni is NULL we start where we last stopped and we perform wrap around 218262306a36Sopenharmony_ci * when we reach the end. Note, we do not try to allocate mft records below 218362306a36Sopenharmony_ci * number 24 because numbers 0 to 15 are the defined system files anyway and 16 218462306a36Sopenharmony_ci * to 24 are special in that they are used for storing extension mft records 218562306a36Sopenharmony_ci * for the $DATA attribute of $MFT. This is required to avoid the possibility 218662306a36Sopenharmony_ci * of creating a runlist with a circular dependency which once written to disk 218762306a36Sopenharmony_ci * can never be read in again. Windows will only use records 16 to 24 for 218862306a36Sopenharmony_ci * normal files if the volume is completely out of space. We never use them 218962306a36Sopenharmony_ci * which means that when the volume is really out of space we cannot create any 219062306a36Sopenharmony_ci * more files while Windows can still create up to 8 small files. We can start 219162306a36Sopenharmony_ci * doing this at some later time, it does not matter much for now. 219262306a36Sopenharmony_ci * 219362306a36Sopenharmony_ci * When scanning the mft bitmap, we only search up to the last allocated mft 219462306a36Sopenharmony_ci * record. If there are no free records left in the range 24 to number of 219562306a36Sopenharmony_ci * allocated mft records, then we extend the $MFT/$DATA attribute in order to 219662306a36Sopenharmony_ci * create free mft records. We extend the allocated size of $MFT/$DATA by 16 219762306a36Sopenharmony_ci * records at a time or one cluster, if cluster size is above 16kiB. If there 219862306a36Sopenharmony_ci * is not sufficient space to do this, we try to extend by a single mft record 219962306a36Sopenharmony_ci * or one cluster, if cluster size is above the mft record size. 220062306a36Sopenharmony_ci * 220162306a36Sopenharmony_ci * No matter how many mft records we allocate, we initialize only the first 220262306a36Sopenharmony_ci * allocated mft record, incrementing mft data size and initialized size 220362306a36Sopenharmony_ci * accordingly, open an ntfs_inode for it and return it to the caller, unless 220462306a36Sopenharmony_ci * there are less than 24 mft records, in which case we allocate and initialize 220562306a36Sopenharmony_ci * mft records until we reach record 24 which we consider as the first free mft 220662306a36Sopenharmony_ci * record for use by normal files. 220762306a36Sopenharmony_ci * 220862306a36Sopenharmony_ci * If during any stage we overflow the initialized data in the mft bitmap, we 220962306a36Sopenharmony_ci * extend the initialized size (and data size) by 8 bytes, allocating another 221062306a36Sopenharmony_ci * cluster if required. The bitmap data size has to be at least equal to the 221162306a36Sopenharmony_ci * number of mft records in the mft, but it can be bigger, in which case the 221262306a36Sopenharmony_ci * superflous bits are padded with zeroes. 221362306a36Sopenharmony_ci * 221462306a36Sopenharmony_ci * Thus, when we return successfully (IS_ERR() is false), we will have: 221562306a36Sopenharmony_ci * - initialized / extended the mft bitmap if necessary, 221662306a36Sopenharmony_ci * - initialized / extended the mft data if necessary, 221762306a36Sopenharmony_ci * - set the bit corresponding to the mft record being allocated in the 221862306a36Sopenharmony_ci * mft bitmap, 221962306a36Sopenharmony_ci * - opened an ntfs_inode for the allocated mft record, and we will have 222062306a36Sopenharmony_ci * - returned the ntfs_inode as well as the allocated mapped, pinned, and 222162306a36Sopenharmony_ci * locked mft record. 222262306a36Sopenharmony_ci * 222362306a36Sopenharmony_ci * On error, the volume will be left in a consistent state and no record will 222462306a36Sopenharmony_ci * be allocated. If rolling back a partial operation fails, we may leave some 222562306a36Sopenharmony_ci * inconsistent metadata in which case we set NVolErrors() so the volume is 222662306a36Sopenharmony_ci * left dirty when unmounted. 222762306a36Sopenharmony_ci * 222862306a36Sopenharmony_ci * Note, this function cannot make use of most of the normal functions, like 222962306a36Sopenharmony_ci * for example for attribute resizing, etc, because when the run list overflows 223062306a36Sopenharmony_ci * the base mft record and an attribute list is used, it is very important that 223162306a36Sopenharmony_ci * the extension mft records used to store the $DATA attribute of $MFT can be 223262306a36Sopenharmony_ci * reached without having to read the information contained inside them, as 223362306a36Sopenharmony_ci * this would make it impossible to find them in the first place after the 223462306a36Sopenharmony_ci * volume is unmounted. $MFT/$BITMAP probably does not need to follow this 223562306a36Sopenharmony_ci * rule because the bitmap is not essential for finding the mft records, but on 223662306a36Sopenharmony_ci * the other hand, handling the bitmap in this special way would make life 223762306a36Sopenharmony_ci * easier because otherwise there might be circular invocations of functions 223862306a36Sopenharmony_ci * when reading the bitmap. 223962306a36Sopenharmony_ci */ 224062306a36Sopenharmony_cintfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, 224162306a36Sopenharmony_ci ntfs_inode *base_ni, MFT_RECORD **mrec) 224262306a36Sopenharmony_ci{ 224362306a36Sopenharmony_ci s64 ll, bit, old_data_initialized, old_data_size; 224462306a36Sopenharmony_ci unsigned long flags; 224562306a36Sopenharmony_ci struct inode *vi; 224662306a36Sopenharmony_ci struct page *page; 224762306a36Sopenharmony_ci ntfs_inode *mft_ni, *mftbmp_ni, *ni; 224862306a36Sopenharmony_ci ntfs_attr_search_ctx *ctx; 224962306a36Sopenharmony_ci MFT_RECORD *m; 225062306a36Sopenharmony_ci ATTR_RECORD *a; 225162306a36Sopenharmony_ci pgoff_t index; 225262306a36Sopenharmony_ci unsigned int ofs; 225362306a36Sopenharmony_ci int err; 225462306a36Sopenharmony_ci le16 seq_no, usn; 225562306a36Sopenharmony_ci bool record_formatted = false; 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_ci if (base_ni) { 225862306a36Sopenharmony_ci ntfs_debug("Entering (allocating an extent mft record for " 225962306a36Sopenharmony_ci "base mft record 0x%llx).", 226062306a36Sopenharmony_ci (long long)base_ni->mft_no); 226162306a36Sopenharmony_ci /* @mode and @base_ni are mutually exclusive. */ 226262306a36Sopenharmony_ci BUG_ON(mode); 226362306a36Sopenharmony_ci } else 226462306a36Sopenharmony_ci ntfs_debug("Entering (allocating a base mft record)."); 226562306a36Sopenharmony_ci if (mode) { 226662306a36Sopenharmony_ci /* @mode and @base_ni are mutually exclusive. */ 226762306a36Sopenharmony_ci BUG_ON(base_ni); 226862306a36Sopenharmony_ci /* We only support creation of normal files and directories. */ 226962306a36Sopenharmony_ci if (!S_ISREG(mode) && !S_ISDIR(mode)) 227062306a36Sopenharmony_ci return ERR_PTR(-EOPNOTSUPP); 227162306a36Sopenharmony_ci } 227262306a36Sopenharmony_ci BUG_ON(!mrec); 227362306a36Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 227462306a36Sopenharmony_ci mftbmp_ni = NTFS_I(vol->mftbmp_ino); 227562306a36Sopenharmony_ci down_write(&vol->mftbmp_lock); 227662306a36Sopenharmony_ci bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni); 227762306a36Sopenharmony_ci if (bit >= 0) { 227862306a36Sopenharmony_ci ntfs_debug("Found and allocated free record (#1), bit 0x%llx.", 227962306a36Sopenharmony_ci (long long)bit); 228062306a36Sopenharmony_ci goto have_alloc_rec; 228162306a36Sopenharmony_ci } 228262306a36Sopenharmony_ci if (bit != -ENOSPC) { 228362306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 228462306a36Sopenharmony_ci return ERR_PTR(bit); 228562306a36Sopenharmony_ci } 228662306a36Sopenharmony_ci /* 228762306a36Sopenharmony_ci * No free mft records left. If the mft bitmap already covers more 228862306a36Sopenharmony_ci * than the currently used mft records, the next records are all free, 228962306a36Sopenharmony_ci * so we can simply allocate the first unused mft record. 229062306a36Sopenharmony_ci * Note: We also have to make sure that the mft bitmap at least covers 229162306a36Sopenharmony_ci * the first 24 mft records as they are special and whilst they may not 229262306a36Sopenharmony_ci * be in use, we do not allocate from them. 229362306a36Sopenharmony_ci */ 229462306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 229562306a36Sopenharmony_ci ll = mft_ni->initialized_size >> vol->mft_record_size_bits; 229662306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 229762306a36Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 229862306a36Sopenharmony_ci old_data_initialized = mftbmp_ni->initialized_size; 229962306a36Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 230062306a36Sopenharmony_ci if (old_data_initialized << 3 > ll && old_data_initialized > 3) { 230162306a36Sopenharmony_ci bit = ll; 230262306a36Sopenharmony_ci if (bit < 24) 230362306a36Sopenharmony_ci bit = 24; 230462306a36Sopenharmony_ci if (unlikely(bit >= (1ll << 32))) 230562306a36Sopenharmony_ci goto max_err_out; 230662306a36Sopenharmony_ci ntfs_debug("Found free record (#2), bit 0x%llx.", 230762306a36Sopenharmony_ci (long long)bit); 230862306a36Sopenharmony_ci goto found_free_rec; 230962306a36Sopenharmony_ci } 231062306a36Sopenharmony_ci /* 231162306a36Sopenharmony_ci * The mft bitmap needs to be expanded until it covers the first unused 231262306a36Sopenharmony_ci * mft record that we can allocate. 231362306a36Sopenharmony_ci * Note: The smallest mft record we allocate is mft record 24. 231462306a36Sopenharmony_ci */ 231562306a36Sopenharmony_ci bit = old_data_initialized << 3; 231662306a36Sopenharmony_ci if (unlikely(bit >= (1ll << 32))) 231762306a36Sopenharmony_ci goto max_err_out; 231862306a36Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 231962306a36Sopenharmony_ci old_data_size = mftbmp_ni->allocated_size; 232062306a36Sopenharmony_ci ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " 232162306a36Sopenharmony_ci "data_size 0x%llx, initialized_size 0x%llx.", 232262306a36Sopenharmony_ci (long long)old_data_size, 232362306a36Sopenharmony_ci (long long)i_size_read(vol->mftbmp_ino), 232462306a36Sopenharmony_ci (long long)old_data_initialized); 232562306a36Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 232662306a36Sopenharmony_ci if (old_data_initialized + 8 > old_data_size) { 232762306a36Sopenharmony_ci /* Need to extend bitmap by one more cluster. */ 232862306a36Sopenharmony_ci ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 232962306a36Sopenharmony_ci err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 233062306a36Sopenharmony_ci if (unlikely(err)) { 233162306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 233262306a36Sopenharmony_ci goto err_out; 233362306a36Sopenharmony_ci } 233462306a36Sopenharmony_ci#ifdef DEBUG 233562306a36Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 233662306a36Sopenharmony_ci ntfs_debug("Status of mftbmp after allocation extension: " 233762306a36Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 233862306a36Sopenharmony_ci "initialized_size 0x%llx.", 233962306a36Sopenharmony_ci (long long)mftbmp_ni->allocated_size, 234062306a36Sopenharmony_ci (long long)i_size_read(vol->mftbmp_ino), 234162306a36Sopenharmony_ci (long long)mftbmp_ni->initialized_size); 234262306a36Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 234362306a36Sopenharmony_ci#endif /* DEBUG */ 234462306a36Sopenharmony_ci } 234562306a36Sopenharmony_ci /* 234662306a36Sopenharmony_ci * We now have sufficient allocated space, extend the initialized_size 234762306a36Sopenharmony_ci * as well as the data_size if necessary and fill the new space with 234862306a36Sopenharmony_ci * zeroes. 234962306a36Sopenharmony_ci */ 235062306a36Sopenharmony_ci err = ntfs_mft_bitmap_extend_initialized_nolock(vol); 235162306a36Sopenharmony_ci if (unlikely(err)) { 235262306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 235362306a36Sopenharmony_ci goto err_out; 235462306a36Sopenharmony_ci } 235562306a36Sopenharmony_ci#ifdef DEBUG 235662306a36Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 235762306a36Sopenharmony_ci ntfs_debug("Status of mftbmp after initialized extension: " 235862306a36Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 235962306a36Sopenharmony_ci "initialized_size 0x%llx.", 236062306a36Sopenharmony_ci (long long)mftbmp_ni->allocated_size, 236162306a36Sopenharmony_ci (long long)i_size_read(vol->mftbmp_ino), 236262306a36Sopenharmony_ci (long long)mftbmp_ni->initialized_size); 236362306a36Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 236462306a36Sopenharmony_ci#endif /* DEBUG */ 236562306a36Sopenharmony_ci ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); 236662306a36Sopenharmony_cifound_free_rec: 236762306a36Sopenharmony_ci /* @bit is the found free mft record, allocate it in the mft bitmap. */ 236862306a36Sopenharmony_ci ntfs_debug("At found_free_rec."); 236962306a36Sopenharmony_ci err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit); 237062306a36Sopenharmony_ci if (unlikely(err)) { 237162306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap."); 237262306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 237362306a36Sopenharmony_ci goto err_out; 237462306a36Sopenharmony_ci } 237562306a36Sopenharmony_ci ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit); 237662306a36Sopenharmony_cihave_alloc_rec: 237762306a36Sopenharmony_ci /* 237862306a36Sopenharmony_ci * The mft bitmap is now uptodate. Deal with mft data attribute now. 237962306a36Sopenharmony_ci * Note, we keep hold of the mft bitmap lock for writing until all 238062306a36Sopenharmony_ci * modifications to the mft data attribute are complete, too, as they 238162306a36Sopenharmony_ci * will impact decisions for mft bitmap and mft record allocation done 238262306a36Sopenharmony_ci * by a parallel allocation and if the lock is not maintained a 238362306a36Sopenharmony_ci * parallel allocation could allocate the same mft record as this one. 238462306a36Sopenharmony_ci */ 238562306a36Sopenharmony_ci ll = (bit + 1) << vol->mft_record_size_bits; 238662306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 238762306a36Sopenharmony_ci old_data_initialized = mft_ni->initialized_size; 238862306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 238962306a36Sopenharmony_ci if (ll <= old_data_initialized) { 239062306a36Sopenharmony_ci ntfs_debug("Allocated mft record already initialized."); 239162306a36Sopenharmony_ci goto mft_rec_already_initialized; 239262306a36Sopenharmony_ci } 239362306a36Sopenharmony_ci ntfs_debug("Initializing allocated mft record."); 239462306a36Sopenharmony_ci /* 239562306a36Sopenharmony_ci * The mft record is outside the initialized data. Extend the mft data 239662306a36Sopenharmony_ci * attribute until it covers the allocated record. The loop is only 239762306a36Sopenharmony_ci * actually traversed more than once when a freshly formatted volume is 239862306a36Sopenharmony_ci * first written to so it optimizes away nicely in the common case. 239962306a36Sopenharmony_ci */ 240062306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 240162306a36Sopenharmony_ci ntfs_debug("Status of mft data before extension: " 240262306a36Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 240362306a36Sopenharmony_ci "initialized_size 0x%llx.", 240462306a36Sopenharmony_ci (long long)mft_ni->allocated_size, 240562306a36Sopenharmony_ci (long long)i_size_read(vol->mft_ino), 240662306a36Sopenharmony_ci (long long)mft_ni->initialized_size); 240762306a36Sopenharmony_ci while (ll > mft_ni->allocated_size) { 240862306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 240962306a36Sopenharmony_ci err = ntfs_mft_data_extend_allocation_nolock(vol); 241062306a36Sopenharmony_ci if (unlikely(err)) { 241162306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to extend mft data " 241262306a36Sopenharmony_ci "allocation."); 241362306a36Sopenharmony_ci goto undo_mftbmp_alloc_nolock; 241462306a36Sopenharmony_ci } 241562306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 241662306a36Sopenharmony_ci ntfs_debug("Status of mft data after allocation extension: " 241762306a36Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 241862306a36Sopenharmony_ci "initialized_size 0x%llx.", 241962306a36Sopenharmony_ci (long long)mft_ni->allocated_size, 242062306a36Sopenharmony_ci (long long)i_size_read(vol->mft_ino), 242162306a36Sopenharmony_ci (long long)mft_ni->initialized_size); 242262306a36Sopenharmony_ci } 242362306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 242462306a36Sopenharmony_ci /* 242562306a36Sopenharmony_ci * Extend mft data initialized size (and data size of course) to reach 242662306a36Sopenharmony_ci * the allocated mft record, formatting the mft records allong the way. 242762306a36Sopenharmony_ci * Note: We only modify the ntfs_inode structure as that is all that is 242862306a36Sopenharmony_ci * needed by ntfs_mft_record_format(). We will update the attribute 242962306a36Sopenharmony_ci * record itself in one fell swoop later on. 243062306a36Sopenharmony_ci */ 243162306a36Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 243262306a36Sopenharmony_ci old_data_initialized = mft_ni->initialized_size; 243362306a36Sopenharmony_ci old_data_size = vol->mft_ino->i_size; 243462306a36Sopenharmony_ci while (ll > mft_ni->initialized_size) { 243562306a36Sopenharmony_ci s64 new_initialized_size, mft_no; 243662306a36Sopenharmony_ci 243762306a36Sopenharmony_ci new_initialized_size = mft_ni->initialized_size + 243862306a36Sopenharmony_ci vol->mft_record_size; 243962306a36Sopenharmony_ci mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; 244062306a36Sopenharmony_ci if (new_initialized_size > i_size_read(vol->mft_ino)) 244162306a36Sopenharmony_ci i_size_write(vol->mft_ino, new_initialized_size); 244262306a36Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 244362306a36Sopenharmony_ci ntfs_debug("Initializing mft record 0x%llx.", 244462306a36Sopenharmony_ci (long long)mft_no); 244562306a36Sopenharmony_ci err = ntfs_mft_record_format(vol, mft_no); 244662306a36Sopenharmony_ci if (unlikely(err)) { 244762306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to format mft record."); 244862306a36Sopenharmony_ci goto undo_data_init; 244962306a36Sopenharmony_ci } 245062306a36Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 245162306a36Sopenharmony_ci mft_ni->initialized_size = new_initialized_size; 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 245462306a36Sopenharmony_ci record_formatted = true; 245562306a36Sopenharmony_ci /* Update the mft data attribute record to reflect the new sizes. */ 245662306a36Sopenharmony_ci m = map_mft_record(mft_ni); 245762306a36Sopenharmony_ci if (IS_ERR(m)) { 245862306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 245962306a36Sopenharmony_ci err = PTR_ERR(m); 246062306a36Sopenharmony_ci goto undo_data_init; 246162306a36Sopenharmony_ci } 246262306a36Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, m); 246362306a36Sopenharmony_ci if (unlikely(!ctx)) { 246462306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 246562306a36Sopenharmony_ci err = -ENOMEM; 246662306a36Sopenharmony_ci unmap_mft_record(mft_ni); 246762306a36Sopenharmony_ci goto undo_data_init; 246862306a36Sopenharmony_ci } 246962306a36Sopenharmony_ci err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 247062306a36Sopenharmony_ci CASE_SENSITIVE, 0, NULL, 0, ctx); 247162306a36Sopenharmony_ci if (unlikely(err)) { 247262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute extent of " 247362306a36Sopenharmony_ci "mft data attribute."); 247462306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 247562306a36Sopenharmony_ci unmap_mft_record(mft_ni); 247662306a36Sopenharmony_ci goto undo_data_init; 247762306a36Sopenharmony_ci } 247862306a36Sopenharmony_ci a = ctx->attr; 247962306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 248062306a36Sopenharmony_ci a->data.non_resident.initialized_size = 248162306a36Sopenharmony_ci cpu_to_sle64(mft_ni->initialized_size); 248262306a36Sopenharmony_ci a->data.non_resident.data_size = 248362306a36Sopenharmony_ci cpu_to_sle64(i_size_read(vol->mft_ino)); 248462306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 248562306a36Sopenharmony_ci /* Ensure the changes make it to disk. */ 248662306a36Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 248762306a36Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 248862306a36Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 248962306a36Sopenharmony_ci unmap_mft_record(mft_ni); 249062306a36Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 249162306a36Sopenharmony_ci ntfs_debug("Status of mft data after mft record initialization: " 249262306a36Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 249362306a36Sopenharmony_ci "initialized_size 0x%llx.", 249462306a36Sopenharmony_ci (long long)mft_ni->allocated_size, 249562306a36Sopenharmony_ci (long long)i_size_read(vol->mft_ino), 249662306a36Sopenharmony_ci (long long)mft_ni->initialized_size); 249762306a36Sopenharmony_ci BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size); 249862306a36Sopenharmony_ci BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino)); 249962306a36Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 250062306a36Sopenharmony_cimft_rec_already_initialized: 250162306a36Sopenharmony_ci /* 250262306a36Sopenharmony_ci * We can finally drop the mft bitmap lock as the mft data attribute 250362306a36Sopenharmony_ci * has been fully updated. The only disparity left is that the 250462306a36Sopenharmony_ci * allocated mft record still needs to be marked as in use to match the 250562306a36Sopenharmony_ci * set bit in the mft bitmap but this is actually not a problem since 250662306a36Sopenharmony_ci * this mft record is not referenced from anywhere yet and the fact 250762306a36Sopenharmony_ci * that it is allocated in the mft bitmap means that no-one will try to 250862306a36Sopenharmony_ci * allocate it either. 250962306a36Sopenharmony_ci */ 251062306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 251162306a36Sopenharmony_ci /* 251262306a36Sopenharmony_ci * We now have allocated and initialized the mft record. Calculate the 251362306a36Sopenharmony_ci * index of and the offset within the page cache page the record is in. 251462306a36Sopenharmony_ci */ 251562306a36Sopenharmony_ci index = bit << vol->mft_record_size_bits >> PAGE_SHIFT; 251662306a36Sopenharmony_ci ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK; 251762306a36Sopenharmony_ci /* Read, map, and pin the page containing the mft record. */ 251862306a36Sopenharmony_ci page = ntfs_map_page(vol->mft_ino->i_mapping, index); 251962306a36Sopenharmony_ci if (IS_ERR(page)) { 252062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map page containing allocated " 252162306a36Sopenharmony_ci "mft record 0x%llx.", (long long)bit); 252262306a36Sopenharmony_ci err = PTR_ERR(page); 252362306a36Sopenharmony_ci goto undo_mftbmp_alloc; 252462306a36Sopenharmony_ci } 252562306a36Sopenharmony_ci lock_page(page); 252662306a36Sopenharmony_ci BUG_ON(!PageUptodate(page)); 252762306a36Sopenharmony_ci ClearPageUptodate(page); 252862306a36Sopenharmony_ci m = (MFT_RECORD*)((u8*)page_address(page) + ofs); 252962306a36Sopenharmony_ci /* If we just formatted the mft record no need to do it again. */ 253062306a36Sopenharmony_ci if (!record_formatted) { 253162306a36Sopenharmony_ci /* Sanity check that the mft record is really not in use. */ 253262306a36Sopenharmony_ci if (ntfs_is_file_record(m->magic) && 253362306a36Sopenharmony_ci (m->flags & MFT_RECORD_IN_USE)) { 253462306a36Sopenharmony_ci ntfs_error(vol->sb, "Mft record 0x%llx was marked " 253562306a36Sopenharmony_ci "free in mft bitmap but is marked " 253662306a36Sopenharmony_ci "used itself. Corrupt filesystem. " 253762306a36Sopenharmony_ci "Unmount and run chkdsk.", 253862306a36Sopenharmony_ci (long long)bit); 253962306a36Sopenharmony_ci err = -EIO; 254062306a36Sopenharmony_ci SetPageUptodate(page); 254162306a36Sopenharmony_ci unlock_page(page); 254262306a36Sopenharmony_ci ntfs_unmap_page(page); 254362306a36Sopenharmony_ci NVolSetErrors(vol); 254462306a36Sopenharmony_ci goto undo_mftbmp_alloc; 254562306a36Sopenharmony_ci } 254662306a36Sopenharmony_ci /* 254762306a36Sopenharmony_ci * We need to (re-)format the mft record, preserving the 254862306a36Sopenharmony_ci * sequence number if it is not zero as well as the update 254962306a36Sopenharmony_ci * sequence number if it is not zero or -1 (0xffff). This 255062306a36Sopenharmony_ci * means we do not need to care whether or not something went 255162306a36Sopenharmony_ci * wrong with the previous mft record. 255262306a36Sopenharmony_ci */ 255362306a36Sopenharmony_ci seq_no = m->sequence_number; 255462306a36Sopenharmony_ci usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)); 255562306a36Sopenharmony_ci err = ntfs_mft_record_layout(vol, bit, m); 255662306a36Sopenharmony_ci if (unlikely(err)) { 255762306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to layout allocated mft " 255862306a36Sopenharmony_ci "record 0x%llx.", (long long)bit); 255962306a36Sopenharmony_ci SetPageUptodate(page); 256062306a36Sopenharmony_ci unlock_page(page); 256162306a36Sopenharmony_ci ntfs_unmap_page(page); 256262306a36Sopenharmony_ci goto undo_mftbmp_alloc; 256362306a36Sopenharmony_ci } 256462306a36Sopenharmony_ci if (seq_no) 256562306a36Sopenharmony_ci m->sequence_number = seq_no; 256662306a36Sopenharmony_ci if (usn && le16_to_cpu(usn) != 0xffff) 256762306a36Sopenharmony_ci *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn; 256862306a36Sopenharmony_ci } 256962306a36Sopenharmony_ci /* Set the mft record itself in use. */ 257062306a36Sopenharmony_ci m->flags |= MFT_RECORD_IN_USE; 257162306a36Sopenharmony_ci if (S_ISDIR(mode)) 257262306a36Sopenharmony_ci m->flags |= MFT_RECORD_IS_DIRECTORY; 257362306a36Sopenharmony_ci flush_dcache_page(page); 257462306a36Sopenharmony_ci SetPageUptodate(page); 257562306a36Sopenharmony_ci if (base_ni) { 257662306a36Sopenharmony_ci MFT_RECORD *m_tmp; 257762306a36Sopenharmony_ci 257862306a36Sopenharmony_ci /* 257962306a36Sopenharmony_ci * Setup the base mft record in the extent mft record. This 258062306a36Sopenharmony_ci * completes initialization of the allocated extent mft record 258162306a36Sopenharmony_ci * and we can simply use it with map_extent_mft_record(). 258262306a36Sopenharmony_ci */ 258362306a36Sopenharmony_ci m->base_mft_record = MK_LE_MREF(base_ni->mft_no, 258462306a36Sopenharmony_ci base_ni->seq_no); 258562306a36Sopenharmony_ci /* 258662306a36Sopenharmony_ci * Allocate an extent inode structure for the new mft record, 258762306a36Sopenharmony_ci * attach it to the base inode @base_ni and map, pin, and lock 258862306a36Sopenharmony_ci * its, i.e. the allocated, mft record. 258962306a36Sopenharmony_ci */ 259062306a36Sopenharmony_ci m_tmp = map_extent_mft_record(base_ni, bit, &ni); 259162306a36Sopenharmony_ci if (IS_ERR(m_tmp)) { 259262306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to map allocated extent " 259362306a36Sopenharmony_ci "mft record 0x%llx.", (long long)bit); 259462306a36Sopenharmony_ci err = PTR_ERR(m_tmp); 259562306a36Sopenharmony_ci /* Set the mft record itself not in use. */ 259662306a36Sopenharmony_ci m->flags &= cpu_to_le16( 259762306a36Sopenharmony_ci ~le16_to_cpu(MFT_RECORD_IN_USE)); 259862306a36Sopenharmony_ci flush_dcache_page(page); 259962306a36Sopenharmony_ci /* Make sure the mft record is written out to disk. */ 260062306a36Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 260162306a36Sopenharmony_ci unlock_page(page); 260262306a36Sopenharmony_ci ntfs_unmap_page(page); 260362306a36Sopenharmony_ci goto undo_mftbmp_alloc; 260462306a36Sopenharmony_ci } 260562306a36Sopenharmony_ci BUG_ON(m != m_tmp); 260662306a36Sopenharmony_ci /* 260762306a36Sopenharmony_ci * Make sure the allocated mft record is written out to disk. 260862306a36Sopenharmony_ci * No need to set the inode dirty because the caller is going 260962306a36Sopenharmony_ci * to do that anyway after finishing with the new extent mft 261062306a36Sopenharmony_ci * record (e.g. at a minimum a new attribute will be added to 261162306a36Sopenharmony_ci * the mft record. 261262306a36Sopenharmony_ci */ 261362306a36Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 261462306a36Sopenharmony_ci unlock_page(page); 261562306a36Sopenharmony_ci /* 261662306a36Sopenharmony_ci * Need to unmap the page since map_extent_mft_record() mapped 261762306a36Sopenharmony_ci * it as well so we have it mapped twice at the moment. 261862306a36Sopenharmony_ci */ 261962306a36Sopenharmony_ci ntfs_unmap_page(page); 262062306a36Sopenharmony_ci } else { 262162306a36Sopenharmony_ci /* 262262306a36Sopenharmony_ci * Allocate a new VFS inode and set it up. NOTE: @vi->i_nlink 262362306a36Sopenharmony_ci * is set to 1 but the mft record->link_count is 0. The caller 262462306a36Sopenharmony_ci * needs to bear this in mind. 262562306a36Sopenharmony_ci */ 262662306a36Sopenharmony_ci vi = new_inode(vol->sb); 262762306a36Sopenharmony_ci if (unlikely(!vi)) { 262862306a36Sopenharmony_ci err = -ENOMEM; 262962306a36Sopenharmony_ci /* Set the mft record itself not in use. */ 263062306a36Sopenharmony_ci m->flags &= cpu_to_le16( 263162306a36Sopenharmony_ci ~le16_to_cpu(MFT_RECORD_IN_USE)); 263262306a36Sopenharmony_ci flush_dcache_page(page); 263362306a36Sopenharmony_ci /* Make sure the mft record is written out to disk. */ 263462306a36Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 263562306a36Sopenharmony_ci unlock_page(page); 263662306a36Sopenharmony_ci ntfs_unmap_page(page); 263762306a36Sopenharmony_ci goto undo_mftbmp_alloc; 263862306a36Sopenharmony_ci } 263962306a36Sopenharmony_ci vi->i_ino = bit; 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci /* The owner and group come from the ntfs volume. */ 264262306a36Sopenharmony_ci vi->i_uid = vol->uid; 264362306a36Sopenharmony_ci vi->i_gid = vol->gid; 264462306a36Sopenharmony_ci 264562306a36Sopenharmony_ci /* Initialize the ntfs specific part of @vi. */ 264662306a36Sopenharmony_ci ntfs_init_big_inode(vi); 264762306a36Sopenharmony_ci ni = NTFS_I(vi); 264862306a36Sopenharmony_ci /* 264962306a36Sopenharmony_ci * Set the appropriate mode, attribute type, and name. For 265062306a36Sopenharmony_ci * directories, also setup the index values to the defaults. 265162306a36Sopenharmony_ci */ 265262306a36Sopenharmony_ci if (S_ISDIR(mode)) { 265362306a36Sopenharmony_ci vi->i_mode = S_IFDIR | S_IRWXUGO; 265462306a36Sopenharmony_ci vi->i_mode &= ~vol->dmask; 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci NInoSetMstProtected(ni); 265762306a36Sopenharmony_ci ni->type = AT_INDEX_ALLOCATION; 265862306a36Sopenharmony_ci ni->name = I30; 265962306a36Sopenharmony_ci ni->name_len = 4; 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci ni->itype.index.block_size = 4096; 266262306a36Sopenharmony_ci ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1; 266362306a36Sopenharmony_ci ni->itype.index.collation_rule = COLLATION_FILE_NAME; 266462306a36Sopenharmony_ci if (vol->cluster_size <= ni->itype.index.block_size) { 266562306a36Sopenharmony_ci ni->itype.index.vcn_size = vol->cluster_size; 266662306a36Sopenharmony_ci ni->itype.index.vcn_size_bits = 266762306a36Sopenharmony_ci vol->cluster_size_bits; 266862306a36Sopenharmony_ci } else { 266962306a36Sopenharmony_ci ni->itype.index.vcn_size = vol->sector_size; 267062306a36Sopenharmony_ci ni->itype.index.vcn_size_bits = 267162306a36Sopenharmony_ci vol->sector_size_bits; 267262306a36Sopenharmony_ci } 267362306a36Sopenharmony_ci } else { 267462306a36Sopenharmony_ci vi->i_mode = S_IFREG | S_IRWXUGO; 267562306a36Sopenharmony_ci vi->i_mode &= ~vol->fmask; 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci ni->type = AT_DATA; 267862306a36Sopenharmony_ci ni->name = NULL; 267962306a36Sopenharmony_ci ni->name_len = 0; 268062306a36Sopenharmony_ci } 268162306a36Sopenharmony_ci if (IS_RDONLY(vi)) 268262306a36Sopenharmony_ci vi->i_mode &= ~S_IWUGO; 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci /* Set the inode times to the current time. */ 268562306a36Sopenharmony_ci vi->i_atime = vi->i_mtime = inode_set_ctime_current(vi); 268662306a36Sopenharmony_ci /* 268762306a36Sopenharmony_ci * Set the file size to 0, the ntfs inode sizes are set to 0 by 268862306a36Sopenharmony_ci * the call to ntfs_init_big_inode() below. 268962306a36Sopenharmony_ci */ 269062306a36Sopenharmony_ci vi->i_size = 0; 269162306a36Sopenharmony_ci vi->i_blocks = 0; 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci /* Set the sequence number. */ 269462306a36Sopenharmony_ci vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); 269562306a36Sopenharmony_ci /* 269662306a36Sopenharmony_ci * Manually map, pin, and lock the mft record as we already 269762306a36Sopenharmony_ci * have its page mapped and it is very easy to do. 269862306a36Sopenharmony_ci */ 269962306a36Sopenharmony_ci atomic_inc(&ni->count); 270062306a36Sopenharmony_ci mutex_lock(&ni->mrec_lock); 270162306a36Sopenharmony_ci ni->page = page; 270262306a36Sopenharmony_ci ni->page_ofs = ofs; 270362306a36Sopenharmony_ci /* 270462306a36Sopenharmony_ci * Make sure the allocated mft record is written out to disk. 270562306a36Sopenharmony_ci * NOTE: We do not set the ntfs inode dirty because this would 270662306a36Sopenharmony_ci * fail in ntfs_write_inode() because the inode does not have a 270762306a36Sopenharmony_ci * standard information attribute yet. Also, there is no need 270862306a36Sopenharmony_ci * to set the inode dirty because the caller is going to do 270962306a36Sopenharmony_ci * that anyway after finishing with the new mft record (e.g. at 271062306a36Sopenharmony_ci * a minimum some new attributes will be added to the mft 271162306a36Sopenharmony_ci * record. 271262306a36Sopenharmony_ci */ 271362306a36Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 271462306a36Sopenharmony_ci unlock_page(page); 271562306a36Sopenharmony_ci 271662306a36Sopenharmony_ci /* Add the inode to the inode hash for the superblock. */ 271762306a36Sopenharmony_ci insert_inode_hash(vi); 271862306a36Sopenharmony_ci 271962306a36Sopenharmony_ci /* Update the default mft allocation position. */ 272062306a36Sopenharmony_ci vol->mft_data_pos = bit + 1; 272162306a36Sopenharmony_ci } 272262306a36Sopenharmony_ci /* 272362306a36Sopenharmony_ci * Return the opened, allocated inode of the allocated mft record as 272462306a36Sopenharmony_ci * well as the mapped, pinned, and locked mft record. 272562306a36Sopenharmony_ci */ 272662306a36Sopenharmony_ci ntfs_debug("Returning opened, allocated %sinode 0x%llx.", 272762306a36Sopenharmony_ci base_ni ? "extent " : "", (long long)bit); 272862306a36Sopenharmony_ci *mrec = m; 272962306a36Sopenharmony_ci return ni; 273062306a36Sopenharmony_ciundo_data_init: 273162306a36Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 273262306a36Sopenharmony_ci mft_ni->initialized_size = old_data_initialized; 273362306a36Sopenharmony_ci i_size_write(vol->mft_ino, old_data_size); 273462306a36Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 273562306a36Sopenharmony_ci goto undo_mftbmp_alloc_nolock; 273662306a36Sopenharmony_ciundo_mftbmp_alloc: 273762306a36Sopenharmony_ci down_write(&vol->mftbmp_lock); 273862306a36Sopenharmony_ciundo_mftbmp_alloc_nolock: 273962306a36Sopenharmony_ci if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) { 274062306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); 274162306a36Sopenharmony_ci NVolSetErrors(vol); 274262306a36Sopenharmony_ci } 274362306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 274462306a36Sopenharmony_cierr_out: 274562306a36Sopenharmony_ci return ERR_PTR(err); 274662306a36Sopenharmony_cimax_err_out: 274762306a36Sopenharmony_ci ntfs_warning(vol->sb, "Cannot allocate mft record because the maximum " 274862306a36Sopenharmony_ci "number of inodes (2^32) has already been reached."); 274962306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 275062306a36Sopenharmony_ci return ERR_PTR(-ENOSPC); 275162306a36Sopenharmony_ci} 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_ci/** 275462306a36Sopenharmony_ci * ntfs_extent_mft_record_free - free an extent mft record on an ntfs volume 275562306a36Sopenharmony_ci * @ni: ntfs inode of the mapped extent mft record to free 275662306a36Sopenharmony_ci * @m: mapped extent mft record of the ntfs inode @ni 275762306a36Sopenharmony_ci * 275862306a36Sopenharmony_ci * Free the mapped extent mft record @m of the extent ntfs inode @ni. 275962306a36Sopenharmony_ci * 276062306a36Sopenharmony_ci * Note that this function unmaps the mft record and closes and destroys @ni 276162306a36Sopenharmony_ci * internally and hence you cannot use either @ni nor @m any more after this 276262306a36Sopenharmony_ci * function returns success. 276362306a36Sopenharmony_ci * 276462306a36Sopenharmony_ci * On success return 0 and on error return -errno. @ni and @m are still valid 276562306a36Sopenharmony_ci * in this case and have not been freed. 276662306a36Sopenharmony_ci * 276762306a36Sopenharmony_ci * For some errors an error message is displayed and the success code 0 is 276862306a36Sopenharmony_ci * returned and the volume is then left dirty on umount. This makes sense in 276962306a36Sopenharmony_ci * case we could not rollback the changes that were already done since the 277062306a36Sopenharmony_ci * caller no longer wants to reference this mft record so it does not matter to 277162306a36Sopenharmony_ci * the caller if something is wrong with it as long as it is properly detached 277262306a36Sopenharmony_ci * from the base inode. 277362306a36Sopenharmony_ci */ 277462306a36Sopenharmony_ciint ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) 277562306a36Sopenharmony_ci{ 277662306a36Sopenharmony_ci unsigned long mft_no = ni->mft_no; 277762306a36Sopenharmony_ci ntfs_volume *vol = ni->vol; 277862306a36Sopenharmony_ci ntfs_inode *base_ni; 277962306a36Sopenharmony_ci ntfs_inode **extent_nis; 278062306a36Sopenharmony_ci int i, err; 278162306a36Sopenharmony_ci le16 old_seq_no; 278262306a36Sopenharmony_ci u16 seq_no; 278362306a36Sopenharmony_ci 278462306a36Sopenharmony_ci BUG_ON(NInoAttr(ni)); 278562306a36Sopenharmony_ci BUG_ON(ni->nr_extents != -1); 278662306a36Sopenharmony_ci 278762306a36Sopenharmony_ci mutex_lock(&ni->extent_lock); 278862306a36Sopenharmony_ci base_ni = ni->ext.base_ntfs_ino; 278962306a36Sopenharmony_ci mutex_unlock(&ni->extent_lock); 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_ci BUG_ON(base_ni->nr_extents <= 0); 279262306a36Sopenharmony_ci 279362306a36Sopenharmony_ci ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n", 279462306a36Sopenharmony_ci mft_no, base_ni->mft_no); 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci mutex_lock(&base_ni->extent_lock); 279762306a36Sopenharmony_ci 279862306a36Sopenharmony_ci /* Make sure we are holding the only reference to the extent inode. */ 279962306a36Sopenharmony_ci if (atomic_read(&ni->count) > 2) { 280062306a36Sopenharmony_ci ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, " 280162306a36Sopenharmony_ci "not freeing.", base_ni->mft_no); 280262306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 280362306a36Sopenharmony_ci return -EBUSY; 280462306a36Sopenharmony_ci } 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_ci /* Dissociate the ntfs inode from the base inode. */ 280762306a36Sopenharmony_ci extent_nis = base_ni->ext.extent_ntfs_inos; 280862306a36Sopenharmony_ci err = -ENOENT; 280962306a36Sopenharmony_ci for (i = 0; i < base_ni->nr_extents; i++) { 281062306a36Sopenharmony_ci if (ni != extent_nis[i]) 281162306a36Sopenharmony_ci continue; 281262306a36Sopenharmony_ci extent_nis += i; 281362306a36Sopenharmony_ci base_ni->nr_extents--; 281462306a36Sopenharmony_ci memmove(extent_nis, extent_nis + 1, (base_ni->nr_extents - i) * 281562306a36Sopenharmony_ci sizeof(ntfs_inode*)); 281662306a36Sopenharmony_ci err = 0; 281762306a36Sopenharmony_ci break; 281862306a36Sopenharmony_ci } 281962306a36Sopenharmony_ci 282062306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 282162306a36Sopenharmony_ci 282262306a36Sopenharmony_ci if (unlikely(err)) { 282362306a36Sopenharmony_ci ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to " 282462306a36Sopenharmony_ci "its base inode 0x%lx.", mft_no, 282562306a36Sopenharmony_ci base_ni->mft_no); 282662306a36Sopenharmony_ci BUG(); 282762306a36Sopenharmony_ci } 282862306a36Sopenharmony_ci 282962306a36Sopenharmony_ci /* 283062306a36Sopenharmony_ci * The extent inode is no longer attached to the base inode so no one 283162306a36Sopenharmony_ci * can get a reference to it any more. 283262306a36Sopenharmony_ci */ 283362306a36Sopenharmony_ci 283462306a36Sopenharmony_ci /* Mark the mft record as not in use. */ 283562306a36Sopenharmony_ci m->flags &= ~MFT_RECORD_IN_USE; 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci /* Increment the sequence number, skipping zero, if it is not zero. */ 283862306a36Sopenharmony_ci old_seq_no = m->sequence_number; 283962306a36Sopenharmony_ci seq_no = le16_to_cpu(old_seq_no); 284062306a36Sopenharmony_ci if (seq_no == 0xffff) 284162306a36Sopenharmony_ci seq_no = 1; 284262306a36Sopenharmony_ci else if (seq_no) 284362306a36Sopenharmony_ci seq_no++; 284462306a36Sopenharmony_ci m->sequence_number = cpu_to_le16(seq_no); 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ci /* 284762306a36Sopenharmony_ci * Set the ntfs inode dirty and write it out. We do not need to worry 284862306a36Sopenharmony_ci * about the base inode here since whatever caused the extent mft 284962306a36Sopenharmony_ci * record to be freed is guaranteed to do it already. 285062306a36Sopenharmony_ci */ 285162306a36Sopenharmony_ci NInoSetDirty(ni); 285262306a36Sopenharmony_ci err = write_mft_record(ni, m, 0); 285362306a36Sopenharmony_ci if (unlikely(err)) { 285462306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to write mft record 0x%lx, not " 285562306a36Sopenharmony_ci "freeing.", mft_no); 285662306a36Sopenharmony_ci goto rollback; 285762306a36Sopenharmony_ci } 285862306a36Sopenharmony_cirollback_error: 285962306a36Sopenharmony_ci /* Unmap and throw away the now freed extent inode. */ 286062306a36Sopenharmony_ci unmap_extent_mft_record(ni); 286162306a36Sopenharmony_ci ntfs_clear_extent_inode(ni); 286262306a36Sopenharmony_ci 286362306a36Sopenharmony_ci /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */ 286462306a36Sopenharmony_ci down_write(&vol->mftbmp_lock); 286562306a36Sopenharmony_ci err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no); 286662306a36Sopenharmony_ci up_write(&vol->mftbmp_lock); 286762306a36Sopenharmony_ci if (unlikely(err)) { 286862306a36Sopenharmony_ci /* 286962306a36Sopenharmony_ci * The extent inode is gone but we failed to deallocate it in 287062306a36Sopenharmony_ci * the mft bitmap. Just emit a warning and leave the volume 287162306a36Sopenharmony_ci * dirty on umount. 287262306a36Sopenharmony_ci */ 287362306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); 287462306a36Sopenharmony_ci NVolSetErrors(vol); 287562306a36Sopenharmony_ci } 287662306a36Sopenharmony_ci return 0; 287762306a36Sopenharmony_cirollback: 287862306a36Sopenharmony_ci /* Rollback what we did... */ 287962306a36Sopenharmony_ci mutex_lock(&base_ni->extent_lock); 288062306a36Sopenharmony_ci extent_nis = base_ni->ext.extent_ntfs_inos; 288162306a36Sopenharmony_ci if (!(base_ni->nr_extents & 3)) { 288262306a36Sopenharmony_ci int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*); 288362306a36Sopenharmony_ci 288462306a36Sopenharmony_ci extent_nis = kmalloc(new_size, GFP_NOFS); 288562306a36Sopenharmony_ci if (unlikely(!extent_nis)) { 288662306a36Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate internal " 288762306a36Sopenharmony_ci "buffer during rollback.%s", es); 288862306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 288962306a36Sopenharmony_ci NVolSetErrors(vol); 289062306a36Sopenharmony_ci goto rollback_error; 289162306a36Sopenharmony_ci } 289262306a36Sopenharmony_ci if (base_ni->nr_extents) { 289362306a36Sopenharmony_ci BUG_ON(!base_ni->ext.extent_ntfs_inos); 289462306a36Sopenharmony_ci memcpy(extent_nis, base_ni->ext.extent_ntfs_inos, 289562306a36Sopenharmony_ci new_size - 4 * sizeof(ntfs_inode*)); 289662306a36Sopenharmony_ci kfree(base_ni->ext.extent_ntfs_inos); 289762306a36Sopenharmony_ci } 289862306a36Sopenharmony_ci base_ni->ext.extent_ntfs_inos = extent_nis; 289962306a36Sopenharmony_ci } 290062306a36Sopenharmony_ci m->flags |= MFT_RECORD_IN_USE; 290162306a36Sopenharmony_ci m->sequence_number = old_seq_no; 290262306a36Sopenharmony_ci extent_nis[base_ni->nr_extents++] = ni; 290362306a36Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 290462306a36Sopenharmony_ci mark_mft_record_dirty(ni); 290562306a36Sopenharmony_ci return err; 290662306a36Sopenharmony_ci} 290762306a36Sopenharmony_ci#endif /* NTFS_RW */ 2908