18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/** 38c2ecf20Sopenharmony_ci * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. 68c2ecf20Sopenharmony_ci * Copyright (c) 2002 Richard Russon 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <linux/buffer_head.h> 108c2ecf20Sopenharmony_ci#include <linux/slab.h> 118c2ecf20Sopenharmony_ci#include <linux/swap.h> 128c2ecf20Sopenharmony_ci#include <linux/bio.h> 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include "attrib.h" 158c2ecf20Sopenharmony_ci#include "aops.h" 168c2ecf20Sopenharmony_ci#include "bitmap.h" 178c2ecf20Sopenharmony_ci#include "debug.h" 188c2ecf20Sopenharmony_ci#include "dir.h" 198c2ecf20Sopenharmony_ci#include "lcnalloc.h" 208c2ecf20Sopenharmony_ci#include "malloc.h" 218c2ecf20Sopenharmony_ci#include "mft.h" 228c2ecf20Sopenharmony_ci#include "ntfs.h" 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#define MAX_BHS (PAGE_SIZE / NTFS_BLOCK_SIZE) 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/** 278c2ecf20Sopenharmony_ci * map_mft_record_page - map the page in which a specific mft record resides 288c2ecf20Sopenharmony_ci * @ni: ntfs inode whose mft record page to map 298c2ecf20Sopenharmony_ci * 308c2ecf20Sopenharmony_ci * This maps the page in which the mft record of the ntfs inode @ni is situated 318c2ecf20Sopenharmony_ci * and returns a pointer to the mft record within the mapped page. 328c2ecf20Sopenharmony_ci * 338c2ecf20Sopenharmony_ci * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR() 348c2ecf20Sopenharmony_ci * contains the negative error code returned. 358c2ecf20Sopenharmony_ci */ 368c2ecf20Sopenharmony_cistatic inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) 378c2ecf20Sopenharmony_ci{ 388c2ecf20Sopenharmony_ci loff_t i_size; 398c2ecf20Sopenharmony_ci ntfs_volume *vol = ni->vol; 408c2ecf20Sopenharmony_ci struct inode *mft_vi = vol->mft_ino; 418c2ecf20Sopenharmony_ci struct page *page; 428c2ecf20Sopenharmony_ci unsigned long index, end_index; 438c2ecf20Sopenharmony_ci unsigned ofs; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci BUG_ON(ni->page); 468c2ecf20Sopenharmony_ci /* 478c2ecf20Sopenharmony_ci * The index into the page cache and the offset within the page cache 488c2ecf20Sopenharmony_ci * page of the wanted mft record. FIXME: We need to check for 498c2ecf20Sopenharmony_ci * overflowing the unsigned long, but I don't think we would ever get 508c2ecf20Sopenharmony_ci * here if the volume was that big... 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_ci index = (u64)ni->mft_no << vol->mft_record_size_bits >> 538c2ecf20Sopenharmony_ci PAGE_SHIFT; 548c2ecf20Sopenharmony_ci ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci i_size = i_size_read(mft_vi); 578c2ecf20Sopenharmony_ci /* The maximum valid index into the page cache for $MFT's data. */ 588c2ecf20Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci /* If the wanted index is out of bounds the mft record doesn't exist. */ 618c2ecf20Sopenharmony_ci if (unlikely(index >= end_index)) { 628c2ecf20Sopenharmony_ci if (index > end_index || (i_size & ~PAGE_MASK) < ofs + 638c2ecf20Sopenharmony_ci vol->mft_record_size) { 648c2ecf20Sopenharmony_ci page = ERR_PTR(-ENOENT); 658c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, " 668c2ecf20Sopenharmony_ci "which is beyond the end of the mft. " 678c2ecf20Sopenharmony_ci "This is probably a bug in the ntfs " 688c2ecf20Sopenharmony_ci "driver.", ni->mft_no); 698c2ecf20Sopenharmony_ci goto err_out; 708c2ecf20Sopenharmony_ci } 718c2ecf20Sopenharmony_ci } 728c2ecf20Sopenharmony_ci /* Read, map, and pin the page. */ 738c2ecf20Sopenharmony_ci page = ntfs_map_page(mft_vi->i_mapping, index); 748c2ecf20Sopenharmony_ci if (!IS_ERR(page)) { 758c2ecf20Sopenharmony_ci /* Catch multi sector transfer fixup errors. */ 768c2ecf20Sopenharmony_ci if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) + 778c2ecf20Sopenharmony_ci ofs)))) { 788c2ecf20Sopenharmony_ci ni->page = page; 798c2ecf20Sopenharmony_ci ni->page_ofs = ofs; 808c2ecf20Sopenharmony_ci return page_address(page) + ofs; 818c2ecf20Sopenharmony_ci } 828c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Mft record 0x%lx is corrupt. " 838c2ecf20Sopenharmony_ci "Run chkdsk.", ni->mft_no); 848c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 858c2ecf20Sopenharmony_ci page = ERR_PTR(-EIO); 868c2ecf20Sopenharmony_ci NVolSetErrors(vol); 878c2ecf20Sopenharmony_ci } 888c2ecf20Sopenharmony_cierr_out: 898c2ecf20Sopenharmony_ci ni->page = NULL; 908c2ecf20Sopenharmony_ci ni->page_ofs = 0; 918c2ecf20Sopenharmony_ci return (void*)page; 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci/** 958c2ecf20Sopenharmony_ci * map_mft_record - map, pin and lock an mft record 968c2ecf20Sopenharmony_ci * @ni: ntfs inode whose MFT record to map 978c2ecf20Sopenharmony_ci * 988c2ecf20Sopenharmony_ci * First, take the mrec_lock mutex. We might now be sleeping, while waiting 998c2ecf20Sopenharmony_ci * for the mutex if it was already locked by someone else. 1008c2ecf20Sopenharmony_ci * 1018c2ecf20Sopenharmony_ci * The page of the record is mapped using map_mft_record_page() before being 1028c2ecf20Sopenharmony_ci * returned to the caller. 1038c2ecf20Sopenharmony_ci * 1048c2ecf20Sopenharmony_ci * This in turn uses ntfs_map_page() to get the page containing the wanted mft 1058c2ecf20Sopenharmony_ci * record (it in turn calls read_cache_page() which reads it in from disk if 1068c2ecf20Sopenharmony_ci * necessary, increments the use count on the page so that it cannot disappear 1078c2ecf20Sopenharmony_ci * under us and returns a reference to the page cache page). 1088c2ecf20Sopenharmony_ci * 1098c2ecf20Sopenharmony_ci * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it 1108c2ecf20Sopenharmony_ci * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed 1118c2ecf20Sopenharmony_ci * and the post-read mst fixups on each mft record in the page have been 1128c2ecf20Sopenharmony_ci * performed, the page gets PG_uptodate set and PG_locked cleared (this is done 1138c2ecf20Sopenharmony_ci * in our asynchronous I/O completion handler end_buffer_read_mft_async()). 1148c2ecf20Sopenharmony_ci * ntfs_map_page() waits for PG_locked to become clear and checks if 1158c2ecf20Sopenharmony_ci * PG_uptodate is set and returns an error code if not. This provides 1168c2ecf20Sopenharmony_ci * sufficient protection against races when reading/using the page. 1178c2ecf20Sopenharmony_ci * 1188c2ecf20Sopenharmony_ci * However there is the write mapping to think about. Doing the above described 1198c2ecf20Sopenharmony_ci * checking here will be fine, because when initiating the write we will set 1208c2ecf20Sopenharmony_ci * PG_locked and clear PG_uptodate making sure nobody is touching the page 1218c2ecf20Sopenharmony_ci * contents. Doing the locking this way means that the commit to disk code in 1228c2ecf20Sopenharmony_ci * the page cache code paths is automatically sufficiently locked with us as 1238c2ecf20Sopenharmony_ci * we will not touch a page that has been locked or is not uptodate. The only 1248c2ecf20Sopenharmony_ci * locking problem then is them locking the page while we are accessing it. 1258c2ecf20Sopenharmony_ci * 1268c2ecf20Sopenharmony_ci * So that code will end up having to own the mrec_lock of all mft 1278c2ecf20Sopenharmony_ci * records/inodes present in the page before I/O can proceed. In that case we 1288c2ecf20Sopenharmony_ci * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be 1298c2ecf20Sopenharmony_ci * accessing anything without owning the mrec_lock mutex. But we do need to 1308c2ecf20Sopenharmony_ci * use them because of the read_cache_page() invocation and the code becomes so 1318c2ecf20Sopenharmony_ci * much simpler this way that it is well worth it. 1328c2ecf20Sopenharmony_ci * 1338c2ecf20Sopenharmony_ci * The mft record is now ours and we return a pointer to it. You need to check 1348c2ecf20Sopenharmony_ci * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return 1358c2ecf20Sopenharmony_ci * the error code. 1368c2ecf20Sopenharmony_ci * 1378c2ecf20Sopenharmony_ci * NOTE: Caller is responsible for setting the mft record dirty before calling 1388c2ecf20Sopenharmony_ci * unmap_mft_record(). This is obviously only necessary if the caller really 1398c2ecf20Sopenharmony_ci * modified the mft record... 1408c2ecf20Sopenharmony_ci * Q: Do we want to recycle one of the VFS inode state bits instead? 1418c2ecf20Sopenharmony_ci * A: No, the inode ones mean we want to change the mft record, not we want to 1428c2ecf20Sopenharmony_ci * write it out. 1438c2ecf20Sopenharmony_ci */ 1448c2ecf20Sopenharmony_ciMFT_RECORD *map_mft_record(ntfs_inode *ni) 1458c2ecf20Sopenharmony_ci{ 1468c2ecf20Sopenharmony_ci MFT_RECORD *m; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci /* Make sure the ntfs inode doesn't go away. */ 1518c2ecf20Sopenharmony_ci atomic_inc(&ni->count); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* Serialize access to this mft record. */ 1548c2ecf20Sopenharmony_ci mutex_lock(&ni->mrec_lock); 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci m = map_mft_record_page(ni); 1578c2ecf20Sopenharmony_ci if (!IS_ERR(m)) 1588c2ecf20Sopenharmony_ci return m; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci mutex_unlock(&ni->mrec_lock); 1618c2ecf20Sopenharmony_ci atomic_dec(&ni->count); 1628c2ecf20Sopenharmony_ci ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); 1638c2ecf20Sopenharmony_ci return m; 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci/** 1678c2ecf20Sopenharmony_ci * unmap_mft_record_page - unmap the page in which a specific mft record resides 1688c2ecf20Sopenharmony_ci * @ni: ntfs inode whose mft record page to unmap 1698c2ecf20Sopenharmony_ci * 1708c2ecf20Sopenharmony_ci * This unmaps the page in which the mft record of the ntfs inode @ni is 1718c2ecf20Sopenharmony_ci * situated and returns. This is a NOOP if highmem is not configured. 1728c2ecf20Sopenharmony_ci * 1738c2ecf20Sopenharmony_ci * The unmap happens via ntfs_unmap_page() which in turn decrements the use 1748c2ecf20Sopenharmony_ci * count on the page thus releasing it from the pinned state. 1758c2ecf20Sopenharmony_ci * 1768c2ecf20Sopenharmony_ci * We do not actually unmap the page from memory of course, as that will be 1778c2ecf20Sopenharmony_ci * done by the page cache code itself when memory pressure increases or 1788c2ecf20Sopenharmony_ci * whatever. 1798c2ecf20Sopenharmony_ci */ 1808c2ecf20Sopenharmony_cistatic inline void unmap_mft_record_page(ntfs_inode *ni) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci BUG_ON(!ni->page); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci // TODO: If dirty, blah... 1858c2ecf20Sopenharmony_ci ntfs_unmap_page(ni->page); 1868c2ecf20Sopenharmony_ci ni->page = NULL; 1878c2ecf20Sopenharmony_ci ni->page_ofs = 0; 1888c2ecf20Sopenharmony_ci return; 1898c2ecf20Sopenharmony_ci} 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci/** 1928c2ecf20Sopenharmony_ci * unmap_mft_record - release a mapped mft record 1938c2ecf20Sopenharmony_ci * @ni: ntfs inode whose MFT record to unmap 1948c2ecf20Sopenharmony_ci * 1958c2ecf20Sopenharmony_ci * We release the page mapping and the mrec_lock mutex which unmaps the mft 1968c2ecf20Sopenharmony_ci * record and releases it for others to get hold of. We also release the ntfs 1978c2ecf20Sopenharmony_ci * inode by decrementing the ntfs inode reference count. 1988c2ecf20Sopenharmony_ci * 1998c2ecf20Sopenharmony_ci * NOTE: If caller has modified the mft record, it is imperative to set the mft 2008c2ecf20Sopenharmony_ci * record dirty BEFORE calling unmap_mft_record(). 2018c2ecf20Sopenharmony_ci */ 2028c2ecf20Sopenharmony_civoid unmap_mft_record(ntfs_inode *ni) 2038c2ecf20Sopenharmony_ci{ 2048c2ecf20Sopenharmony_ci struct page *page = ni->page; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci BUG_ON(!page); 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci unmap_mft_record_page(ni); 2118c2ecf20Sopenharmony_ci mutex_unlock(&ni->mrec_lock); 2128c2ecf20Sopenharmony_ci atomic_dec(&ni->count); 2138c2ecf20Sopenharmony_ci /* 2148c2ecf20Sopenharmony_ci * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to 2158c2ecf20Sopenharmony_ci * ntfs_clear_extent_inode() in the extent inode case, and to the 2168c2ecf20Sopenharmony_ci * caller in the non-extent, yet pure ntfs inode case, to do the actual 2178c2ecf20Sopenharmony_ci * tear down of all structures and freeing of all allocated memory. 2188c2ecf20Sopenharmony_ci */ 2198c2ecf20Sopenharmony_ci return; 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci/** 2238c2ecf20Sopenharmony_ci * map_extent_mft_record - load an extent inode and attach it to its base 2248c2ecf20Sopenharmony_ci * @base_ni: base ntfs inode 2258c2ecf20Sopenharmony_ci * @mref: mft reference of the extent inode to load 2268c2ecf20Sopenharmony_ci * @ntfs_ino: on successful return, pointer to the ntfs_inode structure 2278c2ecf20Sopenharmony_ci * 2288c2ecf20Sopenharmony_ci * Load the extent mft record @mref and attach it to its base inode @base_ni. 2298c2ecf20Sopenharmony_ci * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise 2308c2ecf20Sopenharmony_ci * PTR_ERR(result) gives the negative error code. 2318c2ecf20Sopenharmony_ci * 2328c2ecf20Sopenharmony_ci * On successful return, @ntfs_ino contains a pointer to the ntfs_inode 2338c2ecf20Sopenharmony_ci * structure of the mapped extent inode. 2348c2ecf20Sopenharmony_ci */ 2358c2ecf20Sopenharmony_ciMFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, 2368c2ecf20Sopenharmony_ci ntfs_inode **ntfs_ino) 2378c2ecf20Sopenharmony_ci{ 2388c2ecf20Sopenharmony_ci MFT_RECORD *m; 2398c2ecf20Sopenharmony_ci ntfs_inode *ni = NULL; 2408c2ecf20Sopenharmony_ci ntfs_inode **extent_nis = NULL; 2418c2ecf20Sopenharmony_ci int i; 2428c2ecf20Sopenharmony_ci unsigned long mft_no = MREF(mref); 2438c2ecf20Sopenharmony_ci u16 seq_no = MSEQNO(mref); 2448c2ecf20Sopenharmony_ci bool destroy_ni = false; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", 2478c2ecf20Sopenharmony_ci mft_no, base_ni->mft_no); 2488c2ecf20Sopenharmony_ci /* Make sure the base ntfs inode doesn't go away. */ 2498c2ecf20Sopenharmony_ci atomic_inc(&base_ni->count); 2508c2ecf20Sopenharmony_ci /* 2518c2ecf20Sopenharmony_ci * Check if this extent inode has already been added to the base inode, 2528c2ecf20Sopenharmony_ci * in which case just return it. If not found, add it to the base 2538c2ecf20Sopenharmony_ci * inode before returning it. 2548c2ecf20Sopenharmony_ci */ 2558c2ecf20Sopenharmony_ci mutex_lock(&base_ni->extent_lock); 2568c2ecf20Sopenharmony_ci if (base_ni->nr_extents > 0) { 2578c2ecf20Sopenharmony_ci extent_nis = base_ni->ext.extent_ntfs_inos; 2588c2ecf20Sopenharmony_ci for (i = 0; i < base_ni->nr_extents; i++) { 2598c2ecf20Sopenharmony_ci if (mft_no != extent_nis[i]->mft_no) 2608c2ecf20Sopenharmony_ci continue; 2618c2ecf20Sopenharmony_ci ni = extent_nis[i]; 2628c2ecf20Sopenharmony_ci /* Make sure the ntfs inode doesn't go away. */ 2638c2ecf20Sopenharmony_ci atomic_inc(&ni->count); 2648c2ecf20Sopenharmony_ci break; 2658c2ecf20Sopenharmony_ci } 2668c2ecf20Sopenharmony_ci } 2678c2ecf20Sopenharmony_ci if (likely(ni != NULL)) { 2688c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 2698c2ecf20Sopenharmony_ci atomic_dec(&base_ni->count); 2708c2ecf20Sopenharmony_ci /* We found the record; just have to map and return it. */ 2718c2ecf20Sopenharmony_ci m = map_mft_record(ni); 2728c2ecf20Sopenharmony_ci /* map_mft_record() has incremented this on success. */ 2738c2ecf20Sopenharmony_ci atomic_dec(&ni->count); 2748c2ecf20Sopenharmony_ci if (!IS_ERR(m)) { 2758c2ecf20Sopenharmony_ci /* Verify the sequence number. */ 2768c2ecf20Sopenharmony_ci if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { 2778c2ecf20Sopenharmony_ci ntfs_debug("Done 1."); 2788c2ecf20Sopenharmony_ci *ntfs_ino = ni; 2798c2ecf20Sopenharmony_ci return m; 2808c2ecf20Sopenharmony_ci } 2818c2ecf20Sopenharmony_ci unmap_mft_record(ni); 2828c2ecf20Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Found stale extent mft " 2838c2ecf20Sopenharmony_ci "reference! Corrupt filesystem. " 2848c2ecf20Sopenharmony_ci "Run chkdsk."); 2858c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 2868c2ecf20Sopenharmony_ci } 2878c2ecf20Sopenharmony_cimap_err_out: 2888c2ecf20Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Failed to map extent " 2898c2ecf20Sopenharmony_ci "mft record, error code %ld.", -PTR_ERR(m)); 2908c2ecf20Sopenharmony_ci return m; 2918c2ecf20Sopenharmony_ci } 2928c2ecf20Sopenharmony_ci /* Record wasn't there. Get a new ntfs inode and initialize it. */ 2938c2ecf20Sopenharmony_ci ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); 2948c2ecf20Sopenharmony_ci if (unlikely(!ni)) { 2958c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 2968c2ecf20Sopenharmony_ci atomic_dec(&base_ni->count); 2978c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 2988c2ecf20Sopenharmony_ci } 2998c2ecf20Sopenharmony_ci ni->vol = base_ni->vol; 3008c2ecf20Sopenharmony_ci ni->seq_no = seq_no; 3018c2ecf20Sopenharmony_ci ni->nr_extents = -1; 3028c2ecf20Sopenharmony_ci ni->ext.base_ntfs_ino = base_ni; 3038c2ecf20Sopenharmony_ci /* Now map the record. */ 3048c2ecf20Sopenharmony_ci m = map_mft_record(ni); 3058c2ecf20Sopenharmony_ci if (IS_ERR(m)) { 3068c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 3078c2ecf20Sopenharmony_ci atomic_dec(&base_ni->count); 3088c2ecf20Sopenharmony_ci ntfs_clear_extent_inode(ni); 3098c2ecf20Sopenharmony_ci goto map_err_out; 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci /* Verify the sequence number if it is present. */ 3128c2ecf20Sopenharmony_ci if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { 3138c2ecf20Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Found stale extent mft " 3148c2ecf20Sopenharmony_ci "reference! Corrupt filesystem. Run chkdsk."); 3158c2ecf20Sopenharmony_ci destroy_ni = true; 3168c2ecf20Sopenharmony_ci m = ERR_PTR(-EIO); 3178c2ecf20Sopenharmony_ci goto unm_err_out; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci /* Attach extent inode to base inode, reallocating memory if needed. */ 3208c2ecf20Sopenharmony_ci if (!(base_ni->nr_extents & 3)) { 3218c2ecf20Sopenharmony_ci ntfs_inode **tmp; 3228c2ecf20Sopenharmony_ci int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci tmp = kmalloc(new_size, GFP_NOFS); 3258c2ecf20Sopenharmony_ci if (unlikely(!tmp)) { 3268c2ecf20Sopenharmony_ci ntfs_error(base_ni->vol->sb, "Failed to allocate " 3278c2ecf20Sopenharmony_ci "internal buffer."); 3288c2ecf20Sopenharmony_ci destroy_ni = true; 3298c2ecf20Sopenharmony_ci m = ERR_PTR(-ENOMEM); 3308c2ecf20Sopenharmony_ci goto unm_err_out; 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci if (base_ni->nr_extents) { 3338c2ecf20Sopenharmony_ci BUG_ON(!base_ni->ext.extent_ntfs_inos); 3348c2ecf20Sopenharmony_ci memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - 3358c2ecf20Sopenharmony_ci 4 * sizeof(ntfs_inode *)); 3368c2ecf20Sopenharmony_ci kfree(base_ni->ext.extent_ntfs_inos); 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci base_ni->ext.extent_ntfs_inos = tmp; 3398c2ecf20Sopenharmony_ci } 3408c2ecf20Sopenharmony_ci base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; 3418c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 3428c2ecf20Sopenharmony_ci atomic_dec(&base_ni->count); 3438c2ecf20Sopenharmony_ci ntfs_debug("Done 2."); 3448c2ecf20Sopenharmony_ci *ntfs_ino = ni; 3458c2ecf20Sopenharmony_ci return m; 3468c2ecf20Sopenharmony_ciunm_err_out: 3478c2ecf20Sopenharmony_ci unmap_mft_record(ni); 3488c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 3498c2ecf20Sopenharmony_ci atomic_dec(&base_ni->count); 3508c2ecf20Sopenharmony_ci /* 3518c2ecf20Sopenharmony_ci * If the extent inode was not attached to the base inode we need to 3528c2ecf20Sopenharmony_ci * release it or we will leak memory. 3538c2ecf20Sopenharmony_ci */ 3548c2ecf20Sopenharmony_ci if (destroy_ni) 3558c2ecf20Sopenharmony_ci ntfs_clear_extent_inode(ni); 3568c2ecf20Sopenharmony_ci return m; 3578c2ecf20Sopenharmony_ci} 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci#ifdef NTFS_RW 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci/** 3628c2ecf20Sopenharmony_ci * __mark_mft_record_dirty - set the mft record and the page containing it dirty 3638c2ecf20Sopenharmony_ci * @ni: ntfs inode describing the mapped mft record 3648c2ecf20Sopenharmony_ci * 3658c2ecf20Sopenharmony_ci * Internal function. Users should call mark_mft_record_dirty() instead. 3668c2ecf20Sopenharmony_ci * 3678c2ecf20Sopenharmony_ci * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni, 3688c2ecf20Sopenharmony_ci * as well as the page containing the mft record, dirty. Also, mark the base 3698c2ecf20Sopenharmony_ci * vfs inode dirty. This ensures that any changes to the mft record are 3708c2ecf20Sopenharmony_ci * written out to disk. 3718c2ecf20Sopenharmony_ci * 3728c2ecf20Sopenharmony_ci * NOTE: We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) 3738c2ecf20Sopenharmony_ci * on the base vfs inode, because even though file data may have been modified, 3748c2ecf20Sopenharmony_ci * it is dirty in the inode meta data rather than the data page cache of the 3758c2ecf20Sopenharmony_ci * inode, and thus there are no data pages that need writing out. Therefore, a 3768c2ecf20Sopenharmony_ci * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 3778c2ecf20Sopenharmony_ci * other hand, is not sufficient, because ->write_inode needs to be called even 3788c2ecf20Sopenharmony_ci * in case of fdatasync. This needs to happen or the file data would not 3798c2ecf20Sopenharmony_ci * necessarily hit the device synchronously, even though the vfs inode has the 3808c2ecf20Sopenharmony_ci * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just 3818c2ecf20Sopenharmony_ci * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, 3828c2ecf20Sopenharmony_ci * which is not what I_DIRTY_SYNC on its own would suggest. 3838c2ecf20Sopenharmony_ci */ 3848c2ecf20Sopenharmony_civoid __mark_mft_record_dirty(ntfs_inode *ni) 3858c2ecf20Sopenharmony_ci{ 3868c2ecf20Sopenharmony_ci ntfs_inode *base_ni; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); 3898c2ecf20Sopenharmony_ci BUG_ON(NInoAttr(ni)); 3908c2ecf20Sopenharmony_ci mark_ntfs_record_dirty(ni->page, ni->page_ofs); 3918c2ecf20Sopenharmony_ci /* Determine the base vfs inode and mark it dirty, too. */ 3928c2ecf20Sopenharmony_ci mutex_lock(&ni->extent_lock); 3938c2ecf20Sopenharmony_ci if (likely(ni->nr_extents >= 0)) 3948c2ecf20Sopenharmony_ci base_ni = ni; 3958c2ecf20Sopenharmony_ci else 3968c2ecf20Sopenharmony_ci base_ni = ni->ext.base_ntfs_ino; 3978c2ecf20Sopenharmony_ci mutex_unlock(&ni->extent_lock); 3988c2ecf20Sopenharmony_ci __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC); 3998c2ecf20Sopenharmony_ci} 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_cistatic const char *ntfs_please_email = "Please email " 4028c2ecf20Sopenharmony_ci "linux-ntfs-dev@lists.sourceforge.net and say that you saw " 4038c2ecf20Sopenharmony_ci "this message. Thank you."; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci/** 4068c2ecf20Sopenharmony_ci * ntfs_sync_mft_mirror_umount - synchronise an mft record to the mft mirror 4078c2ecf20Sopenharmony_ci * @vol: ntfs volume on which the mft record to synchronize resides 4088c2ecf20Sopenharmony_ci * @mft_no: mft record number of mft record to synchronize 4098c2ecf20Sopenharmony_ci * @m: mapped, mst protected (extent) mft record to synchronize 4108c2ecf20Sopenharmony_ci * 4118c2ecf20Sopenharmony_ci * Write the mapped, mst protected (extent) mft record @m with mft record 4128c2ecf20Sopenharmony_ci * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol, 4138c2ecf20Sopenharmony_ci * bypassing the page cache and the $MFTMirr inode itself. 4148c2ecf20Sopenharmony_ci * 4158c2ecf20Sopenharmony_ci * This function is only for use at umount time when the mft mirror inode has 4168c2ecf20Sopenharmony_ci * already been disposed off. We BUG() if we are called while the mft mirror 4178c2ecf20Sopenharmony_ci * inode is still attached to the volume. 4188c2ecf20Sopenharmony_ci * 4198c2ecf20Sopenharmony_ci * On success return 0. On error return -errno. 4208c2ecf20Sopenharmony_ci * 4218c2ecf20Sopenharmony_ci * NOTE: This function is not implemented yet as I am not convinced it can 4228c2ecf20Sopenharmony_ci * actually be triggered considering the sequence of commits we do in super.c:: 4238c2ecf20Sopenharmony_ci * ntfs_put_super(). But just in case we provide this place holder as the 4248c2ecf20Sopenharmony_ci * alternative would be either to BUG() or to get a NULL pointer dereference 4258c2ecf20Sopenharmony_ci * and Oops. 4268c2ecf20Sopenharmony_ci */ 4278c2ecf20Sopenharmony_cistatic int ntfs_sync_mft_mirror_umount(ntfs_volume *vol, 4288c2ecf20Sopenharmony_ci const unsigned long mft_no, MFT_RECORD *m) 4298c2ecf20Sopenharmony_ci{ 4308c2ecf20Sopenharmony_ci BUG_ON(vol->mftmirr_ino); 4318c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Umount time mft mirror syncing is not " 4328c2ecf20Sopenharmony_ci "implemented yet. %s", ntfs_please_email); 4338c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 4348c2ecf20Sopenharmony_ci} 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci/** 4378c2ecf20Sopenharmony_ci * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror 4388c2ecf20Sopenharmony_ci * @vol: ntfs volume on which the mft record to synchronize resides 4398c2ecf20Sopenharmony_ci * @mft_no: mft record number of mft record to synchronize 4408c2ecf20Sopenharmony_ci * @m: mapped, mst protected (extent) mft record to synchronize 4418c2ecf20Sopenharmony_ci * @sync: if true, wait for i/o completion 4428c2ecf20Sopenharmony_ci * 4438c2ecf20Sopenharmony_ci * Write the mapped, mst protected (extent) mft record @m with mft record 4448c2ecf20Sopenharmony_ci * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol. 4458c2ecf20Sopenharmony_ci * 4468c2ecf20Sopenharmony_ci * On success return 0. On error return -errno and set the volume errors flag 4478c2ecf20Sopenharmony_ci * in the ntfs volume @vol. 4488c2ecf20Sopenharmony_ci * 4498c2ecf20Sopenharmony_ci * NOTE: We always perform synchronous i/o and ignore the @sync parameter. 4508c2ecf20Sopenharmony_ci * 4518c2ecf20Sopenharmony_ci * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just 4528c2ecf20Sopenharmony_ci * schedule i/o via ->writepage or do it via kntfsd or whatever. 4538c2ecf20Sopenharmony_ci */ 4548c2ecf20Sopenharmony_ciint ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, 4558c2ecf20Sopenharmony_ci MFT_RECORD *m, int sync) 4568c2ecf20Sopenharmony_ci{ 4578c2ecf20Sopenharmony_ci struct page *page; 4588c2ecf20Sopenharmony_ci unsigned int blocksize = vol->sb->s_blocksize; 4598c2ecf20Sopenharmony_ci int max_bhs = vol->mft_record_size / blocksize; 4608c2ecf20Sopenharmony_ci struct buffer_head *bhs[MAX_BHS]; 4618c2ecf20Sopenharmony_ci struct buffer_head *bh, *head; 4628c2ecf20Sopenharmony_ci u8 *kmirr; 4638c2ecf20Sopenharmony_ci runlist_element *rl; 4648c2ecf20Sopenharmony_ci unsigned int block_start, block_end, m_start, m_end, page_ofs; 4658c2ecf20Sopenharmony_ci int i_bhs, nr_bhs, err = 0; 4668c2ecf20Sopenharmony_ci unsigned char blocksize_bits = vol->sb->s_blocksize_bits; 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", mft_no); 4698c2ecf20Sopenharmony_ci BUG_ON(!max_bhs); 4708c2ecf20Sopenharmony_ci if (WARN_ON(max_bhs > MAX_BHS)) 4718c2ecf20Sopenharmony_ci return -EINVAL; 4728c2ecf20Sopenharmony_ci if (unlikely(!vol->mftmirr_ino)) { 4738c2ecf20Sopenharmony_ci /* This could happen during umount... */ 4748c2ecf20Sopenharmony_ci err = ntfs_sync_mft_mirror_umount(vol, mft_no, m); 4758c2ecf20Sopenharmony_ci if (likely(!err)) 4768c2ecf20Sopenharmony_ci return err; 4778c2ecf20Sopenharmony_ci goto err_out; 4788c2ecf20Sopenharmony_ci } 4798c2ecf20Sopenharmony_ci /* Get the page containing the mirror copy of the mft record @m. */ 4808c2ecf20Sopenharmony_ci page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >> 4818c2ecf20Sopenharmony_ci (PAGE_SHIFT - vol->mft_record_size_bits)); 4828c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 4838c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft mirror page."); 4848c2ecf20Sopenharmony_ci err = PTR_ERR(page); 4858c2ecf20Sopenharmony_ci goto err_out; 4868c2ecf20Sopenharmony_ci } 4878c2ecf20Sopenharmony_ci lock_page(page); 4888c2ecf20Sopenharmony_ci BUG_ON(!PageUptodate(page)); 4898c2ecf20Sopenharmony_ci ClearPageUptodate(page); 4908c2ecf20Sopenharmony_ci /* Offset of the mft mirror record inside the page. */ 4918c2ecf20Sopenharmony_ci page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK; 4928c2ecf20Sopenharmony_ci /* The address in the page of the mirror copy of the mft record @m. */ 4938c2ecf20Sopenharmony_ci kmirr = page_address(page) + page_ofs; 4948c2ecf20Sopenharmony_ci /* Copy the mst protected mft record to the mirror. */ 4958c2ecf20Sopenharmony_ci memcpy(kmirr, m, vol->mft_record_size); 4968c2ecf20Sopenharmony_ci /* Create uptodate buffers if not present. */ 4978c2ecf20Sopenharmony_ci if (unlikely(!page_has_buffers(page))) { 4988c2ecf20Sopenharmony_ci struct buffer_head *tail; 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci bh = head = alloc_page_buffers(page, blocksize, true); 5018c2ecf20Sopenharmony_ci do { 5028c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 5038c2ecf20Sopenharmony_ci tail = bh; 5048c2ecf20Sopenharmony_ci bh = bh->b_this_page; 5058c2ecf20Sopenharmony_ci } while (bh); 5068c2ecf20Sopenharmony_ci tail->b_this_page = head; 5078c2ecf20Sopenharmony_ci attach_page_private(page, head); 5088c2ecf20Sopenharmony_ci } 5098c2ecf20Sopenharmony_ci bh = head = page_buffers(page); 5108c2ecf20Sopenharmony_ci BUG_ON(!bh); 5118c2ecf20Sopenharmony_ci rl = NULL; 5128c2ecf20Sopenharmony_ci nr_bhs = 0; 5138c2ecf20Sopenharmony_ci block_start = 0; 5148c2ecf20Sopenharmony_ci m_start = kmirr - (u8*)page_address(page); 5158c2ecf20Sopenharmony_ci m_end = m_start + vol->mft_record_size; 5168c2ecf20Sopenharmony_ci do { 5178c2ecf20Sopenharmony_ci block_end = block_start + blocksize; 5188c2ecf20Sopenharmony_ci /* If the buffer is outside the mft record, skip it. */ 5198c2ecf20Sopenharmony_ci if (block_end <= m_start) 5208c2ecf20Sopenharmony_ci continue; 5218c2ecf20Sopenharmony_ci if (unlikely(block_start >= m_end)) 5228c2ecf20Sopenharmony_ci break; 5238c2ecf20Sopenharmony_ci /* Need to map the buffer if it is not mapped already. */ 5248c2ecf20Sopenharmony_ci if (unlikely(!buffer_mapped(bh))) { 5258c2ecf20Sopenharmony_ci VCN vcn; 5268c2ecf20Sopenharmony_ci LCN lcn; 5278c2ecf20Sopenharmony_ci unsigned int vcn_ofs; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci bh->b_bdev = vol->sb->s_bdev; 5308c2ecf20Sopenharmony_ci /* Obtain the vcn and offset of the current block. */ 5318c2ecf20Sopenharmony_ci vcn = ((VCN)mft_no << vol->mft_record_size_bits) + 5328c2ecf20Sopenharmony_ci (block_start - m_start); 5338c2ecf20Sopenharmony_ci vcn_ofs = vcn & vol->cluster_size_mask; 5348c2ecf20Sopenharmony_ci vcn >>= vol->cluster_size_bits; 5358c2ecf20Sopenharmony_ci if (!rl) { 5368c2ecf20Sopenharmony_ci down_read(&NTFS_I(vol->mftmirr_ino)-> 5378c2ecf20Sopenharmony_ci runlist.lock); 5388c2ecf20Sopenharmony_ci rl = NTFS_I(vol->mftmirr_ino)->runlist.rl; 5398c2ecf20Sopenharmony_ci /* 5408c2ecf20Sopenharmony_ci * $MFTMirr always has the whole of its runlist 5418c2ecf20Sopenharmony_ci * in memory. 5428c2ecf20Sopenharmony_ci */ 5438c2ecf20Sopenharmony_ci BUG_ON(!rl); 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci /* Seek to element containing target vcn. */ 5468c2ecf20Sopenharmony_ci while (rl->length && rl[1].vcn <= vcn) 5478c2ecf20Sopenharmony_ci rl++; 5488c2ecf20Sopenharmony_ci lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 5498c2ecf20Sopenharmony_ci /* For $MFTMirr, only lcn >= 0 is a successful remap. */ 5508c2ecf20Sopenharmony_ci if (likely(lcn >= 0)) { 5518c2ecf20Sopenharmony_ci /* Setup buffer head to correct block. */ 5528c2ecf20Sopenharmony_ci bh->b_blocknr = ((lcn << 5538c2ecf20Sopenharmony_ci vol->cluster_size_bits) + 5548c2ecf20Sopenharmony_ci vcn_ofs) >> blocksize_bits; 5558c2ecf20Sopenharmony_ci set_buffer_mapped(bh); 5568c2ecf20Sopenharmony_ci } else { 5578c2ecf20Sopenharmony_ci bh->b_blocknr = -1; 5588c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Cannot write mft mirror " 5598c2ecf20Sopenharmony_ci "record 0x%lx because its " 5608c2ecf20Sopenharmony_ci "location on disk could not " 5618c2ecf20Sopenharmony_ci "be determined (error code " 5628c2ecf20Sopenharmony_ci "%lli).", mft_no, 5638c2ecf20Sopenharmony_ci (long long)lcn); 5648c2ecf20Sopenharmony_ci err = -EIO; 5658c2ecf20Sopenharmony_ci } 5668c2ecf20Sopenharmony_ci } 5678c2ecf20Sopenharmony_ci BUG_ON(!buffer_uptodate(bh)); 5688c2ecf20Sopenharmony_ci BUG_ON(!nr_bhs && (m_start != block_start)); 5698c2ecf20Sopenharmony_ci BUG_ON(nr_bhs >= max_bhs); 5708c2ecf20Sopenharmony_ci bhs[nr_bhs++] = bh; 5718c2ecf20Sopenharmony_ci BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); 5728c2ecf20Sopenharmony_ci } while (block_start = block_end, (bh = bh->b_this_page) != head); 5738c2ecf20Sopenharmony_ci if (unlikely(rl)) 5748c2ecf20Sopenharmony_ci up_read(&NTFS_I(vol->mftmirr_ino)->runlist.lock); 5758c2ecf20Sopenharmony_ci if (likely(!err)) { 5768c2ecf20Sopenharmony_ci /* Lock buffers and start synchronous write i/o on them. */ 5778c2ecf20Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 5788c2ecf20Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci if (!trylock_buffer(tbh)) 5818c2ecf20Sopenharmony_ci BUG(); 5828c2ecf20Sopenharmony_ci BUG_ON(!buffer_uptodate(tbh)); 5838c2ecf20Sopenharmony_ci clear_buffer_dirty(tbh); 5848c2ecf20Sopenharmony_ci get_bh(tbh); 5858c2ecf20Sopenharmony_ci tbh->b_end_io = end_buffer_write_sync; 5868c2ecf20Sopenharmony_ci submit_bh(REQ_OP_WRITE, 0, tbh); 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci /* Wait on i/o completion of buffers. */ 5898c2ecf20Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 5908c2ecf20Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci wait_on_buffer(tbh); 5938c2ecf20Sopenharmony_ci if (unlikely(!buffer_uptodate(tbh))) { 5948c2ecf20Sopenharmony_ci err = -EIO; 5958c2ecf20Sopenharmony_ci /* 5968c2ecf20Sopenharmony_ci * Set the buffer uptodate so the page and 5978c2ecf20Sopenharmony_ci * buffer states do not become out of sync. 5988c2ecf20Sopenharmony_ci */ 5998c2ecf20Sopenharmony_ci set_buffer_uptodate(tbh); 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci } 6028c2ecf20Sopenharmony_ci } else /* if (unlikely(err)) */ { 6038c2ecf20Sopenharmony_ci /* Clean the buffers. */ 6048c2ecf20Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) 6058c2ecf20Sopenharmony_ci clear_buffer_dirty(bhs[i_bhs]); 6068c2ecf20Sopenharmony_ci } 6078c2ecf20Sopenharmony_ci /* Current state: all buffers are clean, unlocked, and uptodate. */ 6088c2ecf20Sopenharmony_ci /* Remove the mst protection fixups again. */ 6098c2ecf20Sopenharmony_ci post_write_mst_fixup((NTFS_RECORD*)kmirr); 6108c2ecf20Sopenharmony_ci flush_dcache_page(page); 6118c2ecf20Sopenharmony_ci SetPageUptodate(page); 6128c2ecf20Sopenharmony_ci unlock_page(page); 6138c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 6148c2ecf20Sopenharmony_ci if (likely(!err)) { 6158c2ecf20Sopenharmony_ci ntfs_debug("Done."); 6168c2ecf20Sopenharmony_ci } else { 6178c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "I/O error while writing mft mirror " 6188c2ecf20Sopenharmony_ci "record 0x%lx!", mft_no); 6198c2ecf20Sopenharmony_cierr_out: 6208c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error " 6218c2ecf20Sopenharmony_ci "code %i). Volume will be left marked dirty " 6228c2ecf20Sopenharmony_ci "on umount. Run ntfsfix on the partition " 6238c2ecf20Sopenharmony_ci "after umounting to correct this.", -err); 6248c2ecf20Sopenharmony_ci NVolSetErrors(vol); 6258c2ecf20Sopenharmony_ci } 6268c2ecf20Sopenharmony_ci return err; 6278c2ecf20Sopenharmony_ci} 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci/** 6308c2ecf20Sopenharmony_ci * write_mft_record_nolock - write out a mapped (extent) mft record 6318c2ecf20Sopenharmony_ci * @ni: ntfs inode describing the mapped (extent) mft record 6328c2ecf20Sopenharmony_ci * @m: mapped (extent) mft record to write 6338c2ecf20Sopenharmony_ci * @sync: if true, wait for i/o completion 6348c2ecf20Sopenharmony_ci * 6358c2ecf20Sopenharmony_ci * Write the mapped (extent) mft record @m described by the (regular or extent) 6368c2ecf20Sopenharmony_ci * ntfs inode @ni to backing store. If the mft record @m has a counterpart in 6378c2ecf20Sopenharmony_ci * the mft mirror, that is also updated. 6388c2ecf20Sopenharmony_ci * 6398c2ecf20Sopenharmony_ci * We only write the mft record if the ntfs inode @ni is dirty and the first 6408c2ecf20Sopenharmony_ci * buffer belonging to its mft record is dirty, too. We ignore the dirty state 6418c2ecf20Sopenharmony_ci * of subsequent buffers because we could have raced with 6428c2ecf20Sopenharmony_ci * fs/ntfs/aops.c::mark_ntfs_record_dirty(). 6438c2ecf20Sopenharmony_ci * 6448c2ecf20Sopenharmony_ci * On success, clean the mft record and return 0. On error, leave the mft 6458c2ecf20Sopenharmony_ci * record dirty and return -errno. 6468c2ecf20Sopenharmony_ci * 6478c2ecf20Sopenharmony_ci * NOTE: We always perform synchronous i/o and ignore the @sync parameter. 6488c2ecf20Sopenharmony_ci * However, if the mft record has a counterpart in the mft mirror and @sync is 6498c2ecf20Sopenharmony_ci * true, we write the mft record, wait for i/o completion, and only then write 6508c2ecf20Sopenharmony_ci * the mft mirror copy. This ensures that if the system crashes either the mft 6518c2ecf20Sopenharmony_ci * or the mft mirror will contain a self-consistent mft record @m. If @sync is 6528c2ecf20Sopenharmony_ci * false on the other hand, we start i/o on both and then wait for completion 6538c2ecf20Sopenharmony_ci * on them. This provides a speedup but no longer guarantees that you will end 6548c2ecf20Sopenharmony_ci * up with a self-consistent mft record in the case of a crash but if you asked 6558c2ecf20Sopenharmony_ci * for asynchronous writing you probably do not care about that anyway. 6568c2ecf20Sopenharmony_ci * 6578c2ecf20Sopenharmony_ci * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just 6588c2ecf20Sopenharmony_ci * schedule i/o via ->writepage or do it via kntfsd or whatever. 6598c2ecf20Sopenharmony_ci */ 6608c2ecf20Sopenharmony_ciint write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) 6618c2ecf20Sopenharmony_ci{ 6628c2ecf20Sopenharmony_ci ntfs_volume *vol = ni->vol; 6638c2ecf20Sopenharmony_ci struct page *page = ni->page; 6648c2ecf20Sopenharmony_ci unsigned int blocksize = vol->sb->s_blocksize; 6658c2ecf20Sopenharmony_ci unsigned char blocksize_bits = vol->sb->s_blocksize_bits; 6668c2ecf20Sopenharmony_ci int max_bhs = vol->mft_record_size / blocksize; 6678c2ecf20Sopenharmony_ci struct buffer_head *bhs[MAX_BHS]; 6688c2ecf20Sopenharmony_ci struct buffer_head *bh, *head; 6698c2ecf20Sopenharmony_ci runlist_element *rl; 6708c2ecf20Sopenharmony_ci unsigned int block_start, block_end, m_start, m_end; 6718c2ecf20Sopenharmony_ci int i_bhs, nr_bhs, err = 0; 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); 6748c2ecf20Sopenharmony_ci BUG_ON(NInoAttr(ni)); 6758c2ecf20Sopenharmony_ci BUG_ON(!max_bhs); 6768c2ecf20Sopenharmony_ci BUG_ON(!PageLocked(page)); 6778c2ecf20Sopenharmony_ci if (WARN_ON(max_bhs > MAX_BHS)) { 6788c2ecf20Sopenharmony_ci err = -EINVAL; 6798c2ecf20Sopenharmony_ci goto err_out; 6808c2ecf20Sopenharmony_ci } 6818c2ecf20Sopenharmony_ci /* 6828c2ecf20Sopenharmony_ci * If the ntfs_inode is clean no need to do anything. If it is dirty, 6838c2ecf20Sopenharmony_ci * mark it as clean now so that it can be redirtied later on if needed. 6848c2ecf20Sopenharmony_ci * There is no danger of races since the caller is holding the locks 6858c2ecf20Sopenharmony_ci * for the mft record @m and the page it is in. 6868c2ecf20Sopenharmony_ci */ 6878c2ecf20Sopenharmony_ci if (!NInoTestClearDirty(ni)) 6888c2ecf20Sopenharmony_ci goto done; 6898c2ecf20Sopenharmony_ci bh = head = page_buffers(page); 6908c2ecf20Sopenharmony_ci BUG_ON(!bh); 6918c2ecf20Sopenharmony_ci rl = NULL; 6928c2ecf20Sopenharmony_ci nr_bhs = 0; 6938c2ecf20Sopenharmony_ci block_start = 0; 6948c2ecf20Sopenharmony_ci m_start = ni->page_ofs; 6958c2ecf20Sopenharmony_ci m_end = m_start + vol->mft_record_size; 6968c2ecf20Sopenharmony_ci do { 6978c2ecf20Sopenharmony_ci block_end = block_start + blocksize; 6988c2ecf20Sopenharmony_ci /* If the buffer is outside the mft record, skip it. */ 6998c2ecf20Sopenharmony_ci if (block_end <= m_start) 7008c2ecf20Sopenharmony_ci continue; 7018c2ecf20Sopenharmony_ci if (unlikely(block_start >= m_end)) 7028c2ecf20Sopenharmony_ci break; 7038c2ecf20Sopenharmony_ci /* 7048c2ecf20Sopenharmony_ci * If this block is not the first one in the record, we ignore 7058c2ecf20Sopenharmony_ci * the buffer's dirty state because we could have raced with a 7068c2ecf20Sopenharmony_ci * parallel mark_ntfs_record_dirty(). 7078c2ecf20Sopenharmony_ci */ 7088c2ecf20Sopenharmony_ci if (block_start == m_start) { 7098c2ecf20Sopenharmony_ci /* This block is the first one in the record. */ 7108c2ecf20Sopenharmony_ci if (!buffer_dirty(bh)) { 7118c2ecf20Sopenharmony_ci BUG_ON(nr_bhs); 7128c2ecf20Sopenharmony_ci /* Clean records are not written out. */ 7138c2ecf20Sopenharmony_ci break; 7148c2ecf20Sopenharmony_ci } 7158c2ecf20Sopenharmony_ci } 7168c2ecf20Sopenharmony_ci /* Need to map the buffer if it is not mapped already. */ 7178c2ecf20Sopenharmony_ci if (unlikely(!buffer_mapped(bh))) { 7188c2ecf20Sopenharmony_ci VCN vcn; 7198c2ecf20Sopenharmony_ci LCN lcn; 7208c2ecf20Sopenharmony_ci unsigned int vcn_ofs; 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci bh->b_bdev = vol->sb->s_bdev; 7238c2ecf20Sopenharmony_ci /* Obtain the vcn and offset of the current block. */ 7248c2ecf20Sopenharmony_ci vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) + 7258c2ecf20Sopenharmony_ci (block_start - m_start); 7268c2ecf20Sopenharmony_ci vcn_ofs = vcn & vol->cluster_size_mask; 7278c2ecf20Sopenharmony_ci vcn >>= vol->cluster_size_bits; 7288c2ecf20Sopenharmony_ci if (!rl) { 7298c2ecf20Sopenharmony_ci down_read(&NTFS_I(vol->mft_ino)->runlist.lock); 7308c2ecf20Sopenharmony_ci rl = NTFS_I(vol->mft_ino)->runlist.rl; 7318c2ecf20Sopenharmony_ci BUG_ON(!rl); 7328c2ecf20Sopenharmony_ci } 7338c2ecf20Sopenharmony_ci /* Seek to element containing target vcn. */ 7348c2ecf20Sopenharmony_ci while (rl->length && rl[1].vcn <= vcn) 7358c2ecf20Sopenharmony_ci rl++; 7368c2ecf20Sopenharmony_ci lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 7378c2ecf20Sopenharmony_ci /* For $MFT, only lcn >= 0 is a successful remap. */ 7388c2ecf20Sopenharmony_ci if (likely(lcn >= 0)) { 7398c2ecf20Sopenharmony_ci /* Setup buffer head to correct block. */ 7408c2ecf20Sopenharmony_ci bh->b_blocknr = ((lcn << 7418c2ecf20Sopenharmony_ci vol->cluster_size_bits) + 7428c2ecf20Sopenharmony_ci vcn_ofs) >> blocksize_bits; 7438c2ecf20Sopenharmony_ci set_buffer_mapped(bh); 7448c2ecf20Sopenharmony_ci } else { 7458c2ecf20Sopenharmony_ci bh->b_blocknr = -1; 7468c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Cannot write mft record " 7478c2ecf20Sopenharmony_ci "0x%lx because its location " 7488c2ecf20Sopenharmony_ci "on disk could not be " 7498c2ecf20Sopenharmony_ci "determined (error code %lli).", 7508c2ecf20Sopenharmony_ci ni->mft_no, (long long)lcn); 7518c2ecf20Sopenharmony_ci err = -EIO; 7528c2ecf20Sopenharmony_ci } 7538c2ecf20Sopenharmony_ci } 7548c2ecf20Sopenharmony_ci BUG_ON(!buffer_uptodate(bh)); 7558c2ecf20Sopenharmony_ci BUG_ON(!nr_bhs && (m_start != block_start)); 7568c2ecf20Sopenharmony_ci BUG_ON(nr_bhs >= max_bhs); 7578c2ecf20Sopenharmony_ci bhs[nr_bhs++] = bh; 7588c2ecf20Sopenharmony_ci BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); 7598c2ecf20Sopenharmony_ci } while (block_start = block_end, (bh = bh->b_this_page) != head); 7608c2ecf20Sopenharmony_ci if (unlikely(rl)) 7618c2ecf20Sopenharmony_ci up_read(&NTFS_I(vol->mft_ino)->runlist.lock); 7628c2ecf20Sopenharmony_ci if (!nr_bhs) 7638c2ecf20Sopenharmony_ci goto done; 7648c2ecf20Sopenharmony_ci if (unlikely(err)) 7658c2ecf20Sopenharmony_ci goto cleanup_out; 7668c2ecf20Sopenharmony_ci /* Apply the mst protection fixups. */ 7678c2ecf20Sopenharmony_ci err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size); 7688c2ecf20Sopenharmony_ci if (err) { 7698c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to apply mst fixups!"); 7708c2ecf20Sopenharmony_ci goto cleanup_out; 7718c2ecf20Sopenharmony_ci } 7728c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ni); 7738c2ecf20Sopenharmony_ci /* Lock buffers and start synchronous write i/o on them. */ 7748c2ecf20Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 7758c2ecf20Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci if (!trylock_buffer(tbh)) 7788c2ecf20Sopenharmony_ci BUG(); 7798c2ecf20Sopenharmony_ci BUG_ON(!buffer_uptodate(tbh)); 7808c2ecf20Sopenharmony_ci clear_buffer_dirty(tbh); 7818c2ecf20Sopenharmony_ci get_bh(tbh); 7828c2ecf20Sopenharmony_ci tbh->b_end_io = end_buffer_write_sync; 7838c2ecf20Sopenharmony_ci submit_bh(REQ_OP_WRITE, 0, tbh); 7848c2ecf20Sopenharmony_ci } 7858c2ecf20Sopenharmony_ci /* Synchronize the mft mirror now if not @sync. */ 7868c2ecf20Sopenharmony_ci if (!sync && ni->mft_no < vol->mftmirr_size) 7878c2ecf20Sopenharmony_ci ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync); 7888c2ecf20Sopenharmony_ci /* Wait on i/o completion of buffers. */ 7898c2ecf20Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 7908c2ecf20Sopenharmony_ci struct buffer_head *tbh = bhs[i_bhs]; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci wait_on_buffer(tbh); 7938c2ecf20Sopenharmony_ci if (unlikely(!buffer_uptodate(tbh))) { 7948c2ecf20Sopenharmony_ci err = -EIO; 7958c2ecf20Sopenharmony_ci /* 7968c2ecf20Sopenharmony_ci * Set the buffer uptodate so the page and buffer 7978c2ecf20Sopenharmony_ci * states do not become out of sync. 7988c2ecf20Sopenharmony_ci */ 7998c2ecf20Sopenharmony_ci if (PageUptodate(page)) 8008c2ecf20Sopenharmony_ci set_buffer_uptodate(tbh); 8018c2ecf20Sopenharmony_ci } 8028c2ecf20Sopenharmony_ci } 8038c2ecf20Sopenharmony_ci /* If @sync, now synchronize the mft mirror. */ 8048c2ecf20Sopenharmony_ci if (sync && ni->mft_no < vol->mftmirr_size) 8058c2ecf20Sopenharmony_ci ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync); 8068c2ecf20Sopenharmony_ci /* Remove the mst protection fixups again. */ 8078c2ecf20Sopenharmony_ci post_write_mst_fixup((NTFS_RECORD*)m); 8088c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ni); 8098c2ecf20Sopenharmony_ci if (unlikely(err)) { 8108c2ecf20Sopenharmony_ci /* I/O error during writing. This is really bad! */ 8118c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "I/O error while writing mft record " 8128c2ecf20Sopenharmony_ci "0x%lx! Marking base inode as bad. You " 8138c2ecf20Sopenharmony_ci "should unmount the volume and run chkdsk.", 8148c2ecf20Sopenharmony_ci ni->mft_no); 8158c2ecf20Sopenharmony_ci goto err_out; 8168c2ecf20Sopenharmony_ci } 8178c2ecf20Sopenharmony_cidone: 8188c2ecf20Sopenharmony_ci ntfs_debug("Done."); 8198c2ecf20Sopenharmony_ci return 0; 8208c2ecf20Sopenharmony_cicleanup_out: 8218c2ecf20Sopenharmony_ci /* Clean the buffers. */ 8228c2ecf20Sopenharmony_ci for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) 8238c2ecf20Sopenharmony_ci clear_buffer_dirty(bhs[i_bhs]); 8248c2ecf20Sopenharmony_cierr_out: 8258c2ecf20Sopenharmony_ci /* 8268c2ecf20Sopenharmony_ci * Current state: all buffers are clean, unlocked, and uptodate. 8278c2ecf20Sopenharmony_ci * The caller should mark the base inode as bad so that no more i/o 8288c2ecf20Sopenharmony_ci * happens. ->clear_inode() will still be invoked so all extent inodes 8298c2ecf20Sopenharmony_ci * and other allocated memory will be freed. 8308c2ecf20Sopenharmony_ci */ 8318c2ecf20Sopenharmony_ci if (err == -ENOMEM) { 8328c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Not enough memory to write mft record. " 8338c2ecf20Sopenharmony_ci "Redirtying so the write is retried later."); 8348c2ecf20Sopenharmony_ci mark_mft_record_dirty(ni); 8358c2ecf20Sopenharmony_ci err = 0; 8368c2ecf20Sopenharmony_ci } else 8378c2ecf20Sopenharmony_ci NVolSetErrors(vol); 8388c2ecf20Sopenharmony_ci return err; 8398c2ecf20Sopenharmony_ci} 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci/** 8428c2ecf20Sopenharmony_ci * ntfs_may_write_mft_record - check if an mft record may be written out 8438c2ecf20Sopenharmony_ci * @vol: [IN] ntfs volume on which the mft record to check resides 8448c2ecf20Sopenharmony_ci * @mft_no: [IN] mft record number of the mft record to check 8458c2ecf20Sopenharmony_ci * @m: [IN] mapped mft record to check 8468c2ecf20Sopenharmony_ci * @locked_ni: [OUT] caller has to unlock this ntfs inode if one is returned 8478c2ecf20Sopenharmony_ci * 8488c2ecf20Sopenharmony_ci * Check if the mapped (base or extent) mft record @m with mft record number 8498c2ecf20Sopenharmony_ci * @mft_no belonging to the ntfs volume @vol may be written out. If necessary 8508c2ecf20Sopenharmony_ci * and possible the ntfs inode of the mft record is locked and the base vfs 8518c2ecf20Sopenharmony_ci * inode is pinned. The locked ntfs inode is then returned in @locked_ni. The 8528c2ecf20Sopenharmony_ci * caller is responsible for unlocking the ntfs inode and unpinning the base 8538c2ecf20Sopenharmony_ci * vfs inode. 8548c2ecf20Sopenharmony_ci * 8558c2ecf20Sopenharmony_ci * Return 'true' if the mft record may be written out and 'false' if not. 8568c2ecf20Sopenharmony_ci * 8578c2ecf20Sopenharmony_ci * The caller has locked the page and cleared the uptodate flag on it which 8588c2ecf20Sopenharmony_ci * means that we can safely write out any dirty mft records that do not have 8598c2ecf20Sopenharmony_ci * their inodes in icache as determined by ilookup5() as anyone 8608c2ecf20Sopenharmony_ci * opening/creating such an inode would block when attempting to map the mft 8618c2ecf20Sopenharmony_ci * record in read_cache_page() until we are finished with the write out. 8628c2ecf20Sopenharmony_ci * 8638c2ecf20Sopenharmony_ci * Here is a description of the tests we perform: 8648c2ecf20Sopenharmony_ci * 8658c2ecf20Sopenharmony_ci * If the inode is found in icache we know the mft record must be a base mft 8668c2ecf20Sopenharmony_ci * record. If it is dirty, we do not write it and return 'false' as the vfs 8678c2ecf20Sopenharmony_ci * inode write paths will result in the access times being updated which would 8688c2ecf20Sopenharmony_ci * cause the base mft record to be redirtied and written out again. (We know 8698c2ecf20Sopenharmony_ci * the access time update will modify the base mft record because Windows 8708c2ecf20Sopenharmony_ci * chkdsk complains if the standard information attribute is not in the base 8718c2ecf20Sopenharmony_ci * mft record.) 8728c2ecf20Sopenharmony_ci * 8738c2ecf20Sopenharmony_ci * If the inode is in icache and not dirty, we attempt to lock the mft record 8748c2ecf20Sopenharmony_ci * and if we find the lock was already taken, it is not safe to write the mft 8758c2ecf20Sopenharmony_ci * record and we return 'false'. 8768c2ecf20Sopenharmony_ci * 8778c2ecf20Sopenharmony_ci * If we manage to obtain the lock we have exclusive access to the mft record, 8788c2ecf20Sopenharmony_ci * which also allows us safe writeout of the mft record. We then set 8798c2ecf20Sopenharmony_ci * @locked_ni to the locked ntfs inode and return 'true'. 8808c2ecf20Sopenharmony_ci * 8818c2ecf20Sopenharmony_ci * Note we cannot just lock the mft record and sleep while waiting for the lock 8828c2ecf20Sopenharmony_ci * because this would deadlock due to lock reversal (normally the mft record is 8838c2ecf20Sopenharmony_ci * locked before the page is locked but we already have the page locked here 8848c2ecf20Sopenharmony_ci * when we try to lock the mft record). 8858c2ecf20Sopenharmony_ci * 8868c2ecf20Sopenharmony_ci * If the inode is not in icache we need to perform further checks. 8878c2ecf20Sopenharmony_ci * 8888c2ecf20Sopenharmony_ci * If the mft record is not a FILE record or it is a base mft record, we can 8898c2ecf20Sopenharmony_ci * safely write it and return 'true'. 8908c2ecf20Sopenharmony_ci * 8918c2ecf20Sopenharmony_ci * We now know the mft record is an extent mft record. We check if the inode 8928c2ecf20Sopenharmony_ci * corresponding to its base mft record is in icache and obtain a reference to 8938c2ecf20Sopenharmony_ci * it if it is. If it is not, we can safely write it and return 'true'. 8948c2ecf20Sopenharmony_ci * 8958c2ecf20Sopenharmony_ci * We now have the base inode for the extent mft record. We check if it has an 8968c2ecf20Sopenharmony_ci * ntfs inode for the extent mft record attached and if not it is safe to write 8978c2ecf20Sopenharmony_ci * the extent mft record and we return 'true'. 8988c2ecf20Sopenharmony_ci * 8998c2ecf20Sopenharmony_ci * The ntfs inode for the extent mft record is attached to the base inode so we 9008c2ecf20Sopenharmony_ci * attempt to lock the extent mft record and if we find the lock was already 9018c2ecf20Sopenharmony_ci * taken, it is not safe to write the extent mft record and we return 'false'. 9028c2ecf20Sopenharmony_ci * 9038c2ecf20Sopenharmony_ci * If we manage to obtain the lock we have exclusive access to the extent mft 9048c2ecf20Sopenharmony_ci * record, which also allows us safe writeout of the extent mft record. We 9058c2ecf20Sopenharmony_ci * set the ntfs inode of the extent mft record clean and then set @locked_ni to 9068c2ecf20Sopenharmony_ci * the now locked ntfs inode and return 'true'. 9078c2ecf20Sopenharmony_ci * 9088c2ecf20Sopenharmony_ci * Note, the reason for actually writing dirty mft records here and not just 9098c2ecf20Sopenharmony_ci * relying on the vfs inode dirty code paths is that we can have mft records 9108c2ecf20Sopenharmony_ci * modified without them ever having actual inodes in memory. Also we can have 9118c2ecf20Sopenharmony_ci * dirty mft records with clean ntfs inodes in memory. None of the described 9128c2ecf20Sopenharmony_ci * cases would result in the dirty mft records being written out if we only 9138c2ecf20Sopenharmony_ci * relied on the vfs inode dirty code paths. And these cases can really occur 9148c2ecf20Sopenharmony_ci * during allocation of new mft records and in particular when the 9158c2ecf20Sopenharmony_ci * initialized_size of the $MFT/$DATA attribute is extended and the new space 9168c2ecf20Sopenharmony_ci * is initialized using ntfs_mft_record_format(). The clean inode can then 9178c2ecf20Sopenharmony_ci * appear if the mft record is reused for a new inode before it got written 9188c2ecf20Sopenharmony_ci * out. 9198c2ecf20Sopenharmony_ci */ 9208c2ecf20Sopenharmony_cibool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, 9218c2ecf20Sopenharmony_ci const MFT_RECORD *m, ntfs_inode **locked_ni) 9228c2ecf20Sopenharmony_ci{ 9238c2ecf20Sopenharmony_ci struct super_block *sb = vol->sb; 9248c2ecf20Sopenharmony_ci struct inode *mft_vi = vol->mft_ino; 9258c2ecf20Sopenharmony_ci struct inode *vi; 9268c2ecf20Sopenharmony_ci ntfs_inode *ni, *eni, **extent_nis; 9278c2ecf20Sopenharmony_ci int i; 9288c2ecf20Sopenharmony_ci ntfs_attr na; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci ntfs_debug("Entering for inode 0x%lx.", mft_no); 9318c2ecf20Sopenharmony_ci /* 9328c2ecf20Sopenharmony_ci * Normally we do not return a locked inode so set @locked_ni to NULL. 9338c2ecf20Sopenharmony_ci */ 9348c2ecf20Sopenharmony_ci BUG_ON(!locked_ni); 9358c2ecf20Sopenharmony_ci *locked_ni = NULL; 9368c2ecf20Sopenharmony_ci /* 9378c2ecf20Sopenharmony_ci * Check if the inode corresponding to this mft record is in the VFS 9388c2ecf20Sopenharmony_ci * inode cache and obtain a reference to it if it is. 9398c2ecf20Sopenharmony_ci */ 9408c2ecf20Sopenharmony_ci ntfs_debug("Looking for inode 0x%lx in icache.", mft_no); 9418c2ecf20Sopenharmony_ci na.mft_no = mft_no; 9428c2ecf20Sopenharmony_ci na.name = NULL; 9438c2ecf20Sopenharmony_ci na.name_len = 0; 9448c2ecf20Sopenharmony_ci na.type = AT_UNUSED; 9458c2ecf20Sopenharmony_ci /* 9468c2ecf20Sopenharmony_ci * Optimize inode 0, i.e. $MFT itself, since we have it in memory and 9478c2ecf20Sopenharmony_ci * we get here for it rather often. 9488c2ecf20Sopenharmony_ci */ 9498c2ecf20Sopenharmony_ci if (!mft_no) { 9508c2ecf20Sopenharmony_ci /* Balance the below iput(). */ 9518c2ecf20Sopenharmony_ci vi = igrab(mft_vi); 9528c2ecf20Sopenharmony_ci BUG_ON(vi != mft_vi); 9538c2ecf20Sopenharmony_ci } else { 9548c2ecf20Sopenharmony_ci /* 9558c2ecf20Sopenharmony_ci * Have to use ilookup5_nowait() since ilookup5() waits for the 9568c2ecf20Sopenharmony_ci * inode lock which causes ntfs to deadlock when a concurrent 9578c2ecf20Sopenharmony_ci * inode write via the inode dirty code paths and the page 9588c2ecf20Sopenharmony_ci * dirty code path of the inode dirty code path when writing 9598c2ecf20Sopenharmony_ci * $MFT occurs. 9608c2ecf20Sopenharmony_ci */ 9618c2ecf20Sopenharmony_ci vi = ilookup5_nowait(sb, mft_no, ntfs_test_inode, &na); 9628c2ecf20Sopenharmony_ci } 9638c2ecf20Sopenharmony_ci if (vi) { 9648c2ecf20Sopenharmony_ci ntfs_debug("Base inode 0x%lx is in icache.", mft_no); 9658c2ecf20Sopenharmony_ci /* The inode is in icache. */ 9668c2ecf20Sopenharmony_ci ni = NTFS_I(vi); 9678c2ecf20Sopenharmony_ci /* Take a reference to the ntfs inode. */ 9688c2ecf20Sopenharmony_ci atomic_inc(&ni->count); 9698c2ecf20Sopenharmony_ci /* If the inode is dirty, do not write this record. */ 9708c2ecf20Sopenharmony_ci if (NInoDirty(ni)) { 9718c2ecf20Sopenharmony_ci ntfs_debug("Inode 0x%lx is dirty, do not write it.", 9728c2ecf20Sopenharmony_ci mft_no); 9738c2ecf20Sopenharmony_ci atomic_dec(&ni->count); 9748c2ecf20Sopenharmony_ci iput(vi); 9758c2ecf20Sopenharmony_ci return false; 9768c2ecf20Sopenharmony_ci } 9778c2ecf20Sopenharmony_ci ntfs_debug("Inode 0x%lx is not dirty.", mft_no); 9788c2ecf20Sopenharmony_ci /* The inode is not dirty, try to take the mft record lock. */ 9798c2ecf20Sopenharmony_ci if (unlikely(!mutex_trylock(&ni->mrec_lock))) { 9808c2ecf20Sopenharmony_ci ntfs_debug("Mft record 0x%lx is already locked, do " 9818c2ecf20Sopenharmony_ci "not write it.", mft_no); 9828c2ecf20Sopenharmony_ci atomic_dec(&ni->count); 9838c2ecf20Sopenharmony_ci iput(vi); 9848c2ecf20Sopenharmony_ci return false; 9858c2ecf20Sopenharmony_ci } 9868c2ecf20Sopenharmony_ci ntfs_debug("Managed to lock mft record 0x%lx, write it.", 9878c2ecf20Sopenharmony_ci mft_no); 9888c2ecf20Sopenharmony_ci /* 9898c2ecf20Sopenharmony_ci * The write has to occur while we hold the mft record lock so 9908c2ecf20Sopenharmony_ci * return the locked ntfs inode. 9918c2ecf20Sopenharmony_ci */ 9928c2ecf20Sopenharmony_ci *locked_ni = ni; 9938c2ecf20Sopenharmony_ci return true; 9948c2ecf20Sopenharmony_ci } 9958c2ecf20Sopenharmony_ci ntfs_debug("Inode 0x%lx is not in icache.", mft_no); 9968c2ecf20Sopenharmony_ci /* The inode is not in icache. */ 9978c2ecf20Sopenharmony_ci /* Write the record if it is not a mft record (type "FILE"). */ 9988c2ecf20Sopenharmony_ci if (!ntfs_is_mft_record(m->magic)) { 9998c2ecf20Sopenharmony_ci ntfs_debug("Mft record 0x%lx is not a FILE record, write it.", 10008c2ecf20Sopenharmony_ci mft_no); 10018c2ecf20Sopenharmony_ci return true; 10028c2ecf20Sopenharmony_ci } 10038c2ecf20Sopenharmony_ci /* Write the mft record if it is a base inode. */ 10048c2ecf20Sopenharmony_ci if (!m->base_mft_record) { 10058c2ecf20Sopenharmony_ci ntfs_debug("Mft record 0x%lx is a base record, write it.", 10068c2ecf20Sopenharmony_ci mft_no); 10078c2ecf20Sopenharmony_ci return true; 10088c2ecf20Sopenharmony_ci } 10098c2ecf20Sopenharmony_ci /* 10108c2ecf20Sopenharmony_ci * This is an extent mft record. Check if the inode corresponding to 10118c2ecf20Sopenharmony_ci * its base mft record is in icache and obtain a reference to it if it 10128c2ecf20Sopenharmony_ci * is. 10138c2ecf20Sopenharmony_ci */ 10148c2ecf20Sopenharmony_ci na.mft_no = MREF_LE(m->base_mft_record); 10158c2ecf20Sopenharmony_ci ntfs_debug("Mft record 0x%lx is an extent record. Looking for base " 10168c2ecf20Sopenharmony_ci "inode 0x%lx in icache.", mft_no, na.mft_no); 10178c2ecf20Sopenharmony_ci if (!na.mft_no) { 10188c2ecf20Sopenharmony_ci /* Balance the below iput(). */ 10198c2ecf20Sopenharmony_ci vi = igrab(mft_vi); 10208c2ecf20Sopenharmony_ci BUG_ON(vi != mft_vi); 10218c2ecf20Sopenharmony_ci } else 10228c2ecf20Sopenharmony_ci vi = ilookup5_nowait(sb, na.mft_no, ntfs_test_inode, 10238c2ecf20Sopenharmony_ci &na); 10248c2ecf20Sopenharmony_ci if (!vi) { 10258c2ecf20Sopenharmony_ci /* 10268c2ecf20Sopenharmony_ci * The base inode is not in icache, write this extent mft 10278c2ecf20Sopenharmony_ci * record. 10288c2ecf20Sopenharmony_ci */ 10298c2ecf20Sopenharmony_ci ntfs_debug("Base inode 0x%lx is not in icache, write the " 10308c2ecf20Sopenharmony_ci "extent record.", na.mft_no); 10318c2ecf20Sopenharmony_ci return true; 10328c2ecf20Sopenharmony_ci } 10338c2ecf20Sopenharmony_ci ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no); 10348c2ecf20Sopenharmony_ci /* 10358c2ecf20Sopenharmony_ci * The base inode is in icache. Check if it has the extent inode 10368c2ecf20Sopenharmony_ci * corresponding to this extent mft record attached. 10378c2ecf20Sopenharmony_ci */ 10388c2ecf20Sopenharmony_ci ni = NTFS_I(vi); 10398c2ecf20Sopenharmony_ci mutex_lock(&ni->extent_lock); 10408c2ecf20Sopenharmony_ci if (ni->nr_extents <= 0) { 10418c2ecf20Sopenharmony_ci /* 10428c2ecf20Sopenharmony_ci * The base inode has no attached extent inodes, write this 10438c2ecf20Sopenharmony_ci * extent mft record. 10448c2ecf20Sopenharmony_ci */ 10458c2ecf20Sopenharmony_ci mutex_unlock(&ni->extent_lock); 10468c2ecf20Sopenharmony_ci iput(vi); 10478c2ecf20Sopenharmony_ci ntfs_debug("Base inode 0x%lx has no attached extent inodes, " 10488c2ecf20Sopenharmony_ci "write the extent record.", na.mft_no); 10498c2ecf20Sopenharmony_ci return true; 10508c2ecf20Sopenharmony_ci } 10518c2ecf20Sopenharmony_ci /* Iterate over the attached extent inodes. */ 10528c2ecf20Sopenharmony_ci extent_nis = ni->ext.extent_ntfs_inos; 10538c2ecf20Sopenharmony_ci for (eni = NULL, i = 0; i < ni->nr_extents; ++i) { 10548c2ecf20Sopenharmony_ci if (mft_no == extent_nis[i]->mft_no) { 10558c2ecf20Sopenharmony_ci /* 10568c2ecf20Sopenharmony_ci * Found the extent inode corresponding to this extent 10578c2ecf20Sopenharmony_ci * mft record. 10588c2ecf20Sopenharmony_ci */ 10598c2ecf20Sopenharmony_ci eni = extent_nis[i]; 10608c2ecf20Sopenharmony_ci break; 10618c2ecf20Sopenharmony_ci } 10628c2ecf20Sopenharmony_ci } 10638c2ecf20Sopenharmony_ci /* 10648c2ecf20Sopenharmony_ci * If the extent inode was not attached to the base inode, write this 10658c2ecf20Sopenharmony_ci * extent mft record. 10668c2ecf20Sopenharmony_ci */ 10678c2ecf20Sopenharmony_ci if (!eni) { 10688c2ecf20Sopenharmony_ci mutex_unlock(&ni->extent_lock); 10698c2ecf20Sopenharmony_ci iput(vi); 10708c2ecf20Sopenharmony_ci ntfs_debug("Extent inode 0x%lx is not attached to its base " 10718c2ecf20Sopenharmony_ci "inode 0x%lx, write the extent record.", 10728c2ecf20Sopenharmony_ci mft_no, na.mft_no); 10738c2ecf20Sopenharmony_ci return true; 10748c2ecf20Sopenharmony_ci } 10758c2ecf20Sopenharmony_ci ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.", 10768c2ecf20Sopenharmony_ci mft_no, na.mft_no); 10778c2ecf20Sopenharmony_ci /* Take a reference to the extent ntfs inode. */ 10788c2ecf20Sopenharmony_ci atomic_inc(&eni->count); 10798c2ecf20Sopenharmony_ci mutex_unlock(&ni->extent_lock); 10808c2ecf20Sopenharmony_ci /* 10818c2ecf20Sopenharmony_ci * Found the extent inode coresponding to this extent mft record. 10828c2ecf20Sopenharmony_ci * Try to take the mft record lock. 10838c2ecf20Sopenharmony_ci */ 10848c2ecf20Sopenharmony_ci if (unlikely(!mutex_trylock(&eni->mrec_lock))) { 10858c2ecf20Sopenharmony_ci atomic_dec(&eni->count); 10868c2ecf20Sopenharmony_ci iput(vi); 10878c2ecf20Sopenharmony_ci ntfs_debug("Extent mft record 0x%lx is already locked, do " 10888c2ecf20Sopenharmony_ci "not write it.", mft_no); 10898c2ecf20Sopenharmony_ci return false; 10908c2ecf20Sopenharmony_ci } 10918c2ecf20Sopenharmony_ci ntfs_debug("Managed to lock extent mft record 0x%lx, write it.", 10928c2ecf20Sopenharmony_ci mft_no); 10938c2ecf20Sopenharmony_ci if (NInoTestClearDirty(eni)) 10948c2ecf20Sopenharmony_ci ntfs_debug("Extent inode 0x%lx is dirty, marking it clean.", 10958c2ecf20Sopenharmony_ci mft_no); 10968c2ecf20Sopenharmony_ci /* 10978c2ecf20Sopenharmony_ci * The write has to occur while we hold the mft record lock so return 10988c2ecf20Sopenharmony_ci * the locked extent ntfs inode. 10998c2ecf20Sopenharmony_ci */ 11008c2ecf20Sopenharmony_ci *locked_ni = eni; 11018c2ecf20Sopenharmony_ci return true; 11028c2ecf20Sopenharmony_ci} 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_cistatic const char *es = " Leaving inconsistent metadata. Unmount and run " 11058c2ecf20Sopenharmony_ci "chkdsk."; 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci/** 11088c2ecf20Sopenharmony_ci * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name 11098c2ecf20Sopenharmony_ci * @vol: volume on which to search for a free mft record 11108c2ecf20Sopenharmony_ci * @base_ni: open base inode if allocating an extent mft record or NULL 11118c2ecf20Sopenharmony_ci * 11128c2ecf20Sopenharmony_ci * Search for a free mft record in the mft bitmap attribute on the ntfs volume 11138c2ecf20Sopenharmony_ci * @vol. 11148c2ecf20Sopenharmony_ci * 11158c2ecf20Sopenharmony_ci * If @base_ni is NULL start the search at the default allocator position. 11168c2ecf20Sopenharmony_ci * 11178c2ecf20Sopenharmony_ci * If @base_ni is not NULL start the search at the mft record after the base 11188c2ecf20Sopenharmony_ci * mft record @base_ni. 11198c2ecf20Sopenharmony_ci * 11208c2ecf20Sopenharmony_ci * Return the free mft record on success and -errno on error. An error code of 11218c2ecf20Sopenharmony_ci * -ENOSPC means that there are no free mft records in the currently 11228c2ecf20Sopenharmony_ci * initialized mft bitmap. 11238c2ecf20Sopenharmony_ci * 11248c2ecf20Sopenharmony_ci * Locking: Caller must hold vol->mftbmp_lock for writing. 11258c2ecf20Sopenharmony_ci */ 11268c2ecf20Sopenharmony_cistatic int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, 11278c2ecf20Sopenharmony_ci ntfs_inode *base_ni) 11288c2ecf20Sopenharmony_ci{ 11298c2ecf20Sopenharmony_ci s64 pass_end, ll, data_pos, pass_start, ofs, bit; 11308c2ecf20Sopenharmony_ci unsigned long flags; 11318c2ecf20Sopenharmony_ci struct address_space *mftbmp_mapping; 11328c2ecf20Sopenharmony_ci u8 *buf, *byte; 11338c2ecf20Sopenharmony_ci struct page *page; 11348c2ecf20Sopenharmony_ci unsigned int page_ofs, size; 11358c2ecf20Sopenharmony_ci u8 pass, b; 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci ntfs_debug("Searching for free mft record in the currently " 11388c2ecf20Sopenharmony_ci "initialized mft bitmap."); 11398c2ecf20Sopenharmony_ci mftbmp_mapping = vol->mftbmp_ino->i_mapping; 11408c2ecf20Sopenharmony_ci /* 11418c2ecf20Sopenharmony_ci * Set the end of the pass making sure we do not overflow the mft 11428c2ecf20Sopenharmony_ci * bitmap. 11438c2ecf20Sopenharmony_ci */ 11448c2ecf20Sopenharmony_ci read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags); 11458c2ecf20Sopenharmony_ci pass_end = NTFS_I(vol->mft_ino)->allocated_size >> 11468c2ecf20Sopenharmony_ci vol->mft_record_size_bits; 11478c2ecf20Sopenharmony_ci read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags); 11488c2ecf20Sopenharmony_ci read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 11498c2ecf20Sopenharmony_ci ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; 11508c2ecf20Sopenharmony_ci read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); 11518c2ecf20Sopenharmony_ci if (pass_end > ll) 11528c2ecf20Sopenharmony_ci pass_end = ll; 11538c2ecf20Sopenharmony_ci pass = 1; 11548c2ecf20Sopenharmony_ci if (!base_ni) 11558c2ecf20Sopenharmony_ci data_pos = vol->mft_data_pos; 11568c2ecf20Sopenharmony_ci else 11578c2ecf20Sopenharmony_ci data_pos = base_ni->mft_no + 1; 11588c2ecf20Sopenharmony_ci if (data_pos < 24) 11598c2ecf20Sopenharmony_ci data_pos = 24; 11608c2ecf20Sopenharmony_ci if (data_pos >= pass_end) { 11618c2ecf20Sopenharmony_ci data_pos = 24; 11628c2ecf20Sopenharmony_ci pass = 2; 11638c2ecf20Sopenharmony_ci /* This happens on a freshly formatted volume. */ 11648c2ecf20Sopenharmony_ci if (data_pos >= pass_end) 11658c2ecf20Sopenharmony_ci return -ENOSPC; 11668c2ecf20Sopenharmony_ci } 11678c2ecf20Sopenharmony_ci pass_start = data_pos; 11688c2ecf20Sopenharmony_ci ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, " 11698c2ecf20Sopenharmony_ci "pass_end 0x%llx, data_pos 0x%llx.", pass, 11708c2ecf20Sopenharmony_ci (long long)pass_start, (long long)pass_end, 11718c2ecf20Sopenharmony_ci (long long)data_pos); 11728c2ecf20Sopenharmony_ci /* Loop until a free mft record is found. */ 11738c2ecf20Sopenharmony_ci for (; pass <= 2;) { 11748c2ecf20Sopenharmony_ci /* Cap size to pass_end. */ 11758c2ecf20Sopenharmony_ci ofs = data_pos >> 3; 11768c2ecf20Sopenharmony_ci page_ofs = ofs & ~PAGE_MASK; 11778c2ecf20Sopenharmony_ci size = PAGE_SIZE - page_ofs; 11788c2ecf20Sopenharmony_ci ll = ((pass_end + 7) >> 3) - ofs; 11798c2ecf20Sopenharmony_ci if (size > ll) 11808c2ecf20Sopenharmony_ci size = ll; 11818c2ecf20Sopenharmony_ci size <<= 3; 11828c2ecf20Sopenharmony_ci /* 11838c2ecf20Sopenharmony_ci * If we are still within the active pass, search the next page 11848c2ecf20Sopenharmony_ci * for a zero bit. 11858c2ecf20Sopenharmony_ci */ 11868c2ecf20Sopenharmony_ci if (size) { 11878c2ecf20Sopenharmony_ci page = ntfs_map_page(mftbmp_mapping, 11888c2ecf20Sopenharmony_ci ofs >> PAGE_SHIFT); 11898c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 11908c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to read mft " 11918c2ecf20Sopenharmony_ci "bitmap, aborting."); 11928c2ecf20Sopenharmony_ci return PTR_ERR(page); 11938c2ecf20Sopenharmony_ci } 11948c2ecf20Sopenharmony_ci buf = (u8*)page_address(page) + page_ofs; 11958c2ecf20Sopenharmony_ci bit = data_pos & 7; 11968c2ecf20Sopenharmony_ci data_pos &= ~7ull; 11978c2ecf20Sopenharmony_ci ntfs_debug("Before inner for loop: size 0x%x, " 11988c2ecf20Sopenharmony_ci "data_pos 0x%llx, bit 0x%llx", size, 11998c2ecf20Sopenharmony_ci (long long)data_pos, (long long)bit); 12008c2ecf20Sopenharmony_ci for (; bit < size && data_pos + bit < pass_end; 12018c2ecf20Sopenharmony_ci bit &= ~7ull, bit += 8) { 12028c2ecf20Sopenharmony_ci byte = buf + (bit >> 3); 12038c2ecf20Sopenharmony_ci if (*byte == 0xff) 12048c2ecf20Sopenharmony_ci continue; 12058c2ecf20Sopenharmony_ci b = ffz((unsigned long)*byte); 12068c2ecf20Sopenharmony_ci if (b < 8 && b >= (bit & 7)) { 12078c2ecf20Sopenharmony_ci ll = data_pos + (bit & ~7ull) + b; 12088c2ecf20Sopenharmony_ci if (unlikely(ll > (1ll << 32))) { 12098c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 12108c2ecf20Sopenharmony_ci return -ENOSPC; 12118c2ecf20Sopenharmony_ci } 12128c2ecf20Sopenharmony_ci *byte |= 1 << b; 12138c2ecf20Sopenharmony_ci flush_dcache_page(page); 12148c2ecf20Sopenharmony_ci set_page_dirty(page); 12158c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 12168c2ecf20Sopenharmony_ci ntfs_debug("Done. (Found and " 12178c2ecf20Sopenharmony_ci "allocated mft record " 12188c2ecf20Sopenharmony_ci "0x%llx.)", 12198c2ecf20Sopenharmony_ci (long long)ll); 12208c2ecf20Sopenharmony_ci return ll; 12218c2ecf20Sopenharmony_ci } 12228c2ecf20Sopenharmony_ci } 12238c2ecf20Sopenharmony_ci ntfs_debug("After inner for loop: size 0x%x, " 12248c2ecf20Sopenharmony_ci "data_pos 0x%llx, bit 0x%llx", size, 12258c2ecf20Sopenharmony_ci (long long)data_pos, (long long)bit); 12268c2ecf20Sopenharmony_ci data_pos += size; 12278c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 12288c2ecf20Sopenharmony_ci /* 12298c2ecf20Sopenharmony_ci * If the end of the pass has not been reached yet, 12308c2ecf20Sopenharmony_ci * continue searching the mft bitmap for a zero bit. 12318c2ecf20Sopenharmony_ci */ 12328c2ecf20Sopenharmony_ci if (data_pos < pass_end) 12338c2ecf20Sopenharmony_ci continue; 12348c2ecf20Sopenharmony_ci } 12358c2ecf20Sopenharmony_ci /* Do the next pass. */ 12368c2ecf20Sopenharmony_ci if (++pass == 2) { 12378c2ecf20Sopenharmony_ci /* 12388c2ecf20Sopenharmony_ci * Starting the second pass, in which we scan the first 12398c2ecf20Sopenharmony_ci * part of the zone which we omitted earlier. 12408c2ecf20Sopenharmony_ci */ 12418c2ecf20Sopenharmony_ci pass_end = pass_start; 12428c2ecf20Sopenharmony_ci data_pos = pass_start = 24; 12438c2ecf20Sopenharmony_ci ntfs_debug("pass %i, pass_start 0x%llx, pass_end " 12448c2ecf20Sopenharmony_ci "0x%llx.", pass, (long long)pass_start, 12458c2ecf20Sopenharmony_ci (long long)pass_end); 12468c2ecf20Sopenharmony_ci if (data_pos >= pass_end) 12478c2ecf20Sopenharmony_ci break; 12488c2ecf20Sopenharmony_ci } 12498c2ecf20Sopenharmony_ci } 12508c2ecf20Sopenharmony_ci /* No free mft records in currently initialized mft bitmap. */ 12518c2ecf20Sopenharmony_ci ntfs_debug("Done. (No free mft records left in currently initialized " 12528c2ecf20Sopenharmony_ci "mft bitmap.)"); 12538c2ecf20Sopenharmony_ci return -ENOSPC; 12548c2ecf20Sopenharmony_ci} 12558c2ecf20Sopenharmony_ci 12568c2ecf20Sopenharmony_ci/** 12578c2ecf20Sopenharmony_ci * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster 12588c2ecf20Sopenharmony_ci * @vol: volume on which to extend the mft bitmap attribute 12598c2ecf20Sopenharmony_ci * 12608c2ecf20Sopenharmony_ci * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster. 12618c2ecf20Sopenharmony_ci * 12628c2ecf20Sopenharmony_ci * Note: Only changes allocated_size, i.e. does not touch initialized_size or 12638c2ecf20Sopenharmony_ci * data_size. 12648c2ecf20Sopenharmony_ci * 12658c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error. 12668c2ecf20Sopenharmony_ci * 12678c2ecf20Sopenharmony_ci * Locking: - Caller must hold vol->mftbmp_lock for writing. 12688c2ecf20Sopenharmony_ci * - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for 12698c2ecf20Sopenharmony_ci * writing and releases it before returning. 12708c2ecf20Sopenharmony_ci * - This function takes vol->lcnbmp_lock for writing and releases it 12718c2ecf20Sopenharmony_ci * before returning. 12728c2ecf20Sopenharmony_ci */ 12738c2ecf20Sopenharmony_cistatic int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) 12748c2ecf20Sopenharmony_ci{ 12758c2ecf20Sopenharmony_ci LCN lcn; 12768c2ecf20Sopenharmony_ci s64 ll; 12778c2ecf20Sopenharmony_ci unsigned long flags; 12788c2ecf20Sopenharmony_ci struct page *page; 12798c2ecf20Sopenharmony_ci ntfs_inode *mft_ni, *mftbmp_ni; 12808c2ecf20Sopenharmony_ci runlist_element *rl, *rl2 = NULL; 12818c2ecf20Sopenharmony_ci ntfs_attr_search_ctx *ctx = NULL; 12828c2ecf20Sopenharmony_ci MFT_RECORD *mrec; 12838c2ecf20Sopenharmony_ci ATTR_RECORD *a = NULL; 12848c2ecf20Sopenharmony_ci int ret, mp_size; 12858c2ecf20Sopenharmony_ci u32 old_alen = 0; 12868c2ecf20Sopenharmony_ci u8 *b, tb; 12878c2ecf20Sopenharmony_ci struct { 12888c2ecf20Sopenharmony_ci u8 added_cluster:1; 12898c2ecf20Sopenharmony_ci u8 added_run:1; 12908c2ecf20Sopenharmony_ci u8 mp_rebuilt:1; 12918c2ecf20Sopenharmony_ci } status = { 0, 0, 0 }; 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci ntfs_debug("Extending mft bitmap allocation."); 12948c2ecf20Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 12958c2ecf20Sopenharmony_ci mftbmp_ni = NTFS_I(vol->mftbmp_ino); 12968c2ecf20Sopenharmony_ci /* 12978c2ecf20Sopenharmony_ci * Determine the last lcn of the mft bitmap. The allocated size of the 12988c2ecf20Sopenharmony_ci * mft bitmap cannot be zero so we are ok to do this. 12998c2ecf20Sopenharmony_ci */ 13008c2ecf20Sopenharmony_ci down_write(&mftbmp_ni->runlist.lock); 13018c2ecf20Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 13028c2ecf20Sopenharmony_ci ll = mftbmp_ni->allocated_size; 13038c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 13048c2ecf20Sopenharmony_ci rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, 13058c2ecf20Sopenharmony_ci (ll - 1) >> vol->cluster_size_bits, NULL); 13068c2ecf20Sopenharmony_ci if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 13078c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 13088c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to determine last allocated " 13098c2ecf20Sopenharmony_ci "cluster of mft bitmap attribute."); 13108c2ecf20Sopenharmony_ci if (!IS_ERR(rl)) 13118c2ecf20Sopenharmony_ci ret = -EIO; 13128c2ecf20Sopenharmony_ci else 13138c2ecf20Sopenharmony_ci ret = PTR_ERR(rl); 13148c2ecf20Sopenharmony_ci return ret; 13158c2ecf20Sopenharmony_ci } 13168c2ecf20Sopenharmony_ci lcn = rl->lcn + rl->length; 13178c2ecf20Sopenharmony_ci ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.", 13188c2ecf20Sopenharmony_ci (long long)lcn); 13198c2ecf20Sopenharmony_ci /* 13208c2ecf20Sopenharmony_ci * Attempt to get the cluster following the last allocated cluster by 13218c2ecf20Sopenharmony_ci * hand as it may be in the MFT zone so the allocator would not give it 13228c2ecf20Sopenharmony_ci * to us. 13238c2ecf20Sopenharmony_ci */ 13248c2ecf20Sopenharmony_ci ll = lcn >> 3; 13258c2ecf20Sopenharmony_ci page = ntfs_map_page(vol->lcnbmp_ino->i_mapping, 13268c2ecf20Sopenharmony_ci ll >> PAGE_SHIFT); 13278c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 13288c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 13298c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to read from lcn bitmap."); 13308c2ecf20Sopenharmony_ci return PTR_ERR(page); 13318c2ecf20Sopenharmony_ci } 13328c2ecf20Sopenharmony_ci b = (u8*)page_address(page) + (ll & ~PAGE_MASK); 13338c2ecf20Sopenharmony_ci tb = 1 << (lcn & 7ull); 13348c2ecf20Sopenharmony_ci down_write(&vol->lcnbmp_lock); 13358c2ecf20Sopenharmony_ci if (*b != 0xff && !(*b & tb)) { 13368c2ecf20Sopenharmony_ci /* Next cluster is free, allocate it. */ 13378c2ecf20Sopenharmony_ci *b |= tb; 13388c2ecf20Sopenharmony_ci flush_dcache_page(page); 13398c2ecf20Sopenharmony_ci set_page_dirty(page); 13408c2ecf20Sopenharmony_ci up_write(&vol->lcnbmp_lock); 13418c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 13428c2ecf20Sopenharmony_ci /* Update the mft bitmap runlist. */ 13438c2ecf20Sopenharmony_ci rl->length++; 13448c2ecf20Sopenharmony_ci rl[1].vcn++; 13458c2ecf20Sopenharmony_ci status.added_cluster = 1; 13468c2ecf20Sopenharmony_ci ntfs_debug("Appending one cluster to mft bitmap."); 13478c2ecf20Sopenharmony_ci } else { 13488c2ecf20Sopenharmony_ci up_write(&vol->lcnbmp_lock); 13498c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 13508c2ecf20Sopenharmony_ci /* Allocate a cluster from the DATA_ZONE. */ 13518c2ecf20Sopenharmony_ci rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE, 13528c2ecf20Sopenharmony_ci true); 13538c2ecf20Sopenharmony_ci if (IS_ERR(rl2)) { 13548c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 13558c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate a cluster for " 13568c2ecf20Sopenharmony_ci "the mft bitmap."); 13578c2ecf20Sopenharmony_ci return PTR_ERR(rl2); 13588c2ecf20Sopenharmony_ci } 13598c2ecf20Sopenharmony_ci rl = ntfs_runlists_merge(mftbmp_ni->runlist.rl, rl2); 13608c2ecf20Sopenharmony_ci if (IS_ERR(rl)) { 13618c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 13628c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to merge runlists for mft " 13638c2ecf20Sopenharmony_ci "bitmap."); 13648c2ecf20Sopenharmony_ci if (ntfs_cluster_free_from_rl(vol, rl2)) { 13658c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to deallocate " 13668c2ecf20Sopenharmony_ci "allocated cluster.%s", es); 13678c2ecf20Sopenharmony_ci NVolSetErrors(vol); 13688c2ecf20Sopenharmony_ci } 13698c2ecf20Sopenharmony_ci ntfs_free(rl2); 13708c2ecf20Sopenharmony_ci return PTR_ERR(rl); 13718c2ecf20Sopenharmony_ci } 13728c2ecf20Sopenharmony_ci mftbmp_ni->runlist.rl = rl; 13738c2ecf20Sopenharmony_ci status.added_run = 1; 13748c2ecf20Sopenharmony_ci ntfs_debug("Adding one run to mft bitmap."); 13758c2ecf20Sopenharmony_ci /* Find the last run in the new runlist. */ 13768c2ecf20Sopenharmony_ci for (; rl[1].length; rl++) 13778c2ecf20Sopenharmony_ci ; 13788c2ecf20Sopenharmony_ci } 13798c2ecf20Sopenharmony_ci /* 13808c2ecf20Sopenharmony_ci * Update the attribute record as well. Note: @rl is the last 13818c2ecf20Sopenharmony_ci * (non-terminator) runlist element of mft bitmap. 13828c2ecf20Sopenharmony_ci */ 13838c2ecf20Sopenharmony_ci mrec = map_mft_record(mft_ni); 13848c2ecf20Sopenharmony_ci if (IS_ERR(mrec)) { 13858c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 13868c2ecf20Sopenharmony_ci ret = PTR_ERR(mrec); 13878c2ecf20Sopenharmony_ci goto undo_alloc; 13888c2ecf20Sopenharmony_ci } 13898c2ecf20Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 13908c2ecf20Sopenharmony_ci if (unlikely(!ctx)) { 13918c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 13928c2ecf20Sopenharmony_ci ret = -ENOMEM; 13938c2ecf20Sopenharmony_ci goto undo_alloc; 13948c2ecf20Sopenharmony_ci } 13958c2ecf20Sopenharmony_ci ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 13968c2ecf20Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 13978c2ecf20Sopenharmony_ci 0, ctx); 13988c2ecf20Sopenharmony_ci if (unlikely(ret)) { 13998c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 14008c2ecf20Sopenharmony_ci "mft bitmap attribute."); 14018c2ecf20Sopenharmony_ci if (ret == -ENOENT) 14028c2ecf20Sopenharmony_ci ret = -EIO; 14038c2ecf20Sopenharmony_ci goto undo_alloc; 14048c2ecf20Sopenharmony_ci } 14058c2ecf20Sopenharmony_ci a = ctx->attr; 14068c2ecf20Sopenharmony_ci ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); 14078c2ecf20Sopenharmony_ci /* Search back for the previous last allocated cluster of mft bitmap. */ 14088c2ecf20Sopenharmony_ci for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) { 14098c2ecf20Sopenharmony_ci if (ll >= rl2->vcn) 14108c2ecf20Sopenharmony_ci break; 14118c2ecf20Sopenharmony_ci } 14128c2ecf20Sopenharmony_ci BUG_ON(ll < rl2->vcn); 14138c2ecf20Sopenharmony_ci BUG_ON(ll >= rl2->vcn + rl2->length); 14148c2ecf20Sopenharmony_ci /* Get the size for the new mapping pairs array for this extent. */ 14158c2ecf20Sopenharmony_ci mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); 14168c2ecf20Sopenharmony_ci if (unlikely(mp_size <= 0)) { 14178c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Get size for mapping pairs failed for " 14188c2ecf20Sopenharmony_ci "mft bitmap attribute extent."); 14198c2ecf20Sopenharmony_ci ret = mp_size; 14208c2ecf20Sopenharmony_ci if (!ret) 14218c2ecf20Sopenharmony_ci ret = -EIO; 14228c2ecf20Sopenharmony_ci goto undo_alloc; 14238c2ecf20Sopenharmony_ci } 14248c2ecf20Sopenharmony_ci /* Expand the attribute record if necessary. */ 14258c2ecf20Sopenharmony_ci old_alen = le32_to_cpu(a->length); 14268c2ecf20Sopenharmony_ci ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 14278c2ecf20Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 14288c2ecf20Sopenharmony_ci if (unlikely(ret)) { 14298c2ecf20Sopenharmony_ci if (ret != -ENOSPC) { 14308c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to resize attribute " 14318c2ecf20Sopenharmony_ci "record for mft bitmap attribute."); 14328c2ecf20Sopenharmony_ci goto undo_alloc; 14338c2ecf20Sopenharmony_ci } 14348c2ecf20Sopenharmony_ci // TODO: Deal with this by moving this extent to a new mft 14358c2ecf20Sopenharmony_ci // record or by starting a new extent in a new mft record or by 14368c2ecf20Sopenharmony_ci // moving other attributes out of this mft record. 14378c2ecf20Sopenharmony_ci // Note: It will need to be a special mft record and if none of 14388c2ecf20Sopenharmony_ci // those are available it gets rather complicated... 14398c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Not enough space in this mft record to " 14408c2ecf20Sopenharmony_ci "accommodate extended mft bitmap attribute " 14418c2ecf20Sopenharmony_ci "extent. Cannot handle this yet."); 14428c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 14438c2ecf20Sopenharmony_ci goto undo_alloc; 14448c2ecf20Sopenharmony_ci } 14458c2ecf20Sopenharmony_ci status.mp_rebuilt = 1; 14468c2ecf20Sopenharmony_ci /* Generate the mapping pairs array directly into the attr record. */ 14478c2ecf20Sopenharmony_ci ret = ntfs_mapping_pairs_build(vol, (u8*)a + 14488c2ecf20Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 14498c2ecf20Sopenharmony_ci mp_size, rl2, ll, -1, NULL); 14508c2ecf20Sopenharmony_ci if (unlikely(ret)) { 14518c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to build mapping pairs array for " 14528c2ecf20Sopenharmony_ci "mft bitmap attribute."); 14538c2ecf20Sopenharmony_ci goto undo_alloc; 14548c2ecf20Sopenharmony_ci } 14558c2ecf20Sopenharmony_ci /* Update the highest_vcn. */ 14568c2ecf20Sopenharmony_ci a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1); 14578c2ecf20Sopenharmony_ci /* 14588c2ecf20Sopenharmony_ci * We now have extended the mft bitmap allocated_size by one cluster. 14598c2ecf20Sopenharmony_ci * Reflect this in the ntfs_inode structure and the attribute record. 14608c2ecf20Sopenharmony_ci */ 14618c2ecf20Sopenharmony_ci if (a->data.non_resident.lowest_vcn) { 14628c2ecf20Sopenharmony_ci /* 14638c2ecf20Sopenharmony_ci * We are not in the first attribute extent, switch to it, but 14648c2ecf20Sopenharmony_ci * first ensure the changes will make it to disk later. 14658c2ecf20Sopenharmony_ci */ 14668c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 14678c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 14688c2ecf20Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 14698c2ecf20Sopenharmony_ci ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 14708c2ecf20Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 14718c2ecf20Sopenharmony_ci 0, ctx); 14728c2ecf20Sopenharmony_ci if (unlikely(ret)) { 14738c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute " 14748c2ecf20Sopenharmony_ci "extent of mft bitmap attribute."); 14758c2ecf20Sopenharmony_ci goto restore_undo_alloc; 14768c2ecf20Sopenharmony_ci } 14778c2ecf20Sopenharmony_ci a = ctx->attr; 14788c2ecf20Sopenharmony_ci } 14798c2ecf20Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 14808c2ecf20Sopenharmony_ci mftbmp_ni->allocated_size += vol->cluster_size; 14818c2ecf20Sopenharmony_ci a->data.non_resident.allocated_size = 14828c2ecf20Sopenharmony_ci cpu_to_sle64(mftbmp_ni->allocated_size); 14838c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 14848c2ecf20Sopenharmony_ci /* Ensure the changes make it to disk. */ 14858c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 14868c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 14878c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 14888c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 14898c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 14908c2ecf20Sopenharmony_ci ntfs_debug("Done."); 14918c2ecf20Sopenharmony_ci return 0; 14928c2ecf20Sopenharmony_cirestore_undo_alloc: 14938c2ecf20Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 14948c2ecf20Sopenharmony_ci if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 14958c2ecf20Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, 14968c2ecf20Sopenharmony_ci 0, ctx)) { 14978c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 14988c2ecf20Sopenharmony_ci "mft bitmap attribute.%s", es); 14998c2ecf20Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 15008c2ecf20Sopenharmony_ci mftbmp_ni->allocated_size += vol->cluster_size; 15018c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 15028c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 15038c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 15048c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 15058c2ecf20Sopenharmony_ci /* 15068c2ecf20Sopenharmony_ci * The only thing that is now wrong is ->allocated_size of the 15078c2ecf20Sopenharmony_ci * base attribute extent which chkdsk should be able to fix. 15088c2ecf20Sopenharmony_ci */ 15098c2ecf20Sopenharmony_ci NVolSetErrors(vol); 15108c2ecf20Sopenharmony_ci return ret; 15118c2ecf20Sopenharmony_ci } 15128c2ecf20Sopenharmony_ci a = ctx->attr; 15138c2ecf20Sopenharmony_ci a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 2); 15148c2ecf20Sopenharmony_ciundo_alloc: 15158c2ecf20Sopenharmony_ci if (status.added_cluster) { 15168c2ecf20Sopenharmony_ci /* Truncate the last run in the runlist by one cluster. */ 15178c2ecf20Sopenharmony_ci rl->length--; 15188c2ecf20Sopenharmony_ci rl[1].vcn--; 15198c2ecf20Sopenharmony_ci } else if (status.added_run) { 15208c2ecf20Sopenharmony_ci lcn = rl->lcn; 15218c2ecf20Sopenharmony_ci /* Remove the last run from the runlist. */ 15228c2ecf20Sopenharmony_ci rl->lcn = rl[1].lcn; 15238c2ecf20Sopenharmony_ci rl->length = 0; 15248c2ecf20Sopenharmony_ci } 15258c2ecf20Sopenharmony_ci /* Deallocate the cluster. */ 15268c2ecf20Sopenharmony_ci down_write(&vol->lcnbmp_lock); 15278c2ecf20Sopenharmony_ci if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 15288c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es); 15298c2ecf20Sopenharmony_ci NVolSetErrors(vol); 15308c2ecf20Sopenharmony_ci } 15318c2ecf20Sopenharmony_ci up_write(&vol->lcnbmp_lock); 15328c2ecf20Sopenharmony_ci if (status.mp_rebuilt) { 15338c2ecf20Sopenharmony_ci if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 15348c2ecf20Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 15358c2ecf20Sopenharmony_ci old_alen - le16_to_cpu( 15368c2ecf20Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 15378c2ecf20Sopenharmony_ci rl2, ll, -1, NULL)) { 15388c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore mapping pairs " 15398c2ecf20Sopenharmony_ci "array.%s", es); 15408c2ecf20Sopenharmony_ci NVolSetErrors(vol); 15418c2ecf20Sopenharmony_ci } 15428c2ecf20Sopenharmony_ci if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 15438c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore attribute " 15448c2ecf20Sopenharmony_ci "record.%s", es); 15458c2ecf20Sopenharmony_ci NVolSetErrors(vol); 15468c2ecf20Sopenharmony_ci } 15478c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 15488c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 15498c2ecf20Sopenharmony_ci } 15508c2ecf20Sopenharmony_ci if (ctx) 15518c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 15528c2ecf20Sopenharmony_ci if (!IS_ERR(mrec)) 15538c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 15548c2ecf20Sopenharmony_ci up_write(&mftbmp_ni->runlist.lock); 15558c2ecf20Sopenharmony_ci return ret; 15568c2ecf20Sopenharmony_ci} 15578c2ecf20Sopenharmony_ci 15588c2ecf20Sopenharmony_ci/** 15598c2ecf20Sopenharmony_ci * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data 15608c2ecf20Sopenharmony_ci * @vol: volume on which to extend the mft bitmap attribute 15618c2ecf20Sopenharmony_ci * 15628c2ecf20Sopenharmony_ci * Extend the initialized portion of the mft bitmap attribute on the ntfs 15638c2ecf20Sopenharmony_ci * volume @vol by 8 bytes. 15648c2ecf20Sopenharmony_ci * 15658c2ecf20Sopenharmony_ci * Note: Only changes initialized_size and data_size, i.e. requires that 15668c2ecf20Sopenharmony_ci * allocated_size is big enough to fit the new initialized_size. 15678c2ecf20Sopenharmony_ci * 15688c2ecf20Sopenharmony_ci * Return 0 on success and -error on error. 15698c2ecf20Sopenharmony_ci * 15708c2ecf20Sopenharmony_ci * Locking: Caller must hold vol->mftbmp_lock for writing. 15718c2ecf20Sopenharmony_ci */ 15728c2ecf20Sopenharmony_cistatic int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) 15738c2ecf20Sopenharmony_ci{ 15748c2ecf20Sopenharmony_ci s64 old_data_size, old_initialized_size; 15758c2ecf20Sopenharmony_ci unsigned long flags; 15768c2ecf20Sopenharmony_ci struct inode *mftbmp_vi; 15778c2ecf20Sopenharmony_ci ntfs_inode *mft_ni, *mftbmp_ni; 15788c2ecf20Sopenharmony_ci ntfs_attr_search_ctx *ctx; 15798c2ecf20Sopenharmony_ci MFT_RECORD *mrec; 15808c2ecf20Sopenharmony_ci ATTR_RECORD *a; 15818c2ecf20Sopenharmony_ci int ret; 15828c2ecf20Sopenharmony_ci 15838c2ecf20Sopenharmony_ci ntfs_debug("Extending mft bitmap initiailized (and data) size."); 15848c2ecf20Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 15858c2ecf20Sopenharmony_ci mftbmp_vi = vol->mftbmp_ino; 15868c2ecf20Sopenharmony_ci mftbmp_ni = NTFS_I(mftbmp_vi); 15878c2ecf20Sopenharmony_ci /* Get the attribute record. */ 15888c2ecf20Sopenharmony_ci mrec = map_mft_record(mft_ni); 15898c2ecf20Sopenharmony_ci if (IS_ERR(mrec)) { 15908c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 15918c2ecf20Sopenharmony_ci return PTR_ERR(mrec); 15928c2ecf20Sopenharmony_ci } 15938c2ecf20Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 15948c2ecf20Sopenharmony_ci if (unlikely(!ctx)) { 15958c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 15968c2ecf20Sopenharmony_ci ret = -ENOMEM; 15978c2ecf20Sopenharmony_ci goto unm_err_out; 15988c2ecf20Sopenharmony_ci } 15998c2ecf20Sopenharmony_ci ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 16008c2ecf20Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); 16018c2ecf20Sopenharmony_ci if (unlikely(ret)) { 16028c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute extent of " 16038c2ecf20Sopenharmony_ci "mft bitmap attribute."); 16048c2ecf20Sopenharmony_ci if (ret == -ENOENT) 16058c2ecf20Sopenharmony_ci ret = -EIO; 16068c2ecf20Sopenharmony_ci goto put_err_out; 16078c2ecf20Sopenharmony_ci } 16088c2ecf20Sopenharmony_ci a = ctx->attr; 16098c2ecf20Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 16108c2ecf20Sopenharmony_ci old_data_size = i_size_read(mftbmp_vi); 16118c2ecf20Sopenharmony_ci old_initialized_size = mftbmp_ni->initialized_size; 16128c2ecf20Sopenharmony_ci /* 16138c2ecf20Sopenharmony_ci * We can simply update the initialized_size before filling the space 16148c2ecf20Sopenharmony_ci * with zeroes because the caller is holding the mft bitmap lock for 16158c2ecf20Sopenharmony_ci * writing which ensures that no one else is trying to access the data. 16168c2ecf20Sopenharmony_ci */ 16178c2ecf20Sopenharmony_ci mftbmp_ni->initialized_size += 8; 16188c2ecf20Sopenharmony_ci a->data.non_resident.initialized_size = 16198c2ecf20Sopenharmony_ci cpu_to_sle64(mftbmp_ni->initialized_size); 16208c2ecf20Sopenharmony_ci if (mftbmp_ni->initialized_size > old_data_size) { 16218c2ecf20Sopenharmony_ci i_size_write(mftbmp_vi, mftbmp_ni->initialized_size); 16228c2ecf20Sopenharmony_ci a->data.non_resident.data_size = 16238c2ecf20Sopenharmony_ci cpu_to_sle64(mftbmp_ni->initialized_size); 16248c2ecf20Sopenharmony_ci } 16258c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 16268c2ecf20Sopenharmony_ci /* Ensure the changes make it to disk. */ 16278c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 16288c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 16298c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 16308c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 16318c2ecf20Sopenharmony_ci /* Initialize the mft bitmap attribute value with zeroes. */ 16328c2ecf20Sopenharmony_ci ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0); 16338c2ecf20Sopenharmony_ci if (likely(!ret)) { 16348c2ecf20Sopenharmony_ci ntfs_debug("Done. (Wrote eight initialized bytes to mft " 16358c2ecf20Sopenharmony_ci "bitmap."); 16368c2ecf20Sopenharmony_ci return 0; 16378c2ecf20Sopenharmony_ci } 16388c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to write to mft bitmap."); 16398c2ecf20Sopenharmony_ci /* Try to recover from the error. */ 16408c2ecf20Sopenharmony_ci mrec = map_mft_record(mft_ni); 16418c2ecf20Sopenharmony_ci if (IS_ERR(mrec)) { 16428c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record.%s", es); 16438c2ecf20Sopenharmony_ci NVolSetErrors(vol); 16448c2ecf20Sopenharmony_ci return ret; 16458c2ecf20Sopenharmony_ci } 16468c2ecf20Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 16478c2ecf20Sopenharmony_ci if (unlikely(!ctx)) { 16488c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context.%s", es); 16498c2ecf20Sopenharmony_ci NVolSetErrors(vol); 16508c2ecf20Sopenharmony_ci goto unm_err_out; 16518c2ecf20Sopenharmony_ci } 16528c2ecf20Sopenharmony_ci if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 16538c2ecf20Sopenharmony_ci mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) { 16548c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute extent of " 16558c2ecf20Sopenharmony_ci "mft bitmap attribute.%s", es); 16568c2ecf20Sopenharmony_ci NVolSetErrors(vol); 16578c2ecf20Sopenharmony_ciput_err_out: 16588c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 16598c2ecf20Sopenharmony_ciunm_err_out: 16608c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 16618c2ecf20Sopenharmony_ci goto err_out; 16628c2ecf20Sopenharmony_ci } 16638c2ecf20Sopenharmony_ci a = ctx->attr; 16648c2ecf20Sopenharmony_ci write_lock_irqsave(&mftbmp_ni->size_lock, flags); 16658c2ecf20Sopenharmony_ci mftbmp_ni->initialized_size = old_initialized_size; 16668c2ecf20Sopenharmony_ci a->data.non_resident.initialized_size = 16678c2ecf20Sopenharmony_ci cpu_to_sle64(old_initialized_size); 16688c2ecf20Sopenharmony_ci if (i_size_read(mftbmp_vi) != old_data_size) { 16698c2ecf20Sopenharmony_ci i_size_write(mftbmp_vi, old_data_size); 16708c2ecf20Sopenharmony_ci a->data.non_resident.data_size = cpu_to_sle64(old_data_size); 16718c2ecf20Sopenharmony_ci } 16728c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 16738c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 16748c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 16758c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 16768c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 16778c2ecf20Sopenharmony_ci#ifdef DEBUG 16788c2ecf20Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 16798c2ecf20Sopenharmony_ci ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " 16808c2ecf20Sopenharmony_ci "data_size 0x%llx, initialized_size 0x%llx.", 16818c2ecf20Sopenharmony_ci (long long)mftbmp_ni->allocated_size, 16828c2ecf20Sopenharmony_ci (long long)i_size_read(mftbmp_vi), 16838c2ecf20Sopenharmony_ci (long long)mftbmp_ni->initialized_size); 16848c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 16858c2ecf20Sopenharmony_ci#endif /* DEBUG */ 16868c2ecf20Sopenharmony_cierr_out: 16878c2ecf20Sopenharmony_ci return ret; 16888c2ecf20Sopenharmony_ci} 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_ci/** 16918c2ecf20Sopenharmony_ci * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute 16928c2ecf20Sopenharmony_ci * @vol: volume on which to extend the mft data attribute 16938c2ecf20Sopenharmony_ci * 16948c2ecf20Sopenharmony_ci * Extend the mft data attribute on the ntfs volume @vol by 16 mft records 16958c2ecf20Sopenharmony_ci * worth of clusters or if not enough space for this by one mft record worth 16968c2ecf20Sopenharmony_ci * of clusters. 16978c2ecf20Sopenharmony_ci * 16988c2ecf20Sopenharmony_ci * Note: Only changes allocated_size, i.e. does not touch initialized_size or 16998c2ecf20Sopenharmony_ci * data_size. 17008c2ecf20Sopenharmony_ci * 17018c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error. 17028c2ecf20Sopenharmony_ci * 17038c2ecf20Sopenharmony_ci * Locking: - Caller must hold vol->mftbmp_lock for writing. 17048c2ecf20Sopenharmony_ci * - This function takes NTFS_I(vol->mft_ino)->runlist.lock for 17058c2ecf20Sopenharmony_ci * writing and releases it before returning. 17068c2ecf20Sopenharmony_ci * - This function calls functions which take vol->lcnbmp_lock for 17078c2ecf20Sopenharmony_ci * writing and release it before returning. 17088c2ecf20Sopenharmony_ci */ 17098c2ecf20Sopenharmony_cistatic int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) 17108c2ecf20Sopenharmony_ci{ 17118c2ecf20Sopenharmony_ci LCN lcn; 17128c2ecf20Sopenharmony_ci VCN old_last_vcn; 17138c2ecf20Sopenharmony_ci s64 min_nr, nr, ll; 17148c2ecf20Sopenharmony_ci unsigned long flags; 17158c2ecf20Sopenharmony_ci ntfs_inode *mft_ni; 17168c2ecf20Sopenharmony_ci runlist_element *rl, *rl2; 17178c2ecf20Sopenharmony_ci ntfs_attr_search_ctx *ctx = NULL; 17188c2ecf20Sopenharmony_ci MFT_RECORD *mrec; 17198c2ecf20Sopenharmony_ci ATTR_RECORD *a = NULL; 17208c2ecf20Sopenharmony_ci int ret, mp_size; 17218c2ecf20Sopenharmony_ci u32 old_alen = 0; 17228c2ecf20Sopenharmony_ci bool mp_rebuilt = false; 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci ntfs_debug("Extending mft data allocation."); 17258c2ecf20Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 17268c2ecf20Sopenharmony_ci /* 17278c2ecf20Sopenharmony_ci * Determine the preferred allocation location, i.e. the last lcn of 17288c2ecf20Sopenharmony_ci * the mft data attribute. The allocated size of the mft data 17298c2ecf20Sopenharmony_ci * attribute cannot be zero so we are ok to do this. 17308c2ecf20Sopenharmony_ci */ 17318c2ecf20Sopenharmony_ci down_write(&mft_ni->runlist.lock); 17328c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 17338c2ecf20Sopenharmony_ci ll = mft_ni->allocated_size; 17348c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 17358c2ecf20Sopenharmony_ci rl = ntfs_attr_find_vcn_nolock(mft_ni, 17368c2ecf20Sopenharmony_ci (ll - 1) >> vol->cluster_size_bits, NULL); 17378c2ecf20Sopenharmony_ci if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) { 17388c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 17398c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to determine last allocated " 17408c2ecf20Sopenharmony_ci "cluster of mft data attribute."); 17418c2ecf20Sopenharmony_ci if (!IS_ERR(rl)) 17428c2ecf20Sopenharmony_ci ret = -EIO; 17438c2ecf20Sopenharmony_ci else 17448c2ecf20Sopenharmony_ci ret = PTR_ERR(rl); 17458c2ecf20Sopenharmony_ci return ret; 17468c2ecf20Sopenharmony_ci } 17478c2ecf20Sopenharmony_ci lcn = rl->lcn + rl->length; 17488c2ecf20Sopenharmony_ci ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn); 17498c2ecf20Sopenharmony_ci /* Minimum allocation is one mft record worth of clusters. */ 17508c2ecf20Sopenharmony_ci min_nr = vol->mft_record_size >> vol->cluster_size_bits; 17518c2ecf20Sopenharmony_ci if (!min_nr) 17528c2ecf20Sopenharmony_ci min_nr = 1; 17538c2ecf20Sopenharmony_ci /* Want to allocate 16 mft records worth of clusters. */ 17548c2ecf20Sopenharmony_ci nr = vol->mft_record_size << 4 >> vol->cluster_size_bits; 17558c2ecf20Sopenharmony_ci if (!nr) 17568c2ecf20Sopenharmony_ci nr = min_nr; 17578c2ecf20Sopenharmony_ci /* Ensure we do not go above 2^32-1 mft records. */ 17588c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 17598c2ecf20Sopenharmony_ci ll = mft_ni->allocated_size; 17608c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 17618c2ecf20Sopenharmony_ci if (unlikely((ll + (nr << vol->cluster_size_bits)) >> 17628c2ecf20Sopenharmony_ci vol->mft_record_size_bits >= (1ll << 32))) { 17638c2ecf20Sopenharmony_ci nr = min_nr; 17648c2ecf20Sopenharmony_ci if (unlikely((ll + (nr << vol->cluster_size_bits)) >> 17658c2ecf20Sopenharmony_ci vol->mft_record_size_bits >= (1ll << 32))) { 17668c2ecf20Sopenharmony_ci ntfs_warning(vol->sb, "Cannot allocate mft record " 17678c2ecf20Sopenharmony_ci "because the maximum number of inodes " 17688c2ecf20Sopenharmony_ci "(2^32) has already been reached."); 17698c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 17708c2ecf20Sopenharmony_ci return -ENOSPC; 17718c2ecf20Sopenharmony_ci } 17728c2ecf20Sopenharmony_ci } 17738c2ecf20Sopenharmony_ci ntfs_debug("Trying mft data allocation with %s cluster count %lli.", 17748c2ecf20Sopenharmony_ci nr > min_nr ? "default" : "minimal", (long long)nr); 17758c2ecf20Sopenharmony_ci old_last_vcn = rl[1].vcn; 17768c2ecf20Sopenharmony_ci do { 17778c2ecf20Sopenharmony_ci rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE, 17788c2ecf20Sopenharmony_ci true); 17798c2ecf20Sopenharmony_ci if (!IS_ERR(rl2)) 17808c2ecf20Sopenharmony_ci break; 17818c2ecf20Sopenharmony_ci if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { 17828c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate the minimal " 17838c2ecf20Sopenharmony_ci "number of clusters (%lli) for the " 17848c2ecf20Sopenharmony_ci "mft data attribute.", (long long)nr); 17858c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 17868c2ecf20Sopenharmony_ci return PTR_ERR(rl2); 17878c2ecf20Sopenharmony_ci } 17888c2ecf20Sopenharmony_ci /* 17898c2ecf20Sopenharmony_ci * There is not enough space to do the allocation, but there 17908c2ecf20Sopenharmony_ci * might be enough space to do a minimal allocation so try that 17918c2ecf20Sopenharmony_ci * before failing. 17928c2ecf20Sopenharmony_ci */ 17938c2ecf20Sopenharmony_ci nr = min_nr; 17948c2ecf20Sopenharmony_ci ntfs_debug("Retrying mft data allocation with minimal cluster " 17958c2ecf20Sopenharmony_ci "count %lli.", (long long)nr); 17968c2ecf20Sopenharmony_ci } while (1); 17978c2ecf20Sopenharmony_ci rl = ntfs_runlists_merge(mft_ni->runlist.rl, rl2); 17988c2ecf20Sopenharmony_ci if (IS_ERR(rl)) { 17998c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 18008c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to merge runlists for mft data " 18018c2ecf20Sopenharmony_ci "attribute."); 18028c2ecf20Sopenharmony_ci if (ntfs_cluster_free_from_rl(vol, rl2)) { 18038c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to deallocate clusters " 18048c2ecf20Sopenharmony_ci "from the mft data attribute.%s", es); 18058c2ecf20Sopenharmony_ci NVolSetErrors(vol); 18068c2ecf20Sopenharmony_ci } 18078c2ecf20Sopenharmony_ci ntfs_free(rl2); 18088c2ecf20Sopenharmony_ci return PTR_ERR(rl); 18098c2ecf20Sopenharmony_ci } 18108c2ecf20Sopenharmony_ci mft_ni->runlist.rl = rl; 18118c2ecf20Sopenharmony_ci ntfs_debug("Allocated %lli clusters.", (long long)nr); 18128c2ecf20Sopenharmony_ci /* Find the last run in the new runlist. */ 18138c2ecf20Sopenharmony_ci for (; rl[1].length; rl++) 18148c2ecf20Sopenharmony_ci ; 18158c2ecf20Sopenharmony_ci /* Update the attribute record as well. */ 18168c2ecf20Sopenharmony_ci mrec = map_mft_record(mft_ni); 18178c2ecf20Sopenharmony_ci if (IS_ERR(mrec)) { 18188c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 18198c2ecf20Sopenharmony_ci ret = PTR_ERR(mrec); 18208c2ecf20Sopenharmony_ci goto undo_alloc; 18218c2ecf20Sopenharmony_ci } 18228c2ecf20Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); 18238c2ecf20Sopenharmony_ci if (unlikely(!ctx)) { 18248c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 18258c2ecf20Sopenharmony_ci ret = -ENOMEM; 18268c2ecf20Sopenharmony_ci goto undo_alloc; 18278c2ecf20Sopenharmony_ci } 18288c2ecf20Sopenharmony_ci ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 18298c2ecf20Sopenharmony_ci CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx); 18308c2ecf20Sopenharmony_ci if (unlikely(ret)) { 18318c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 18328c2ecf20Sopenharmony_ci "mft data attribute."); 18338c2ecf20Sopenharmony_ci if (ret == -ENOENT) 18348c2ecf20Sopenharmony_ci ret = -EIO; 18358c2ecf20Sopenharmony_ci goto undo_alloc; 18368c2ecf20Sopenharmony_ci } 18378c2ecf20Sopenharmony_ci a = ctx->attr; 18388c2ecf20Sopenharmony_ci ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); 18398c2ecf20Sopenharmony_ci /* Search back for the previous last allocated cluster of mft bitmap. */ 18408c2ecf20Sopenharmony_ci for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) { 18418c2ecf20Sopenharmony_ci if (ll >= rl2->vcn) 18428c2ecf20Sopenharmony_ci break; 18438c2ecf20Sopenharmony_ci } 18448c2ecf20Sopenharmony_ci BUG_ON(ll < rl2->vcn); 18458c2ecf20Sopenharmony_ci BUG_ON(ll >= rl2->vcn + rl2->length); 18468c2ecf20Sopenharmony_ci /* Get the size for the new mapping pairs array for this extent. */ 18478c2ecf20Sopenharmony_ci mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); 18488c2ecf20Sopenharmony_ci if (unlikely(mp_size <= 0)) { 18498c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Get size for mapping pairs failed for " 18508c2ecf20Sopenharmony_ci "mft data attribute extent."); 18518c2ecf20Sopenharmony_ci ret = mp_size; 18528c2ecf20Sopenharmony_ci if (!ret) 18538c2ecf20Sopenharmony_ci ret = -EIO; 18548c2ecf20Sopenharmony_ci goto undo_alloc; 18558c2ecf20Sopenharmony_ci } 18568c2ecf20Sopenharmony_ci /* Expand the attribute record if necessary. */ 18578c2ecf20Sopenharmony_ci old_alen = le32_to_cpu(a->length); 18588c2ecf20Sopenharmony_ci ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + 18598c2ecf20Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); 18608c2ecf20Sopenharmony_ci if (unlikely(ret)) { 18618c2ecf20Sopenharmony_ci if (ret != -ENOSPC) { 18628c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to resize attribute " 18638c2ecf20Sopenharmony_ci "record for mft data attribute."); 18648c2ecf20Sopenharmony_ci goto undo_alloc; 18658c2ecf20Sopenharmony_ci } 18668c2ecf20Sopenharmony_ci // TODO: Deal with this by moving this extent to a new mft 18678c2ecf20Sopenharmony_ci // record or by starting a new extent in a new mft record or by 18688c2ecf20Sopenharmony_ci // moving other attributes out of this mft record. 18698c2ecf20Sopenharmony_ci // Note: Use the special reserved mft records and ensure that 18708c2ecf20Sopenharmony_ci // this extent is not required to find the mft record in 18718c2ecf20Sopenharmony_ci // question. If no free special records left we would need to 18728c2ecf20Sopenharmony_ci // move an existing record away, insert ours in its place, and 18738c2ecf20Sopenharmony_ci // then place the moved record into the newly allocated space 18748c2ecf20Sopenharmony_ci // and we would then need to update all references to this mft 18758c2ecf20Sopenharmony_ci // record appropriately. This is rather complicated... 18768c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Not enough space in this mft record to " 18778c2ecf20Sopenharmony_ci "accommodate extended mft data attribute " 18788c2ecf20Sopenharmony_ci "extent. Cannot handle this yet."); 18798c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 18808c2ecf20Sopenharmony_ci goto undo_alloc; 18818c2ecf20Sopenharmony_ci } 18828c2ecf20Sopenharmony_ci mp_rebuilt = true; 18838c2ecf20Sopenharmony_ci /* Generate the mapping pairs array directly into the attr record. */ 18848c2ecf20Sopenharmony_ci ret = ntfs_mapping_pairs_build(vol, (u8*)a + 18858c2ecf20Sopenharmony_ci le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 18868c2ecf20Sopenharmony_ci mp_size, rl2, ll, -1, NULL); 18878c2ecf20Sopenharmony_ci if (unlikely(ret)) { 18888c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to build mapping pairs array of " 18898c2ecf20Sopenharmony_ci "mft data attribute."); 18908c2ecf20Sopenharmony_ci goto undo_alloc; 18918c2ecf20Sopenharmony_ci } 18928c2ecf20Sopenharmony_ci /* Update the highest_vcn. */ 18938c2ecf20Sopenharmony_ci a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1); 18948c2ecf20Sopenharmony_ci /* 18958c2ecf20Sopenharmony_ci * We now have extended the mft data allocated_size by nr clusters. 18968c2ecf20Sopenharmony_ci * Reflect this in the ntfs_inode structure and the attribute record. 18978c2ecf20Sopenharmony_ci * @rl is the last (non-terminator) runlist element of mft data 18988c2ecf20Sopenharmony_ci * attribute. 18998c2ecf20Sopenharmony_ci */ 19008c2ecf20Sopenharmony_ci if (a->data.non_resident.lowest_vcn) { 19018c2ecf20Sopenharmony_ci /* 19028c2ecf20Sopenharmony_ci * We are not in the first attribute extent, switch to it, but 19038c2ecf20Sopenharmony_ci * first ensure the changes will make it to disk later. 19048c2ecf20Sopenharmony_ci */ 19058c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 19068c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 19078c2ecf20Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 19088c2ecf20Sopenharmony_ci ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, 19098c2ecf20Sopenharmony_ci mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, 19108c2ecf20Sopenharmony_ci ctx); 19118c2ecf20Sopenharmony_ci if (unlikely(ret)) { 19128c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute " 19138c2ecf20Sopenharmony_ci "extent of mft data attribute."); 19148c2ecf20Sopenharmony_ci goto restore_undo_alloc; 19158c2ecf20Sopenharmony_ci } 19168c2ecf20Sopenharmony_ci a = ctx->attr; 19178c2ecf20Sopenharmony_ci } 19188c2ecf20Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 19198c2ecf20Sopenharmony_ci mft_ni->allocated_size += nr << vol->cluster_size_bits; 19208c2ecf20Sopenharmony_ci a->data.non_resident.allocated_size = 19218c2ecf20Sopenharmony_ci cpu_to_sle64(mft_ni->allocated_size); 19228c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 19238c2ecf20Sopenharmony_ci /* Ensure the changes make it to disk. */ 19248c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 19258c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 19268c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 19278c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 19288c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 19298c2ecf20Sopenharmony_ci ntfs_debug("Done."); 19308c2ecf20Sopenharmony_ci return 0; 19318c2ecf20Sopenharmony_cirestore_undo_alloc: 19328c2ecf20Sopenharmony_ci ntfs_attr_reinit_search_ctx(ctx); 19338c2ecf20Sopenharmony_ci if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 19348c2ecf20Sopenharmony_ci CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { 19358c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find last attribute extent of " 19368c2ecf20Sopenharmony_ci "mft data attribute.%s", es); 19378c2ecf20Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 19388c2ecf20Sopenharmony_ci mft_ni->allocated_size += nr << vol->cluster_size_bits; 19398c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 19408c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 19418c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 19428c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 19438c2ecf20Sopenharmony_ci /* 19448c2ecf20Sopenharmony_ci * The only thing that is now wrong is ->allocated_size of the 19458c2ecf20Sopenharmony_ci * base attribute extent which chkdsk should be able to fix. 19468c2ecf20Sopenharmony_ci */ 19478c2ecf20Sopenharmony_ci NVolSetErrors(vol); 19488c2ecf20Sopenharmony_ci return ret; 19498c2ecf20Sopenharmony_ci } 19508c2ecf20Sopenharmony_ci ctx->attr->data.non_resident.highest_vcn = 19518c2ecf20Sopenharmony_ci cpu_to_sle64(old_last_vcn - 1); 19528c2ecf20Sopenharmony_ciundo_alloc: 19538c2ecf20Sopenharmony_ci if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) { 19548c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to free clusters from mft data " 19558c2ecf20Sopenharmony_ci "attribute.%s", es); 19568c2ecf20Sopenharmony_ci NVolSetErrors(vol); 19578c2ecf20Sopenharmony_ci } 19588c2ecf20Sopenharmony_ci a = ctx->attr; 19598c2ecf20Sopenharmony_ci if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { 19608c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to truncate mft data attribute " 19618c2ecf20Sopenharmony_ci "runlist.%s", es); 19628c2ecf20Sopenharmony_ci NVolSetErrors(vol); 19638c2ecf20Sopenharmony_ci } 19648c2ecf20Sopenharmony_ci if (mp_rebuilt && !IS_ERR(ctx->mrec)) { 19658c2ecf20Sopenharmony_ci if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 19668c2ecf20Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 19678c2ecf20Sopenharmony_ci old_alen - le16_to_cpu( 19688c2ecf20Sopenharmony_ci a->data.non_resident.mapping_pairs_offset), 19698c2ecf20Sopenharmony_ci rl2, ll, -1, NULL)) { 19708c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore mapping pairs " 19718c2ecf20Sopenharmony_ci "array.%s", es); 19728c2ecf20Sopenharmony_ci NVolSetErrors(vol); 19738c2ecf20Sopenharmony_ci } 19748c2ecf20Sopenharmony_ci if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { 19758c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore attribute " 19768c2ecf20Sopenharmony_ci "record.%s", es); 19778c2ecf20Sopenharmony_ci NVolSetErrors(vol); 19788c2ecf20Sopenharmony_ci } 19798c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 19808c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 19818c2ecf20Sopenharmony_ci } else if (IS_ERR(ctx->mrec)) { 19828c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to restore attribute search " 19838c2ecf20Sopenharmony_ci "context.%s", es); 19848c2ecf20Sopenharmony_ci NVolSetErrors(vol); 19858c2ecf20Sopenharmony_ci } 19868c2ecf20Sopenharmony_ci if (ctx) 19878c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 19888c2ecf20Sopenharmony_ci if (!IS_ERR(mrec)) 19898c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 19908c2ecf20Sopenharmony_ci up_write(&mft_ni->runlist.lock); 19918c2ecf20Sopenharmony_ci return ret; 19928c2ecf20Sopenharmony_ci} 19938c2ecf20Sopenharmony_ci 19948c2ecf20Sopenharmony_ci/** 19958c2ecf20Sopenharmony_ci * ntfs_mft_record_layout - layout an mft record into a memory buffer 19968c2ecf20Sopenharmony_ci * @vol: volume to which the mft record will belong 19978c2ecf20Sopenharmony_ci * @mft_no: mft reference specifying the mft record number 19988c2ecf20Sopenharmony_ci * @m: destination buffer of size >= @vol->mft_record_size bytes 19998c2ecf20Sopenharmony_ci * 20008c2ecf20Sopenharmony_ci * Layout an empty, unused mft record with the mft record number @mft_no into 20018c2ecf20Sopenharmony_ci * the buffer @m. The volume @vol is needed because the mft record structure 20028c2ecf20Sopenharmony_ci * was modified in NTFS 3.1 so we need to know which volume version this mft 20038c2ecf20Sopenharmony_ci * record will be used on. 20048c2ecf20Sopenharmony_ci * 20058c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error. 20068c2ecf20Sopenharmony_ci */ 20078c2ecf20Sopenharmony_cistatic int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no, 20088c2ecf20Sopenharmony_ci MFT_RECORD *m) 20098c2ecf20Sopenharmony_ci{ 20108c2ecf20Sopenharmony_ci ATTR_RECORD *a; 20118c2ecf20Sopenharmony_ci 20128c2ecf20Sopenharmony_ci ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 20138c2ecf20Sopenharmony_ci if (mft_no >= (1ll << 32)) { 20148c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Mft record number 0x%llx exceeds " 20158c2ecf20Sopenharmony_ci "maximum of 2^32.", (long long)mft_no); 20168c2ecf20Sopenharmony_ci return -ERANGE; 20178c2ecf20Sopenharmony_ci } 20188c2ecf20Sopenharmony_ci /* Start by clearing the whole mft record to gives us a clean slate. */ 20198c2ecf20Sopenharmony_ci memset(m, 0, vol->mft_record_size); 20208c2ecf20Sopenharmony_ci /* Aligned to 2-byte boundary. */ 20218c2ecf20Sopenharmony_ci if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver)) 20228c2ecf20Sopenharmony_ci m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1); 20238c2ecf20Sopenharmony_ci else { 20248c2ecf20Sopenharmony_ci m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1); 20258c2ecf20Sopenharmony_ci /* 20268c2ecf20Sopenharmony_ci * Set the NTFS 3.1+ specific fields while we know that the 20278c2ecf20Sopenharmony_ci * volume version is 3.1+. 20288c2ecf20Sopenharmony_ci */ 20298c2ecf20Sopenharmony_ci m->reserved = 0; 20308c2ecf20Sopenharmony_ci m->mft_record_number = cpu_to_le32((u32)mft_no); 20318c2ecf20Sopenharmony_ci } 20328c2ecf20Sopenharmony_ci m->magic = magic_FILE; 20338c2ecf20Sopenharmony_ci if (vol->mft_record_size >= NTFS_BLOCK_SIZE) 20348c2ecf20Sopenharmony_ci m->usa_count = cpu_to_le16(vol->mft_record_size / 20358c2ecf20Sopenharmony_ci NTFS_BLOCK_SIZE + 1); 20368c2ecf20Sopenharmony_ci else { 20378c2ecf20Sopenharmony_ci m->usa_count = cpu_to_le16(1); 20388c2ecf20Sopenharmony_ci ntfs_warning(vol->sb, "Sector size is bigger than mft record " 20398c2ecf20Sopenharmony_ci "size. Setting usa_count to 1. If chkdsk " 20408c2ecf20Sopenharmony_ci "reports this as corruption, please email " 20418c2ecf20Sopenharmony_ci "linux-ntfs-dev@lists.sourceforge.net stating " 20428c2ecf20Sopenharmony_ci "that you saw this message and that the " 20438c2ecf20Sopenharmony_ci "modified filesystem created was corrupt. " 20448c2ecf20Sopenharmony_ci "Thank you."); 20458c2ecf20Sopenharmony_ci } 20468c2ecf20Sopenharmony_ci /* Set the update sequence number to 1. */ 20478c2ecf20Sopenharmony_ci *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1); 20488c2ecf20Sopenharmony_ci m->lsn = 0; 20498c2ecf20Sopenharmony_ci m->sequence_number = cpu_to_le16(1); 20508c2ecf20Sopenharmony_ci m->link_count = 0; 20518c2ecf20Sopenharmony_ci /* 20528c2ecf20Sopenharmony_ci * Place the attributes straight after the update sequence array, 20538c2ecf20Sopenharmony_ci * aligned to 8-byte boundary. 20548c2ecf20Sopenharmony_ci */ 20558c2ecf20Sopenharmony_ci m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + 20568c2ecf20Sopenharmony_ci (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); 20578c2ecf20Sopenharmony_ci m->flags = 0; 20588c2ecf20Sopenharmony_ci /* 20598c2ecf20Sopenharmony_ci * Using attrs_offset plus eight bytes (for the termination attribute). 20608c2ecf20Sopenharmony_ci * attrs_offset is already aligned to 8-byte boundary, so no need to 20618c2ecf20Sopenharmony_ci * align again. 20628c2ecf20Sopenharmony_ci */ 20638c2ecf20Sopenharmony_ci m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8); 20648c2ecf20Sopenharmony_ci m->bytes_allocated = cpu_to_le32(vol->mft_record_size); 20658c2ecf20Sopenharmony_ci m->base_mft_record = 0; 20668c2ecf20Sopenharmony_ci m->next_attr_instance = 0; 20678c2ecf20Sopenharmony_ci /* Add the termination attribute. */ 20688c2ecf20Sopenharmony_ci a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 20698c2ecf20Sopenharmony_ci a->type = AT_END; 20708c2ecf20Sopenharmony_ci a->length = 0; 20718c2ecf20Sopenharmony_ci ntfs_debug("Done."); 20728c2ecf20Sopenharmony_ci return 0; 20738c2ecf20Sopenharmony_ci} 20748c2ecf20Sopenharmony_ci 20758c2ecf20Sopenharmony_ci/** 20768c2ecf20Sopenharmony_ci * ntfs_mft_record_format - format an mft record on an ntfs volume 20778c2ecf20Sopenharmony_ci * @vol: volume on which to format the mft record 20788c2ecf20Sopenharmony_ci * @mft_no: mft record number to format 20798c2ecf20Sopenharmony_ci * 20808c2ecf20Sopenharmony_ci * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused 20818c2ecf20Sopenharmony_ci * mft record into the appropriate place of the mft data attribute. This is 20828c2ecf20Sopenharmony_ci * used when extending the mft data attribute. 20838c2ecf20Sopenharmony_ci * 20848c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error. 20858c2ecf20Sopenharmony_ci */ 20868c2ecf20Sopenharmony_cistatic int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) 20878c2ecf20Sopenharmony_ci{ 20888c2ecf20Sopenharmony_ci loff_t i_size; 20898c2ecf20Sopenharmony_ci struct inode *mft_vi = vol->mft_ino; 20908c2ecf20Sopenharmony_ci struct page *page; 20918c2ecf20Sopenharmony_ci MFT_RECORD *m; 20928c2ecf20Sopenharmony_ci pgoff_t index, end_index; 20938c2ecf20Sopenharmony_ci unsigned int ofs; 20948c2ecf20Sopenharmony_ci int err; 20958c2ecf20Sopenharmony_ci 20968c2ecf20Sopenharmony_ci ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); 20978c2ecf20Sopenharmony_ci /* 20988c2ecf20Sopenharmony_ci * The index into the page cache and the offset within the page cache 20998c2ecf20Sopenharmony_ci * page of the wanted mft record. 21008c2ecf20Sopenharmony_ci */ 21018c2ecf20Sopenharmony_ci index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT; 21028c2ecf20Sopenharmony_ci ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK; 21038c2ecf20Sopenharmony_ci /* The maximum valid index into the page cache for $MFT's data. */ 21048c2ecf20Sopenharmony_ci i_size = i_size_read(mft_vi); 21058c2ecf20Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 21068c2ecf20Sopenharmony_ci if (unlikely(index >= end_index)) { 21078c2ecf20Sopenharmony_ci if (unlikely(index > end_index || ofs + vol->mft_record_size >= 21088c2ecf20Sopenharmony_ci (i_size & ~PAGE_MASK))) { 21098c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Tried to format non-existing mft " 21108c2ecf20Sopenharmony_ci "record 0x%llx.", (long long)mft_no); 21118c2ecf20Sopenharmony_ci return -ENOENT; 21128c2ecf20Sopenharmony_ci } 21138c2ecf20Sopenharmony_ci } 21148c2ecf20Sopenharmony_ci /* Read, map, and pin the page containing the mft record. */ 21158c2ecf20Sopenharmony_ci page = ntfs_map_page(mft_vi->i_mapping, index); 21168c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 21178c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map page containing mft record " 21188c2ecf20Sopenharmony_ci "to format 0x%llx.", (long long)mft_no); 21198c2ecf20Sopenharmony_ci return PTR_ERR(page); 21208c2ecf20Sopenharmony_ci } 21218c2ecf20Sopenharmony_ci lock_page(page); 21228c2ecf20Sopenharmony_ci BUG_ON(!PageUptodate(page)); 21238c2ecf20Sopenharmony_ci ClearPageUptodate(page); 21248c2ecf20Sopenharmony_ci m = (MFT_RECORD*)((u8*)page_address(page) + ofs); 21258c2ecf20Sopenharmony_ci err = ntfs_mft_record_layout(vol, mft_no, m); 21268c2ecf20Sopenharmony_ci if (unlikely(err)) { 21278c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.", 21288c2ecf20Sopenharmony_ci (long long)mft_no); 21298c2ecf20Sopenharmony_ci SetPageUptodate(page); 21308c2ecf20Sopenharmony_ci unlock_page(page); 21318c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 21328c2ecf20Sopenharmony_ci return err; 21338c2ecf20Sopenharmony_ci } 21348c2ecf20Sopenharmony_ci flush_dcache_page(page); 21358c2ecf20Sopenharmony_ci SetPageUptodate(page); 21368c2ecf20Sopenharmony_ci unlock_page(page); 21378c2ecf20Sopenharmony_ci /* 21388c2ecf20Sopenharmony_ci * Make sure the mft record is written out to disk. We could use 21398c2ecf20Sopenharmony_ci * ilookup5() to check if an inode is in icache and so on but this is 21408c2ecf20Sopenharmony_ci * unnecessary as ntfs_writepage() will write the dirty record anyway. 21418c2ecf20Sopenharmony_ci */ 21428c2ecf20Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 21438c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 21448c2ecf20Sopenharmony_ci ntfs_debug("Done."); 21458c2ecf20Sopenharmony_ci return 0; 21468c2ecf20Sopenharmony_ci} 21478c2ecf20Sopenharmony_ci 21488c2ecf20Sopenharmony_ci/** 21498c2ecf20Sopenharmony_ci * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume 21508c2ecf20Sopenharmony_ci * @vol: [IN] volume on which to allocate the mft record 21518c2ecf20Sopenharmony_ci * @mode: [IN] mode if want a file or directory, i.e. base inode or 0 21528c2ecf20Sopenharmony_ci * @base_ni: [IN] open base inode if allocating an extent mft record or NULL 21538c2ecf20Sopenharmony_ci * @mrec: [OUT] on successful return this is the mapped mft record 21548c2ecf20Sopenharmony_ci * 21558c2ecf20Sopenharmony_ci * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol. 21568c2ecf20Sopenharmony_ci * 21578c2ecf20Sopenharmony_ci * If @base_ni is NULL make the mft record a base mft record, i.e. a file or 21588c2ecf20Sopenharmony_ci * direvctory inode, and allocate it at the default allocator position. In 21598c2ecf20Sopenharmony_ci * this case @mode is the file mode as given to us by the caller. We in 21608c2ecf20Sopenharmony_ci * particular use @mode to distinguish whether a file or a directory is being 21618c2ecf20Sopenharmony_ci * created (S_IFDIR(mode) and S_IFREG(mode), respectively). 21628c2ecf20Sopenharmony_ci * 21638c2ecf20Sopenharmony_ci * If @base_ni is not NULL make the allocated mft record an extent record, 21648c2ecf20Sopenharmony_ci * allocate it starting at the mft record after the base mft record and attach 21658c2ecf20Sopenharmony_ci * the allocated and opened ntfs inode to the base inode @base_ni. In this 21668c2ecf20Sopenharmony_ci * case @mode must be 0 as it is meaningless for extent inodes. 21678c2ecf20Sopenharmony_ci * 21688c2ecf20Sopenharmony_ci * You need to check the return value with IS_ERR(). If false, the function 21698c2ecf20Sopenharmony_ci * was successful and the return value is the now opened ntfs inode of the 21708c2ecf20Sopenharmony_ci * allocated mft record. *@mrec is then set to the allocated, mapped, pinned, 21718c2ecf20Sopenharmony_ci * and locked mft record. If IS_ERR() is true, the function failed and the 21728c2ecf20Sopenharmony_ci * error code is obtained from PTR_ERR(return value). *@mrec is undefined in 21738c2ecf20Sopenharmony_ci * this case. 21748c2ecf20Sopenharmony_ci * 21758c2ecf20Sopenharmony_ci * Allocation strategy: 21768c2ecf20Sopenharmony_ci * 21778c2ecf20Sopenharmony_ci * To find a free mft record, we scan the mft bitmap for a zero bit. To 21788c2ecf20Sopenharmony_ci * optimize this we start scanning at the place specified by @base_ni or if 21798c2ecf20Sopenharmony_ci * @base_ni is NULL we start where we last stopped and we perform wrap around 21808c2ecf20Sopenharmony_ci * when we reach the end. Note, we do not try to allocate mft records below 21818c2ecf20Sopenharmony_ci * number 24 because numbers 0 to 15 are the defined system files anyway and 16 21828c2ecf20Sopenharmony_ci * to 24 are special in that they are used for storing extension mft records 21838c2ecf20Sopenharmony_ci * for the $DATA attribute of $MFT. This is required to avoid the possibility 21848c2ecf20Sopenharmony_ci * of creating a runlist with a circular dependency which once written to disk 21858c2ecf20Sopenharmony_ci * can never be read in again. Windows will only use records 16 to 24 for 21868c2ecf20Sopenharmony_ci * normal files if the volume is completely out of space. We never use them 21878c2ecf20Sopenharmony_ci * which means that when the volume is really out of space we cannot create any 21888c2ecf20Sopenharmony_ci * more files while Windows can still create up to 8 small files. We can start 21898c2ecf20Sopenharmony_ci * doing this at some later time, it does not matter much for now. 21908c2ecf20Sopenharmony_ci * 21918c2ecf20Sopenharmony_ci * When scanning the mft bitmap, we only search up to the last allocated mft 21928c2ecf20Sopenharmony_ci * record. If there are no free records left in the range 24 to number of 21938c2ecf20Sopenharmony_ci * allocated mft records, then we extend the $MFT/$DATA attribute in order to 21948c2ecf20Sopenharmony_ci * create free mft records. We extend the allocated size of $MFT/$DATA by 16 21958c2ecf20Sopenharmony_ci * records at a time or one cluster, if cluster size is above 16kiB. If there 21968c2ecf20Sopenharmony_ci * is not sufficient space to do this, we try to extend by a single mft record 21978c2ecf20Sopenharmony_ci * or one cluster, if cluster size is above the mft record size. 21988c2ecf20Sopenharmony_ci * 21998c2ecf20Sopenharmony_ci * No matter how many mft records we allocate, we initialize only the first 22008c2ecf20Sopenharmony_ci * allocated mft record, incrementing mft data size and initialized size 22018c2ecf20Sopenharmony_ci * accordingly, open an ntfs_inode for it and return it to the caller, unless 22028c2ecf20Sopenharmony_ci * there are less than 24 mft records, in which case we allocate and initialize 22038c2ecf20Sopenharmony_ci * mft records until we reach record 24 which we consider as the first free mft 22048c2ecf20Sopenharmony_ci * record for use by normal files. 22058c2ecf20Sopenharmony_ci * 22068c2ecf20Sopenharmony_ci * If during any stage we overflow the initialized data in the mft bitmap, we 22078c2ecf20Sopenharmony_ci * extend the initialized size (and data size) by 8 bytes, allocating another 22088c2ecf20Sopenharmony_ci * cluster if required. The bitmap data size has to be at least equal to the 22098c2ecf20Sopenharmony_ci * number of mft records in the mft, but it can be bigger, in which case the 22108c2ecf20Sopenharmony_ci * superflous bits are padded with zeroes. 22118c2ecf20Sopenharmony_ci * 22128c2ecf20Sopenharmony_ci * Thus, when we return successfully (IS_ERR() is false), we will have: 22138c2ecf20Sopenharmony_ci * - initialized / extended the mft bitmap if necessary, 22148c2ecf20Sopenharmony_ci * - initialized / extended the mft data if necessary, 22158c2ecf20Sopenharmony_ci * - set the bit corresponding to the mft record being allocated in the 22168c2ecf20Sopenharmony_ci * mft bitmap, 22178c2ecf20Sopenharmony_ci * - opened an ntfs_inode for the allocated mft record, and we will have 22188c2ecf20Sopenharmony_ci * - returned the ntfs_inode as well as the allocated mapped, pinned, and 22198c2ecf20Sopenharmony_ci * locked mft record. 22208c2ecf20Sopenharmony_ci * 22218c2ecf20Sopenharmony_ci * On error, the volume will be left in a consistent state and no record will 22228c2ecf20Sopenharmony_ci * be allocated. If rolling back a partial operation fails, we may leave some 22238c2ecf20Sopenharmony_ci * inconsistent metadata in which case we set NVolErrors() so the volume is 22248c2ecf20Sopenharmony_ci * left dirty when unmounted. 22258c2ecf20Sopenharmony_ci * 22268c2ecf20Sopenharmony_ci * Note, this function cannot make use of most of the normal functions, like 22278c2ecf20Sopenharmony_ci * for example for attribute resizing, etc, because when the run list overflows 22288c2ecf20Sopenharmony_ci * the base mft record and an attribute list is used, it is very important that 22298c2ecf20Sopenharmony_ci * the extension mft records used to store the $DATA attribute of $MFT can be 22308c2ecf20Sopenharmony_ci * reached without having to read the information contained inside them, as 22318c2ecf20Sopenharmony_ci * this would make it impossible to find them in the first place after the 22328c2ecf20Sopenharmony_ci * volume is unmounted. $MFT/$BITMAP probably does not need to follow this 22338c2ecf20Sopenharmony_ci * rule because the bitmap is not essential for finding the mft records, but on 22348c2ecf20Sopenharmony_ci * the other hand, handling the bitmap in this special way would make life 22358c2ecf20Sopenharmony_ci * easier because otherwise there might be circular invocations of functions 22368c2ecf20Sopenharmony_ci * when reading the bitmap. 22378c2ecf20Sopenharmony_ci */ 22388c2ecf20Sopenharmony_cintfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, 22398c2ecf20Sopenharmony_ci ntfs_inode *base_ni, MFT_RECORD **mrec) 22408c2ecf20Sopenharmony_ci{ 22418c2ecf20Sopenharmony_ci s64 ll, bit, old_data_initialized, old_data_size; 22428c2ecf20Sopenharmony_ci unsigned long flags; 22438c2ecf20Sopenharmony_ci struct inode *vi; 22448c2ecf20Sopenharmony_ci struct page *page; 22458c2ecf20Sopenharmony_ci ntfs_inode *mft_ni, *mftbmp_ni, *ni; 22468c2ecf20Sopenharmony_ci ntfs_attr_search_ctx *ctx; 22478c2ecf20Sopenharmony_ci MFT_RECORD *m; 22488c2ecf20Sopenharmony_ci ATTR_RECORD *a; 22498c2ecf20Sopenharmony_ci pgoff_t index; 22508c2ecf20Sopenharmony_ci unsigned int ofs; 22518c2ecf20Sopenharmony_ci int err; 22528c2ecf20Sopenharmony_ci le16 seq_no, usn; 22538c2ecf20Sopenharmony_ci bool record_formatted = false; 22548c2ecf20Sopenharmony_ci 22558c2ecf20Sopenharmony_ci if (base_ni) { 22568c2ecf20Sopenharmony_ci ntfs_debug("Entering (allocating an extent mft record for " 22578c2ecf20Sopenharmony_ci "base mft record 0x%llx).", 22588c2ecf20Sopenharmony_ci (long long)base_ni->mft_no); 22598c2ecf20Sopenharmony_ci /* @mode and @base_ni are mutually exclusive. */ 22608c2ecf20Sopenharmony_ci BUG_ON(mode); 22618c2ecf20Sopenharmony_ci } else 22628c2ecf20Sopenharmony_ci ntfs_debug("Entering (allocating a base mft record)."); 22638c2ecf20Sopenharmony_ci if (mode) { 22648c2ecf20Sopenharmony_ci /* @mode and @base_ni are mutually exclusive. */ 22658c2ecf20Sopenharmony_ci BUG_ON(base_ni); 22668c2ecf20Sopenharmony_ci /* We only support creation of normal files and directories. */ 22678c2ecf20Sopenharmony_ci if (!S_ISREG(mode) && !S_ISDIR(mode)) 22688c2ecf20Sopenharmony_ci return ERR_PTR(-EOPNOTSUPP); 22698c2ecf20Sopenharmony_ci } 22708c2ecf20Sopenharmony_ci BUG_ON(!mrec); 22718c2ecf20Sopenharmony_ci mft_ni = NTFS_I(vol->mft_ino); 22728c2ecf20Sopenharmony_ci mftbmp_ni = NTFS_I(vol->mftbmp_ino); 22738c2ecf20Sopenharmony_ci down_write(&vol->mftbmp_lock); 22748c2ecf20Sopenharmony_ci bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni); 22758c2ecf20Sopenharmony_ci if (bit >= 0) { 22768c2ecf20Sopenharmony_ci ntfs_debug("Found and allocated free record (#1), bit 0x%llx.", 22778c2ecf20Sopenharmony_ci (long long)bit); 22788c2ecf20Sopenharmony_ci goto have_alloc_rec; 22798c2ecf20Sopenharmony_ci } 22808c2ecf20Sopenharmony_ci if (bit != -ENOSPC) { 22818c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 22828c2ecf20Sopenharmony_ci return ERR_PTR(bit); 22838c2ecf20Sopenharmony_ci } 22848c2ecf20Sopenharmony_ci /* 22858c2ecf20Sopenharmony_ci * No free mft records left. If the mft bitmap already covers more 22868c2ecf20Sopenharmony_ci * than the currently used mft records, the next records are all free, 22878c2ecf20Sopenharmony_ci * so we can simply allocate the first unused mft record. 22888c2ecf20Sopenharmony_ci * Note: We also have to make sure that the mft bitmap at least covers 22898c2ecf20Sopenharmony_ci * the first 24 mft records as they are special and whilst they may not 22908c2ecf20Sopenharmony_ci * be in use, we do not allocate from them. 22918c2ecf20Sopenharmony_ci */ 22928c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 22938c2ecf20Sopenharmony_ci ll = mft_ni->initialized_size >> vol->mft_record_size_bits; 22948c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 22958c2ecf20Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 22968c2ecf20Sopenharmony_ci old_data_initialized = mftbmp_ni->initialized_size; 22978c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 22988c2ecf20Sopenharmony_ci if (old_data_initialized << 3 > ll && old_data_initialized > 3) { 22998c2ecf20Sopenharmony_ci bit = ll; 23008c2ecf20Sopenharmony_ci if (bit < 24) 23018c2ecf20Sopenharmony_ci bit = 24; 23028c2ecf20Sopenharmony_ci if (unlikely(bit >= (1ll << 32))) 23038c2ecf20Sopenharmony_ci goto max_err_out; 23048c2ecf20Sopenharmony_ci ntfs_debug("Found free record (#2), bit 0x%llx.", 23058c2ecf20Sopenharmony_ci (long long)bit); 23068c2ecf20Sopenharmony_ci goto found_free_rec; 23078c2ecf20Sopenharmony_ci } 23088c2ecf20Sopenharmony_ci /* 23098c2ecf20Sopenharmony_ci * The mft bitmap needs to be expanded until it covers the first unused 23108c2ecf20Sopenharmony_ci * mft record that we can allocate. 23118c2ecf20Sopenharmony_ci * Note: The smallest mft record we allocate is mft record 24. 23128c2ecf20Sopenharmony_ci */ 23138c2ecf20Sopenharmony_ci bit = old_data_initialized << 3; 23148c2ecf20Sopenharmony_ci if (unlikely(bit >= (1ll << 32))) 23158c2ecf20Sopenharmony_ci goto max_err_out; 23168c2ecf20Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 23178c2ecf20Sopenharmony_ci old_data_size = mftbmp_ni->allocated_size; 23188c2ecf20Sopenharmony_ci ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " 23198c2ecf20Sopenharmony_ci "data_size 0x%llx, initialized_size 0x%llx.", 23208c2ecf20Sopenharmony_ci (long long)old_data_size, 23218c2ecf20Sopenharmony_ci (long long)i_size_read(vol->mftbmp_ino), 23228c2ecf20Sopenharmony_ci (long long)old_data_initialized); 23238c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 23248c2ecf20Sopenharmony_ci if (old_data_initialized + 8 > old_data_size) { 23258c2ecf20Sopenharmony_ci /* Need to extend bitmap by one more cluster. */ 23268c2ecf20Sopenharmony_ci ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 23278c2ecf20Sopenharmony_ci err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 23288c2ecf20Sopenharmony_ci if (unlikely(err)) { 23298c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 23308c2ecf20Sopenharmony_ci goto err_out; 23318c2ecf20Sopenharmony_ci } 23328c2ecf20Sopenharmony_ci#ifdef DEBUG 23338c2ecf20Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 23348c2ecf20Sopenharmony_ci ntfs_debug("Status of mftbmp after allocation extension: " 23358c2ecf20Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 23368c2ecf20Sopenharmony_ci "initialized_size 0x%llx.", 23378c2ecf20Sopenharmony_ci (long long)mftbmp_ni->allocated_size, 23388c2ecf20Sopenharmony_ci (long long)i_size_read(vol->mftbmp_ino), 23398c2ecf20Sopenharmony_ci (long long)mftbmp_ni->initialized_size); 23408c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 23418c2ecf20Sopenharmony_ci#endif /* DEBUG */ 23428c2ecf20Sopenharmony_ci } 23438c2ecf20Sopenharmony_ci /* 23448c2ecf20Sopenharmony_ci * We now have sufficient allocated space, extend the initialized_size 23458c2ecf20Sopenharmony_ci * as well as the data_size if necessary and fill the new space with 23468c2ecf20Sopenharmony_ci * zeroes. 23478c2ecf20Sopenharmony_ci */ 23488c2ecf20Sopenharmony_ci err = ntfs_mft_bitmap_extend_initialized_nolock(vol); 23498c2ecf20Sopenharmony_ci if (unlikely(err)) { 23508c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 23518c2ecf20Sopenharmony_ci goto err_out; 23528c2ecf20Sopenharmony_ci } 23538c2ecf20Sopenharmony_ci#ifdef DEBUG 23548c2ecf20Sopenharmony_ci read_lock_irqsave(&mftbmp_ni->size_lock, flags); 23558c2ecf20Sopenharmony_ci ntfs_debug("Status of mftbmp after initialized extension: " 23568c2ecf20Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 23578c2ecf20Sopenharmony_ci "initialized_size 0x%llx.", 23588c2ecf20Sopenharmony_ci (long long)mftbmp_ni->allocated_size, 23598c2ecf20Sopenharmony_ci (long long)i_size_read(vol->mftbmp_ino), 23608c2ecf20Sopenharmony_ci (long long)mftbmp_ni->initialized_size); 23618c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 23628c2ecf20Sopenharmony_ci#endif /* DEBUG */ 23638c2ecf20Sopenharmony_ci ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); 23648c2ecf20Sopenharmony_cifound_free_rec: 23658c2ecf20Sopenharmony_ci /* @bit is the found free mft record, allocate it in the mft bitmap. */ 23668c2ecf20Sopenharmony_ci ntfs_debug("At found_free_rec."); 23678c2ecf20Sopenharmony_ci err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit); 23688c2ecf20Sopenharmony_ci if (unlikely(err)) { 23698c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap."); 23708c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 23718c2ecf20Sopenharmony_ci goto err_out; 23728c2ecf20Sopenharmony_ci } 23738c2ecf20Sopenharmony_ci ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit); 23748c2ecf20Sopenharmony_cihave_alloc_rec: 23758c2ecf20Sopenharmony_ci /* 23768c2ecf20Sopenharmony_ci * The mft bitmap is now uptodate. Deal with mft data attribute now. 23778c2ecf20Sopenharmony_ci * Note, we keep hold of the mft bitmap lock for writing until all 23788c2ecf20Sopenharmony_ci * modifications to the mft data attribute are complete, too, as they 23798c2ecf20Sopenharmony_ci * will impact decisions for mft bitmap and mft record allocation done 23808c2ecf20Sopenharmony_ci * by a parallel allocation and if the lock is not maintained a 23818c2ecf20Sopenharmony_ci * parallel allocation could allocate the same mft record as this one. 23828c2ecf20Sopenharmony_ci */ 23838c2ecf20Sopenharmony_ci ll = (bit + 1) << vol->mft_record_size_bits; 23848c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 23858c2ecf20Sopenharmony_ci old_data_initialized = mft_ni->initialized_size; 23868c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 23878c2ecf20Sopenharmony_ci if (ll <= old_data_initialized) { 23888c2ecf20Sopenharmony_ci ntfs_debug("Allocated mft record already initialized."); 23898c2ecf20Sopenharmony_ci goto mft_rec_already_initialized; 23908c2ecf20Sopenharmony_ci } 23918c2ecf20Sopenharmony_ci ntfs_debug("Initializing allocated mft record."); 23928c2ecf20Sopenharmony_ci /* 23938c2ecf20Sopenharmony_ci * The mft record is outside the initialized data. Extend the mft data 23948c2ecf20Sopenharmony_ci * attribute until it covers the allocated record. The loop is only 23958c2ecf20Sopenharmony_ci * actually traversed more than once when a freshly formatted volume is 23968c2ecf20Sopenharmony_ci * first written to so it optimizes away nicely in the common case. 23978c2ecf20Sopenharmony_ci */ 23988c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 23998c2ecf20Sopenharmony_ci ntfs_debug("Status of mft data before extension: " 24008c2ecf20Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 24018c2ecf20Sopenharmony_ci "initialized_size 0x%llx.", 24028c2ecf20Sopenharmony_ci (long long)mft_ni->allocated_size, 24038c2ecf20Sopenharmony_ci (long long)i_size_read(vol->mft_ino), 24048c2ecf20Sopenharmony_ci (long long)mft_ni->initialized_size); 24058c2ecf20Sopenharmony_ci while (ll > mft_ni->allocated_size) { 24068c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 24078c2ecf20Sopenharmony_ci err = ntfs_mft_data_extend_allocation_nolock(vol); 24088c2ecf20Sopenharmony_ci if (unlikely(err)) { 24098c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to extend mft data " 24108c2ecf20Sopenharmony_ci "allocation."); 24118c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc_nolock; 24128c2ecf20Sopenharmony_ci } 24138c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 24148c2ecf20Sopenharmony_ci ntfs_debug("Status of mft data after allocation extension: " 24158c2ecf20Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 24168c2ecf20Sopenharmony_ci "initialized_size 0x%llx.", 24178c2ecf20Sopenharmony_ci (long long)mft_ni->allocated_size, 24188c2ecf20Sopenharmony_ci (long long)i_size_read(vol->mft_ino), 24198c2ecf20Sopenharmony_ci (long long)mft_ni->initialized_size); 24208c2ecf20Sopenharmony_ci } 24218c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 24228c2ecf20Sopenharmony_ci /* 24238c2ecf20Sopenharmony_ci * Extend mft data initialized size (and data size of course) to reach 24248c2ecf20Sopenharmony_ci * the allocated mft record, formatting the mft records allong the way. 24258c2ecf20Sopenharmony_ci * Note: We only modify the ntfs_inode structure as that is all that is 24268c2ecf20Sopenharmony_ci * needed by ntfs_mft_record_format(). We will update the attribute 24278c2ecf20Sopenharmony_ci * record itself in one fell swoop later on. 24288c2ecf20Sopenharmony_ci */ 24298c2ecf20Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 24308c2ecf20Sopenharmony_ci old_data_initialized = mft_ni->initialized_size; 24318c2ecf20Sopenharmony_ci old_data_size = vol->mft_ino->i_size; 24328c2ecf20Sopenharmony_ci while (ll > mft_ni->initialized_size) { 24338c2ecf20Sopenharmony_ci s64 new_initialized_size, mft_no; 24348c2ecf20Sopenharmony_ci 24358c2ecf20Sopenharmony_ci new_initialized_size = mft_ni->initialized_size + 24368c2ecf20Sopenharmony_ci vol->mft_record_size; 24378c2ecf20Sopenharmony_ci mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; 24388c2ecf20Sopenharmony_ci if (new_initialized_size > i_size_read(vol->mft_ino)) 24398c2ecf20Sopenharmony_ci i_size_write(vol->mft_ino, new_initialized_size); 24408c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 24418c2ecf20Sopenharmony_ci ntfs_debug("Initializing mft record 0x%llx.", 24428c2ecf20Sopenharmony_ci (long long)mft_no); 24438c2ecf20Sopenharmony_ci err = ntfs_mft_record_format(vol, mft_no); 24448c2ecf20Sopenharmony_ci if (unlikely(err)) { 24458c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to format mft record."); 24468c2ecf20Sopenharmony_ci goto undo_data_init; 24478c2ecf20Sopenharmony_ci } 24488c2ecf20Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 24498c2ecf20Sopenharmony_ci mft_ni->initialized_size = new_initialized_size; 24508c2ecf20Sopenharmony_ci } 24518c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 24528c2ecf20Sopenharmony_ci record_formatted = true; 24538c2ecf20Sopenharmony_ci /* Update the mft data attribute record to reflect the new sizes. */ 24548c2ecf20Sopenharmony_ci m = map_mft_record(mft_ni); 24558c2ecf20Sopenharmony_ci if (IS_ERR(m)) { 24568c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map mft record."); 24578c2ecf20Sopenharmony_ci err = PTR_ERR(m); 24588c2ecf20Sopenharmony_ci goto undo_data_init; 24598c2ecf20Sopenharmony_ci } 24608c2ecf20Sopenharmony_ci ctx = ntfs_attr_get_search_ctx(mft_ni, m); 24618c2ecf20Sopenharmony_ci if (unlikely(!ctx)) { 24628c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to get search context."); 24638c2ecf20Sopenharmony_ci err = -ENOMEM; 24648c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 24658c2ecf20Sopenharmony_ci goto undo_data_init; 24668c2ecf20Sopenharmony_ci } 24678c2ecf20Sopenharmony_ci err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 24688c2ecf20Sopenharmony_ci CASE_SENSITIVE, 0, NULL, 0, ctx); 24698c2ecf20Sopenharmony_ci if (unlikely(err)) { 24708c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to find first attribute extent of " 24718c2ecf20Sopenharmony_ci "mft data attribute."); 24728c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 24738c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 24748c2ecf20Sopenharmony_ci goto undo_data_init; 24758c2ecf20Sopenharmony_ci } 24768c2ecf20Sopenharmony_ci a = ctx->attr; 24778c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 24788c2ecf20Sopenharmony_ci a->data.non_resident.initialized_size = 24798c2ecf20Sopenharmony_ci cpu_to_sle64(mft_ni->initialized_size); 24808c2ecf20Sopenharmony_ci a->data.non_resident.data_size = 24818c2ecf20Sopenharmony_ci cpu_to_sle64(i_size_read(vol->mft_ino)); 24828c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 24838c2ecf20Sopenharmony_ci /* Ensure the changes make it to disk. */ 24848c2ecf20Sopenharmony_ci flush_dcache_mft_record_page(ctx->ntfs_ino); 24858c2ecf20Sopenharmony_ci mark_mft_record_dirty(ctx->ntfs_ino); 24868c2ecf20Sopenharmony_ci ntfs_attr_put_search_ctx(ctx); 24878c2ecf20Sopenharmony_ci unmap_mft_record(mft_ni); 24888c2ecf20Sopenharmony_ci read_lock_irqsave(&mft_ni->size_lock, flags); 24898c2ecf20Sopenharmony_ci ntfs_debug("Status of mft data after mft record initialization: " 24908c2ecf20Sopenharmony_ci "allocated_size 0x%llx, data_size 0x%llx, " 24918c2ecf20Sopenharmony_ci "initialized_size 0x%llx.", 24928c2ecf20Sopenharmony_ci (long long)mft_ni->allocated_size, 24938c2ecf20Sopenharmony_ci (long long)i_size_read(vol->mft_ino), 24948c2ecf20Sopenharmony_ci (long long)mft_ni->initialized_size); 24958c2ecf20Sopenharmony_ci BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size); 24968c2ecf20Sopenharmony_ci BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino)); 24978c2ecf20Sopenharmony_ci read_unlock_irqrestore(&mft_ni->size_lock, flags); 24988c2ecf20Sopenharmony_cimft_rec_already_initialized: 24998c2ecf20Sopenharmony_ci /* 25008c2ecf20Sopenharmony_ci * We can finally drop the mft bitmap lock as the mft data attribute 25018c2ecf20Sopenharmony_ci * has been fully updated. The only disparity left is that the 25028c2ecf20Sopenharmony_ci * allocated mft record still needs to be marked as in use to match the 25038c2ecf20Sopenharmony_ci * set bit in the mft bitmap but this is actually not a problem since 25048c2ecf20Sopenharmony_ci * this mft record is not referenced from anywhere yet and the fact 25058c2ecf20Sopenharmony_ci * that it is allocated in the mft bitmap means that no-one will try to 25068c2ecf20Sopenharmony_ci * allocate it either. 25078c2ecf20Sopenharmony_ci */ 25088c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 25098c2ecf20Sopenharmony_ci /* 25108c2ecf20Sopenharmony_ci * We now have allocated and initialized the mft record. Calculate the 25118c2ecf20Sopenharmony_ci * index of and the offset within the page cache page the record is in. 25128c2ecf20Sopenharmony_ci */ 25138c2ecf20Sopenharmony_ci index = bit << vol->mft_record_size_bits >> PAGE_SHIFT; 25148c2ecf20Sopenharmony_ci ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK; 25158c2ecf20Sopenharmony_ci /* Read, map, and pin the page containing the mft record. */ 25168c2ecf20Sopenharmony_ci page = ntfs_map_page(vol->mft_ino->i_mapping, index); 25178c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 25188c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map page containing allocated " 25198c2ecf20Sopenharmony_ci "mft record 0x%llx.", (long long)bit); 25208c2ecf20Sopenharmony_ci err = PTR_ERR(page); 25218c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc; 25228c2ecf20Sopenharmony_ci } 25238c2ecf20Sopenharmony_ci lock_page(page); 25248c2ecf20Sopenharmony_ci BUG_ON(!PageUptodate(page)); 25258c2ecf20Sopenharmony_ci ClearPageUptodate(page); 25268c2ecf20Sopenharmony_ci m = (MFT_RECORD*)((u8*)page_address(page) + ofs); 25278c2ecf20Sopenharmony_ci /* If we just formatted the mft record no need to do it again. */ 25288c2ecf20Sopenharmony_ci if (!record_formatted) { 25298c2ecf20Sopenharmony_ci /* Sanity check that the mft record is really not in use. */ 25308c2ecf20Sopenharmony_ci if (ntfs_is_file_record(m->magic) && 25318c2ecf20Sopenharmony_ci (m->flags & MFT_RECORD_IN_USE)) { 25328c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Mft record 0x%llx was marked " 25338c2ecf20Sopenharmony_ci "free in mft bitmap but is marked " 25348c2ecf20Sopenharmony_ci "used itself. Corrupt filesystem. " 25358c2ecf20Sopenharmony_ci "Unmount and run chkdsk.", 25368c2ecf20Sopenharmony_ci (long long)bit); 25378c2ecf20Sopenharmony_ci err = -EIO; 25388c2ecf20Sopenharmony_ci SetPageUptodate(page); 25398c2ecf20Sopenharmony_ci unlock_page(page); 25408c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 25418c2ecf20Sopenharmony_ci NVolSetErrors(vol); 25428c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc; 25438c2ecf20Sopenharmony_ci } 25448c2ecf20Sopenharmony_ci /* 25458c2ecf20Sopenharmony_ci * We need to (re-)format the mft record, preserving the 25468c2ecf20Sopenharmony_ci * sequence number if it is not zero as well as the update 25478c2ecf20Sopenharmony_ci * sequence number if it is not zero or -1 (0xffff). This 25488c2ecf20Sopenharmony_ci * means we do not need to care whether or not something went 25498c2ecf20Sopenharmony_ci * wrong with the previous mft record. 25508c2ecf20Sopenharmony_ci */ 25518c2ecf20Sopenharmony_ci seq_no = m->sequence_number; 25528c2ecf20Sopenharmony_ci usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)); 25538c2ecf20Sopenharmony_ci err = ntfs_mft_record_layout(vol, bit, m); 25548c2ecf20Sopenharmony_ci if (unlikely(err)) { 25558c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to layout allocated mft " 25568c2ecf20Sopenharmony_ci "record 0x%llx.", (long long)bit); 25578c2ecf20Sopenharmony_ci SetPageUptodate(page); 25588c2ecf20Sopenharmony_ci unlock_page(page); 25598c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 25608c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc; 25618c2ecf20Sopenharmony_ci } 25628c2ecf20Sopenharmony_ci if (seq_no) 25638c2ecf20Sopenharmony_ci m->sequence_number = seq_no; 25648c2ecf20Sopenharmony_ci if (usn && le16_to_cpu(usn) != 0xffff) 25658c2ecf20Sopenharmony_ci *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn; 25668c2ecf20Sopenharmony_ci } 25678c2ecf20Sopenharmony_ci /* Set the mft record itself in use. */ 25688c2ecf20Sopenharmony_ci m->flags |= MFT_RECORD_IN_USE; 25698c2ecf20Sopenharmony_ci if (S_ISDIR(mode)) 25708c2ecf20Sopenharmony_ci m->flags |= MFT_RECORD_IS_DIRECTORY; 25718c2ecf20Sopenharmony_ci flush_dcache_page(page); 25728c2ecf20Sopenharmony_ci SetPageUptodate(page); 25738c2ecf20Sopenharmony_ci if (base_ni) { 25748c2ecf20Sopenharmony_ci MFT_RECORD *m_tmp; 25758c2ecf20Sopenharmony_ci 25768c2ecf20Sopenharmony_ci /* 25778c2ecf20Sopenharmony_ci * Setup the base mft record in the extent mft record. This 25788c2ecf20Sopenharmony_ci * completes initialization of the allocated extent mft record 25798c2ecf20Sopenharmony_ci * and we can simply use it with map_extent_mft_record(). 25808c2ecf20Sopenharmony_ci */ 25818c2ecf20Sopenharmony_ci m->base_mft_record = MK_LE_MREF(base_ni->mft_no, 25828c2ecf20Sopenharmony_ci base_ni->seq_no); 25838c2ecf20Sopenharmony_ci /* 25848c2ecf20Sopenharmony_ci * Allocate an extent inode structure for the new mft record, 25858c2ecf20Sopenharmony_ci * attach it to the base inode @base_ni and map, pin, and lock 25868c2ecf20Sopenharmony_ci * its, i.e. the allocated, mft record. 25878c2ecf20Sopenharmony_ci */ 25888c2ecf20Sopenharmony_ci m_tmp = map_extent_mft_record(base_ni, bit, &ni); 25898c2ecf20Sopenharmony_ci if (IS_ERR(m_tmp)) { 25908c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to map allocated extent " 25918c2ecf20Sopenharmony_ci "mft record 0x%llx.", (long long)bit); 25928c2ecf20Sopenharmony_ci err = PTR_ERR(m_tmp); 25938c2ecf20Sopenharmony_ci /* Set the mft record itself not in use. */ 25948c2ecf20Sopenharmony_ci m->flags &= cpu_to_le16( 25958c2ecf20Sopenharmony_ci ~le16_to_cpu(MFT_RECORD_IN_USE)); 25968c2ecf20Sopenharmony_ci flush_dcache_page(page); 25978c2ecf20Sopenharmony_ci /* Make sure the mft record is written out to disk. */ 25988c2ecf20Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 25998c2ecf20Sopenharmony_ci unlock_page(page); 26008c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 26018c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc; 26028c2ecf20Sopenharmony_ci } 26038c2ecf20Sopenharmony_ci BUG_ON(m != m_tmp); 26048c2ecf20Sopenharmony_ci /* 26058c2ecf20Sopenharmony_ci * Make sure the allocated mft record is written out to disk. 26068c2ecf20Sopenharmony_ci * No need to set the inode dirty because the caller is going 26078c2ecf20Sopenharmony_ci * to do that anyway after finishing with the new extent mft 26088c2ecf20Sopenharmony_ci * record (e.g. at a minimum a new attribute will be added to 26098c2ecf20Sopenharmony_ci * the mft record. 26108c2ecf20Sopenharmony_ci */ 26118c2ecf20Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 26128c2ecf20Sopenharmony_ci unlock_page(page); 26138c2ecf20Sopenharmony_ci /* 26148c2ecf20Sopenharmony_ci * Need to unmap the page since map_extent_mft_record() mapped 26158c2ecf20Sopenharmony_ci * it as well so we have it mapped twice at the moment. 26168c2ecf20Sopenharmony_ci */ 26178c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 26188c2ecf20Sopenharmony_ci } else { 26198c2ecf20Sopenharmony_ci /* 26208c2ecf20Sopenharmony_ci * Allocate a new VFS inode and set it up. NOTE: @vi->i_nlink 26218c2ecf20Sopenharmony_ci * is set to 1 but the mft record->link_count is 0. The caller 26228c2ecf20Sopenharmony_ci * needs to bear this in mind. 26238c2ecf20Sopenharmony_ci */ 26248c2ecf20Sopenharmony_ci vi = new_inode(vol->sb); 26258c2ecf20Sopenharmony_ci if (unlikely(!vi)) { 26268c2ecf20Sopenharmony_ci err = -ENOMEM; 26278c2ecf20Sopenharmony_ci /* Set the mft record itself not in use. */ 26288c2ecf20Sopenharmony_ci m->flags &= cpu_to_le16( 26298c2ecf20Sopenharmony_ci ~le16_to_cpu(MFT_RECORD_IN_USE)); 26308c2ecf20Sopenharmony_ci flush_dcache_page(page); 26318c2ecf20Sopenharmony_ci /* Make sure the mft record is written out to disk. */ 26328c2ecf20Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 26338c2ecf20Sopenharmony_ci unlock_page(page); 26348c2ecf20Sopenharmony_ci ntfs_unmap_page(page); 26358c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc; 26368c2ecf20Sopenharmony_ci } 26378c2ecf20Sopenharmony_ci vi->i_ino = bit; 26388c2ecf20Sopenharmony_ci 26398c2ecf20Sopenharmony_ci /* The owner and group come from the ntfs volume. */ 26408c2ecf20Sopenharmony_ci vi->i_uid = vol->uid; 26418c2ecf20Sopenharmony_ci vi->i_gid = vol->gid; 26428c2ecf20Sopenharmony_ci 26438c2ecf20Sopenharmony_ci /* Initialize the ntfs specific part of @vi. */ 26448c2ecf20Sopenharmony_ci ntfs_init_big_inode(vi); 26458c2ecf20Sopenharmony_ci ni = NTFS_I(vi); 26468c2ecf20Sopenharmony_ci /* 26478c2ecf20Sopenharmony_ci * Set the appropriate mode, attribute type, and name. For 26488c2ecf20Sopenharmony_ci * directories, also setup the index values to the defaults. 26498c2ecf20Sopenharmony_ci */ 26508c2ecf20Sopenharmony_ci if (S_ISDIR(mode)) { 26518c2ecf20Sopenharmony_ci vi->i_mode = S_IFDIR | S_IRWXUGO; 26528c2ecf20Sopenharmony_ci vi->i_mode &= ~vol->dmask; 26538c2ecf20Sopenharmony_ci 26548c2ecf20Sopenharmony_ci NInoSetMstProtected(ni); 26558c2ecf20Sopenharmony_ci ni->type = AT_INDEX_ALLOCATION; 26568c2ecf20Sopenharmony_ci ni->name = I30; 26578c2ecf20Sopenharmony_ci ni->name_len = 4; 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci ni->itype.index.block_size = 4096; 26608c2ecf20Sopenharmony_ci ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1; 26618c2ecf20Sopenharmony_ci ni->itype.index.collation_rule = COLLATION_FILE_NAME; 26628c2ecf20Sopenharmony_ci if (vol->cluster_size <= ni->itype.index.block_size) { 26638c2ecf20Sopenharmony_ci ni->itype.index.vcn_size = vol->cluster_size; 26648c2ecf20Sopenharmony_ci ni->itype.index.vcn_size_bits = 26658c2ecf20Sopenharmony_ci vol->cluster_size_bits; 26668c2ecf20Sopenharmony_ci } else { 26678c2ecf20Sopenharmony_ci ni->itype.index.vcn_size = vol->sector_size; 26688c2ecf20Sopenharmony_ci ni->itype.index.vcn_size_bits = 26698c2ecf20Sopenharmony_ci vol->sector_size_bits; 26708c2ecf20Sopenharmony_ci } 26718c2ecf20Sopenharmony_ci } else { 26728c2ecf20Sopenharmony_ci vi->i_mode = S_IFREG | S_IRWXUGO; 26738c2ecf20Sopenharmony_ci vi->i_mode &= ~vol->fmask; 26748c2ecf20Sopenharmony_ci 26758c2ecf20Sopenharmony_ci ni->type = AT_DATA; 26768c2ecf20Sopenharmony_ci ni->name = NULL; 26778c2ecf20Sopenharmony_ci ni->name_len = 0; 26788c2ecf20Sopenharmony_ci } 26798c2ecf20Sopenharmony_ci if (IS_RDONLY(vi)) 26808c2ecf20Sopenharmony_ci vi->i_mode &= ~S_IWUGO; 26818c2ecf20Sopenharmony_ci 26828c2ecf20Sopenharmony_ci /* Set the inode times to the current time. */ 26838c2ecf20Sopenharmony_ci vi->i_atime = vi->i_mtime = vi->i_ctime = 26848c2ecf20Sopenharmony_ci current_time(vi); 26858c2ecf20Sopenharmony_ci /* 26868c2ecf20Sopenharmony_ci * Set the file size to 0, the ntfs inode sizes are set to 0 by 26878c2ecf20Sopenharmony_ci * the call to ntfs_init_big_inode() below. 26888c2ecf20Sopenharmony_ci */ 26898c2ecf20Sopenharmony_ci vi->i_size = 0; 26908c2ecf20Sopenharmony_ci vi->i_blocks = 0; 26918c2ecf20Sopenharmony_ci 26928c2ecf20Sopenharmony_ci /* Set the sequence number. */ 26938c2ecf20Sopenharmony_ci vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); 26948c2ecf20Sopenharmony_ci /* 26958c2ecf20Sopenharmony_ci * Manually map, pin, and lock the mft record as we already 26968c2ecf20Sopenharmony_ci * have its page mapped and it is very easy to do. 26978c2ecf20Sopenharmony_ci */ 26988c2ecf20Sopenharmony_ci atomic_inc(&ni->count); 26998c2ecf20Sopenharmony_ci mutex_lock(&ni->mrec_lock); 27008c2ecf20Sopenharmony_ci ni->page = page; 27018c2ecf20Sopenharmony_ci ni->page_ofs = ofs; 27028c2ecf20Sopenharmony_ci /* 27038c2ecf20Sopenharmony_ci * Make sure the allocated mft record is written out to disk. 27048c2ecf20Sopenharmony_ci * NOTE: We do not set the ntfs inode dirty because this would 27058c2ecf20Sopenharmony_ci * fail in ntfs_write_inode() because the inode does not have a 27068c2ecf20Sopenharmony_ci * standard information attribute yet. Also, there is no need 27078c2ecf20Sopenharmony_ci * to set the inode dirty because the caller is going to do 27088c2ecf20Sopenharmony_ci * that anyway after finishing with the new mft record (e.g. at 27098c2ecf20Sopenharmony_ci * a minimum some new attributes will be added to the mft 27108c2ecf20Sopenharmony_ci * record. 27118c2ecf20Sopenharmony_ci */ 27128c2ecf20Sopenharmony_ci mark_ntfs_record_dirty(page, ofs); 27138c2ecf20Sopenharmony_ci unlock_page(page); 27148c2ecf20Sopenharmony_ci 27158c2ecf20Sopenharmony_ci /* Add the inode to the inode hash for the superblock. */ 27168c2ecf20Sopenharmony_ci insert_inode_hash(vi); 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci /* Update the default mft allocation position. */ 27198c2ecf20Sopenharmony_ci vol->mft_data_pos = bit + 1; 27208c2ecf20Sopenharmony_ci } 27218c2ecf20Sopenharmony_ci /* 27228c2ecf20Sopenharmony_ci * Return the opened, allocated inode of the allocated mft record as 27238c2ecf20Sopenharmony_ci * well as the mapped, pinned, and locked mft record. 27248c2ecf20Sopenharmony_ci */ 27258c2ecf20Sopenharmony_ci ntfs_debug("Returning opened, allocated %sinode 0x%llx.", 27268c2ecf20Sopenharmony_ci base_ni ? "extent " : "", (long long)bit); 27278c2ecf20Sopenharmony_ci *mrec = m; 27288c2ecf20Sopenharmony_ci return ni; 27298c2ecf20Sopenharmony_ciundo_data_init: 27308c2ecf20Sopenharmony_ci write_lock_irqsave(&mft_ni->size_lock, flags); 27318c2ecf20Sopenharmony_ci mft_ni->initialized_size = old_data_initialized; 27328c2ecf20Sopenharmony_ci i_size_write(vol->mft_ino, old_data_size); 27338c2ecf20Sopenharmony_ci write_unlock_irqrestore(&mft_ni->size_lock, flags); 27348c2ecf20Sopenharmony_ci goto undo_mftbmp_alloc_nolock; 27358c2ecf20Sopenharmony_ciundo_mftbmp_alloc: 27368c2ecf20Sopenharmony_ci down_write(&vol->mftbmp_lock); 27378c2ecf20Sopenharmony_ciundo_mftbmp_alloc_nolock: 27388c2ecf20Sopenharmony_ci if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) { 27398c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); 27408c2ecf20Sopenharmony_ci NVolSetErrors(vol); 27418c2ecf20Sopenharmony_ci } 27428c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 27438c2ecf20Sopenharmony_cierr_out: 27448c2ecf20Sopenharmony_ci return ERR_PTR(err); 27458c2ecf20Sopenharmony_cimax_err_out: 27468c2ecf20Sopenharmony_ci ntfs_warning(vol->sb, "Cannot allocate mft record because the maximum " 27478c2ecf20Sopenharmony_ci "number of inodes (2^32) has already been reached."); 27488c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 27498c2ecf20Sopenharmony_ci return ERR_PTR(-ENOSPC); 27508c2ecf20Sopenharmony_ci} 27518c2ecf20Sopenharmony_ci 27528c2ecf20Sopenharmony_ci/** 27538c2ecf20Sopenharmony_ci * ntfs_extent_mft_record_free - free an extent mft record on an ntfs volume 27548c2ecf20Sopenharmony_ci * @ni: ntfs inode of the mapped extent mft record to free 27558c2ecf20Sopenharmony_ci * @m: mapped extent mft record of the ntfs inode @ni 27568c2ecf20Sopenharmony_ci * 27578c2ecf20Sopenharmony_ci * Free the mapped extent mft record @m of the extent ntfs inode @ni. 27588c2ecf20Sopenharmony_ci * 27598c2ecf20Sopenharmony_ci * Note that this function unmaps the mft record and closes and destroys @ni 27608c2ecf20Sopenharmony_ci * internally and hence you cannot use either @ni nor @m any more after this 27618c2ecf20Sopenharmony_ci * function returns success. 27628c2ecf20Sopenharmony_ci * 27638c2ecf20Sopenharmony_ci * On success return 0 and on error return -errno. @ni and @m are still valid 27648c2ecf20Sopenharmony_ci * in this case and have not been freed. 27658c2ecf20Sopenharmony_ci * 27668c2ecf20Sopenharmony_ci * For some errors an error message is displayed and the success code 0 is 27678c2ecf20Sopenharmony_ci * returned and the volume is then left dirty on umount. This makes sense in 27688c2ecf20Sopenharmony_ci * case we could not rollback the changes that were already done since the 27698c2ecf20Sopenharmony_ci * caller no longer wants to reference this mft record so it does not matter to 27708c2ecf20Sopenharmony_ci * the caller if something is wrong with it as long as it is properly detached 27718c2ecf20Sopenharmony_ci * from the base inode. 27728c2ecf20Sopenharmony_ci */ 27738c2ecf20Sopenharmony_ciint ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) 27748c2ecf20Sopenharmony_ci{ 27758c2ecf20Sopenharmony_ci unsigned long mft_no = ni->mft_no; 27768c2ecf20Sopenharmony_ci ntfs_volume *vol = ni->vol; 27778c2ecf20Sopenharmony_ci ntfs_inode *base_ni; 27788c2ecf20Sopenharmony_ci ntfs_inode **extent_nis; 27798c2ecf20Sopenharmony_ci int i, err; 27808c2ecf20Sopenharmony_ci le16 old_seq_no; 27818c2ecf20Sopenharmony_ci u16 seq_no; 27828c2ecf20Sopenharmony_ci 27838c2ecf20Sopenharmony_ci BUG_ON(NInoAttr(ni)); 27848c2ecf20Sopenharmony_ci BUG_ON(ni->nr_extents != -1); 27858c2ecf20Sopenharmony_ci 27868c2ecf20Sopenharmony_ci mutex_lock(&ni->extent_lock); 27878c2ecf20Sopenharmony_ci base_ni = ni->ext.base_ntfs_ino; 27888c2ecf20Sopenharmony_ci mutex_unlock(&ni->extent_lock); 27898c2ecf20Sopenharmony_ci 27908c2ecf20Sopenharmony_ci BUG_ON(base_ni->nr_extents <= 0); 27918c2ecf20Sopenharmony_ci 27928c2ecf20Sopenharmony_ci ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n", 27938c2ecf20Sopenharmony_ci mft_no, base_ni->mft_no); 27948c2ecf20Sopenharmony_ci 27958c2ecf20Sopenharmony_ci mutex_lock(&base_ni->extent_lock); 27968c2ecf20Sopenharmony_ci 27978c2ecf20Sopenharmony_ci /* Make sure we are holding the only reference to the extent inode. */ 27988c2ecf20Sopenharmony_ci if (atomic_read(&ni->count) > 2) { 27998c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, " 28008c2ecf20Sopenharmony_ci "not freeing.", base_ni->mft_no); 28018c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 28028c2ecf20Sopenharmony_ci return -EBUSY; 28038c2ecf20Sopenharmony_ci } 28048c2ecf20Sopenharmony_ci 28058c2ecf20Sopenharmony_ci /* Dissociate the ntfs inode from the base inode. */ 28068c2ecf20Sopenharmony_ci extent_nis = base_ni->ext.extent_ntfs_inos; 28078c2ecf20Sopenharmony_ci err = -ENOENT; 28088c2ecf20Sopenharmony_ci for (i = 0; i < base_ni->nr_extents; i++) { 28098c2ecf20Sopenharmony_ci if (ni != extent_nis[i]) 28108c2ecf20Sopenharmony_ci continue; 28118c2ecf20Sopenharmony_ci extent_nis += i; 28128c2ecf20Sopenharmony_ci base_ni->nr_extents--; 28138c2ecf20Sopenharmony_ci memmove(extent_nis, extent_nis + 1, (base_ni->nr_extents - i) * 28148c2ecf20Sopenharmony_ci sizeof(ntfs_inode*)); 28158c2ecf20Sopenharmony_ci err = 0; 28168c2ecf20Sopenharmony_ci break; 28178c2ecf20Sopenharmony_ci } 28188c2ecf20Sopenharmony_ci 28198c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ci if (unlikely(err)) { 28228c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to " 28238c2ecf20Sopenharmony_ci "its base inode 0x%lx.", mft_no, 28248c2ecf20Sopenharmony_ci base_ni->mft_no); 28258c2ecf20Sopenharmony_ci BUG(); 28268c2ecf20Sopenharmony_ci } 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci /* 28298c2ecf20Sopenharmony_ci * The extent inode is no longer attached to the base inode so no one 28308c2ecf20Sopenharmony_ci * can get a reference to it any more. 28318c2ecf20Sopenharmony_ci */ 28328c2ecf20Sopenharmony_ci 28338c2ecf20Sopenharmony_ci /* Mark the mft record as not in use. */ 28348c2ecf20Sopenharmony_ci m->flags &= ~MFT_RECORD_IN_USE; 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_ci /* Increment the sequence number, skipping zero, if it is not zero. */ 28378c2ecf20Sopenharmony_ci old_seq_no = m->sequence_number; 28388c2ecf20Sopenharmony_ci seq_no = le16_to_cpu(old_seq_no); 28398c2ecf20Sopenharmony_ci if (seq_no == 0xffff) 28408c2ecf20Sopenharmony_ci seq_no = 1; 28418c2ecf20Sopenharmony_ci else if (seq_no) 28428c2ecf20Sopenharmony_ci seq_no++; 28438c2ecf20Sopenharmony_ci m->sequence_number = cpu_to_le16(seq_no); 28448c2ecf20Sopenharmony_ci 28458c2ecf20Sopenharmony_ci /* 28468c2ecf20Sopenharmony_ci * Set the ntfs inode dirty and write it out. We do not need to worry 28478c2ecf20Sopenharmony_ci * about the base inode here since whatever caused the extent mft 28488c2ecf20Sopenharmony_ci * record to be freed is guaranteed to do it already. 28498c2ecf20Sopenharmony_ci */ 28508c2ecf20Sopenharmony_ci NInoSetDirty(ni); 28518c2ecf20Sopenharmony_ci err = write_mft_record(ni, m, 0); 28528c2ecf20Sopenharmony_ci if (unlikely(err)) { 28538c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to write mft record 0x%lx, not " 28548c2ecf20Sopenharmony_ci "freeing.", mft_no); 28558c2ecf20Sopenharmony_ci goto rollback; 28568c2ecf20Sopenharmony_ci } 28578c2ecf20Sopenharmony_cirollback_error: 28588c2ecf20Sopenharmony_ci /* Unmap and throw away the now freed extent inode. */ 28598c2ecf20Sopenharmony_ci unmap_extent_mft_record(ni); 28608c2ecf20Sopenharmony_ci ntfs_clear_extent_inode(ni); 28618c2ecf20Sopenharmony_ci 28628c2ecf20Sopenharmony_ci /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */ 28638c2ecf20Sopenharmony_ci down_write(&vol->mftbmp_lock); 28648c2ecf20Sopenharmony_ci err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no); 28658c2ecf20Sopenharmony_ci up_write(&vol->mftbmp_lock); 28668c2ecf20Sopenharmony_ci if (unlikely(err)) { 28678c2ecf20Sopenharmony_ci /* 28688c2ecf20Sopenharmony_ci * The extent inode is gone but we failed to deallocate it in 28698c2ecf20Sopenharmony_ci * the mft bitmap. Just emit a warning and leave the volume 28708c2ecf20Sopenharmony_ci * dirty on umount. 28718c2ecf20Sopenharmony_ci */ 28728c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); 28738c2ecf20Sopenharmony_ci NVolSetErrors(vol); 28748c2ecf20Sopenharmony_ci } 28758c2ecf20Sopenharmony_ci return 0; 28768c2ecf20Sopenharmony_cirollback: 28778c2ecf20Sopenharmony_ci /* Rollback what we did... */ 28788c2ecf20Sopenharmony_ci mutex_lock(&base_ni->extent_lock); 28798c2ecf20Sopenharmony_ci extent_nis = base_ni->ext.extent_ntfs_inos; 28808c2ecf20Sopenharmony_ci if (!(base_ni->nr_extents & 3)) { 28818c2ecf20Sopenharmony_ci int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*); 28828c2ecf20Sopenharmony_ci 28838c2ecf20Sopenharmony_ci extent_nis = kmalloc(new_size, GFP_NOFS); 28848c2ecf20Sopenharmony_ci if (unlikely(!extent_nis)) { 28858c2ecf20Sopenharmony_ci ntfs_error(vol->sb, "Failed to allocate internal " 28868c2ecf20Sopenharmony_ci "buffer during rollback.%s", es); 28878c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 28888c2ecf20Sopenharmony_ci NVolSetErrors(vol); 28898c2ecf20Sopenharmony_ci goto rollback_error; 28908c2ecf20Sopenharmony_ci } 28918c2ecf20Sopenharmony_ci if (base_ni->nr_extents) { 28928c2ecf20Sopenharmony_ci BUG_ON(!base_ni->ext.extent_ntfs_inos); 28938c2ecf20Sopenharmony_ci memcpy(extent_nis, base_ni->ext.extent_ntfs_inos, 28948c2ecf20Sopenharmony_ci new_size - 4 * sizeof(ntfs_inode*)); 28958c2ecf20Sopenharmony_ci kfree(base_ni->ext.extent_ntfs_inos); 28968c2ecf20Sopenharmony_ci } 28978c2ecf20Sopenharmony_ci base_ni->ext.extent_ntfs_inos = extent_nis; 28988c2ecf20Sopenharmony_ci } 28998c2ecf20Sopenharmony_ci m->flags |= MFT_RECORD_IN_USE; 29008c2ecf20Sopenharmony_ci m->sequence_number = old_seq_no; 29018c2ecf20Sopenharmony_ci extent_nis[base_ni->nr_extents++] = ni; 29028c2ecf20Sopenharmony_ci mutex_unlock(&base_ni->extent_lock); 29038c2ecf20Sopenharmony_ci mark_mft_record_dirty(ni); 29048c2ecf20Sopenharmony_ci return err; 29058c2ecf20Sopenharmony_ci} 29068c2ecf20Sopenharmony_ci#endif /* NTFS_RW */ 2907