xref: /kernel/linux/linux-5.10/fs/ntfs/mft.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/**
38c2ecf20Sopenharmony_ci * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
68c2ecf20Sopenharmony_ci * Copyright (c) 2002 Richard Russon
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/buffer_head.h>
108c2ecf20Sopenharmony_ci#include <linux/slab.h>
118c2ecf20Sopenharmony_ci#include <linux/swap.h>
128c2ecf20Sopenharmony_ci#include <linux/bio.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include "attrib.h"
158c2ecf20Sopenharmony_ci#include "aops.h"
168c2ecf20Sopenharmony_ci#include "bitmap.h"
178c2ecf20Sopenharmony_ci#include "debug.h"
188c2ecf20Sopenharmony_ci#include "dir.h"
198c2ecf20Sopenharmony_ci#include "lcnalloc.h"
208c2ecf20Sopenharmony_ci#include "malloc.h"
218c2ecf20Sopenharmony_ci#include "mft.h"
228c2ecf20Sopenharmony_ci#include "ntfs.h"
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#define MAX_BHS	(PAGE_SIZE / NTFS_BLOCK_SIZE)
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci/**
278c2ecf20Sopenharmony_ci * map_mft_record_page - map the page in which a specific mft record resides
288c2ecf20Sopenharmony_ci * @ni:		ntfs inode whose mft record page to map
298c2ecf20Sopenharmony_ci *
308c2ecf20Sopenharmony_ci * This maps the page in which the mft record of the ntfs inode @ni is situated
318c2ecf20Sopenharmony_ci * and returns a pointer to the mft record within the mapped page.
328c2ecf20Sopenharmony_ci *
338c2ecf20Sopenharmony_ci * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR()
348c2ecf20Sopenharmony_ci * contains the negative error code returned.
358c2ecf20Sopenharmony_ci */
368c2ecf20Sopenharmony_cistatic inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
378c2ecf20Sopenharmony_ci{
388c2ecf20Sopenharmony_ci	loff_t i_size;
398c2ecf20Sopenharmony_ci	ntfs_volume *vol = ni->vol;
408c2ecf20Sopenharmony_ci	struct inode *mft_vi = vol->mft_ino;
418c2ecf20Sopenharmony_ci	struct page *page;
428c2ecf20Sopenharmony_ci	unsigned long index, end_index;
438c2ecf20Sopenharmony_ci	unsigned ofs;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	BUG_ON(ni->page);
468c2ecf20Sopenharmony_ci	/*
478c2ecf20Sopenharmony_ci	 * The index into the page cache and the offset within the page cache
488c2ecf20Sopenharmony_ci	 * page of the wanted mft record. FIXME: We need to check for
498c2ecf20Sopenharmony_ci	 * overflowing the unsigned long, but I don't think we would ever get
508c2ecf20Sopenharmony_ci	 * here if the volume was that big...
518c2ecf20Sopenharmony_ci	 */
528c2ecf20Sopenharmony_ci	index = (u64)ni->mft_no << vol->mft_record_size_bits >>
538c2ecf20Sopenharmony_ci			PAGE_SHIFT;
548c2ecf20Sopenharmony_ci	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	i_size = i_size_read(mft_vi);
578c2ecf20Sopenharmony_ci	/* The maximum valid index into the page cache for $MFT's data. */
588c2ecf20Sopenharmony_ci	end_index = i_size >> PAGE_SHIFT;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	/* If the wanted index is out of bounds the mft record doesn't exist. */
618c2ecf20Sopenharmony_ci	if (unlikely(index >= end_index)) {
628c2ecf20Sopenharmony_ci		if (index > end_index || (i_size & ~PAGE_MASK) < ofs +
638c2ecf20Sopenharmony_ci				vol->mft_record_size) {
648c2ecf20Sopenharmony_ci			page = ERR_PTR(-ENOENT);
658c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
668c2ecf20Sopenharmony_ci					"which is beyond the end of the mft.  "
678c2ecf20Sopenharmony_ci					"This is probably a bug in the ntfs "
688c2ecf20Sopenharmony_ci					"driver.", ni->mft_no);
698c2ecf20Sopenharmony_ci			goto err_out;
708c2ecf20Sopenharmony_ci		}
718c2ecf20Sopenharmony_ci	}
728c2ecf20Sopenharmony_ci	/* Read, map, and pin the page. */
738c2ecf20Sopenharmony_ci	page = ntfs_map_page(mft_vi->i_mapping, index);
748c2ecf20Sopenharmony_ci	if (!IS_ERR(page)) {
758c2ecf20Sopenharmony_ci		/* Catch multi sector transfer fixup errors. */
768c2ecf20Sopenharmony_ci		if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) +
778c2ecf20Sopenharmony_ci				ofs)))) {
788c2ecf20Sopenharmony_ci			ni->page = page;
798c2ecf20Sopenharmony_ci			ni->page_ofs = ofs;
808c2ecf20Sopenharmony_ci			return page_address(page) + ofs;
818c2ecf20Sopenharmony_ci		}
828c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Mft record 0x%lx is corrupt.  "
838c2ecf20Sopenharmony_ci				"Run chkdsk.", ni->mft_no);
848c2ecf20Sopenharmony_ci		ntfs_unmap_page(page);
858c2ecf20Sopenharmony_ci		page = ERR_PTR(-EIO);
868c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
878c2ecf20Sopenharmony_ci	}
888c2ecf20Sopenharmony_cierr_out:
898c2ecf20Sopenharmony_ci	ni->page = NULL;
908c2ecf20Sopenharmony_ci	ni->page_ofs = 0;
918c2ecf20Sopenharmony_ci	return (void*)page;
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci/**
958c2ecf20Sopenharmony_ci * map_mft_record - map, pin and lock an mft record
968c2ecf20Sopenharmony_ci * @ni:		ntfs inode whose MFT record to map
978c2ecf20Sopenharmony_ci *
988c2ecf20Sopenharmony_ci * First, take the mrec_lock mutex.  We might now be sleeping, while waiting
998c2ecf20Sopenharmony_ci * for the mutex if it was already locked by someone else.
1008c2ecf20Sopenharmony_ci *
1018c2ecf20Sopenharmony_ci * The page of the record is mapped using map_mft_record_page() before being
1028c2ecf20Sopenharmony_ci * returned to the caller.
1038c2ecf20Sopenharmony_ci *
1048c2ecf20Sopenharmony_ci * This in turn uses ntfs_map_page() to get the page containing the wanted mft
1058c2ecf20Sopenharmony_ci * record (it in turn calls read_cache_page() which reads it in from disk if
1068c2ecf20Sopenharmony_ci * necessary, increments the use count on the page so that it cannot disappear
1078c2ecf20Sopenharmony_ci * under us and returns a reference to the page cache page).
1088c2ecf20Sopenharmony_ci *
1098c2ecf20Sopenharmony_ci * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it
1108c2ecf20Sopenharmony_ci * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed
1118c2ecf20Sopenharmony_ci * and the post-read mst fixups on each mft record in the page have been
1128c2ecf20Sopenharmony_ci * performed, the page gets PG_uptodate set and PG_locked cleared (this is done
1138c2ecf20Sopenharmony_ci * in our asynchronous I/O completion handler end_buffer_read_mft_async()).
1148c2ecf20Sopenharmony_ci * ntfs_map_page() waits for PG_locked to become clear and checks if
1158c2ecf20Sopenharmony_ci * PG_uptodate is set and returns an error code if not. This provides
1168c2ecf20Sopenharmony_ci * sufficient protection against races when reading/using the page.
1178c2ecf20Sopenharmony_ci *
1188c2ecf20Sopenharmony_ci * However there is the write mapping to think about. Doing the above described
1198c2ecf20Sopenharmony_ci * checking here will be fine, because when initiating the write we will set
1208c2ecf20Sopenharmony_ci * PG_locked and clear PG_uptodate making sure nobody is touching the page
1218c2ecf20Sopenharmony_ci * contents. Doing the locking this way means that the commit to disk code in
1228c2ecf20Sopenharmony_ci * the page cache code paths is automatically sufficiently locked with us as
1238c2ecf20Sopenharmony_ci * we will not touch a page that has been locked or is not uptodate. The only
1248c2ecf20Sopenharmony_ci * locking problem then is them locking the page while we are accessing it.
1258c2ecf20Sopenharmony_ci *
1268c2ecf20Sopenharmony_ci * So that code will end up having to own the mrec_lock of all mft
1278c2ecf20Sopenharmony_ci * records/inodes present in the page before I/O can proceed. In that case we
1288c2ecf20Sopenharmony_ci * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be
1298c2ecf20Sopenharmony_ci * accessing anything without owning the mrec_lock mutex.  But we do need to
1308c2ecf20Sopenharmony_ci * use them because of the read_cache_page() invocation and the code becomes so
1318c2ecf20Sopenharmony_ci * much simpler this way that it is well worth it.
1328c2ecf20Sopenharmony_ci *
1338c2ecf20Sopenharmony_ci * The mft record is now ours and we return a pointer to it. You need to check
1348c2ecf20Sopenharmony_ci * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
1358c2ecf20Sopenharmony_ci * the error code.
1368c2ecf20Sopenharmony_ci *
1378c2ecf20Sopenharmony_ci * NOTE: Caller is responsible for setting the mft record dirty before calling
1388c2ecf20Sopenharmony_ci * unmap_mft_record(). This is obviously only necessary if the caller really
1398c2ecf20Sopenharmony_ci * modified the mft record...
1408c2ecf20Sopenharmony_ci * Q: Do we want to recycle one of the VFS inode state bits instead?
1418c2ecf20Sopenharmony_ci * A: No, the inode ones mean we want to change the mft record, not we want to
1428c2ecf20Sopenharmony_ci * write it out.
1438c2ecf20Sopenharmony_ci */
1448c2ecf20Sopenharmony_ciMFT_RECORD *map_mft_record(ntfs_inode *ni)
1458c2ecf20Sopenharmony_ci{
1468c2ecf20Sopenharmony_ci	MFT_RECORD *m;
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	/* Make sure the ntfs inode doesn't go away. */
1518c2ecf20Sopenharmony_ci	atomic_inc(&ni->count);
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	/* Serialize access to this mft record. */
1548c2ecf20Sopenharmony_ci	mutex_lock(&ni->mrec_lock);
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	m = map_mft_record_page(ni);
1578c2ecf20Sopenharmony_ci	if (!IS_ERR(m))
1588c2ecf20Sopenharmony_ci		return m;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	mutex_unlock(&ni->mrec_lock);
1618c2ecf20Sopenharmony_ci	atomic_dec(&ni->count);
1628c2ecf20Sopenharmony_ci	ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
1638c2ecf20Sopenharmony_ci	return m;
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci/**
1678c2ecf20Sopenharmony_ci * unmap_mft_record_page - unmap the page in which a specific mft record resides
1688c2ecf20Sopenharmony_ci * @ni:		ntfs inode whose mft record page to unmap
1698c2ecf20Sopenharmony_ci *
1708c2ecf20Sopenharmony_ci * This unmaps the page in which the mft record of the ntfs inode @ni is
1718c2ecf20Sopenharmony_ci * situated and returns. This is a NOOP if highmem is not configured.
1728c2ecf20Sopenharmony_ci *
1738c2ecf20Sopenharmony_ci * The unmap happens via ntfs_unmap_page() which in turn decrements the use
1748c2ecf20Sopenharmony_ci * count on the page thus releasing it from the pinned state.
1758c2ecf20Sopenharmony_ci *
1768c2ecf20Sopenharmony_ci * We do not actually unmap the page from memory of course, as that will be
1778c2ecf20Sopenharmony_ci * done by the page cache code itself when memory pressure increases or
1788c2ecf20Sopenharmony_ci * whatever.
1798c2ecf20Sopenharmony_ci */
1808c2ecf20Sopenharmony_cistatic inline void unmap_mft_record_page(ntfs_inode *ni)
1818c2ecf20Sopenharmony_ci{
1828c2ecf20Sopenharmony_ci	BUG_ON(!ni->page);
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	// TODO: If dirty, blah...
1858c2ecf20Sopenharmony_ci	ntfs_unmap_page(ni->page);
1868c2ecf20Sopenharmony_ci	ni->page = NULL;
1878c2ecf20Sopenharmony_ci	ni->page_ofs = 0;
1888c2ecf20Sopenharmony_ci	return;
1898c2ecf20Sopenharmony_ci}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci/**
1928c2ecf20Sopenharmony_ci * unmap_mft_record - release a mapped mft record
1938c2ecf20Sopenharmony_ci * @ni:		ntfs inode whose MFT record to unmap
1948c2ecf20Sopenharmony_ci *
1958c2ecf20Sopenharmony_ci * We release the page mapping and the mrec_lock mutex which unmaps the mft
1968c2ecf20Sopenharmony_ci * record and releases it for others to get hold of. We also release the ntfs
1978c2ecf20Sopenharmony_ci * inode by decrementing the ntfs inode reference count.
1988c2ecf20Sopenharmony_ci *
1998c2ecf20Sopenharmony_ci * NOTE: If caller has modified the mft record, it is imperative to set the mft
2008c2ecf20Sopenharmony_ci * record dirty BEFORE calling unmap_mft_record().
2018c2ecf20Sopenharmony_ci */
2028c2ecf20Sopenharmony_civoid unmap_mft_record(ntfs_inode *ni)
2038c2ecf20Sopenharmony_ci{
2048c2ecf20Sopenharmony_ci	struct page *page = ni->page;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	BUG_ON(!page);
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	unmap_mft_record_page(ni);
2118c2ecf20Sopenharmony_ci	mutex_unlock(&ni->mrec_lock);
2128c2ecf20Sopenharmony_ci	atomic_dec(&ni->count);
2138c2ecf20Sopenharmony_ci	/*
2148c2ecf20Sopenharmony_ci	 * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
2158c2ecf20Sopenharmony_ci	 * ntfs_clear_extent_inode() in the extent inode case, and to the
2168c2ecf20Sopenharmony_ci	 * caller in the non-extent, yet pure ntfs inode case, to do the actual
2178c2ecf20Sopenharmony_ci	 * tear down of all structures and freeing of all allocated memory.
2188c2ecf20Sopenharmony_ci	 */
2198c2ecf20Sopenharmony_ci	return;
2208c2ecf20Sopenharmony_ci}
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci/**
2238c2ecf20Sopenharmony_ci * map_extent_mft_record - load an extent inode and attach it to its base
2248c2ecf20Sopenharmony_ci * @base_ni:	base ntfs inode
2258c2ecf20Sopenharmony_ci * @mref:	mft reference of the extent inode to load
2268c2ecf20Sopenharmony_ci * @ntfs_ino:	on successful return, pointer to the ntfs_inode structure
2278c2ecf20Sopenharmony_ci *
2288c2ecf20Sopenharmony_ci * Load the extent mft record @mref and attach it to its base inode @base_ni.
2298c2ecf20Sopenharmony_ci * Return the mapped extent mft record if IS_ERR(result) is false.  Otherwise
2308c2ecf20Sopenharmony_ci * PTR_ERR(result) gives the negative error code.
2318c2ecf20Sopenharmony_ci *
2328c2ecf20Sopenharmony_ci * On successful return, @ntfs_ino contains a pointer to the ntfs_inode
2338c2ecf20Sopenharmony_ci * structure of the mapped extent inode.
2348c2ecf20Sopenharmony_ci */
2358c2ecf20Sopenharmony_ciMFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
2368c2ecf20Sopenharmony_ci		ntfs_inode **ntfs_ino)
2378c2ecf20Sopenharmony_ci{
2388c2ecf20Sopenharmony_ci	MFT_RECORD *m;
2398c2ecf20Sopenharmony_ci	ntfs_inode *ni = NULL;
2408c2ecf20Sopenharmony_ci	ntfs_inode **extent_nis = NULL;
2418c2ecf20Sopenharmony_ci	int i;
2428c2ecf20Sopenharmony_ci	unsigned long mft_no = MREF(mref);
2438c2ecf20Sopenharmony_ci	u16 seq_no = MSEQNO(mref);
2448c2ecf20Sopenharmony_ci	bool destroy_ni = false;
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).",
2478c2ecf20Sopenharmony_ci			mft_no, base_ni->mft_no);
2488c2ecf20Sopenharmony_ci	/* Make sure the base ntfs inode doesn't go away. */
2498c2ecf20Sopenharmony_ci	atomic_inc(&base_ni->count);
2508c2ecf20Sopenharmony_ci	/*
2518c2ecf20Sopenharmony_ci	 * Check if this extent inode has already been added to the base inode,
2528c2ecf20Sopenharmony_ci	 * in which case just return it. If not found, add it to the base
2538c2ecf20Sopenharmony_ci	 * inode before returning it.
2548c2ecf20Sopenharmony_ci	 */
2558c2ecf20Sopenharmony_ci	mutex_lock(&base_ni->extent_lock);
2568c2ecf20Sopenharmony_ci	if (base_ni->nr_extents > 0) {
2578c2ecf20Sopenharmony_ci		extent_nis = base_ni->ext.extent_ntfs_inos;
2588c2ecf20Sopenharmony_ci		for (i = 0; i < base_ni->nr_extents; i++) {
2598c2ecf20Sopenharmony_ci			if (mft_no != extent_nis[i]->mft_no)
2608c2ecf20Sopenharmony_ci				continue;
2618c2ecf20Sopenharmony_ci			ni = extent_nis[i];
2628c2ecf20Sopenharmony_ci			/* Make sure the ntfs inode doesn't go away. */
2638c2ecf20Sopenharmony_ci			atomic_inc(&ni->count);
2648c2ecf20Sopenharmony_ci			break;
2658c2ecf20Sopenharmony_ci		}
2668c2ecf20Sopenharmony_ci	}
2678c2ecf20Sopenharmony_ci	if (likely(ni != NULL)) {
2688c2ecf20Sopenharmony_ci		mutex_unlock(&base_ni->extent_lock);
2698c2ecf20Sopenharmony_ci		atomic_dec(&base_ni->count);
2708c2ecf20Sopenharmony_ci		/* We found the record; just have to map and return it. */
2718c2ecf20Sopenharmony_ci		m = map_mft_record(ni);
2728c2ecf20Sopenharmony_ci		/* map_mft_record() has incremented this on success. */
2738c2ecf20Sopenharmony_ci		atomic_dec(&ni->count);
2748c2ecf20Sopenharmony_ci		if (!IS_ERR(m)) {
2758c2ecf20Sopenharmony_ci			/* Verify the sequence number. */
2768c2ecf20Sopenharmony_ci			if (likely(le16_to_cpu(m->sequence_number) == seq_no)) {
2778c2ecf20Sopenharmony_ci				ntfs_debug("Done 1.");
2788c2ecf20Sopenharmony_ci				*ntfs_ino = ni;
2798c2ecf20Sopenharmony_ci				return m;
2808c2ecf20Sopenharmony_ci			}
2818c2ecf20Sopenharmony_ci			unmap_mft_record(ni);
2828c2ecf20Sopenharmony_ci			ntfs_error(base_ni->vol->sb, "Found stale extent mft "
2838c2ecf20Sopenharmony_ci					"reference! Corrupt filesystem. "
2848c2ecf20Sopenharmony_ci					"Run chkdsk.");
2858c2ecf20Sopenharmony_ci			return ERR_PTR(-EIO);
2868c2ecf20Sopenharmony_ci		}
2878c2ecf20Sopenharmony_cimap_err_out:
2888c2ecf20Sopenharmony_ci		ntfs_error(base_ni->vol->sb, "Failed to map extent "
2898c2ecf20Sopenharmony_ci				"mft record, error code %ld.", -PTR_ERR(m));
2908c2ecf20Sopenharmony_ci		return m;
2918c2ecf20Sopenharmony_ci	}
2928c2ecf20Sopenharmony_ci	/* Record wasn't there. Get a new ntfs inode and initialize it. */
2938c2ecf20Sopenharmony_ci	ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
2948c2ecf20Sopenharmony_ci	if (unlikely(!ni)) {
2958c2ecf20Sopenharmony_ci		mutex_unlock(&base_ni->extent_lock);
2968c2ecf20Sopenharmony_ci		atomic_dec(&base_ni->count);
2978c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
2988c2ecf20Sopenharmony_ci	}
2998c2ecf20Sopenharmony_ci	ni->vol = base_ni->vol;
3008c2ecf20Sopenharmony_ci	ni->seq_no = seq_no;
3018c2ecf20Sopenharmony_ci	ni->nr_extents = -1;
3028c2ecf20Sopenharmony_ci	ni->ext.base_ntfs_ino = base_ni;
3038c2ecf20Sopenharmony_ci	/* Now map the record. */
3048c2ecf20Sopenharmony_ci	m = map_mft_record(ni);
3058c2ecf20Sopenharmony_ci	if (IS_ERR(m)) {
3068c2ecf20Sopenharmony_ci		mutex_unlock(&base_ni->extent_lock);
3078c2ecf20Sopenharmony_ci		atomic_dec(&base_ni->count);
3088c2ecf20Sopenharmony_ci		ntfs_clear_extent_inode(ni);
3098c2ecf20Sopenharmony_ci		goto map_err_out;
3108c2ecf20Sopenharmony_ci	}
3118c2ecf20Sopenharmony_ci	/* Verify the sequence number if it is present. */
3128c2ecf20Sopenharmony_ci	if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
3138c2ecf20Sopenharmony_ci		ntfs_error(base_ni->vol->sb, "Found stale extent mft "
3148c2ecf20Sopenharmony_ci				"reference! Corrupt filesystem. Run chkdsk.");
3158c2ecf20Sopenharmony_ci		destroy_ni = true;
3168c2ecf20Sopenharmony_ci		m = ERR_PTR(-EIO);
3178c2ecf20Sopenharmony_ci		goto unm_err_out;
3188c2ecf20Sopenharmony_ci	}
3198c2ecf20Sopenharmony_ci	/* Attach extent inode to base inode, reallocating memory if needed. */
3208c2ecf20Sopenharmony_ci	if (!(base_ni->nr_extents & 3)) {
3218c2ecf20Sopenharmony_ci		ntfs_inode **tmp;
3228c2ecf20Sopenharmony_ci		int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci		tmp = kmalloc(new_size, GFP_NOFS);
3258c2ecf20Sopenharmony_ci		if (unlikely(!tmp)) {
3268c2ecf20Sopenharmony_ci			ntfs_error(base_ni->vol->sb, "Failed to allocate "
3278c2ecf20Sopenharmony_ci					"internal buffer.");
3288c2ecf20Sopenharmony_ci			destroy_ni = true;
3298c2ecf20Sopenharmony_ci			m = ERR_PTR(-ENOMEM);
3308c2ecf20Sopenharmony_ci			goto unm_err_out;
3318c2ecf20Sopenharmony_ci		}
3328c2ecf20Sopenharmony_ci		if (base_ni->nr_extents) {
3338c2ecf20Sopenharmony_ci			BUG_ON(!base_ni->ext.extent_ntfs_inos);
3348c2ecf20Sopenharmony_ci			memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size -
3358c2ecf20Sopenharmony_ci					4 * sizeof(ntfs_inode *));
3368c2ecf20Sopenharmony_ci			kfree(base_ni->ext.extent_ntfs_inos);
3378c2ecf20Sopenharmony_ci		}
3388c2ecf20Sopenharmony_ci		base_ni->ext.extent_ntfs_inos = tmp;
3398c2ecf20Sopenharmony_ci	}
3408c2ecf20Sopenharmony_ci	base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
3418c2ecf20Sopenharmony_ci	mutex_unlock(&base_ni->extent_lock);
3428c2ecf20Sopenharmony_ci	atomic_dec(&base_ni->count);
3438c2ecf20Sopenharmony_ci	ntfs_debug("Done 2.");
3448c2ecf20Sopenharmony_ci	*ntfs_ino = ni;
3458c2ecf20Sopenharmony_ci	return m;
3468c2ecf20Sopenharmony_ciunm_err_out:
3478c2ecf20Sopenharmony_ci	unmap_mft_record(ni);
3488c2ecf20Sopenharmony_ci	mutex_unlock(&base_ni->extent_lock);
3498c2ecf20Sopenharmony_ci	atomic_dec(&base_ni->count);
3508c2ecf20Sopenharmony_ci	/*
3518c2ecf20Sopenharmony_ci	 * If the extent inode was not attached to the base inode we need to
3528c2ecf20Sopenharmony_ci	 * release it or we will leak memory.
3538c2ecf20Sopenharmony_ci	 */
3548c2ecf20Sopenharmony_ci	if (destroy_ni)
3558c2ecf20Sopenharmony_ci		ntfs_clear_extent_inode(ni);
3568c2ecf20Sopenharmony_ci	return m;
3578c2ecf20Sopenharmony_ci}
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci#ifdef NTFS_RW
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci/**
3628c2ecf20Sopenharmony_ci * __mark_mft_record_dirty - set the mft record and the page containing it dirty
3638c2ecf20Sopenharmony_ci * @ni:		ntfs inode describing the mapped mft record
3648c2ecf20Sopenharmony_ci *
3658c2ecf20Sopenharmony_ci * Internal function.  Users should call mark_mft_record_dirty() instead.
3668c2ecf20Sopenharmony_ci *
3678c2ecf20Sopenharmony_ci * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni,
3688c2ecf20Sopenharmony_ci * as well as the page containing the mft record, dirty.  Also, mark the base
3698c2ecf20Sopenharmony_ci * vfs inode dirty.  This ensures that any changes to the mft record are
3708c2ecf20Sopenharmony_ci * written out to disk.
3718c2ecf20Sopenharmony_ci *
3728c2ecf20Sopenharmony_ci * NOTE:  We only set I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
3738c2ecf20Sopenharmony_ci * on the base vfs inode, because even though file data may have been modified,
3748c2ecf20Sopenharmony_ci * it is dirty in the inode meta data rather than the data page cache of the
3758c2ecf20Sopenharmony_ci * inode, and thus there are no data pages that need writing out.  Therefore, a
3768c2ecf20Sopenharmony_ci * full mark_inode_dirty() is overkill.  A mark_inode_dirty_sync(), on the
3778c2ecf20Sopenharmony_ci * other hand, is not sufficient, because ->write_inode needs to be called even
3788c2ecf20Sopenharmony_ci * in case of fdatasync. This needs to happen or the file data would not
3798c2ecf20Sopenharmony_ci * necessarily hit the device synchronously, even though the vfs inode has the
3808c2ecf20Sopenharmony_ci * O_SYNC flag set.  Also, I_DIRTY_DATASYNC simply "feels" better than just
3818c2ecf20Sopenharmony_ci * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
3828c2ecf20Sopenharmony_ci * which is not what I_DIRTY_SYNC on its own would suggest.
3838c2ecf20Sopenharmony_ci */
3848c2ecf20Sopenharmony_civoid __mark_mft_record_dirty(ntfs_inode *ni)
3858c2ecf20Sopenharmony_ci{
3868c2ecf20Sopenharmony_ci	ntfs_inode *base_ni;
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
3898c2ecf20Sopenharmony_ci	BUG_ON(NInoAttr(ni));
3908c2ecf20Sopenharmony_ci	mark_ntfs_record_dirty(ni->page, ni->page_ofs);
3918c2ecf20Sopenharmony_ci	/* Determine the base vfs inode and mark it dirty, too. */
3928c2ecf20Sopenharmony_ci	mutex_lock(&ni->extent_lock);
3938c2ecf20Sopenharmony_ci	if (likely(ni->nr_extents >= 0))
3948c2ecf20Sopenharmony_ci		base_ni = ni;
3958c2ecf20Sopenharmony_ci	else
3968c2ecf20Sopenharmony_ci		base_ni = ni->ext.base_ntfs_ino;
3978c2ecf20Sopenharmony_ci	mutex_unlock(&ni->extent_lock);
3988c2ecf20Sopenharmony_ci	__mark_inode_dirty(VFS_I(base_ni), I_DIRTY_DATASYNC);
3998c2ecf20Sopenharmony_ci}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_cistatic const char *ntfs_please_email = "Please email "
4028c2ecf20Sopenharmony_ci		"linux-ntfs-dev@lists.sourceforge.net and say that you saw "
4038c2ecf20Sopenharmony_ci		"this message.  Thank you.";
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci/**
4068c2ecf20Sopenharmony_ci * ntfs_sync_mft_mirror_umount - synchronise an mft record to the mft mirror
4078c2ecf20Sopenharmony_ci * @vol:	ntfs volume on which the mft record to synchronize resides
4088c2ecf20Sopenharmony_ci * @mft_no:	mft record number of mft record to synchronize
4098c2ecf20Sopenharmony_ci * @m:		mapped, mst protected (extent) mft record to synchronize
4108c2ecf20Sopenharmony_ci *
4118c2ecf20Sopenharmony_ci * Write the mapped, mst protected (extent) mft record @m with mft record
4128c2ecf20Sopenharmony_ci * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol,
4138c2ecf20Sopenharmony_ci * bypassing the page cache and the $MFTMirr inode itself.
4148c2ecf20Sopenharmony_ci *
4158c2ecf20Sopenharmony_ci * This function is only for use at umount time when the mft mirror inode has
4168c2ecf20Sopenharmony_ci * already been disposed off.  We BUG() if we are called while the mft mirror
4178c2ecf20Sopenharmony_ci * inode is still attached to the volume.
4188c2ecf20Sopenharmony_ci *
4198c2ecf20Sopenharmony_ci * On success return 0.  On error return -errno.
4208c2ecf20Sopenharmony_ci *
4218c2ecf20Sopenharmony_ci * NOTE:  This function is not implemented yet as I am not convinced it can
4228c2ecf20Sopenharmony_ci * actually be triggered considering the sequence of commits we do in super.c::
4238c2ecf20Sopenharmony_ci * ntfs_put_super().  But just in case we provide this place holder as the
4248c2ecf20Sopenharmony_ci * alternative would be either to BUG() or to get a NULL pointer dereference
4258c2ecf20Sopenharmony_ci * and Oops.
4268c2ecf20Sopenharmony_ci */
4278c2ecf20Sopenharmony_cistatic int ntfs_sync_mft_mirror_umount(ntfs_volume *vol,
4288c2ecf20Sopenharmony_ci		const unsigned long mft_no, MFT_RECORD *m)
4298c2ecf20Sopenharmony_ci{
4308c2ecf20Sopenharmony_ci	BUG_ON(vol->mftmirr_ino);
4318c2ecf20Sopenharmony_ci	ntfs_error(vol->sb, "Umount time mft mirror syncing is not "
4328c2ecf20Sopenharmony_ci			"implemented yet.  %s", ntfs_please_email);
4338c2ecf20Sopenharmony_ci	return -EOPNOTSUPP;
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci/**
4378c2ecf20Sopenharmony_ci * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror
4388c2ecf20Sopenharmony_ci * @vol:	ntfs volume on which the mft record to synchronize resides
4398c2ecf20Sopenharmony_ci * @mft_no:	mft record number of mft record to synchronize
4408c2ecf20Sopenharmony_ci * @m:		mapped, mst protected (extent) mft record to synchronize
4418c2ecf20Sopenharmony_ci * @sync:	if true, wait for i/o completion
4428c2ecf20Sopenharmony_ci *
4438c2ecf20Sopenharmony_ci * Write the mapped, mst protected (extent) mft record @m with mft record
4448c2ecf20Sopenharmony_ci * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol.
4458c2ecf20Sopenharmony_ci *
4468c2ecf20Sopenharmony_ci * On success return 0.  On error return -errno and set the volume errors flag
4478c2ecf20Sopenharmony_ci * in the ntfs volume @vol.
4488c2ecf20Sopenharmony_ci *
4498c2ecf20Sopenharmony_ci * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
4508c2ecf20Sopenharmony_ci *
4518c2ecf20Sopenharmony_ci * TODO:  If @sync is false, want to do truly asynchronous i/o, i.e. just
4528c2ecf20Sopenharmony_ci * schedule i/o via ->writepage or do it via kntfsd or whatever.
4538c2ecf20Sopenharmony_ci */
4548c2ecf20Sopenharmony_ciint ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
4558c2ecf20Sopenharmony_ci		MFT_RECORD *m, int sync)
4568c2ecf20Sopenharmony_ci{
4578c2ecf20Sopenharmony_ci	struct page *page;
4588c2ecf20Sopenharmony_ci	unsigned int blocksize = vol->sb->s_blocksize;
4598c2ecf20Sopenharmony_ci	int max_bhs = vol->mft_record_size / blocksize;
4608c2ecf20Sopenharmony_ci	struct buffer_head *bhs[MAX_BHS];
4618c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head;
4628c2ecf20Sopenharmony_ci	u8 *kmirr;
4638c2ecf20Sopenharmony_ci	runlist_element *rl;
4648c2ecf20Sopenharmony_ci	unsigned int block_start, block_end, m_start, m_end, page_ofs;
4658c2ecf20Sopenharmony_ci	int i_bhs, nr_bhs, err = 0;
4668c2ecf20Sopenharmony_ci	unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx.", mft_no);
4698c2ecf20Sopenharmony_ci	BUG_ON(!max_bhs);
4708c2ecf20Sopenharmony_ci	if (WARN_ON(max_bhs > MAX_BHS))
4718c2ecf20Sopenharmony_ci		return -EINVAL;
4728c2ecf20Sopenharmony_ci	if (unlikely(!vol->mftmirr_ino)) {
4738c2ecf20Sopenharmony_ci		/* This could happen during umount... */
4748c2ecf20Sopenharmony_ci		err = ntfs_sync_mft_mirror_umount(vol, mft_no, m);
4758c2ecf20Sopenharmony_ci		if (likely(!err))
4768c2ecf20Sopenharmony_ci			return err;
4778c2ecf20Sopenharmony_ci		goto err_out;
4788c2ecf20Sopenharmony_ci	}
4798c2ecf20Sopenharmony_ci	/* Get the page containing the mirror copy of the mft record @m. */
4808c2ecf20Sopenharmony_ci	page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >>
4818c2ecf20Sopenharmony_ci			(PAGE_SHIFT - vol->mft_record_size_bits));
4828c2ecf20Sopenharmony_ci	if (IS_ERR(page)) {
4838c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map mft mirror page.");
4848c2ecf20Sopenharmony_ci		err = PTR_ERR(page);
4858c2ecf20Sopenharmony_ci		goto err_out;
4868c2ecf20Sopenharmony_ci	}
4878c2ecf20Sopenharmony_ci	lock_page(page);
4888c2ecf20Sopenharmony_ci	BUG_ON(!PageUptodate(page));
4898c2ecf20Sopenharmony_ci	ClearPageUptodate(page);
4908c2ecf20Sopenharmony_ci	/* Offset of the mft mirror record inside the page. */
4918c2ecf20Sopenharmony_ci	page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
4928c2ecf20Sopenharmony_ci	/* The address in the page of the mirror copy of the mft record @m. */
4938c2ecf20Sopenharmony_ci	kmirr = page_address(page) + page_ofs;
4948c2ecf20Sopenharmony_ci	/* Copy the mst protected mft record to the mirror. */
4958c2ecf20Sopenharmony_ci	memcpy(kmirr, m, vol->mft_record_size);
4968c2ecf20Sopenharmony_ci	/* Create uptodate buffers if not present. */
4978c2ecf20Sopenharmony_ci	if (unlikely(!page_has_buffers(page))) {
4988c2ecf20Sopenharmony_ci		struct buffer_head *tail;
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci		bh = head = alloc_page_buffers(page, blocksize, true);
5018c2ecf20Sopenharmony_ci		do {
5028c2ecf20Sopenharmony_ci			set_buffer_uptodate(bh);
5038c2ecf20Sopenharmony_ci			tail = bh;
5048c2ecf20Sopenharmony_ci			bh = bh->b_this_page;
5058c2ecf20Sopenharmony_ci		} while (bh);
5068c2ecf20Sopenharmony_ci		tail->b_this_page = head;
5078c2ecf20Sopenharmony_ci		attach_page_private(page, head);
5088c2ecf20Sopenharmony_ci	}
5098c2ecf20Sopenharmony_ci	bh = head = page_buffers(page);
5108c2ecf20Sopenharmony_ci	BUG_ON(!bh);
5118c2ecf20Sopenharmony_ci	rl = NULL;
5128c2ecf20Sopenharmony_ci	nr_bhs = 0;
5138c2ecf20Sopenharmony_ci	block_start = 0;
5148c2ecf20Sopenharmony_ci	m_start = kmirr - (u8*)page_address(page);
5158c2ecf20Sopenharmony_ci	m_end = m_start + vol->mft_record_size;
5168c2ecf20Sopenharmony_ci	do {
5178c2ecf20Sopenharmony_ci		block_end = block_start + blocksize;
5188c2ecf20Sopenharmony_ci		/* If the buffer is outside the mft record, skip it. */
5198c2ecf20Sopenharmony_ci		if (block_end <= m_start)
5208c2ecf20Sopenharmony_ci			continue;
5218c2ecf20Sopenharmony_ci		if (unlikely(block_start >= m_end))
5228c2ecf20Sopenharmony_ci			break;
5238c2ecf20Sopenharmony_ci		/* Need to map the buffer if it is not mapped already. */
5248c2ecf20Sopenharmony_ci		if (unlikely(!buffer_mapped(bh))) {
5258c2ecf20Sopenharmony_ci			VCN vcn;
5268c2ecf20Sopenharmony_ci			LCN lcn;
5278c2ecf20Sopenharmony_ci			unsigned int vcn_ofs;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci			bh->b_bdev = vol->sb->s_bdev;
5308c2ecf20Sopenharmony_ci			/* Obtain the vcn and offset of the current block. */
5318c2ecf20Sopenharmony_ci			vcn = ((VCN)mft_no << vol->mft_record_size_bits) +
5328c2ecf20Sopenharmony_ci					(block_start - m_start);
5338c2ecf20Sopenharmony_ci			vcn_ofs = vcn & vol->cluster_size_mask;
5348c2ecf20Sopenharmony_ci			vcn >>= vol->cluster_size_bits;
5358c2ecf20Sopenharmony_ci			if (!rl) {
5368c2ecf20Sopenharmony_ci				down_read(&NTFS_I(vol->mftmirr_ino)->
5378c2ecf20Sopenharmony_ci						runlist.lock);
5388c2ecf20Sopenharmony_ci				rl = NTFS_I(vol->mftmirr_ino)->runlist.rl;
5398c2ecf20Sopenharmony_ci				/*
5408c2ecf20Sopenharmony_ci				 * $MFTMirr always has the whole of its runlist
5418c2ecf20Sopenharmony_ci				 * in memory.
5428c2ecf20Sopenharmony_ci				 */
5438c2ecf20Sopenharmony_ci				BUG_ON(!rl);
5448c2ecf20Sopenharmony_ci			}
5458c2ecf20Sopenharmony_ci			/* Seek to element containing target vcn. */
5468c2ecf20Sopenharmony_ci			while (rl->length && rl[1].vcn <= vcn)
5478c2ecf20Sopenharmony_ci				rl++;
5488c2ecf20Sopenharmony_ci			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
5498c2ecf20Sopenharmony_ci			/* For $MFTMirr, only lcn >= 0 is a successful remap. */
5508c2ecf20Sopenharmony_ci			if (likely(lcn >= 0)) {
5518c2ecf20Sopenharmony_ci				/* Setup buffer head to correct block. */
5528c2ecf20Sopenharmony_ci				bh->b_blocknr = ((lcn <<
5538c2ecf20Sopenharmony_ci						vol->cluster_size_bits) +
5548c2ecf20Sopenharmony_ci						vcn_ofs) >> blocksize_bits;
5558c2ecf20Sopenharmony_ci				set_buffer_mapped(bh);
5568c2ecf20Sopenharmony_ci			} else {
5578c2ecf20Sopenharmony_ci				bh->b_blocknr = -1;
5588c2ecf20Sopenharmony_ci				ntfs_error(vol->sb, "Cannot write mft mirror "
5598c2ecf20Sopenharmony_ci						"record 0x%lx because its "
5608c2ecf20Sopenharmony_ci						"location on disk could not "
5618c2ecf20Sopenharmony_ci						"be determined (error code "
5628c2ecf20Sopenharmony_ci						"%lli).", mft_no,
5638c2ecf20Sopenharmony_ci						(long long)lcn);
5648c2ecf20Sopenharmony_ci				err = -EIO;
5658c2ecf20Sopenharmony_ci			}
5668c2ecf20Sopenharmony_ci		}
5678c2ecf20Sopenharmony_ci		BUG_ON(!buffer_uptodate(bh));
5688c2ecf20Sopenharmony_ci		BUG_ON(!nr_bhs && (m_start != block_start));
5698c2ecf20Sopenharmony_ci		BUG_ON(nr_bhs >= max_bhs);
5708c2ecf20Sopenharmony_ci		bhs[nr_bhs++] = bh;
5718c2ecf20Sopenharmony_ci		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
5728c2ecf20Sopenharmony_ci	} while (block_start = block_end, (bh = bh->b_this_page) != head);
5738c2ecf20Sopenharmony_ci	if (unlikely(rl))
5748c2ecf20Sopenharmony_ci		up_read(&NTFS_I(vol->mftmirr_ino)->runlist.lock);
5758c2ecf20Sopenharmony_ci	if (likely(!err)) {
5768c2ecf20Sopenharmony_ci		/* Lock buffers and start synchronous write i/o on them. */
5778c2ecf20Sopenharmony_ci		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
5788c2ecf20Sopenharmony_ci			struct buffer_head *tbh = bhs[i_bhs];
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci			if (!trylock_buffer(tbh))
5818c2ecf20Sopenharmony_ci				BUG();
5828c2ecf20Sopenharmony_ci			BUG_ON(!buffer_uptodate(tbh));
5838c2ecf20Sopenharmony_ci			clear_buffer_dirty(tbh);
5848c2ecf20Sopenharmony_ci			get_bh(tbh);
5858c2ecf20Sopenharmony_ci			tbh->b_end_io = end_buffer_write_sync;
5868c2ecf20Sopenharmony_ci			submit_bh(REQ_OP_WRITE, 0, tbh);
5878c2ecf20Sopenharmony_ci		}
5888c2ecf20Sopenharmony_ci		/* Wait on i/o completion of buffers. */
5898c2ecf20Sopenharmony_ci		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
5908c2ecf20Sopenharmony_ci			struct buffer_head *tbh = bhs[i_bhs];
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci			wait_on_buffer(tbh);
5938c2ecf20Sopenharmony_ci			if (unlikely(!buffer_uptodate(tbh))) {
5948c2ecf20Sopenharmony_ci				err = -EIO;
5958c2ecf20Sopenharmony_ci				/*
5968c2ecf20Sopenharmony_ci				 * Set the buffer uptodate so the page and
5978c2ecf20Sopenharmony_ci				 * buffer states do not become out of sync.
5988c2ecf20Sopenharmony_ci				 */
5998c2ecf20Sopenharmony_ci				set_buffer_uptodate(tbh);
6008c2ecf20Sopenharmony_ci			}
6018c2ecf20Sopenharmony_ci		}
6028c2ecf20Sopenharmony_ci	} else /* if (unlikely(err)) */ {
6038c2ecf20Sopenharmony_ci		/* Clean the buffers. */
6048c2ecf20Sopenharmony_ci		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
6058c2ecf20Sopenharmony_ci			clear_buffer_dirty(bhs[i_bhs]);
6068c2ecf20Sopenharmony_ci	}
6078c2ecf20Sopenharmony_ci	/* Current state: all buffers are clean, unlocked, and uptodate. */
6088c2ecf20Sopenharmony_ci	/* Remove the mst protection fixups again. */
6098c2ecf20Sopenharmony_ci	post_write_mst_fixup((NTFS_RECORD*)kmirr);
6108c2ecf20Sopenharmony_ci	flush_dcache_page(page);
6118c2ecf20Sopenharmony_ci	SetPageUptodate(page);
6128c2ecf20Sopenharmony_ci	unlock_page(page);
6138c2ecf20Sopenharmony_ci	ntfs_unmap_page(page);
6148c2ecf20Sopenharmony_ci	if (likely(!err)) {
6158c2ecf20Sopenharmony_ci		ntfs_debug("Done.");
6168c2ecf20Sopenharmony_ci	} else {
6178c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "I/O error while writing mft mirror "
6188c2ecf20Sopenharmony_ci				"record 0x%lx!", mft_no);
6198c2ecf20Sopenharmony_cierr_out:
6208c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error "
6218c2ecf20Sopenharmony_ci				"code %i).  Volume will be left marked dirty "
6228c2ecf20Sopenharmony_ci				"on umount.  Run ntfsfix on the partition "
6238c2ecf20Sopenharmony_ci				"after umounting to correct this.", -err);
6248c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
6258c2ecf20Sopenharmony_ci	}
6268c2ecf20Sopenharmony_ci	return err;
6278c2ecf20Sopenharmony_ci}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci/**
6308c2ecf20Sopenharmony_ci * write_mft_record_nolock - write out a mapped (extent) mft record
6318c2ecf20Sopenharmony_ci * @ni:		ntfs inode describing the mapped (extent) mft record
6328c2ecf20Sopenharmony_ci * @m:		mapped (extent) mft record to write
6338c2ecf20Sopenharmony_ci * @sync:	if true, wait for i/o completion
6348c2ecf20Sopenharmony_ci *
6358c2ecf20Sopenharmony_ci * Write the mapped (extent) mft record @m described by the (regular or extent)
6368c2ecf20Sopenharmony_ci * ntfs inode @ni to backing store.  If the mft record @m has a counterpart in
6378c2ecf20Sopenharmony_ci * the mft mirror, that is also updated.
6388c2ecf20Sopenharmony_ci *
6398c2ecf20Sopenharmony_ci * We only write the mft record if the ntfs inode @ni is dirty and the first
6408c2ecf20Sopenharmony_ci * buffer belonging to its mft record is dirty, too.  We ignore the dirty state
6418c2ecf20Sopenharmony_ci * of subsequent buffers because we could have raced with
6428c2ecf20Sopenharmony_ci * fs/ntfs/aops.c::mark_ntfs_record_dirty().
6438c2ecf20Sopenharmony_ci *
6448c2ecf20Sopenharmony_ci * On success, clean the mft record and return 0.  On error, leave the mft
6458c2ecf20Sopenharmony_ci * record dirty and return -errno.
6468c2ecf20Sopenharmony_ci *
6478c2ecf20Sopenharmony_ci * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
6488c2ecf20Sopenharmony_ci * However, if the mft record has a counterpart in the mft mirror and @sync is
6498c2ecf20Sopenharmony_ci * true, we write the mft record, wait for i/o completion, and only then write
6508c2ecf20Sopenharmony_ci * the mft mirror copy.  This ensures that if the system crashes either the mft
6518c2ecf20Sopenharmony_ci * or the mft mirror will contain a self-consistent mft record @m.  If @sync is
6528c2ecf20Sopenharmony_ci * false on the other hand, we start i/o on both and then wait for completion
6538c2ecf20Sopenharmony_ci * on them.  This provides a speedup but no longer guarantees that you will end
6548c2ecf20Sopenharmony_ci * up with a self-consistent mft record in the case of a crash but if you asked
6558c2ecf20Sopenharmony_ci * for asynchronous writing you probably do not care about that anyway.
6568c2ecf20Sopenharmony_ci *
6578c2ecf20Sopenharmony_ci * TODO:  If @sync is false, want to do truly asynchronous i/o, i.e. just
6588c2ecf20Sopenharmony_ci * schedule i/o via ->writepage or do it via kntfsd or whatever.
6598c2ecf20Sopenharmony_ci */
6608c2ecf20Sopenharmony_ciint write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
6618c2ecf20Sopenharmony_ci{
6628c2ecf20Sopenharmony_ci	ntfs_volume *vol = ni->vol;
6638c2ecf20Sopenharmony_ci	struct page *page = ni->page;
6648c2ecf20Sopenharmony_ci	unsigned int blocksize = vol->sb->s_blocksize;
6658c2ecf20Sopenharmony_ci	unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
6668c2ecf20Sopenharmony_ci	int max_bhs = vol->mft_record_size / blocksize;
6678c2ecf20Sopenharmony_ci	struct buffer_head *bhs[MAX_BHS];
6688c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head;
6698c2ecf20Sopenharmony_ci	runlist_element *rl;
6708c2ecf20Sopenharmony_ci	unsigned int block_start, block_end, m_start, m_end;
6718c2ecf20Sopenharmony_ci	int i_bhs, nr_bhs, err = 0;
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
6748c2ecf20Sopenharmony_ci	BUG_ON(NInoAttr(ni));
6758c2ecf20Sopenharmony_ci	BUG_ON(!max_bhs);
6768c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
6778c2ecf20Sopenharmony_ci	if (WARN_ON(max_bhs > MAX_BHS)) {
6788c2ecf20Sopenharmony_ci		err = -EINVAL;
6798c2ecf20Sopenharmony_ci		goto err_out;
6808c2ecf20Sopenharmony_ci	}
6818c2ecf20Sopenharmony_ci	/*
6828c2ecf20Sopenharmony_ci	 * If the ntfs_inode is clean no need to do anything.  If it is dirty,
6838c2ecf20Sopenharmony_ci	 * mark it as clean now so that it can be redirtied later on if needed.
6848c2ecf20Sopenharmony_ci	 * There is no danger of races since the caller is holding the locks
6858c2ecf20Sopenharmony_ci	 * for the mft record @m and the page it is in.
6868c2ecf20Sopenharmony_ci	 */
6878c2ecf20Sopenharmony_ci	if (!NInoTestClearDirty(ni))
6888c2ecf20Sopenharmony_ci		goto done;
6898c2ecf20Sopenharmony_ci	bh = head = page_buffers(page);
6908c2ecf20Sopenharmony_ci	BUG_ON(!bh);
6918c2ecf20Sopenharmony_ci	rl = NULL;
6928c2ecf20Sopenharmony_ci	nr_bhs = 0;
6938c2ecf20Sopenharmony_ci	block_start = 0;
6948c2ecf20Sopenharmony_ci	m_start = ni->page_ofs;
6958c2ecf20Sopenharmony_ci	m_end = m_start + vol->mft_record_size;
6968c2ecf20Sopenharmony_ci	do {
6978c2ecf20Sopenharmony_ci		block_end = block_start + blocksize;
6988c2ecf20Sopenharmony_ci		/* If the buffer is outside the mft record, skip it. */
6998c2ecf20Sopenharmony_ci		if (block_end <= m_start)
7008c2ecf20Sopenharmony_ci			continue;
7018c2ecf20Sopenharmony_ci		if (unlikely(block_start >= m_end))
7028c2ecf20Sopenharmony_ci			break;
7038c2ecf20Sopenharmony_ci		/*
7048c2ecf20Sopenharmony_ci		 * If this block is not the first one in the record, we ignore
7058c2ecf20Sopenharmony_ci		 * the buffer's dirty state because we could have raced with a
7068c2ecf20Sopenharmony_ci		 * parallel mark_ntfs_record_dirty().
7078c2ecf20Sopenharmony_ci		 */
7088c2ecf20Sopenharmony_ci		if (block_start == m_start) {
7098c2ecf20Sopenharmony_ci			/* This block is the first one in the record. */
7108c2ecf20Sopenharmony_ci			if (!buffer_dirty(bh)) {
7118c2ecf20Sopenharmony_ci				BUG_ON(nr_bhs);
7128c2ecf20Sopenharmony_ci				/* Clean records are not written out. */
7138c2ecf20Sopenharmony_ci				break;
7148c2ecf20Sopenharmony_ci			}
7158c2ecf20Sopenharmony_ci		}
7168c2ecf20Sopenharmony_ci		/* Need to map the buffer if it is not mapped already. */
7178c2ecf20Sopenharmony_ci		if (unlikely(!buffer_mapped(bh))) {
7188c2ecf20Sopenharmony_ci			VCN vcn;
7198c2ecf20Sopenharmony_ci			LCN lcn;
7208c2ecf20Sopenharmony_ci			unsigned int vcn_ofs;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci			bh->b_bdev = vol->sb->s_bdev;
7238c2ecf20Sopenharmony_ci			/* Obtain the vcn and offset of the current block. */
7248c2ecf20Sopenharmony_ci			vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) +
7258c2ecf20Sopenharmony_ci					(block_start - m_start);
7268c2ecf20Sopenharmony_ci			vcn_ofs = vcn & vol->cluster_size_mask;
7278c2ecf20Sopenharmony_ci			vcn >>= vol->cluster_size_bits;
7288c2ecf20Sopenharmony_ci			if (!rl) {
7298c2ecf20Sopenharmony_ci				down_read(&NTFS_I(vol->mft_ino)->runlist.lock);
7308c2ecf20Sopenharmony_ci				rl = NTFS_I(vol->mft_ino)->runlist.rl;
7318c2ecf20Sopenharmony_ci				BUG_ON(!rl);
7328c2ecf20Sopenharmony_ci			}
7338c2ecf20Sopenharmony_ci			/* Seek to element containing target vcn. */
7348c2ecf20Sopenharmony_ci			while (rl->length && rl[1].vcn <= vcn)
7358c2ecf20Sopenharmony_ci				rl++;
7368c2ecf20Sopenharmony_ci			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
7378c2ecf20Sopenharmony_ci			/* For $MFT, only lcn >= 0 is a successful remap. */
7388c2ecf20Sopenharmony_ci			if (likely(lcn >= 0)) {
7398c2ecf20Sopenharmony_ci				/* Setup buffer head to correct block. */
7408c2ecf20Sopenharmony_ci				bh->b_blocknr = ((lcn <<
7418c2ecf20Sopenharmony_ci						vol->cluster_size_bits) +
7428c2ecf20Sopenharmony_ci						vcn_ofs) >> blocksize_bits;
7438c2ecf20Sopenharmony_ci				set_buffer_mapped(bh);
7448c2ecf20Sopenharmony_ci			} else {
7458c2ecf20Sopenharmony_ci				bh->b_blocknr = -1;
7468c2ecf20Sopenharmony_ci				ntfs_error(vol->sb, "Cannot write mft record "
7478c2ecf20Sopenharmony_ci						"0x%lx because its location "
7488c2ecf20Sopenharmony_ci						"on disk could not be "
7498c2ecf20Sopenharmony_ci						"determined (error code %lli).",
7508c2ecf20Sopenharmony_ci						ni->mft_no, (long long)lcn);
7518c2ecf20Sopenharmony_ci				err = -EIO;
7528c2ecf20Sopenharmony_ci			}
7538c2ecf20Sopenharmony_ci		}
7548c2ecf20Sopenharmony_ci		BUG_ON(!buffer_uptodate(bh));
7558c2ecf20Sopenharmony_ci		BUG_ON(!nr_bhs && (m_start != block_start));
7568c2ecf20Sopenharmony_ci		BUG_ON(nr_bhs >= max_bhs);
7578c2ecf20Sopenharmony_ci		bhs[nr_bhs++] = bh;
7588c2ecf20Sopenharmony_ci		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
7598c2ecf20Sopenharmony_ci	} while (block_start = block_end, (bh = bh->b_this_page) != head);
7608c2ecf20Sopenharmony_ci	if (unlikely(rl))
7618c2ecf20Sopenharmony_ci		up_read(&NTFS_I(vol->mft_ino)->runlist.lock);
7628c2ecf20Sopenharmony_ci	if (!nr_bhs)
7638c2ecf20Sopenharmony_ci		goto done;
7648c2ecf20Sopenharmony_ci	if (unlikely(err))
7658c2ecf20Sopenharmony_ci		goto cleanup_out;
7668c2ecf20Sopenharmony_ci	/* Apply the mst protection fixups. */
7678c2ecf20Sopenharmony_ci	err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size);
7688c2ecf20Sopenharmony_ci	if (err) {
7698c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to apply mst fixups!");
7708c2ecf20Sopenharmony_ci		goto cleanup_out;
7718c2ecf20Sopenharmony_ci	}
7728c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ni);
7738c2ecf20Sopenharmony_ci	/* Lock buffers and start synchronous write i/o on them. */
7748c2ecf20Sopenharmony_ci	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
7758c2ecf20Sopenharmony_ci		struct buffer_head *tbh = bhs[i_bhs];
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci		if (!trylock_buffer(tbh))
7788c2ecf20Sopenharmony_ci			BUG();
7798c2ecf20Sopenharmony_ci		BUG_ON(!buffer_uptodate(tbh));
7808c2ecf20Sopenharmony_ci		clear_buffer_dirty(tbh);
7818c2ecf20Sopenharmony_ci		get_bh(tbh);
7828c2ecf20Sopenharmony_ci		tbh->b_end_io = end_buffer_write_sync;
7838c2ecf20Sopenharmony_ci		submit_bh(REQ_OP_WRITE, 0, tbh);
7848c2ecf20Sopenharmony_ci	}
7858c2ecf20Sopenharmony_ci	/* Synchronize the mft mirror now if not @sync. */
7868c2ecf20Sopenharmony_ci	if (!sync && ni->mft_no < vol->mftmirr_size)
7878c2ecf20Sopenharmony_ci		ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync);
7888c2ecf20Sopenharmony_ci	/* Wait on i/o completion of buffers. */
7898c2ecf20Sopenharmony_ci	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
7908c2ecf20Sopenharmony_ci		struct buffer_head *tbh = bhs[i_bhs];
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci		wait_on_buffer(tbh);
7938c2ecf20Sopenharmony_ci		if (unlikely(!buffer_uptodate(tbh))) {
7948c2ecf20Sopenharmony_ci			err = -EIO;
7958c2ecf20Sopenharmony_ci			/*
7968c2ecf20Sopenharmony_ci			 * Set the buffer uptodate so the page and buffer
7978c2ecf20Sopenharmony_ci			 * states do not become out of sync.
7988c2ecf20Sopenharmony_ci			 */
7998c2ecf20Sopenharmony_ci			if (PageUptodate(page))
8008c2ecf20Sopenharmony_ci				set_buffer_uptodate(tbh);
8018c2ecf20Sopenharmony_ci		}
8028c2ecf20Sopenharmony_ci	}
8038c2ecf20Sopenharmony_ci	/* If @sync, now synchronize the mft mirror. */
8048c2ecf20Sopenharmony_ci	if (sync && ni->mft_no < vol->mftmirr_size)
8058c2ecf20Sopenharmony_ci		ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync);
8068c2ecf20Sopenharmony_ci	/* Remove the mst protection fixups again. */
8078c2ecf20Sopenharmony_ci	post_write_mst_fixup((NTFS_RECORD*)m);
8088c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ni);
8098c2ecf20Sopenharmony_ci	if (unlikely(err)) {
8108c2ecf20Sopenharmony_ci		/* I/O error during writing.  This is really bad! */
8118c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "I/O error while writing mft record "
8128c2ecf20Sopenharmony_ci				"0x%lx!  Marking base inode as bad.  You "
8138c2ecf20Sopenharmony_ci				"should unmount the volume and run chkdsk.",
8148c2ecf20Sopenharmony_ci				ni->mft_no);
8158c2ecf20Sopenharmony_ci		goto err_out;
8168c2ecf20Sopenharmony_ci	}
8178c2ecf20Sopenharmony_cidone:
8188c2ecf20Sopenharmony_ci	ntfs_debug("Done.");
8198c2ecf20Sopenharmony_ci	return 0;
8208c2ecf20Sopenharmony_cicleanup_out:
8218c2ecf20Sopenharmony_ci	/* Clean the buffers. */
8228c2ecf20Sopenharmony_ci	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
8238c2ecf20Sopenharmony_ci		clear_buffer_dirty(bhs[i_bhs]);
8248c2ecf20Sopenharmony_cierr_out:
8258c2ecf20Sopenharmony_ci	/*
8268c2ecf20Sopenharmony_ci	 * Current state: all buffers are clean, unlocked, and uptodate.
8278c2ecf20Sopenharmony_ci	 * The caller should mark the base inode as bad so that no more i/o
8288c2ecf20Sopenharmony_ci	 * happens.  ->clear_inode() will still be invoked so all extent inodes
8298c2ecf20Sopenharmony_ci	 * and other allocated memory will be freed.
8308c2ecf20Sopenharmony_ci	 */
8318c2ecf20Sopenharmony_ci	if (err == -ENOMEM) {
8328c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Not enough memory to write mft record.  "
8338c2ecf20Sopenharmony_ci				"Redirtying so the write is retried later.");
8348c2ecf20Sopenharmony_ci		mark_mft_record_dirty(ni);
8358c2ecf20Sopenharmony_ci		err = 0;
8368c2ecf20Sopenharmony_ci	} else
8378c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
8388c2ecf20Sopenharmony_ci	return err;
8398c2ecf20Sopenharmony_ci}
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci/**
8428c2ecf20Sopenharmony_ci * ntfs_may_write_mft_record - check if an mft record may be written out
8438c2ecf20Sopenharmony_ci * @vol:	[IN]  ntfs volume on which the mft record to check resides
8448c2ecf20Sopenharmony_ci * @mft_no:	[IN]  mft record number of the mft record to check
8458c2ecf20Sopenharmony_ci * @m:		[IN]  mapped mft record to check
8468c2ecf20Sopenharmony_ci * @locked_ni:	[OUT] caller has to unlock this ntfs inode if one is returned
8478c2ecf20Sopenharmony_ci *
8488c2ecf20Sopenharmony_ci * Check if the mapped (base or extent) mft record @m with mft record number
8498c2ecf20Sopenharmony_ci * @mft_no belonging to the ntfs volume @vol may be written out.  If necessary
8508c2ecf20Sopenharmony_ci * and possible the ntfs inode of the mft record is locked and the base vfs
8518c2ecf20Sopenharmony_ci * inode is pinned.  The locked ntfs inode is then returned in @locked_ni.  The
8528c2ecf20Sopenharmony_ci * caller is responsible for unlocking the ntfs inode and unpinning the base
8538c2ecf20Sopenharmony_ci * vfs inode.
8548c2ecf20Sopenharmony_ci *
8558c2ecf20Sopenharmony_ci * Return 'true' if the mft record may be written out and 'false' if not.
8568c2ecf20Sopenharmony_ci *
8578c2ecf20Sopenharmony_ci * The caller has locked the page and cleared the uptodate flag on it which
8588c2ecf20Sopenharmony_ci * means that we can safely write out any dirty mft records that do not have
8598c2ecf20Sopenharmony_ci * their inodes in icache as determined by ilookup5() as anyone
8608c2ecf20Sopenharmony_ci * opening/creating such an inode would block when attempting to map the mft
8618c2ecf20Sopenharmony_ci * record in read_cache_page() until we are finished with the write out.
8628c2ecf20Sopenharmony_ci *
8638c2ecf20Sopenharmony_ci * Here is a description of the tests we perform:
8648c2ecf20Sopenharmony_ci *
8658c2ecf20Sopenharmony_ci * If the inode is found in icache we know the mft record must be a base mft
8668c2ecf20Sopenharmony_ci * record.  If it is dirty, we do not write it and return 'false' as the vfs
8678c2ecf20Sopenharmony_ci * inode write paths will result in the access times being updated which would
8688c2ecf20Sopenharmony_ci * cause the base mft record to be redirtied and written out again.  (We know
8698c2ecf20Sopenharmony_ci * the access time update will modify the base mft record because Windows
8708c2ecf20Sopenharmony_ci * chkdsk complains if the standard information attribute is not in the base
8718c2ecf20Sopenharmony_ci * mft record.)
8728c2ecf20Sopenharmony_ci *
8738c2ecf20Sopenharmony_ci * If the inode is in icache and not dirty, we attempt to lock the mft record
8748c2ecf20Sopenharmony_ci * and if we find the lock was already taken, it is not safe to write the mft
8758c2ecf20Sopenharmony_ci * record and we return 'false'.
8768c2ecf20Sopenharmony_ci *
8778c2ecf20Sopenharmony_ci * If we manage to obtain the lock we have exclusive access to the mft record,
8788c2ecf20Sopenharmony_ci * which also allows us safe writeout of the mft record.  We then set
8798c2ecf20Sopenharmony_ci * @locked_ni to the locked ntfs inode and return 'true'.
8808c2ecf20Sopenharmony_ci *
8818c2ecf20Sopenharmony_ci * Note we cannot just lock the mft record and sleep while waiting for the lock
8828c2ecf20Sopenharmony_ci * because this would deadlock due to lock reversal (normally the mft record is
8838c2ecf20Sopenharmony_ci * locked before the page is locked but we already have the page locked here
8848c2ecf20Sopenharmony_ci * when we try to lock the mft record).
8858c2ecf20Sopenharmony_ci *
8868c2ecf20Sopenharmony_ci * If the inode is not in icache we need to perform further checks.
8878c2ecf20Sopenharmony_ci *
8888c2ecf20Sopenharmony_ci * If the mft record is not a FILE record or it is a base mft record, we can
8898c2ecf20Sopenharmony_ci * safely write it and return 'true'.
8908c2ecf20Sopenharmony_ci *
8918c2ecf20Sopenharmony_ci * We now know the mft record is an extent mft record.  We check if the inode
8928c2ecf20Sopenharmony_ci * corresponding to its base mft record is in icache and obtain a reference to
8938c2ecf20Sopenharmony_ci * it if it is.  If it is not, we can safely write it and return 'true'.
8948c2ecf20Sopenharmony_ci *
8958c2ecf20Sopenharmony_ci * We now have the base inode for the extent mft record.  We check if it has an
8968c2ecf20Sopenharmony_ci * ntfs inode for the extent mft record attached and if not it is safe to write
8978c2ecf20Sopenharmony_ci * the extent mft record and we return 'true'.
8988c2ecf20Sopenharmony_ci *
8998c2ecf20Sopenharmony_ci * The ntfs inode for the extent mft record is attached to the base inode so we
9008c2ecf20Sopenharmony_ci * attempt to lock the extent mft record and if we find the lock was already
9018c2ecf20Sopenharmony_ci * taken, it is not safe to write the extent mft record and we return 'false'.
9028c2ecf20Sopenharmony_ci *
9038c2ecf20Sopenharmony_ci * If we manage to obtain the lock we have exclusive access to the extent mft
9048c2ecf20Sopenharmony_ci * record, which also allows us safe writeout of the extent mft record.  We
9058c2ecf20Sopenharmony_ci * set the ntfs inode of the extent mft record clean and then set @locked_ni to
9068c2ecf20Sopenharmony_ci * the now locked ntfs inode and return 'true'.
9078c2ecf20Sopenharmony_ci *
9088c2ecf20Sopenharmony_ci * Note, the reason for actually writing dirty mft records here and not just
9098c2ecf20Sopenharmony_ci * relying on the vfs inode dirty code paths is that we can have mft records
9108c2ecf20Sopenharmony_ci * modified without them ever having actual inodes in memory.  Also we can have
9118c2ecf20Sopenharmony_ci * dirty mft records with clean ntfs inodes in memory.  None of the described
9128c2ecf20Sopenharmony_ci * cases would result in the dirty mft records being written out if we only
9138c2ecf20Sopenharmony_ci * relied on the vfs inode dirty code paths.  And these cases can really occur
9148c2ecf20Sopenharmony_ci * during allocation of new mft records and in particular when the
9158c2ecf20Sopenharmony_ci * initialized_size of the $MFT/$DATA attribute is extended and the new space
9168c2ecf20Sopenharmony_ci * is initialized using ntfs_mft_record_format().  The clean inode can then
9178c2ecf20Sopenharmony_ci * appear if the mft record is reused for a new inode before it got written
9188c2ecf20Sopenharmony_ci * out.
9198c2ecf20Sopenharmony_ci */
9208c2ecf20Sopenharmony_cibool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
9218c2ecf20Sopenharmony_ci		const MFT_RECORD *m, ntfs_inode **locked_ni)
9228c2ecf20Sopenharmony_ci{
9238c2ecf20Sopenharmony_ci	struct super_block *sb = vol->sb;
9248c2ecf20Sopenharmony_ci	struct inode *mft_vi = vol->mft_ino;
9258c2ecf20Sopenharmony_ci	struct inode *vi;
9268c2ecf20Sopenharmony_ci	ntfs_inode *ni, *eni, **extent_nis;
9278c2ecf20Sopenharmony_ci	int i;
9288c2ecf20Sopenharmony_ci	ntfs_attr na;
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx.", mft_no);
9318c2ecf20Sopenharmony_ci	/*
9328c2ecf20Sopenharmony_ci	 * Normally we do not return a locked inode so set @locked_ni to NULL.
9338c2ecf20Sopenharmony_ci	 */
9348c2ecf20Sopenharmony_ci	BUG_ON(!locked_ni);
9358c2ecf20Sopenharmony_ci	*locked_ni = NULL;
9368c2ecf20Sopenharmony_ci	/*
9378c2ecf20Sopenharmony_ci	 * Check if the inode corresponding to this mft record is in the VFS
9388c2ecf20Sopenharmony_ci	 * inode cache and obtain a reference to it if it is.
9398c2ecf20Sopenharmony_ci	 */
9408c2ecf20Sopenharmony_ci	ntfs_debug("Looking for inode 0x%lx in icache.", mft_no);
9418c2ecf20Sopenharmony_ci	na.mft_no = mft_no;
9428c2ecf20Sopenharmony_ci	na.name = NULL;
9438c2ecf20Sopenharmony_ci	na.name_len = 0;
9448c2ecf20Sopenharmony_ci	na.type = AT_UNUSED;
9458c2ecf20Sopenharmony_ci	/*
9468c2ecf20Sopenharmony_ci	 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
9478c2ecf20Sopenharmony_ci	 * we get here for it rather often.
9488c2ecf20Sopenharmony_ci	 */
9498c2ecf20Sopenharmony_ci	if (!mft_no) {
9508c2ecf20Sopenharmony_ci		/* Balance the below iput(). */
9518c2ecf20Sopenharmony_ci		vi = igrab(mft_vi);
9528c2ecf20Sopenharmony_ci		BUG_ON(vi != mft_vi);
9538c2ecf20Sopenharmony_ci	} else {
9548c2ecf20Sopenharmony_ci		/*
9558c2ecf20Sopenharmony_ci		 * Have to use ilookup5_nowait() since ilookup5() waits for the
9568c2ecf20Sopenharmony_ci		 * inode lock which causes ntfs to deadlock when a concurrent
9578c2ecf20Sopenharmony_ci		 * inode write via the inode dirty code paths and the page
9588c2ecf20Sopenharmony_ci		 * dirty code path of the inode dirty code path when writing
9598c2ecf20Sopenharmony_ci		 * $MFT occurs.
9608c2ecf20Sopenharmony_ci		 */
9618c2ecf20Sopenharmony_ci		vi = ilookup5_nowait(sb, mft_no, ntfs_test_inode, &na);
9628c2ecf20Sopenharmony_ci	}
9638c2ecf20Sopenharmony_ci	if (vi) {
9648c2ecf20Sopenharmony_ci		ntfs_debug("Base inode 0x%lx is in icache.", mft_no);
9658c2ecf20Sopenharmony_ci		/* The inode is in icache. */
9668c2ecf20Sopenharmony_ci		ni = NTFS_I(vi);
9678c2ecf20Sopenharmony_ci		/* Take a reference to the ntfs inode. */
9688c2ecf20Sopenharmony_ci		atomic_inc(&ni->count);
9698c2ecf20Sopenharmony_ci		/* If the inode is dirty, do not write this record. */
9708c2ecf20Sopenharmony_ci		if (NInoDirty(ni)) {
9718c2ecf20Sopenharmony_ci			ntfs_debug("Inode 0x%lx is dirty, do not write it.",
9728c2ecf20Sopenharmony_ci					mft_no);
9738c2ecf20Sopenharmony_ci			atomic_dec(&ni->count);
9748c2ecf20Sopenharmony_ci			iput(vi);
9758c2ecf20Sopenharmony_ci			return false;
9768c2ecf20Sopenharmony_ci		}
9778c2ecf20Sopenharmony_ci		ntfs_debug("Inode 0x%lx is not dirty.", mft_no);
9788c2ecf20Sopenharmony_ci		/* The inode is not dirty, try to take the mft record lock. */
9798c2ecf20Sopenharmony_ci		if (unlikely(!mutex_trylock(&ni->mrec_lock))) {
9808c2ecf20Sopenharmony_ci			ntfs_debug("Mft record 0x%lx is already locked, do "
9818c2ecf20Sopenharmony_ci					"not write it.", mft_no);
9828c2ecf20Sopenharmony_ci			atomic_dec(&ni->count);
9838c2ecf20Sopenharmony_ci			iput(vi);
9848c2ecf20Sopenharmony_ci			return false;
9858c2ecf20Sopenharmony_ci		}
9868c2ecf20Sopenharmony_ci		ntfs_debug("Managed to lock mft record 0x%lx, write it.",
9878c2ecf20Sopenharmony_ci				mft_no);
9888c2ecf20Sopenharmony_ci		/*
9898c2ecf20Sopenharmony_ci		 * The write has to occur while we hold the mft record lock so
9908c2ecf20Sopenharmony_ci		 * return the locked ntfs inode.
9918c2ecf20Sopenharmony_ci		 */
9928c2ecf20Sopenharmony_ci		*locked_ni = ni;
9938c2ecf20Sopenharmony_ci		return true;
9948c2ecf20Sopenharmony_ci	}
9958c2ecf20Sopenharmony_ci	ntfs_debug("Inode 0x%lx is not in icache.", mft_no);
9968c2ecf20Sopenharmony_ci	/* The inode is not in icache. */
9978c2ecf20Sopenharmony_ci	/* Write the record if it is not a mft record (type "FILE"). */
9988c2ecf20Sopenharmony_ci	if (!ntfs_is_mft_record(m->magic)) {
9998c2ecf20Sopenharmony_ci		ntfs_debug("Mft record 0x%lx is not a FILE record, write it.",
10008c2ecf20Sopenharmony_ci				mft_no);
10018c2ecf20Sopenharmony_ci		return true;
10028c2ecf20Sopenharmony_ci	}
10038c2ecf20Sopenharmony_ci	/* Write the mft record if it is a base inode. */
10048c2ecf20Sopenharmony_ci	if (!m->base_mft_record) {
10058c2ecf20Sopenharmony_ci		ntfs_debug("Mft record 0x%lx is a base record, write it.",
10068c2ecf20Sopenharmony_ci				mft_no);
10078c2ecf20Sopenharmony_ci		return true;
10088c2ecf20Sopenharmony_ci	}
10098c2ecf20Sopenharmony_ci	/*
10108c2ecf20Sopenharmony_ci	 * This is an extent mft record.  Check if the inode corresponding to
10118c2ecf20Sopenharmony_ci	 * its base mft record is in icache and obtain a reference to it if it
10128c2ecf20Sopenharmony_ci	 * is.
10138c2ecf20Sopenharmony_ci	 */
10148c2ecf20Sopenharmony_ci	na.mft_no = MREF_LE(m->base_mft_record);
10158c2ecf20Sopenharmony_ci	ntfs_debug("Mft record 0x%lx is an extent record.  Looking for base "
10168c2ecf20Sopenharmony_ci			"inode 0x%lx in icache.", mft_no, na.mft_no);
10178c2ecf20Sopenharmony_ci	if (!na.mft_no) {
10188c2ecf20Sopenharmony_ci		/* Balance the below iput(). */
10198c2ecf20Sopenharmony_ci		vi = igrab(mft_vi);
10208c2ecf20Sopenharmony_ci		BUG_ON(vi != mft_vi);
10218c2ecf20Sopenharmony_ci	} else
10228c2ecf20Sopenharmony_ci		vi = ilookup5_nowait(sb, na.mft_no, ntfs_test_inode,
10238c2ecf20Sopenharmony_ci				&na);
10248c2ecf20Sopenharmony_ci	if (!vi) {
10258c2ecf20Sopenharmony_ci		/*
10268c2ecf20Sopenharmony_ci		 * The base inode is not in icache, write this extent mft
10278c2ecf20Sopenharmony_ci		 * record.
10288c2ecf20Sopenharmony_ci		 */
10298c2ecf20Sopenharmony_ci		ntfs_debug("Base inode 0x%lx is not in icache, write the "
10308c2ecf20Sopenharmony_ci				"extent record.", na.mft_no);
10318c2ecf20Sopenharmony_ci		return true;
10328c2ecf20Sopenharmony_ci	}
10338c2ecf20Sopenharmony_ci	ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);
10348c2ecf20Sopenharmony_ci	/*
10358c2ecf20Sopenharmony_ci	 * The base inode is in icache.  Check if it has the extent inode
10368c2ecf20Sopenharmony_ci	 * corresponding to this extent mft record attached.
10378c2ecf20Sopenharmony_ci	 */
10388c2ecf20Sopenharmony_ci	ni = NTFS_I(vi);
10398c2ecf20Sopenharmony_ci	mutex_lock(&ni->extent_lock);
10408c2ecf20Sopenharmony_ci	if (ni->nr_extents <= 0) {
10418c2ecf20Sopenharmony_ci		/*
10428c2ecf20Sopenharmony_ci		 * The base inode has no attached extent inodes, write this
10438c2ecf20Sopenharmony_ci		 * extent mft record.
10448c2ecf20Sopenharmony_ci		 */
10458c2ecf20Sopenharmony_ci		mutex_unlock(&ni->extent_lock);
10468c2ecf20Sopenharmony_ci		iput(vi);
10478c2ecf20Sopenharmony_ci		ntfs_debug("Base inode 0x%lx has no attached extent inodes, "
10488c2ecf20Sopenharmony_ci				"write the extent record.", na.mft_no);
10498c2ecf20Sopenharmony_ci		return true;
10508c2ecf20Sopenharmony_ci	}
10518c2ecf20Sopenharmony_ci	/* Iterate over the attached extent inodes. */
10528c2ecf20Sopenharmony_ci	extent_nis = ni->ext.extent_ntfs_inos;
10538c2ecf20Sopenharmony_ci	for (eni = NULL, i = 0; i < ni->nr_extents; ++i) {
10548c2ecf20Sopenharmony_ci		if (mft_no == extent_nis[i]->mft_no) {
10558c2ecf20Sopenharmony_ci			/*
10568c2ecf20Sopenharmony_ci			 * Found the extent inode corresponding to this extent
10578c2ecf20Sopenharmony_ci			 * mft record.
10588c2ecf20Sopenharmony_ci			 */
10598c2ecf20Sopenharmony_ci			eni = extent_nis[i];
10608c2ecf20Sopenharmony_ci			break;
10618c2ecf20Sopenharmony_ci		}
10628c2ecf20Sopenharmony_ci	}
10638c2ecf20Sopenharmony_ci	/*
10648c2ecf20Sopenharmony_ci	 * If the extent inode was not attached to the base inode, write this
10658c2ecf20Sopenharmony_ci	 * extent mft record.
10668c2ecf20Sopenharmony_ci	 */
10678c2ecf20Sopenharmony_ci	if (!eni) {
10688c2ecf20Sopenharmony_ci		mutex_unlock(&ni->extent_lock);
10698c2ecf20Sopenharmony_ci		iput(vi);
10708c2ecf20Sopenharmony_ci		ntfs_debug("Extent inode 0x%lx is not attached to its base "
10718c2ecf20Sopenharmony_ci				"inode 0x%lx, write the extent record.",
10728c2ecf20Sopenharmony_ci				mft_no, na.mft_no);
10738c2ecf20Sopenharmony_ci		return true;
10748c2ecf20Sopenharmony_ci	}
10758c2ecf20Sopenharmony_ci	ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.",
10768c2ecf20Sopenharmony_ci			mft_no, na.mft_no);
10778c2ecf20Sopenharmony_ci	/* Take a reference to the extent ntfs inode. */
10788c2ecf20Sopenharmony_ci	atomic_inc(&eni->count);
10798c2ecf20Sopenharmony_ci	mutex_unlock(&ni->extent_lock);
10808c2ecf20Sopenharmony_ci	/*
10818c2ecf20Sopenharmony_ci	 * Found the extent inode coresponding to this extent mft record.
10828c2ecf20Sopenharmony_ci	 * Try to take the mft record lock.
10838c2ecf20Sopenharmony_ci	 */
10848c2ecf20Sopenharmony_ci	if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
10858c2ecf20Sopenharmony_ci		atomic_dec(&eni->count);
10868c2ecf20Sopenharmony_ci		iput(vi);
10878c2ecf20Sopenharmony_ci		ntfs_debug("Extent mft record 0x%lx is already locked, do "
10888c2ecf20Sopenharmony_ci				"not write it.", mft_no);
10898c2ecf20Sopenharmony_ci		return false;
10908c2ecf20Sopenharmony_ci	}
10918c2ecf20Sopenharmony_ci	ntfs_debug("Managed to lock extent mft record 0x%lx, write it.",
10928c2ecf20Sopenharmony_ci			mft_no);
10938c2ecf20Sopenharmony_ci	if (NInoTestClearDirty(eni))
10948c2ecf20Sopenharmony_ci		ntfs_debug("Extent inode 0x%lx is dirty, marking it clean.",
10958c2ecf20Sopenharmony_ci				mft_no);
10968c2ecf20Sopenharmony_ci	/*
10978c2ecf20Sopenharmony_ci	 * The write has to occur while we hold the mft record lock so return
10988c2ecf20Sopenharmony_ci	 * the locked extent ntfs inode.
10998c2ecf20Sopenharmony_ci	 */
11008c2ecf20Sopenharmony_ci	*locked_ni = eni;
11018c2ecf20Sopenharmony_ci	return true;
11028c2ecf20Sopenharmony_ci}
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_cistatic const char *es = "  Leaving inconsistent metadata.  Unmount and run "
11058c2ecf20Sopenharmony_ci		"chkdsk.";
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_ci/**
11088c2ecf20Sopenharmony_ci * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name
11098c2ecf20Sopenharmony_ci * @vol:	volume on which to search for a free mft record
11108c2ecf20Sopenharmony_ci * @base_ni:	open base inode if allocating an extent mft record or NULL
11118c2ecf20Sopenharmony_ci *
11128c2ecf20Sopenharmony_ci * Search for a free mft record in the mft bitmap attribute on the ntfs volume
11138c2ecf20Sopenharmony_ci * @vol.
11148c2ecf20Sopenharmony_ci *
11158c2ecf20Sopenharmony_ci * If @base_ni is NULL start the search at the default allocator position.
11168c2ecf20Sopenharmony_ci *
11178c2ecf20Sopenharmony_ci * If @base_ni is not NULL start the search at the mft record after the base
11188c2ecf20Sopenharmony_ci * mft record @base_ni.
11198c2ecf20Sopenharmony_ci *
11208c2ecf20Sopenharmony_ci * Return the free mft record on success and -errno on error.  An error code of
11218c2ecf20Sopenharmony_ci * -ENOSPC means that there are no free mft records in the currently
11228c2ecf20Sopenharmony_ci * initialized mft bitmap.
11238c2ecf20Sopenharmony_ci *
11248c2ecf20Sopenharmony_ci * Locking: Caller must hold vol->mftbmp_lock for writing.
11258c2ecf20Sopenharmony_ci */
11268c2ecf20Sopenharmony_cistatic int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
11278c2ecf20Sopenharmony_ci		ntfs_inode *base_ni)
11288c2ecf20Sopenharmony_ci{
11298c2ecf20Sopenharmony_ci	s64 pass_end, ll, data_pos, pass_start, ofs, bit;
11308c2ecf20Sopenharmony_ci	unsigned long flags;
11318c2ecf20Sopenharmony_ci	struct address_space *mftbmp_mapping;
11328c2ecf20Sopenharmony_ci	u8 *buf, *byte;
11338c2ecf20Sopenharmony_ci	struct page *page;
11348c2ecf20Sopenharmony_ci	unsigned int page_ofs, size;
11358c2ecf20Sopenharmony_ci	u8 pass, b;
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci	ntfs_debug("Searching for free mft record in the currently "
11388c2ecf20Sopenharmony_ci			"initialized mft bitmap.");
11398c2ecf20Sopenharmony_ci	mftbmp_mapping = vol->mftbmp_ino->i_mapping;
11408c2ecf20Sopenharmony_ci	/*
11418c2ecf20Sopenharmony_ci	 * Set the end of the pass making sure we do not overflow the mft
11428c2ecf20Sopenharmony_ci	 * bitmap.
11438c2ecf20Sopenharmony_ci	 */
11448c2ecf20Sopenharmony_ci	read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
11458c2ecf20Sopenharmony_ci	pass_end = NTFS_I(vol->mft_ino)->allocated_size >>
11468c2ecf20Sopenharmony_ci			vol->mft_record_size_bits;
11478c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags);
11488c2ecf20Sopenharmony_ci	read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
11498c2ecf20Sopenharmony_ci	ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3;
11508c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
11518c2ecf20Sopenharmony_ci	if (pass_end > ll)
11528c2ecf20Sopenharmony_ci		pass_end = ll;
11538c2ecf20Sopenharmony_ci	pass = 1;
11548c2ecf20Sopenharmony_ci	if (!base_ni)
11558c2ecf20Sopenharmony_ci		data_pos = vol->mft_data_pos;
11568c2ecf20Sopenharmony_ci	else
11578c2ecf20Sopenharmony_ci		data_pos = base_ni->mft_no + 1;
11588c2ecf20Sopenharmony_ci	if (data_pos < 24)
11598c2ecf20Sopenharmony_ci		data_pos = 24;
11608c2ecf20Sopenharmony_ci	if (data_pos >= pass_end) {
11618c2ecf20Sopenharmony_ci		data_pos = 24;
11628c2ecf20Sopenharmony_ci		pass = 2;
11638c2ecf20Sopenharmony_ci		/* This happens on a freshly formatted volume. */
11648c2ecf20Sopenharmony_ci		if (data_pos >= pass_end)
11658c2ecf20Sopenharmony_ci			return -ENOSPC;
11668c2ecf20Sopenharmony_ci	}
11678c2ecf20Sopenharmony_ci	pass_start = data_pos;
11688c2ecf20Sopenharmony_ci	ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, "
11698c2ecf20Sopenharmony_ci			"pass_end 0x%llx, data_pos 0x%llx.", pass,
11708c2ecf20Sopenharmony_ci			(long long)pass_start, (long long)pass_end,
11718c2ecf20Sopenharmony_ci			(long long)data_pos);
11728c2ecf20Sopenharmony_ci	/* Loop until a free mft record is found. */
11738c2ecf20Sopenharmony_ci	for (; pass <= 2;) {
11748c2ecf20Sopenharmony_ci		/* Cap size to pass_end. */
11758c2ecf20Sopenharmony_ci		ofs = data_pos >> 3;
11768c2ecf20Sopenharmony_ci		page_ofs = ofs & ~PAGE_MASK;
11778c2ecf20Sopenharmony_ci		size = PAGE_SIZE - page_ofs;
11788c2ecf20Sopenharmony_ci		ll = ((pass_end + 7) >> 3) - ofs;
11798c2ecf20Sopenharmony_ci		if (size > ll)
11808c2ecf20Sopenharmony_ci			size = ll;
11818c2ecf20Sopenharmony_ci		size <<= 3;
11828c2ecf20Sopenharmony_ci		/*
11838c2ecf20Sopenharmony_ci		 * If we are still within the active pass, search the next page
11848c2ecf20Sopenharmony_ci		 * for a zero bit.
11858c2ecf20Sopenharmony_ci		 */
11868c2ecf20Sopenharmony_ci		if (size) {
11878c2ecf20Sopenharmony_ci			page = ntfs_map_page(mftbmp_mapping,
11888c2ecf20Sopenharmony_ci					ofs >> PAGE_SHIFT);
11898c2ecf20Sopenharmony_ci			if (IS_ERR(page)) {
11908c2ecf20Sopenharmony_ci				ntfs_error(vol->sb, "Failed to read mft "
11918c2ecf20Sopenharmony_ci						"bitmap, aborting.");
11928c2ecf20Sopenharmony_ci				return PTR_ERR(page);
11938c2ecf20Sopenharmony_ci			}
11948c2ecf20Sopenharmony_ci			buf = (u8*)page_address(page) + page_ofs;
11958c2ecf20Sopenharmony_ci			bit = data_pos & 7;
11968c2ecf20Sopenharmony_ci			data_pos &= ~7ull;
11978c2ecf20Sopenharmony_ci			ntfs_debug("Before inner for loop: size 0x%x, "
11988c2ecf20Sopenharmony_ci					"data_pos 0x%llx, bit 0x%llx", size,
11998c2ecf20Sopenharmony_ci					(long long)data_pos, (long long)bit);
12008c2ecf20Sopenharmony_ci			for (; bit < size && data_pos + bit < pass_end;
12018c2ecf20Sopenharmony_ci					bit &= ~7ull, bit += 8) {
12028c2ecf20Sopenharmony_ci				byte = buf + (bit >> 3);
12038c2ecf20Sopenharmony_ci				if (*byte == 0xff)
12048c2ecf20Sopenharmony_ci					continue;
12058c2ecf20Sopenharmony_ci				b = ffz((unsigned long)*byte);
12068c2ecf20Sopenharmony_ci				if (b < 8 && b >= (bit & 7)) {
12078c2ecf20Sopenharmony_ci					ll = data_pos + (bit & ~7ull) + b;
12088c2ecf20Sopenharmony_ci					if (unlikely(ll > (1ll << 32))) {
12098c2ecf20Sopenharmony_ci						ntfs_unmap_page(page);
12108c2ecf20Sopenharmony_ci						return -ENOSPC;
12118c2ecf20Sopenharmony_ci					}
12128c2ecf20Sopenharmony_ci					*byte |= 1 << b;
12138c2ecf20Sopenharmony_ci					flush_dcache_page(page);
12148c2ecf20Sopenharmony_ci					set_page_dirty(page);
12158c2ecf20Sopenharmony_ci					ntfs_unmap_page(page);
12168c2ecf20Sopenharmony_ci					ntfs_debug("Done.  (Found and "
12178c2ecf20Sopenharmony_ci							"allocated mft record "
12188c2ecf20Sopenharmony_ci							"0x%llx.)",
12198c2ecf20Sopenharmony_ci							(long long)ll);
12208c2ecf20Sopenharmony_ci					return ll;
12218c2ecf20Sopenharmony_ci				}
12228c2ecf20Sopenharmony_ci			}
12238c2ecf20Sopenharmony_ci			ntfs_debug("After inner for loop: size 0x%x, "
12248c2ecf20Sopenharmony_ci					"data_pos 0x%llx, bit 0x%llx", size,
12258c2ecf20Sopenharmony_ci					(long long)data_pos, (long long)bit);
12268c2ecf20Sopenharmony_ci			data_pos += size;
12278c2ecf20Sopenharmony_ci			ntfs_unmap_page(page);
12288c2ecf20Sopenharmony_ci			/*
12298c2ecf20Sopenharmony_ci			 * If the end of the pass has not been reached yet,
12308c2ecf20Sopenharmony_ci			 * continue searching the mft bitmap for a zero bit.
12318c2ecf20Sopenharmony_ci			 */
12328c2ecf20Sopenharmony_ci			if (data_pos < pass_end)
12338c2ecf20Sopenharmony_ci				continue;
12348c2ecf20Sopenharmony_ci		}
12358c2ecf20Sopenharmony_ci		/* Do the next pass. */
12368c2ecf20Sopenharmony_ci		if (++pass == 2) {
12378c2ecf20Sopenharmony_ci			/*
12388c2ecf20Sopenharmony_ci			 * Starting the second pass, in which we scan the first
12398c2ecf20Sopenharmony_ci			 * part of the zone which we omitted earlier.
12408c2ecf20Sopenharmony_ci			 */
12418c2ecf20Sopenharmony_ci			pass_end = pass_start;
12428c2ecf20Sopenharmony_ci			data_pos = pass_start = 24;
12438c2ecf20Sopenharmony_ci			ntfs_debug("pass %i, pass_start 0x%llx, pass_end "
12448c2ecf20Sopenharmony_ci					"0x%llx.", pass, (long long)pass_start,
12458c2ecf20Sopenharmony_ci					(long long)pass_end);
12468c2ecf20Sopenharmony_ci			if (data_pos >= pass_end)
12478c2ecf20Sopenharmony_ci				break;
12488c2ecf20Sopenharmony_ci		}
12498c2ecf20Sopenharmony_ci	}
12508c2ecf20Sopenharmony_ci	/* No free mft records in currently initialized mft bitmap. */
12518c2ecf20Sopenharmony_ci	ntfs_debug("Done.  (No free mft records left in currently initialized "
12528c2ecf20Sopenharmony_ci			"mft bitmap.)");
12538c2ecf20Sopenharmony_ci	return -ENOSPC;
12548c2ecf20Sopenharmony_ci}
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci/**
12578c2ecf20Sopenharmony_ci * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster
12588c2ecf20Sopenharmony_ci * @vol:	volume on which to extend the mft bitmap attribute
12598c2ecf20Sopenharmony_ci *
12608c2ecf20Sopenharmony_ci * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster.
12618c2ecf20Sopenharmony_ci *
12628c2ecf20Sopenharmony_ci * Note: Only changes allocated_size, i.e. does not touch initialized_size or
12638c2ecf20Sopenharmony_ci * data_size.
12648c2ecf20Sopenharmony_ci *
12658c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error.
12668c2ecf20Sopenharmony_ci *
12678c2ecf20Sopenharmony_ci * Locking: - Caller must hold vol->mftbmp_lock for writing.
12688c2ecf20Sopenharmony_ci *	    - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for
12698c2ecf20Sopenharmony_ci *	      writing and releases it before returning.
12708c2ecf20Sopenharmony_ci *	    - This function takes vol->lcnbmp_lock for writing and releases it
12718c2ecf20Sopenharmony_ci *	      before returning.
12728c2ecf20Sopenharmony_ci */
12738c2ecf20Sopenharmony_cistatic int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
12748c2ecf20Sopenharmony_ci{
12758c2ecf20Sopenharmony_ci	LCN lcn;
12768c2ecf20Sopenharmony_ci	s64 ll;
12778c2ecf20Sopenharmony_ci	unsigned long flags;
12788c2ecf20Sopenharmony_ci	struct page *page;
12798c2ecf20Sopenharmony_ci	ntfs_inode *mft_ni, *mftbmp_ni;
12808c2ecf20Sopenharmony_ci	runlist_element *rl, *rl2 = NULL;
12818c2ecf20Sopenharmony_ci	ntfs_attr_search_ctx *ctx = NULL;
12828c2ecf20Sopenharmony_ci	MFT_RECORD *mrec;
12838c2ecf20Sopenharmony_ci	ATTR_RECORD *a = NULL;
12848c2ecf20Sopenharmony_ci	int ret, mp_size;
12858c2ecf20Sopenharmony_ci	u32 old_alen = 0;
12868c2ecf20Sopenharmony_ci	u8 *b, tb;
12878c2ecf20Sopenharmony_ci	struct {
12888c2ecf20Sopenharmony_ci		u8 added_cluster:1;
12898c2ecf20Sopenharmony_ci		u8 added_run:1;
12908c2ecf20Sopenharmony_ci		u8 mp_rebuilt:1;
12918c2ecf20Sopenharmony_ci	} status = { 0, 0, 0 };
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci	ntfs_debug("Extending mft bitmap allocation.");
12948c2ecf20Sopenharmony_ci	mft_ni = NTFS_I(vol->mft_ino);
12958c2ecf20Sopenharmony_ci	mftbmp_ni = NTFS_I(vol->mftbmp_ino);
12968c2ecf20Sopenharmony_ci	/*
12978c2ecf20Sopenharmony_ci	 * Determine the last lcn of the mft bitmap.  The allocated size of the
12988c2ecf20Sopenharmony_ci	 * mft bitmap cannot be zero so we are ok to do this.
12998c2ecf20Sopenharmony_ci	 */
13008c2ecf20Sopenharmony_ci	down_write(&mftbmp_ni->runlist.lock);
13018c2ecf20Sopenharmony_ci	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
13028c2ecf20Sopenharmony_ci	ll = mftbmp_ni->allocated_size;
13038c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
13048c2ecf20Sopenharmony_ci	rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
13058c2ecf20Sopenharmony_ci			(ll - 1) >> vol->cluster_size_bits, NULL);
13068c2ecf20Sopenharmony_ci	if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
13078c2ecf20Sopenharmony_ci		up_write(&mftbmp_ni->runlist.lock);
13088c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to determine last allocated "
13098c2ecf20Sopenharmony_ci				"cluster of mft bitmap attribute.");
13108c2ecf20Sopenharmony_ci		if (!IS_ERR(rl))
13118c2ecf20Sopenharmony_ci			ret = -EIO;
13128c2ecf20Sopenharmony_ci		else
13138c2ecf20Sopenharmony_ci			ret = PTR_ERR(rl);
13148c2ecf20Sopenharmony_ci		return ret;
13158c2ecf20Sopenharmony_ci	}
13168c2ecf20Sopenharmony_ci	lcn = rl->lcn + rl->length;
13178c2ecf20Sopenharmony_ci	ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.",
13188c2ecf20Sopenharmony_ci			(long long)lcn);
13198c2ecf20Sopenharmony_ci	/*
13208c2ecf20Sopenharmony_ci	 * Attempt to get the cluster following the last allocated cluster by
13218c2ecf20Sopenharmony_ci	 * hand as it may be in the MFT zone so the allocator would not give it
13228c2ecf20Sopenharmony_ci	 * to us.
13238c2ecf20Sopenharmony_ci	 */
13248c2ecf20Sopenharmony_ci	ll = lcn >> 3;
13258c2ecf20Sopenharmony_ci	page = ntfs_map_page(vol->lcnbmp_ino->i_mapping,
13268c2ecf20Sopenharmony_ci			ll >> PAGE_SHIFT);
13278c2ecf20Sopenharmony_ci	if (IS_ERR(page)) {
13288c2ecf20Sopenharmony_ci		up_write(&mftbmp_ni->runlist.lock);
13298c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to read from lcn bitmap.");
13308c2ecf20Sopenharmony_ci		return PTR_ERR(page);
13318c2ecf20Sopenharmony_ci	}
13328c2ecf20Sopenharmony_ci	b = (u8*)page_address(page) + (ll & ~PAGE_MASK);
13338c2ecf20Sopenharmony_ci	tb = 1 << (lcn & 7ull);
13348c2ecf20Sopenharmony_ci	down_write(&vol->lcnbmp_lock);
13358c2ecf20Sopenharmony_ci	if (*b != 0xff && !(*b & tb)) {
13368c2ecf20Sopenharmony_ci		/* Next cluster is free, allocate it. */
13378c2ecf20Sopenharmony_ci		*b |= tb;
13388c2ecf20Sopenharmony_ci		flush_dcache_page(page);
13398c2ecf20Sopenharmony_ci		set_page_dirty(page);
13408c2ecf20Sopenharmony_ci		up_write(&vol->lcnbmp_lock);
13418c2ecf20Sopenharmony_ci		ntfs_unmap_page(page);
13428c2ecf20Sopenharmony_ci		/* Update the mft bitmap runlist. */
13438c2ecf20Sopenharmony_ci		rl->length++;
13448c2ecf20Sopenharmony_ci		rl[1].vcn++;
13458c2ecf20Sopenharmony_ci		status.added_cluster = 1;
13468c2ecf20Sopenharmony_ci		ntfs_debug("Appending one cluster to mft bitmap.");
13478c2ecf20Sopenharmony_ci	} else {
13488c2ecf20Sopenharmony_ci		up_write(&vol->lcnbmp_lock);
13498c2ecf20Sopenharmony_ci		ntfs_unmap_page(page);
13508c2ecf20Sopenharmony_ci		/* Allocate a cluster from the DATA_ZONE. */
13518c2ecf20Sopenharmony_ci		rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
13528c2ecf20Sopenharmony_ci				true);
13538c2ecf20Sopenharmony_ci		if (IS_ERR(rl2)) {
13548c2ecf20Sopenharmony_ci			up_write(&mftbmp_ni->runlist.lock);
13558c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to allocate a cluster for "
13568c2ecf20Sopenharmony_ci					"the mft bitmap.");
13578c2ecf20Sopenharmony_ci			return PTR_ERR(rl2);
13588c2ecf20Sopenharmony_ci		}
13598c2ecf20Sopenharmony_ci		rl = ntfs_runlists_merge(mftbmp_ni->runlist.rl, rl2);
13608c2ecf20Sopenharmony_ci		if (IS_ERR(rl)) {
13618c2ecf20Sopenharmony_ci			up_write(&mftbmp_ni->runlist.lock);
13628c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to merge runlists for mft "
13638c2ecf20Sopenharmony_ci					"bitmap.");
13648c2ecf20Sopenharmony_ci			if (ntfs_cluster_free_from_rl(vol, rl2)) {
13658c2ecf20Sopenharmony_ci				ntfs_error(vol->sb, "Failed to deallocate "
13668c2ecf20Sopenharmony_ci						"allocated cluster.%s", es);
13678c2ecf20Sopenharmony_ci				NVolSetErrors(vol);
13688c2ecf20Sopenharmony_ci			}
13698c2ecf20Sopenharmony_ci			ntfs_free(rl2);
13708c2ecf20Sopenharmony_ci			return PTR_ERR(rl);
13718c2ecf20Sopenharmony_ci		}
13728c2ecf20Sopenharmony_ci		mftbmp_ni->runlist.rl = rl;
13738c2ecf20Sopenharmony_ci		status.added_run = 1;
13748c2ecf20Sopenharmony_ci		ntfs_debug("Adding one run to mft bitmap.");
13758c2ecf20Sopenharmony_ci		/* Find the last run in the new runlist. */
13768c2ecf20Sopenharmony_ci		for (; rl[1].length; rl++)
13778c2ecf20Sopenharmony_ci			;
13788c2ecf20Sopenharmony_ci	}
13798c2ecf20Sopenharmony_ci	/*
13808c2ecf20Sopenharmony_ci	 * Update the attribute record as well.  Note: @rl is the last
13818c2ecf20Sopenharmony_ci	 * (non-terminator) runlist element of mft bitmap.
13828c2ecf20Sopenharmony_ci	 */
13838c2ecf20Sopenharmony_ci	mrec = map_mft_record(mft_ni);
13848c2ecf20Sopenharmony_ci	if (IS_ERR(mrec)) {
13858c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map mft record.");
13868c2ecf20Sopenharmony_ci		ret = PTR_ERR(mrec);
13878c2ecf20Sopenharmony_ci		goto undo_alloc;
13888c2ecf20Sopenharmony_ci	}
13898c2ecf20Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
13908c2ecf20Sopenharmony_ci	if (unlikely(!ctx)) {
13918c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to get search context.");
13928c2ecf20Sopenharmony_ci		ret = -ENOMEM;
13938c2ecf20Sopenharmony_ci		goto undo_alloc;
13948c2ecf20Sopenharmony_ci	}
13958c2ecf20Sopenharmony_ci	ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
13968c2ecf20Sopenharmony_ci			mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL,
13978c2ecf20Sopenharmony_ci			0, ctx);
13988c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
13998c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find last attribute extent of "
14008c2ecf20Sopenharmony_ci				"mft bitmap attribute.");
14018c2ecf20Sopenharmony_ci		if (ret == -ENOENT)
14028c2ecf20Sopenharmony_ci			ret = -EIO;
14038c2ecf20Sopenharmony_ci		goto undo_alloc;
14048c2ecf20Sopenharmony_ci	}
14058c2ecf20Sopenharmony_ci	a = ctx->attr;
14068c2ecf20Sopenharmony_ci	ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
14078c2ecf20Sopenharmony_ci	/* Search back for the previous last allocated cluster of mft bitmap. */
14088c2ecf20Sopenharmony_ci	for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) {
14098c2ecf20Sopenharmony_ci		if (ll >= rl2->vcn)
14108c2ecf20Sopenharmony_ci			break;
14118c2ecf20Sopenharmony_ci	}
14128c2ecf20Sopenharmony_ci	BUG_ON(ll < rl2->vcn);
14138c2ecf20Sopenharmony_ci	BUG_ON(ll >= rl2->vcn + rl2->length);
14148c2ecf20Sopenharmony_ci	/* Get the size for the new mapping pairs array for this extent. */
14158c2ecf20Sopenharmony_ci	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
14168c2ecf20Sopenharmony_ci	if (unlikely(mp_size <= 0)) {
14178c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Get size for mapping pairs failed for "
14188c2ecf20Sopenharmony_ci				"mft bitmap attribute extent.");
14198c2ecf20Sopenharmony_ci		ret = mp_size;
14208c2ecf20Sopenharmony_ci		if (!ret)
14218c2ecf20Sopenharmony_ci			ret = -EIO;
14228c2ecf20Sopenharmony_ci		goto undo_alloc;
14238c2ecf20Sopenharmony_ci	}
14248c2ecf20Sopenharmony_ci	/* Expand the attribute record if necessary. */
14258c2ecf20Sopenharmony_ci	old_alen = le32_to_cpu(a->length);
14268c2ecf20Sopenharmony_ci	ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size +
14278c2ecf20Sopenharmony_ci			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
14288c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
14298c2ecf20Sopenharmony_ci		if (ret != -ENOSPC) {
14308c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to resize attribute "
14318c2ecf20Sopenharmony_ci					"record for mft bitmap attribute.");
14328c2ecf20Sopenharmony_ci			goto undo_alloc;
14338c2ecf20Sopenharmony_ci		}
14348c2ecf20Sopenharmony_ci		// TODO: Deal with this by moving this extent to a new mft
14358c2ecf20Sopenharmony_ci		// record or by starting a new extent in a new mft record or by
14368c2ecf20Sopenharmony_ci		// moving other attributes out of this mft record.
14378c2ecf20Sopenharmony_ci		// Note: It will need to be a special mft record and if none of
14388c2ecf20Sopenharmony_ci		// those are available it gets rather complicated...
14398c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Not enough space in this mft record to "
14408c2ecf20Sopenharmony_ci				"accommodate extended mft bitmap attribute "
14418c2ecf20Sopenharmony_ci				"extent.  Cannot handle this yet.");
14428c2ecf20Sopenharmony_ci		ret = -EOPNOTSUPP;
14438c2ecf20Sopenharmony_ci		goto undo_alloc;
14448c2ecf20Sopenharmony_ci	}
14458c2ecf20Sopenharmony_ci	status.mp_rebuilt = 1;
14468c2ecf20Sopenharmony_ci	/* Generate the mapping pairs array directly into the attr record. */
14478c2ecf20Sopenharmony_ci	ret = ntfs_mapping_pairs_build(vol, (u8*)a +
14488c2ecf20Sopenharmony_ci			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
14498c2ecf20Sopenharmony_ci			mp_size, rl2, ll, -1, NULL);
14508c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
14518c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to build mapping pairs array for "
14528c2ecf20Sopenharmony_ci				"mft bitmap attribute.");
14538c2ecf20Sopenharmony_ci		goto undo_alloc;
14548c2ecf20Sopenharmony_ci	}
14558c2ecf20Sopenharmony_ci	/* Update the highest_vcn. */
14568c2ecf20Sopenharmony_ci	a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
14578c2ecf20Sopenharmony_ci	/*
14588c2ecf20Sopenharmony_ci	 * We now have extended the mft bitmap allocated_size by one cluster.
14598c2ecf20Sopenharmony_ci	 * Reflect this in the ntfs_inode structure and the attribute record.
14608c2ecf20Sopenharmony_ci	 */
14618c2ecf20Sopenharmony_ci	if (a->data.non_resident.lowest_vcn) {
14628c2ecf20Sopenharmony_ci		/*
14638c2ecf20Sopenharmony_ci		 * We are not in the first attribute extent, switch to it, but
14648c2ecf20Sopenharmony_ci		 * first ensure the changes will make it to disk later.
14658c2ecf20Sopenharmony_ci		 */
14668c2ecf20Sopenharmony_ci		flush_dcache_mft_record_page(ctx->ntfs_ino);
14678c2ecf20Sopenharmony_ci		mark_mft_record_dirty(ctx->ntfs_ino);
14688c2ecf20Sopenharmony_ci		ntfs_attr_reinit_search_ctx(ctx);
14698c2ecf20Sopenharmony_ci		ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
14708c2ecf20Sopenharmony_ci				mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL,
14718c2ecf20Sopenharmony_ci				0, ctx);
14728c2ecf20Sopenharmony_ci		if (unlikely(ret)) {
14738c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to find first attribute "
14748c2ecf20Sopenharmony_ci					"extent of mft bitmap attribute.");
14758c2ecf20Sopenharmony_ci			goto restore_undo_alloc;
14768c2ecf20Sopenharmony_ci		}
14778c2ecf20Sopenharmony_ci		a = ctx->attr;
14788c2ecf20Sopenharmony_ci	}
14798c2ecf20Sopenharmony_ci	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
14808c2ecf20Sopenharmony_ci	mftbmp_ni->allocated_size += vol->cluster_size;
14818c2ecf20Sopenharmony_ci	a->data.non_resident.allocated_size =
14828c2ecf20Sopenharmony_ci			cpu_to_sle64(mftbmp_ni->allocated_size);
14838c2ecf20Sopenharmony_ci	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
14848c2ecf20Sopenharmony_ci	/* Ensure the changes make it to disk. */
14858c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
14868c2ecf20Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
14878c2ecf20Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
14888c2ecf20Sopenharmony_ci	unmap_mft_record(mft_ni);
14898c2ecf20Sopenharmony_ci	up_write(&mftbmp_ni->runlist.lock);
14908c2ecf20Sopenharmony_ci	ntfs_debug("Done.");
14918c2ecf20Sopenharmony_ci	return 0;
14928c2ecf20Sopenharmony_cirestore_undo_alloc:
14938c2ecf20Sopenharmony_ci	ntfs_attr_reinit_search_ctx(ctx);
14948c2ecf20Sopenharmony_ci	if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
14958c2ecf20Sopenharmony_ci			mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL,
14968c2ecf20Sopenharmony_ci			0, ctx)) {
14978c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find last attribute extent of "
14988c2ecf20Sopenharmony_ci				"mft bitmap attribute.%s", es);
14998c2ecf20Sopenharmony_ci		write_lock_irqsave(&mftbmp_ni->size_lock, flags);
15008c2ecf20Sopenharmony_ci		mftbmp_ni->allocated_size += vol->cluster_size;
15018c2ecf20Sopenharmony_ci		write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
15028c2ecf20Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
15038c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
15048c2ecf20Sopenharmony_ci		up_write(&mftbmp_ni->runlist.lock);
15058c2ecf20Sopenharmony_ci		/*
15068c2ecf20Sopenharmony_ci		 * The only thing that is now wrong is ->allocated_size of the
15078c2ecf20Sopenharmony_ci		 * base attribute extent which chkdsk should be able to fix.
15088c2ecf20Sopenharmony_ci		 */
15098c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
15108c2ecf20Sopenharmony_ci		return ret;
15118c2ecf20Sopenharmony_ci	}
15128c2ecf20Sopenharmony_ci	a = ctx->attr;
15138c2ecf20Sopenharmony_ci	a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 2);
15148c2ecf20Sopenharmony_ciundo_alloc:
15158c2ecf20Sopenharmony_ci	if (status.added_cluster) {
15168c2ecf20Sopenharmony_ci		/* Truncate the last run in the runlist by one cluster. */
15178c2ecf20Sopenharmony_ci		rl->length--;
15188c2ecf20Sopenharmony_ci		rl[1].vcn--;
15198c2ecf20Sopenharmony_ci	} else if (status.added_run) {
15208c2ecf20Sopenharmony_ci		lcn = rl->lcn;
15218c2ecf20Sopenharmony_ci		/* Remove the last run from the runlist. */
15228c2ecf20Sopenharmony_ci		rl->lcn = rl[1].lcn;
15238c2ecf20Sopenharmony_ci		rl->length = 0;
15248c2ecf20Sopenharmony_ci	}
15258c2ecf20Sopenharmony_ci	/* Deallocate the cluster. */
15268c2ecf20Sopenharmony_ci	down_write(&vol->lcnbmp_lock);
15278c2ecf20Sopenharmony_ci	if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
15288c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es);
15298c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
15308c2ecf20Sopenharmony_ci	}
15318c2ecf20Sopenharmony_ci	up_write(&vol->lcnbmp_lock);
15328c2ecf20Sopenharmony_ci	if (status.mp_rebuilt) {
15338c2ecf20Sopenharmony_ci		if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
15348c2ecf20Sopenharmony_ci				a->data.non_resident.mapping_pairs_offset),
15358c2ecf20Sopenharmony_ci				old_alen - le16_to_cpu(
15368c2ecf20Sopenharmony_ci				a->data.non_resident.mapping_pairs_offset),
15378c2ecf20Sopenharmony_ci				rl2, ll, -1, NULL)) {
15388c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to restore mapping pairs "
15398c2ecf20Sopenharmony_ci					"array.%s", es);
15408c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
15418c2ecf20Sopenharmony_ci		}
15428c2ecf20Sopenharmony_ci		if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
15438c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to restore attribute "
15448c2ecf20Sopenharmony_ci					"record.%s", es);
15458c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
15468c2ecf20Sopenharmony_ci		}
15478c2ecf20Sopenharmony_ci		flush_dcache_mft_record_page(ctx->ntfs_ino);
15488c2ecf20Sopenharmony_ci		mark_mft_record_dirty(ctx->ntfs_ino);
15498c2ecf20Sopenharmony_ci	}
15508c2ecf20Sopenharmony_ci	if (ctx)
15518c2ecf20Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
15528c2ecf20Sopenharmony_ci	if (!IS_ERR(mrec))
15538c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
15548c2ecf20Sopenharmony_ci	up_write(&mftbmp_ni->runlist.lock);
15558c2ecf20Sopenharmony_ci	return ret;
15568c2ecf20Sopenharmony_ci}
15578c2ecf20Sopenharmony_ci
15588c2ecf20Sopenharmony_ci/**
15598c2ecf20Sopenharmony_ci * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data
15608c2ecf20Sopenharmony_ci * @vol:	volume on which to extend the mft bitmap attribute
15618c2ecf20Sopenharmony_ci *
15628c2ecf20Sopenharmony_ci * Extend the initialized portion of the mft bitmap attribute on the ntfs
15638c2ecf20Sopenharmony_ci * volume @vol by 8 bytes.
15648c2ecf20Sopenharmony_ci *
15658c2ecf20Sopenharmony_ci * Note:  Only changes initialized_size and data_size, i.e. requires that
15668c2ecf20Sopenharmony_ci * allocated_size is big enough to fit the new initialized_size.
15678c2ecf20Sopenharmony_ci *
15688c2ecf20Sopenharmony_ci * Return 0 on success and -error on error.
15698c2ecf20Sopenharmony_ci *
15708c2ecf20Sopenharmony_ci * Locking: Caller must hold vol->mftbmp_lock for writing.
15718c2ecf20Sopenharmony_ci */
15728c2ecf20Sopenharmony_cistatic int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
15738c2ecf20Sopenharmony_ci{
15748c2ecf20Sopenharmony_ci	s64 old_data_size, old_initialized_size;
15758c2ecf20Sopenharmony_ci	unsigned long flags;
15768c2ecf20Sopenharmony_ci	struct inode *mftbmp_vi;
15778c2ecf20Sopenharmony_ci	ntfs_inode *mft_ni, *mftbmp_ni;
15788c2ecf20Sopenharmony_ci	ntfs_attr_search_ctx *ctx;
15798c2ecf20Sopenharmony_ci	MFT_RECORD *mrec;
15808c2ecf20Sopenharmony_ci	ATTR_RECORD *a;
15818c2ecf20Sopenharmony_ci	int ret;
15828c2ecf20Sopenharmony_ci
15838c2ecf20Sopenharmony_ci	ntfs_debug("Extending mft bitmap initiailized (and data) size.");
15848c2ecf20Sopenharmony_ci	mft_ni = NTFS_I(vol->mft_ino);
15858c2ecf20Sopenharmony_ci	mftbmp_vi = vol->mftbmp_ino;
15868c2ecf20Sopenharmony_ci	mftbmp_ni = NTFS_I(mftbmp_vi);
15878c2ecf20Sopenharmony_ci	/* Get the attribute record. */
15888c2ecf20Sopenharmony_ci	mrec = map_mft_record(mft_ni);
15898c2ecf20Sopenharmony_ci	if (IS_ERR(mrec)) {
15908c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map mft record.");
15918c2ecf20Sopenharmony_ci		return PTR_ERR(mrec);
15928c2ecf20Sopenharmony_ci	}
15938c2ecf20Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
15948c2ecf20Sopenharmony_ci	if (unlikely(!ctx)) {
15958c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to get search context.");
15968c2ecf20Sopenharmony_ci		ret = -ENOMEM;
15978c2ecf20Sopenharmony_ci		goto unm_err_out;
15988c2ecf20Sopenharmony_ci	}
15998c2ecf20Sopenharmony_ci	ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
16008c2ecf20Sopenharmony_ci			mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx);
16018c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
16028c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find first attribute extent of "
16038c2ecf20Sopenharmony_ci				"mft bitmap attribute.");
16048c2ecf20Sopenharmony_ci		if (ret == -ENOENT)
16058c2ecf20Sopenharmony_ci			ret = -EIO;
16068c2ecf20Sopenharmony_ci		goto put_err_out;
16078c2ecf20Sopenharmony_ci	}
16088c2ecf20Sopenharmony_ci	a = ctx->attr;
16098c2ecf20Sopenharmony_ci	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
16108c2ecf20Sopenharmony_ci	old_data_size = i_size_read(mftbmp_vi);
16118c2ecf20Sopenharmony_ci	old_initialized_size = mftbmp_ni->initialized_size;
16128c2ecf20Sopenharmony_ci	/*
16138c2ecf20Sopenharmony_ci	 * We can simply update the initialized_size before filling the space
16148c2ecf20Sopenharmony_ci	 * with zeroes because the caller is holding the mft bitmap lock for
16158c2ecf20Sopenharmony_ci	 * writing which ensures that no one else is trying to access the data.
16168c2ecf20Sopenharmony_ci	 */
16178c2ecf20Sopenharmony_ci	mftbmp_ni->initialized_size += 8;
16188c2ecf20Sopenharmony_ci	a->data.non_resident.initialized_size =
16198c2ecf20Sopenharmony_ci			cpu_to_sle64(mftbmp_ni->initialized_size);
16208c2ecf20Sopenharmony_ci	if (mftbmp_ni->initialized_size > old_data_size) {
16218c2ecf20Sopenharmony_ci		i_size_write(mftbmp_vi, mftbmp_ni->initialized_size);
16228c2ecf20Sopenharmony_ci		a->data.non_resident.data_size =
16238c2ecf20Sopenharmony_ci				cpu_to_sle64(mftbmp_ni->initialized_size);
16248c2ecf20Sopenharmony_ci	}
16258c2ecf20Sopenharmony_ci	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
16268c2ecf20Sopenharmony_ci	/* Ensure the changes make it to disk. */
16278c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
16288c2ecf20Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
16298c2ecf20Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
16308c2ecf20Sopenharmony_ci	unmap_mft_record(mft_ni);
16318c2ecf20Sopenharmony_ci	/* Initialize the mft bitmap attribute value with zeroes. */
16328c2ecf20Sopenharmony_ci	ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0);
16338c2ecf20Sopenharmony_ci	if (likely(!ret)) {
16348c2ecf20Sopenharmony_ci		ntfs_debug("Done.  (Wrote eight initialized bytes to mft "
16358c2ecf20Sopenharmony_ci				"bitmap.");
16368c2ecf20Sopenharmony_ci		return 0;
16378c2ecf20Sopenharmony_ci	}
16388c2ecf20Sopenharmony_ci	ntfs_error(vol->sb, "Failed to write to mft bitmap.");
16398c2ecf20Sopenharmony_ci	/* Try to recover from the error. */
16408c2ecf20Sopenharmony_ci	mrec = map_mft_record(mft_ni);
16418c2ecf20Sopenharmony_ci	if (IS_ERR(mrec)) {
16428c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map mft record.%s", es);
16438c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
16448c2ecf20Sopenharmony_ci		return ret;
16458c2ecf20Sopenharmony_ci	}
16468c2ecf20Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
16478c2ecf20Sopenharmony_ci	if (unlikely(!ctx)) {
16488c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to get search context.%s", es);
16498c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
16508c2ecf20Sopenharmony_ci		goto unm_err_out;
16518c2ecf20Sopenharmony_ci	}
16528c2ecf20Sopenharmony_ci	if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name,
16538c2ecf20Sopenharmony_ci			mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) {
16548c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find first attribute extent of "
16558c2ecf20Sopenharmony_ci				"mft bitmap attribute.%s", es);
16568c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
16578c2ecf20Sopenharmony_ciput_err_out:
16588c2ecf20Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
16598c2ecf20Sopenharmony_ciunm_err_out:
16608c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
16618c2ecf20Sopenharmony_ci		goto err_out;
16628c2ecf20Sopenharmony_ci	}
16638c2ecf20Sopenharmony_ci	a = ctx->attr;
16648c2ecf20Sopenharmony_ci	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
16658c2ecf20Sopenharmony_ci	mftbmp_ni->initialized_size = old_initialized_size;
16668c2ecf20Sopenharmony_ci	a->data.non_resident.initialized_size =
16678c2ecf20Sopenharmony_ci			cpu_to_sle64(old_initialized_size);
16688c2ecf20Sopenharmony_ci	if (i_size_read(mftbmp_vi) != old_data_size) {
16698c2ecf20Sopenharmony_ci		i_size_write(mftbmp_vi, old_data_size);
16708c2ecf20Sopenharmony_ci		a->data.non_resident.data_size = cpu_to_sle64(old_data_size);
16718c2ecf20Sopenharmony_ci	}
16728c2ecf20Sopenharmony_ci	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
16738c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
16748c2ecf20Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
16758c2ecf20Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
16768c2ecf20Sopenharmony_ci	unmap_mft_record(mft_ni);
16778c2ecf20Sopenharmony_ci#ifdef DEBUG
16788c2ecf20Sopenharmony_ci	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
16798c2ecf20Sopenharmony_ci	ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, "
16808c2ecf20Sopenharmony_ci			"data_size 0x%llx, initialized_size 0x%llx.",
16818c2ecf20Sopenharmony_ci			(long long)mftbmp_ni->allocated_size,
16828c2ecf20Sopenharmony_ci			(long long)i_size_read(mftbmp_vi),
16838c2ecf20Sopenharmony_ci			(long long)mftbmp_ni->initialized_size);
16848c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
16858c2ecf20Sopenharmony_ci#endif /* DEBUG */
16868c2ecf20Sopenharmony_cierr_out:
16878c2ecf20Sopenharmony_ci	return ret;
16888c2ecf20Sopenharmony_ci}
16898c2ecf20Sopenharmony_ci
16908c2ecf20Sopenharmony_ci/**
16918c2ecf20Sopenharmony_ci * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute
16928c2ecf20Sopenharmony_ci * @vol:	volume on which to extend the mft data attribute
16938c2ecf20Sopenharmony_ci *
16948c2ecf20Sopenharmony_ci * Extend the mft data attribute on the ntfs volume @vol by 16 mft records
16958c2ecf20Sopenharmony_ci * worth of clusters or if not enough space for this by one mft record worth
16968c2ecf20Sopenharmony_ci * of clusters.
16978c2ecf20Sopenharmony_ci *
16988c2ecf20Sopenharmony_ci * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
16998c2ecf20Sopenharmony_ci * data_size.
17008c2ecf20Sopenharmony_ci *
17018c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error.
17028c2ecf20Sopenharmony_ci *
17038c2ecf20Sopenharmony_ci * Locking: - Caller must hold vol->mftbmp_lock for writing.
17048c2ecf20Sopenharmony_ci *	    - This function takes NTFS_I(vol->mft_ino)->runlist.lock for
17058c2ecf20Sopenharmony_ci *	      writing and releases it before returning.
17068c2ecf20Sopenharmony_ci *	    - This function calls functions which take vol->lcnbmp_lock for
17078c2ecf20Sopenharmony_ci *	      writing and release it before returning.
17088c2ecf20Sopenharmony_ci */
17098c2ecf20Sopenharmony_cistatic int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
17108c2ecf20Sopenharmony_ci{
17118c2ecf20Sopenharmony_ci	LCN lcn;
17128c2ecf20Sopenharmony_ci	VCN old_last_vcn;
17138c2ecf20Sopenharmony_ci	s64 min_nr, nr, ll;
17148c2ecf20Sopenharmony_ci	unsigned long flags;
17158c2ecf20Sopenharmony_ci	ntfs_inode *mft_ni;
17168c2ecf20Sopenharmony_ci	runlist_element *rl, *rl2;
17178c2ecf20Sopenharmony_ci	ntfs_attr_search_ctx *ctx = NULL;
17188c2ecf20Sopenharmony_ci	MFT_RECORD *mrec;
17198c2ecf20Sopenharmony_ci	ATTR_RECORD *a = NULL;
17208c2ecf20Sopenharmony_ci	int ret, mp_size;
17218c2ecf20Sopenharmony_ci	u32 old_alen = 0;
17228c2ecf20Sopenharmony_ci	bool mp_rebuilt = false;
17238c2ecf20Sopenharmony_ci
17248c2ecf20Sopenharmony_ci	ntfs_debug("Extending mft data allocation.");
17258c2ecf20Sopenharmony_ci	mft_ni = NTFS_I(vol->mft_ino);
17268c2ecf20Sopenharmony_ci	/*
17278c2ecf20Sopenharmony_ci	 * Determine the preferred allocation location, i.e. the last lcn of
17288c2ecf20Sopenharmony_ci	 * the mft data attribute.  The allocated size of the mft data
17298c2ecf20Sopenharmony_ci	 * attribute cannot be zero so we are ok to do this.
17308c2ecf20Sopenharmony_ci	 */
17318c2ecf20Sopenharmony_ci	down_write(&mft_ni->runlist.lock);
17328c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
17338c2ecf20Sopenharmony_ci	ll = mft_ni->allocated_size;
17348c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
17358c2ecf20Sopenharmony_ci	rl = ntfs_attr_find_vcn_nolock(mft_ni,
17368c2ecf20Sopenharmony_ci			(ll - 1) >> vol->cluster_size_bits, NULL);
17378c2ecf20Sopenharmony_ci	if (IS_ERR(rl) || unlikely(!rl->length || rl->lcn < 0)) {
17388c2ecf20Sopenharmony_ci		up_write(&mft_ni->runlist.lock);
17398c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to determine last allocated "
17408c2ecf20Sopenharmony_ci				"cluster of mft data attribute.");
17418c2ecf20Sopenharmony_ci		if (!IS_ERR(rl))
17428c2ecf20Sopenharmony_ci			ret = -EIO;
17438c2ecf20Sopenharmony_ci		else
17448c2ecf20Sopenharmony_ci			ret = PTR_ERR(rl);
17458c2ecf20Sopenharmony_ci		return ret;
17468c2ecf20Sopenharmony_ci	}
17478c2ecf20Sopenharmony_ci	lcn = rl->lcn + rl->length;
17488c2ecf20Sopenharmony_ci	ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn);
17498c2ecf20Sopenharmony_ci	/* Minimum allocation is one mft record worth of clusters. */
17508c2ecf20Sopenharmony_ci	min_nr = vol->mft_record_size >> vol->cluster_size_bits;
17518c2ecf20Sopenharmony_ci	if (!min_nr)
17528c2ecf20Sopenharmony_ci		min_nr = 1;
17538c2ecf20Sopenharmony_ci	/* Want to allocate 16 mft records worth of clusters. */
17548c2ecf20Sopenharmony_ci	nr = vol->mft_record_size << 4 >> vol->cluster_size_bits;
17558c2ecf20Sopenharmony_ci	if (!nr)
17568c2ecf20Sopenharmony_ci		nr = min_nr;
17578c2ecf20Sopenharmony_ci	/* Ensure we do not go above 2^32-1 mft records. */
17588c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
17598c2ecf20Sopenharmony_ci	ll = mft_ni->allocated_size;
17608c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
17618c2ecf20Sopenharmony_ci	if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
17628c2ecf20Sopenharmony_ci			vol->mft_record_size_bits >= (1ll << 32))) {
17638c2ecf20Sopenharmony_ci		nr = min_nr;
17648c2ecf20Sopenharmony_ci		if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
17658c2ecf20Sopenharmony_ci				vol->mft_record_size_bits >= (1ll << 32))) {
17668c2ecf20Sopenharmony_ci			ntfs_warning(vol->sb, "Cannot allocate mft record "
17678c2ecf20Sopenharmony_ci					"because the maximum number of inodes "
17688c2ecf20Sopenharmony_ci					"(2^32) has already been reached.");
17698c2ecf20Sopenharmony_ci			up_write(&mft_ni->runlist.lock);
17708c2ecf20Sopenharmony_ci			return -ENOSPC;
17718c2ecf20Sopenharmony_ci		}
17728c2ecf20Sopenharmony_ci	}
17738c2ecf20Sopenharmony_ci	ntfs_debug("Trying mft data allocation with %s cluster count %lli.",
17748c2ecf20Sopenharmony_ci			nr > min_nr ? "default" : "minimal", (long long)nr);
17758c2ecf20Sopenharmony_ci	old_last_vcn = rl[1].vcn;
17768c2ecf20Sopenharmony_ci	do {
17778c2ecf20Sopenharmony_ci		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
17788c2ecf20Sopenharmony_ci				true);
17798c2ecf20Sopenharmony_ci		if (!IS_ERR(rl2))
17808c2ecf20Sopenharmony_ci			break;
17818c2ecf20Sopenharmony_ci		if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
17828c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to allocate the minimal "
17838c2ecf20Sopenharmony_ci					"number of clusters (%lli) for the "
17848c2ecf20Sopenharmony_ci					"mft data attribute.", (long long)nr);
17858c2ecf20Sopenharmony_ci			up_write(&mft_ni->runlist.lock);
17868c2ecf20Sopenharmony_ci			return PTR_ERR(rl2);
17878c2ecf20Sopenharmony_ci		}
17888c2ecf20Sopenharmony_ci		/*
17898c2ecf20Sopenharmony_ci		 * There is not enough space to do the allocation, but there
17908c2ecf20Sopenharmony_ci		 * might be enough space to do a minimal allocation so try that
17918c2ecf20Sopenharmony_ci		 * before failing.
17928c2ecf20Sopenharmony_ci		 */
17938c2ecf20Sopenharmony_ci		nr = min_nr;
17948c2ecf20Sopenharmony_ci		ntfs_debug("Retrying mft data allocation with minimal cluster "
17958c2ecf20Sopenharmony_ci				"count %lli.", (long long)nr);
17968c2ecf20Sopenharmony_ci	} while (1);
17978c2ecf20Sopenharmony_ci	rl = ntfs_runlists_merge(mft_ni->runlist.rl, rl2);
17988c2ecf20Sopenharmony_ci	if (IS_ERR(rl)) {
17998c2ecf20Sopenharmony_ci		up_write(&mft_ni->runlist.lock);
18008c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to merge runlists for mft data "
18018c2ecf20Sopenharmony_ci				"attribute.");
18028c2ecf20Sopenharmony_ci		if (ntfs_cluster_free_from_rl(vol, rl2)) {
18038c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to deallocate clusters "
18048c2ecf20Sopenharmony_ci					"from the mft data attribute.%s", es);
18058c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
18068c2ecf20Sopenharmony_ci		}
18078c2ecf20Sopenharmony_ci		ntfs_free(rl2);
18088c2ecf20Sopenharmony_ci		return PTR_ERR(rl);
18098c2ecf20Sopenharmony_ci	}
18108c2ecf20Sopenharmony_ci	mft_ni->runlist.rl = rl;
18118c2ecf20Sopenharmony_ci	ntfs_debug("Allocated %lli clusters.", (long long)nr);
18128c2ecf20Sopenharmony_ci	/* Find the last run in the new runlist. */
18138c2ecf20Sopenharmony_ci	for (; rl[1].length; rl++)
18148c2ecf20Sopenharmony_ci		;
18158c2ecf20Sopenharmony_ci	/* Update the attribute record as well. */
18168c2ecf20Sopenharmony_ci	mrec = map_mft_record(mft_ni);
18178c2ecf20Sopenharmony_ci	if (IS_ERR(mrec)) {
18188c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map mft record.");
18198c2ecf20Sopenharmony_ci		ret = PTR_ERR(mrec);
18208c2ecf20Sopenharmony_ci		goto undo_alloc;
18218c2ecf20Sopenharmony_ci	}
18228c2ecf20Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(mft_ni, mrec);
18238c2ecf20Sopenharmony_ci	if (unlikely(!ctx)) {
18248c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to get search context.");
18258c2ecf20Sopenharmony_ci		ret = -ENOMEM;
18268c2ecf20Sopenharmony_ci		goto undo_alloc;
18278c2ecf20Sopenharmony_ci	}
18288c2ecf20Sopenharmony_ci	ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
18298c2ecf20Sopenharmony_ci			CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx);
18308c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
18318c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find last attribute extent of "
18328c2ecf20Sopenharmony_ci				"mft data attribute.");
18338c2ecf20Sopenharmony_ci		if (ret == -ENOENT)
18348c2ecf20Sopenharmony_ci			ret = -EIO;
18358c2ecf20Sopenharmony_ci		goto undo_alloc;
18368c2ecf20Sopenharmony_ci	}
18378c2ecf20Sopenharmony_ci	a = ctx->attr;
18388c2ecf20Sopenharmony_ci	ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
18398c2ecf20Sopenharmony_ci	/* Search back for the previous last allocated cluster of mft bitmap. */
18408c2ecf20Sopenharmony_ci	for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) {
18418c2ecf20Sopenharmony_ci		if (ll >= rl2->vcn)
18428c2ecf20Sopenharmony_ci			break;
18438c2ecf20Sopenharmony_ci	}
18448c2ecf20Sopenharmony_ci	BUG_ON(ll < rl2->vcn);
18458c2ecf20Sopenharmony_ci	BUG_ON(ll >= rl2->vcn + rl2->length);
18468c2ecf20Sopenharmony_ci	/* Get the size for the new mapping pairs array for this extent. */
18478c2ecf20Sopenharmony_ci	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
18488c2ecf20Sopenharmony_ci	if (unlikely(mp_size <= 0)) {
18498c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Get size for mapping pairs failed for "
18508c2ecf20Sopenharmony_ci				"mft data attribute extent.");
18518c2ecf20Sopenharmony_ci		ret = mp_size;
18528c2ecf20Sopenharmony_ci		if (!ret)
18538c2ecf20Sopenharmony_ci			ret = -EIO;
18548c2ecf20Sopenharmony_ci		goto undo_alloc;
18558c2ecf20Sopenharmony_ci	}
18568c2ecf20Sopenharmony_ci	/* Expand the attribute record if necessary. */
18578c2ecf20Sopenharmony_ci	old_alen = le32_to_cpu(a->length);
18588c2ecf20Sopenharmony_ci	ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size +
18598c2ecf20Sopenharmony_ci			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
18608c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
18618c2ecf20Sopenharmony_ci		if (ret != -ENOSPC) {
18628c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to resize attribute "
18638c2ecf20Sopenharmony_ci					"record for mft data attribute.");
18648c2ecf20Sopenharmony_ci			goto undo_alloc;
18658c2ecf20Sopenharmony_ci		}
18668c2ecf20Sopenharmony_ci		// TODO: Deal with this by moving this extent to a new mft
18678c2ecf20Sopenharmony_ci		// record or by starting a new extent in a new mft record or by
18688c2ecf20Sopenharmony_ci		// moving other attributes out of this mft record.
18698c2ecf20Sopenharmony_ci		// Note: Use the special reserved mft records and ensure that
18708c2ecf20Sopenharmony_ci		// this extent is not required to find the mft record in
18718c2ecf20Sopenharmony_ci		// question.  If no free special records left we would need to
18728c2ecf20Sopenharmony_ci		// move an existing record away, insert ours in its place, and
18738c2ecf20Sopenharmony_ci		// then place the moved record into the newly allocated space
18748c2ecf20Sopenharmony_ci		// and we would then need to update all references to this mft
18758c2ecf20Sopenharmony_ci		// record appropriately.  This is rather complicated...
18768c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Not enough space in this mft record to "
18778c2ecf20Sopenharmony_ci				"accommodate extended mft data attribute "
18788c2ecf20Sopenharmony_ci				"extent.  Cannot handle this yet.");
18798c2ecf20Sopenharmony_ci		ret = -EOPNOTSUPP;
18808c2ecf20Sopenharmony_ci		goto undo_alloc;
18818c2ecf20Sopenharmony_ci	}
18828c2ecf20Sopenharmony_ci	mp_rebuilt = true;
18838c2ecf20Sopenharmony_ci	/* Generate the mapping pairs array directly into the attr record. */
18848c2ecf20Sopenharmony_ci	ret = ntfs_mapping_pairs_build(vol, (u8*)a +
18858c2ecf20Sopenharmony_ci			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
18868c2ecf20Sopenharmony_ci			mp_size, rl2, ll, -1, NULL);
18878c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
18888c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to build mapping pairs array of "
18898c2ecf20Sopenharmony_ci				"mft data attribute.");
18908c2ecf20Sopenharmony_ci		goto undo_alloc;
18918c2ecf20Sopenharmony_ci	}
18928c2ecf20Sopenharmony_ci	/* Update the highest_vcn. */
18938c2ecf20Sopenharmony_ci	a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
18948c2ecf20Sopenharmony_ci	/*
18958c2ecf20Sopenharmony_ci	 * We now have extended the mft data allocated_size by nr clusters.
18968c2ecf20Sopenharmony_ci	 * Reflect this in the ntfs_inode structure and the attribute record.
18978c2ecf20Sopenharmony_ci	 * @rl is the last (non-terminator) runlist element of mft data
18988c2ecf20Sopenharmony_ci	 * attribute.
18998c2ecf20Sopenharmony_ci	 */
19008c2ecf20Sopenharmony_ci	if (a->data.non_resident.lowest_vcn) {
19018c2ecf20Sopenharmony_ci		/*
19028c2ecf20Sopenharmony_ci		 * We are not in the first attribute extent, switch to it, but
19038c2ecf20Sopenharmony_ci		 * first ensure the changes will make it to disk later.
19048c2ecf20Sopenharmony_ci		 */
19058c2ecf20Sopenharmony_ci		flush_dcache_mft_record_page(ctx->ntfs_ino);
19068c2ecf20Sopenharmony_ci		mark_mft_record_dirty(ctx->ntfs_ino);
19078c2ecf20Sopenharmony_ci		ntfs_attr_reinit_search_ctx(ctx);
19088c2ecf20Sopenharmony_ci		ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name,
19098c2ecf20Sopenharmony_ci				mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0,
19108c2ecf20Sopenharmony_ci				ctx);
19118c2ecf20Sopenharmony_ci		if (unlikely(ret)) {
19128c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to find first attribute "
19138c2ecf20Sopenharmony_ci					"extent of mft data attribute.");
19148c2ecf20Sopenharmony_ci			goto restore_undo_alloc;
19158c2ecf20Sopenharmony_ci		}
19168c2ecf20Sopenharmony_ci		a = ctx->attr;
19178c2ecf20Sopenharmony_ci	}
19188c2ecf20Sopenharmony_ci	write_lock_irqsave(&mft_ni->size_lock, flags);
19198c2ecf20Sopenharmony_ci	mft_ni->allocated_size += nr << vol->cluster_size_bits;
19208c2ecf20Sopenharmony_ci	a->data.non_resident.allocated_size =
19218c2ecf20Sopenharmony_ci			cpu_to_sle64(mft_ni->allocated_size);
19228c2ecf20Sopenharmony_ci	write_unlock_irqrestore(&mft_ni->size_lock, flags);
19238c2ecf20Sopenharmony_ci	/* Ensure the changes make it to disk. */
19248c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
19258c2ecf20Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
19268c2ecf20Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
19278c2ecf20Sopenharmony_ci	unmap_mft_record(mft_ni);
19288c2ecf20Sopenharmony_ci	up_write(&mft_ni->runlist.lock);
19298c2ecf20Sopenharmony_ci	ntfs_debug("Done.");
19308c2ecf20Sopenharmony_ci	return 0;
19318c2ecf20Sopenharmony_cirestore_undo_alloc:
19328c2ecf20Sopenharmony_ci	ntfs_attr_reinit_search_ctx(ctx);
19338c2ecf20Sopenharmony_ci	if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
19348c2ecf20Sopenharmony_ci			CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) {
19358c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find last attribute extent of "
19368c2ecf20Sopenharmony_ci				"mft data attribute.%s", es);
19378c2ecf20Sopenharmony_ci		write_lock_irqsave(&mft_ni->size_lock, flags);
19388c2ecf20Sopenharmony_ci		mft_ni->allocated_size += nr << vol->cluster_size_bits;
19398c2ecf20Sopenharmony_ci		write_unlock_irqrestore(&mft_ni->size_lock, flags);
19408c2ecf20Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
19418c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
19428c2ecf20Sopenharmony_ci		up_write(&mft_ni->runlist.lock);
19438c2ecf20Sopenharmony_ci		/*
19448c2ecf20Sopenharmony_ci		 * The only thing that is now wrong is ->allocated_size of the
19458c2ecf20Sopenharmony_ci		 * base attribute extent which chkdsk should be able to fix.
19468c2ecf20Sopenharmony_ci		 */
19478c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
19488c2ecf20Sopenharmony_ci		return ret;
19498c2ecf20Sopenharmony_ci	}
19508c2ecf20Sopenharmony_ci	ctx->attr->data.non_resident.highest_vcn =
19518c2ecf20Sopenharmony_ci			cpu_to_sle64(old_last_vcn - 1);
19528c2ecf20Sopenharmony_ciundo_alloc:
19538c2ecf20Sopenharmony_ci	if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
19548c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to free clusters from mft data "
19558c2ecf20Sopenharmony_ci				"attribute.%s", es);
19568c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
19578c2ecf20Sopenharmony_ci	}
19588c2ecf20Sopenharmony_ci	a = ctx->attr;
19598c2ecf20Sopenharmony_ci	if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
19608c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to truncate mft data attribute "
19618c2ecf20Sopenharmony_ci				"runlist.%s", es);
19628c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
19638c2ecf20Sopenharmony_ci	}
19648c2ecf20Sopenharmony_ci	if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
19658c2ecf20Sopenharmony_ci		if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
19668c2ecf20Sopenharmony_ci				a->data.non_resident.mapping_pairs_offset),
19678c2ecf20Sopenharmony_ci				old_alen - le16_to_cpu(
19688c2ecf20Sopenharmony_ci				a->data.non_resident.mapping_pairs_offset),
19698c2ecf20Sopenharmony_ci				rl2, ll, -1, NULL)) {
19708c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to restore mapping pairs "
19718c2ecf20Sopenharmony_ci					"array.%s", es);
19728c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
19738c2ecf20Sopenharmony_ci		}
19748c2ecf20Sopenharmony_ci		if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
19758c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to restore attribute "
19768c2ecf20Sopenharmony_ci					"record.%s", es);
19778c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
19788c2ecf20Sopenharmony_ci		}
19798c2ecf20Sopenharmony_ci		flush_dcache_mft_record_page(ctx->ntfs_ino);
19808c2ecf20Sopenharmony_ci		mark_mft_record_dirty(ctx->ntfs_ino);
19818c2ecf20Sopenharmony_ci	} else if (IS_ERR(ctx->mrec)) {
19828c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to restore attribute search "
19838c2ecf20Sopenharmony_ci				"context.%s", es);
19848c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
19858c2ecf20Sopenharmony_ci	}
19868c2ecf20Sopenharmony_ci	if (ctx)
19878c2ecf20Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
19888c2ecf20Sopenharmony_ci	if (!IS_ERR(mrec))
19898c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
19908c2ecf20Sopenharmony_ci	up_write(&mft_ni->runlist.lock);
19918c2ecf20Sopenharmony_ci	return ret;
19928c2ecf20Sopenharmony_ci}
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_ci/**
19958c2ecf20Sopenharmony_ci * ntfs_mft_record_layout - layout an mft record into a memory buffer
19968c2ecf20Sopenharmony_ci * @vol:	volume to which the mft record will belong
19978c2ecf20Sopenharmony_ci * @mft_no:	mft reference specifying the mft record number
19988c2ecf20Sopenharmony_ci * @m:		destination buffer of size >= @vol->mft_record_size bytes
19998c2ecf20Sopenharmony_ci *
20008c2ecf20Sopenharmony_ci * Layout an empty, unused mft record with the mft record number @mft_no into
20018c2ecf20Sopenharmony_ci * the buffer @m.  The volume @vol is needed because the mft record structure
20028c2ecf20Sopenharmony_ci * was modified in NTFS 3.1 so we need to know which volume version this mft
20038c2ecf20Sopenharmony_ci * record will be used on.
20048c2ecf20Sopenharmony_ci *
20058c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error.
20068c2ecf20Sopenharmony_ci */
20078c2ecf20Sopenharmony_cistatic int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
20088c2ecf20Sopenharmony_ci		MFT_RECORD *m)
20098c2ecf20Sopenharmony_ci{
20108c2ecf20Sopenharmony_ci	ATTR_RECORD *a;
20118c2ecf20Sopenharmony_ci
20128c2ecf20Sopenharmony_ci	ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no);
20138c2ecf20Sopenharmony_ci	if (mft_no >= (1ll << 32)) {
20148c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Mft record number 0x%llx exceeds "
20158c2ecf20Sopenharmony_ci				"maximum of 2^32.", (long long)mft_no);
20168c2ecf20Sopenharmony_ci		return -ERANGE;
20178c2ecf20Sopenharmony_ci	}
20188c2ecf20Sopenharmony_ci	/* Start by clearing the whole mft record to gives us a clean slate. */
20198c2ecf20Sopenharmony_ci	memset(m, 0, vol->mft_record_size);
20208c2ecf20Sopenharmony_ci	/* Aligned to 2-byte boundary. */
20218c2ecf20Sopenharmony_ci	if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
20228c2ecf20Sopenharmony_ci		m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
20238c2ecf20Sopenharmony_ci	else {
20248c2ecf20Sopenharmony_ci		m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
20258c2ecf20Sopenharmony_ci		/*
20268c2ecf20Sopenharmony_ci		 * Set the NTFS 3.1+ specific fields while we know that the
20278c2ecf20Sopenharmony_ci		 * volume version is 3.1+.
20288c2ecf20Sopenharmony_ci		 */
20298c2ecf20Sopenharmony_ci		m->reserved = 0;
20308c2ecf20Sopenharmony_ci		m->mft_record_number = cpu_to_le32((u32)mft_no);
20318c2ecf20Sopenharmony_ci	}
20328c2ecf20Sopenharmony_ci	m->magic = magic_FILE;
20338c2ecf20Sopenharmony_ci	if (vol->mft_record_size >= NTFS_BLOCK_SIZE)
20348c2ecf20Sopenharmony_ci		m->usa_count = cpu_to_le16(vol->mft_record_size /
20358c2ecf20Sopenharmony_ci				NTFS_BLOCK_SIZE + 1);
20368c2ecf20Sopenharmony_ci	else {
20378c2ecf20Sopenharmony_ci		m->usa_count = cpu_to_le16(1);
20388c2ecf20Sopenharmony_ci		ntfs_warning(vol->sb, "Sector size is bigger than mft record "
20398c2ecf20Sopenharmony_ci				"size.  Setting usa_count to 1.  If chkdsk "
20408c2ecf20Sopenharmony_ci				"reports this as corruption, please email "
20418c2ecf20Sopenharmony_ci				"linux-ntfs-dev@lists.sourceforge.net stating "
20428c2ecf20Sopenharmony_ci				"that you saw this message and that the "
20438c2ecf20Sopenharmony_ci				"modified filesystem created was corrupt.  "
20448c2ecf20Sopenharmony_ci				"Thank you.");
20458c2ecf20Sopenharmony_ci	}
20468c2ecf20Sopenharmony_ci	/* Set the update sequence number to 1. */
20478c2ecf20Sopenharmony_ci	*(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1);
20488c2ecf20Sopenharmony_ci	m->lsn = 0;
20498c2ecf20Sopenharmony_ci	m->sequence_number = cpu_to_le16(1);
20508c2ecf20Sopenharmony_ci	m->link_count = 0;
20518c2ecf20Sopenharmony_ci	/*
20528c2ecf20Sopenharmony_ci	 * Place the attributes straight after the update sequence array,
20538c2ecf20Sopenharmony_ci	 * aligned to 8-byte boundary.
20548c2ecf20Sopenharmony_ci	 */
20558c2ecf20Sopenharmony_ci	m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) +
20568c2ecf20Sopenharmony_ci			(le16_to_cpu(m->usa_count) << 1) + 7) & ~7);
20578c2ecf20Sopenharmony_ci	m->flags = 0;
20588c2ecf20Sopenharmony_ci	/*
20598c2ecf20Sopenharmony_ci	 * Using attrs_offset plus eight bytes (for the termination attribute).
20608c2ecf20Sopenharmony_ci	 * attrs_offset is already aligned to 8-byte boundary, so no need to
20618c2ecf20Sopenharmony_ci	 * align again.
20628c2ecf20Sopenharmony_ci	 */
20638c2ecf20Sopenharmony_ci	m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8);
20648c2ecf20Sopenharmony_ci	m->bytes_allocated = cpu_to_le32(vol->mft_record_size);
20658c2ecf20Sopenharmony_ci	m->base_mft_record = 0;
20668c2ecf20Sopenharmony_ci	m->next_attr_instance = 0;
20678c2ecf20Sopenharmony_ci	/* Add the termination attribute. */
20688c2ecf20Sopenharmony_ci	a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset));
20698c2ecf20Sopenharmony_ci	a->type = AT_END;
20708c2ecf20Sopenharmony_ci	a->length = 0;
20718c2ecf20Sopenharmony_ci	ntfs_debug("Done.");
20728c2ecf20Sopenharmony_ci	return 0;
20738c2ecf20Sopenharmony_ci}
20748c2ecf20Sopenharmony_ci
20758c2ecf20Sopenharmony_ci/**
20768c2ecf20Sopenharmony_ci * ntfs_mft_record_format - format an mft record on an ntfs volume
20778c2ecf20Sopenharmony_ci * @vol:	volume on which to format the mft record
20788c2ecf20Sopenharmony_ci * @mft_no:	mft record number to format
20798c2ecf20Sopenharmony_ci *
20808c2ecf20Sopenharmony_ci * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused
20818c2ecf20Sopenharmony_ci * mft record into the appropriate place of the mft data attribute.  This is
20828c2ecf20Sopenharmony_ci * used when extending the mft data attribute.
20838c2ecf20Sopenharmony_ci *
20848c2ecf20Sopenharmony_ci * Return 0 on success and -errno on error.
20858c2ecf20Sopenharmony_ci */
20868c2ecf20Sopenharmony_cistatic int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
20878c2ecf20Sopenharmony_ci{
20888c2ecf20Sopenharmony_ci	loff_t i_size;
20898c2ecf20Sopenharmony_ci	struct inode *mft_vi = vol->mft_ino;
20908c2ecf20Sopenharmony_ci	struct page *page;
20918c2ecf20Sopenharmony_ci	MFT_RECORD *m;
20928c2ecf20Sopenharmony_ci	pgoff_t index, end_index;
20938c2ecf20Sopenharmony_ci	unsigned int ofs;
20948c2ecf20Sopenharmony_ci	int err;
20958c2ecf20Sopenharmony_ci
20968c2ecf20Sopenharmony_ci	ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no);
20978c2ecf20Sopenharmony_ci	/*
20988c2ecf20Sopenharmony_ci	 * The index into the page cache and the offset within the page cache
20998c2ecf20Sopenharmony_ci	 * page of the wanted mft record.
21008c2ecf20Sopenharmony_ci	 */
21018c2ecf20Sopenharmony_ci	index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT;
21028c2ecf20Sopenharmony_ci	ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
21038c2ecf20Sopenharmony_ci	/* The maximum valid index into the page cache for $MFT's data. */
21048c2ecf20Sopenharmony_ci	i_size = i_size_read(mft_vi);
21058c2ecf20Sopenharmony_ci	end_index = i_size >> PAGE_SHIFT;
21068c2ecf20Sopenharmony_ci	if (unlikely(index >= end_index)) {
21078c2ecf20Sopenharmony_ci		if (unlikely(index > end_index || ofs + vol->mft_record_size >=
21088c2ecf20Sopenharmony_ci				(i_size & ~PAGE_MASK))) {
21098c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Tried to format non-existing mft "
21108c2ecf20Sopenharmony_ci					"record 0x%llx.", (long long)mft_no);
21118c2ecf20Sopenharmony_ci			return -ENOENT;
21128c2ecf20Sopenharmony_ci		}
21138c2ecf20Sopenharmony_ci	}
21148c2ecf20Sopenharmony_ci	/* Read, map, and pin the page containing the mft record. */
21158c2ecf20Sopenharmony_ci	page = ntfs_map_page(mft_vi->i_mapping, index);
21168c2ecf20Sopenharmony_ci	if (IS_ERR(page)) {
21178c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map page containing mft record "
21188c2ecf20Sopenharmony_ci				"to format 0x%llx.", (long long)mft_no);
21198c2ecf20Sopenharmony_ci		return PTR_ERR(page);
21208c2ecf20Sopenharmony_ci	}
21218c2ecf20Sopenharmony_ci	lock_page(page);
21228c2ecf20Sopenharmony_ci	BUG_ON(!PageUptodate(page));
21238c2ecf20Sopenharmony_ci	ClearPageUptodate(page);
21248c2ecf20Sopenharmony_ci	m = (MFT_RECORD*)((u8*)page_address(page) + ofs);
21258c2ecf20Sopenharmony_ci	err = ntfs_mft_record_layout(vol, mft_no, m);
21268c2ecf20Sopenharmony_ci	if (unlikely(err)) {
21278c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.",
21288c2ecf20Sopenharmony_ci				(long long)mft_no);
21298c2ecf20Sopenharmony_ci		SetPageUptodate(page);
21308c2ecf20Sopenharmony_ci		unlock_page(page);
21318c2ecf20Sopenharmony_ci		ntfs_unmap_page(page);
21328c2ecf20Sopenharmony_ci		return err;
21338c2ecf20Sopenharmony_ci	}
21348c2ecf20Sopenharmony_ci	flush_dcache_page(page);
21358c2ecf20Sopenharmony_ci	SetPageUptodate(page);
21368c2ecf20Sopenharmony_ci	unlock_page(page);
21378c2ecf20Sopenharmony_ci	/*
21388c2ecf20Sopenharmony_ci	 * Make sure the mft record is written out to disk.  We could use
21398c2ecf20Sopenharmony_ci	 * ilookup5() to check if an inode is in icache and so on but this is
21408c2ecf20Sopenharmony_ci	 * unnecessary as ntfs_writepage() will write the dirty record anyway.
21418c2ecf20Sopenharmony_ci	 */
21428c2ecf20Sopenharmony_ci	mark_ntfs_record_dirty(page, ofs);
21438c2ecf20Sopenharmony_ci	ntfs_unmap_page(page);
21448c2ecf20Sopenharmony_ci	ntfs_debug("Done.");
21458c2ecf20Sopenharmony_ci	return 0;
21468c2ecf20Sopenharmony_ci}
21478c2ecf20Sopenharmony_ci
21488c2ecf20Sopenharmony_ci/**
21498c2ecf20Sopenharmony_ci * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
21508c2ecf20Sopenharmony_ci * @vol:	[IN]  volume on which to allocate the mft record
21518c2ecf20Sopenharmony_ci * @mode:	[IN]  mode if want a file or directory, i.e. base inode or 0
21528c2ecf20Sopenharmony_ci * @base_ni:	[IN]  open base inode if allocating an extent mft record or NULL
21538c2ecf20Sopenharmony_ci * @mrec:	[OUT] on successful return this is the mapped mft record
21548c2ecf20Sopenharmony_ci *
21558c2ecf20Sopenharmony_ci * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol.
21568c2ecf20Sopenharmony_ci *
21578c2ecf20Sopenharmony_ci * If @base_ni is NULL make the mft record a base mft record, i.e. a file or
21588c2ecf20Sopenharmony_ci * direvctory inode, and allocate it at the default allocator position.  In
21598c2ecf20Sopenharmony_ci * this case @mode is the file mode as given to us by the caller.  We in
21608c2ecf20Sopenharmony_ci * particular use @mode to distinguish whether a file or a directory is being
21618c2ecf20Sopenharmony_ci * created (S_IFDIR(mode) and S_IFREG(mode), respectively).
21628c2ecf20Sopenharmony_ci *
21638c2ecf20Sopenharmony_ci * If @base_ni is not NULL make the allocated mft record an extent record,
21648c2ecf20Sopenharmony_ci * allocate it starting at the mft record after the base mft record and attach
21658c2ecf20Sopenharmony_ci * the allocated and opened ntfs inode to the base inode @base_ni.  In this
21668c2ecf20Sopenharmony_ci * case @mode must be 0 as it is meaningless for extent inodes.
21678c2ecf20Sopenharmony_ci *
21688c2ecf20Sopenharmony_ci * You need to check the return value with IS_ERR().  If false, the function
21698c2ecf20Sopenharmony_ci * was successful and the return value is the now opened ntfs inode of the
21708c2ecf20Sopenharmony_ci * allocated mft record.  *@mrec is then set to the allocated, mapped, pinned,
21718c2ecf20Sopenharmony_ci * and locked mft record.  If IS_ERR() is true, the function failed and the
21728c2ecf20Sopenharmony_ci * error code is obtained from PTR_ERR(return value).  *@mrec is undefined in
21738c2ecf20Sopenharmony_ci * this case.
21748c2ecf20Sopenharmony_ci *
21758c2ecf20Sopenharmony_ci * Allocation strategy:
21768c2ecf20Sopenharmony_ci *
21778c2ecf20Sopenharmony_ci * To find a free mft record, we scan the mft bitmap for a zero bit.  To
21788c2ecf20Sopenharmony_ci * optimize this we start scanning at the place specified by @base_ni or if
21798c2ecf20Sopenharmony_ci * @base_ni is NULL we start where we last stopped and we perform wrap around
21808c2ecf20Sopenharmony_ci * when we reach the end.  Note, we do not try to allocate mft records below
21818c2ecf20Sopenharmony_ci * number 24 because numbers 0 to 15 are the defined system files anyway and 16
21828c2ecf20Sopenharmony_ci * to 24 are special in that they are used for storing extension mft records
21838c2ecf20Sopenharmony_ci * for the $DATA attribute of $MFT.  This is required to avoid the possibility
21848c2ecf20Sopenharmony_ci * of creating a runlist with a circular dependency which once written to disk
21858c2ecf20Sopenharmony_ci * can never be read in again.  Windows will only use records 16 to 24 for
21868c2ecf20Sopenharmony_ci * normal files if the volume is completely out of space.  We never use them
21878c2ecf20Sopenharmony_ci * which means that when the volume is really out of space we cannot create any
21888c2ecf20Sopenharmony_ci * more files while Windows can still create up to 8 small files.  We can start
21898c2ecf20Sopenharmony_ci * doing this at some later time, it does not matter much for now.
21908c2ecf20Sopenharmony_ci *
21918c2ecf20Sopenharmony_ci * When scanning the mft bitmap, we only search up to the last allocated mft
21928c2ecf20Sopenharmony_ci * record.  If there are no free records left in the range 24 to number of
21938c2ecf20Sopenharmony_ci * allocated mft records, then we extend the $MFT/$DATA attribute in order to
21948c2ecf20Sopenharmony_ci * create free mft records.  We extend the allocated size of $MFT/$DATA by 16
21958c2ecf20Sopenharmony_ci * records at a time or one cluster, if cluster size is above 16kiB.  If there
21968c2ecf20Sopenharmony_ci * is not sufficient space to do this, we try to extend by a single mft record
21978c2ecf20Sopenharmony_ci * or one cluster, if cluster size is above the mft record size.
21988c2ecf20Sopenharmony_ci *
21998c2ecf20Sopenharmony_ci * No matter how many mft records we allocate, we initialize only the first
22008c2ecf20Sopenharmony_ci * allocated mft record, incrementing mft data size and initialized size
22018c2ecf20Sopenharmony_ci * accordingly, open an ntfs_inode for it and return it to the caller, unless
22028c2ecf20Sopenharmony_ci * there are less than 24 mft records, in which case we allocate and initialize
22038c2ecf20Sopenharmony_ci * mft records until we reach record 24 which we consider as the first free mft
22048c2ecf20Sopenharmony_ci * record for use by normal files.
22058c2ecf20Sopenharmony_ci *
22068c2ecf20Sopenharmony_ci * If during any stage we overflow the initialized data in the mft bitmap, we
22078c2ecf20Sopenharmony_ci * extend the initialized size (and data size) by 8 bytes, allocating another
22088c2ecf20Sopenharmony_ci * cluster if required.  The bitmap data size has to be at least equal to the
22098c2ecf20Sopenharmony_ci * number of mft records in the mft, but it can be bigger, in which case the
22108c2ecf20Sopenharmony_ci * superflous bits are padded with zeroes.
22118c2ecf20Sopenharmony_ci *
22128c2ecf20Sopenharmony_ci * Thus, when we return successfully (IS_ERR() is false), we will have:
22138c2ecf20Sopenharmony_ci *	- initialized / extended the mft bitmap if necessary,
22148c2ecf20Sopenharmony_ci *	- initialized / extended the mft data if necessary,
22158c2ecf20Sopenharmony_ci *	- set the bit corresponding to the mft record being allocated in the
22168c2ecf20Sopenharmony_ci *	  mft bitmap,
22178c2ecf20Sopenharmony_ci *	- opened an ntfs_inode for the allocated mft record, and we will have
22188c2ecf20Sopenharmony_ci *	- returned the ntfs_inode as well as the allocated mapped, pinned, and
22198c2ecf20Sopenharmony_ci *	  locked mft record.
22208c2ecf20Sopenharmony_ci *
22218c2ecf20Sopenharmony_ci * On error, the volume will be left in a consistent state and no record will
22228c2ecf20Sopenharmony_ci * be allocated.  If rolling back a partial operation fails, we may leave some
22238c2ecf20Sopenharmony_ci * inconsistent metadata in which case we set NVolErrors() so the volume is
22248c2ecf20Sopenharmony_ci * left dirty when unmounted.
22258c2ecf20Sopenharmony_ci *
22268c2ecf20Sopenharmony_ci * Note, this function cannot make use of most of the normal functions, like
22278c2ecf20Sopenharmony_ci * for example for attribute resizing, etc, because when the run list overflows
22288c2ecf20Sopenharmony_ci * the base mft record and an attribute list is used, it is very important that
22298c2ecf20Sopenharmony_ci * the extension mft records used to store the $DATA attribute of $MFT can be
22308c2ecf20Sopenharmony_ci * reached without having to read the information contained inside them, as
22318c2ecf20Sopenharmony_ci * this would make it impossible to find them in the first place after the
22328c2ecf20Sopenharmony_ci * volume is unmounted.  $MFT/$BITMAP probably does not need to follow this
22338c2ecf20Sopenharmony_ci * rule because the bitmap is not essential for finding the mft records, but on
22348c2ecf20Sopenharmony_ci * the other hand, handling the bitmap in this special way would make life
22358c2ecf20Sopenharmony_ci * easier because otherwise there might be circular invocations of functions
22368c2ecf20Sopenharmony_ci * when reading the bitmap.
22378c2ecf20Sopenharmony_ci */
22388c2ecf20Sopenharmony_cintfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
22398c2ecf20Sopenharmony_ci		ntfs_inode *base_ni, MFT_RECORD **mrec)
22408c2ecf20Sopenharmony_ci{
22418c2ecf20Sopenharmony_ci	s64 ll, bit, old_data_initialized, old_data_size;
22428c2ecf20Sopenharmony_ci	unsigned long flags;
22438c2ecf20Sopenharmony_ci	struct inode *vi;
22448c2ecf20Sopenharmony_ci	struct page *page;
22458c2ecf20Sopenharmony_ci	ntfs_inode *mft_ni, *mftbmp_ni, *ni;
22468c2ecf20Sopenharmony_ci	ntfs_attr_search_ctx *ctx;
22478c2ecf20Sopenharmony_ci	MFT_RECORD *m;
22488c2ecf20Sopenharmony_ci	ATTR_RECORD *a;
22498c2ecf20Sopenharmony_ci	pgoff_t index;
22508c2ecf20Sopenharmony_ci	unsigned int ofs;
22518c2ecf20Sopenharmony_ci	int err;
22528c2ecf20Sopenharmony_ci	le16 seq_no, usn;
22538c2ecf20Sopenharmony_ci	bool record_formatted = false;
22548c2ecf20Sopenharmony_ci
22558c2ecf20Sopenharmony_ci	if (base_ni) {
22568c2ecf20Sopenharmony_ci		ntfs_debug("Entering (allocating an extent mft record for "
22578c2ecf20Sopenharmony_ci				"base mft record 0x%llx).",
22588c2ecf20Sopenharmony_ci				(long long)base_ni->mft_no);
22598c2ecf20Sopenharmony_ci		/* @mode and @base_ni are mutually exclusive. */
22608c2ecf20Sopenharmony_ci		BUG_ON(mode);
22618c2ecf20Sopenharmony_ci	} else
22628c2ecf20Sopenharmony_ci		ntfs_debug("Entering (allocating a base mft record).");
22638c2ecf20Sopenharmony_ci	if (mode) {
22648c2ecf20Sopenharmony_ci		/* @mode and @base_ni are mutually exclusive. */
22658c2ecf20Sopenharmony_ci		BUG_ON(base_ni);
22668c2ecf20Sopenharmony_ci		/* We only support creation of normal files and directories. */
22678c2ecf20Sopenharmony_ci		if (!S_ISREG(mode) && !S_ISDIR(mode))
22688c2ecf20Sopenharmony_ci			return ERR_PTR(-EOPNOTSUPP);
22698c2ecf20Sopenharmony_ci	}
22708c2ecf20Sopenharmony_ci	BUG_ON(!mrec);
22718c2ecf20Sopenharmony_ci	mft_ni = NTFS_I(vol->mft_ino);
22728c2ecf20Sopenharmony_ci	mftbmp_ni = NTFS_I(vol->mftbmp_ino);
22738c2ecf20Sopenharmony_ci	down_write(&vol->mftbmp_lock);
22748c2ecf20Sopenharmony_ci	bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni);
22758c2ecf20Sopenharmony_ci	if (bit >= 0) {
22768c2ecf20Sopenharmony_ci		ntfs_debug("Found and allocated free record (#1), bit 0x%llx.",
22778c2ecf20Sopenharmony_ci				(long long)bit);
22788c2ecf20Sopenharmony_ci		goto have_alloc_rec;
22798c2ecf20Sopenharmony_ci	}
22808c2ecf20Sopenharmony_ci	if (bit != -ENOSPC) {
22818c2ecf20Sopenharmony_ci		up_write(&vol->mftbmp_lock);
22828c2ecf20Sopenharmony_ci		return ERR_PTR(bit);
22838c2ecf20Sopenharmony_ci	}
22848c2ecf20Sopenharmony_ci	/*
22858c2ecf20Sopenharmony_ci	 * No free mft records left.  If the mft bitmap already covers more
22868c2ecf20Sopenharmony_ci	 * than the currently used mft records, the next records are all free,
22878c2ecf20Sopenharmony_ci	 * so we can simply allocate the first unused mft record.
22888c2ecf20Sopenharmony_ci	 * Note: We also have to make sure that the mft bitmap at least covers
22898c2ecf20Sopenharmony_ci	 * the first 24 mft records as they are special and whilst they may not
22908c2ecf20Sopenharmony_ci	 * be in use, we do not allocate from them.
22918c2ecf20Sopenharmony_ci	 */
22928c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
22938c2ecf20Sopenharmony_ci	ll = mft_ni->initialized_size >> vol->mft_record_size_bits;
22948c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
22958c2ecf20Sopenharmony_ci	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
22968c2ecf20Sopenharmony_ci	old_data_initialized = mftbmp_ni->initialized_size;
22978c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
22988c2ecf20Sopenharmony_ci	if (old_data_initialized << 3 > ll && old_data_initialized > 3) {
22998c2ecf20Sopenharmony_ci		bit = ll;
23008c2ecf20Sopenharmony_ci		if (bit < 24)
23018c2ecf20Sopenharmony_ci			bit = 24;
23028c2ecf20Sopenharmony_ci		if (unlikely(bit >= (1ll << 32)))
23038c2ecf20Sopenharmony_ci			goto max_err_out;
23048c2ecf20Sopenharmony_ci		ntfs_debug("Found free record (#2), bit 0x%llx.",
23058c2ecf20Sopenharmony_ci				(long long)bit);
23068c2ecf20Sopenharmony_ci		goto found_free_rec;
23078c2ecf20Sopenharmony_ci	}
23088c2ecf20Sopenharmony_ci	/*
23098c2ecf20Sopenharmony_ci	 * The mft bitmap needs to be expanded until it covers the first unused
23108c2ecf20Sopenharmony_ci	 * mft record that we can allocate.
23118c2ecf20Sopenharmony_ci	 * Note: The smallest mft record we allocate is mft record 24.
23128c2ecf20Sopenharmony_ci	 */
23138c2ecf20Sopenharmony_ci	bit = old_data_initialized << 3;
23148c2ecf20Sopenharmony_ci	if (unlikely(bit >= (1ll << 32)))
23158c2ecf20Sopenharmony_ci		goto max_err_out;
23168c2ecf20Sopenharmony_ci	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
23178c2ecf20Sopenharmony_ci	old_data_size = mftbmp_ni->allocated_size;
23188c2ecf20Sopenharmony_ci	ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
23198c2ecf20Sopenharmony_ci			"data_size 0x%llx, initialized_size 0x%llx.",
23208c2ecf20Sopenharmony_ci			(long long)old_data_size,
23218c2ecf20Sopenharmony_ci			(long long)i_size_read(vol->mftbmp_ino),
23228c2ecf20Sopenharmony_ci			(long long)old_data_initialized);
23238c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
23248c2ecf20Sopenharmony_ci	if (old_data_initialized + 8 > old_data_size) {
23258c2ecf20Sopenharmony_ci		/* Need to extend bitmap by one more cluster. */
23268c2ecf20Sopenharmony_ci		ntfs_debug("mftbmp: initialized_size + 8 > allocated_size.");
23278c2ecf20Sopenharmony_ci		err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
23288c2ecf20Sopenharmony_ci		if (unlikely(err)) {
23298c2ecf20Sopenharmony_ci			up_write(&vol->mftbmp_lock);
23308c2ecf20Sopenharmony_ci			goto err_out;
23318c2ecf20Sopenharmony_ci		}
23328c2ecf20Sopenharmony_ci#ifdef DEBUG
23338c2ecf20Sopenharmony_ci		read_lock_irqsave(&mftbmp_ni->size_lock, flags);
23348c2ecf20Sopenharmony_ci		ntfs_debug("Status of mftbmp after allocation extension: "
23358c2ecf20Sopenharmony_ci				"allocated_size 0x%llx, data_size 0x%llx, "
23368c2ecf20Sopenharmony_ci				"initialized_size 0x%llx.",
23378c2ecf20Sopenharmony_ci				(long long)mftbmp_ni->allocated_size,
23388c2ecf20Sopenharmony_ci				(long long)i_size_read(vol->mftbmp_ino),
23398c2ecf20Sopenharmony_ci				(long long)mftbmp_ni->initialized_size);
23408c2ecf20Sopenharmony_ci		read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
23418c2ecf20Sopenharmony_ci#endif /* DEBUG */
23428c2ecf20Sopenharmony_ci	}
23438c2ecf20Sopenharmony_ci	/*
23448c2ecf20Sopenharmony_ci	 * We now have sufficient allocated space, extend the initialized_size
23458c2ecf20Sopenharmony_ci	 * as well as the data_size if necessary and fill the new space with
23468c2ecf20Sopenharmony_ci	 * zeroes.
23478c2ecf20Sopenharmony_ci	 */
23488c2ecf20Sopenharmony_ci	err = ntfs_mft_bitmap_extend_initialized_nolock(vol);
23498c2ecf20Sopenharmony_ci	if (unlikely(err)) {
23508c2ecf20Sopenharmony_ci		up_write(&vol->mftbmp_lock);
23518c2ecf20Sopenharmony_ci		goto err_out;
23528c2ecf20Sopenharmony_ci	}
23538c2ecf20Sopenharmony_ci#ifdef DEBUG
23548c2ecf20Sopenharmony_ci	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
23558c2ecf20Sopenharmony_ci	ntfs_debug("Status of mftbmp after initialized extension: "
23568c2ecf20Sopenharmony_ci			"allocated_size 0x%llx, data_size 0x%llx, "
23578c2ecf20Sopenharmony_ci			"initialized_size 0x%llx.",
23588c2ecf20Sopenharmony_ci			(long long)mftbmp_ni->allocated_size,
23598c2ecf20Sopenharmony_ci			(long long)i_size_read(vol->mftbmp_ino),
23608c2ecf20Sopenharmony_ci			(long long)mftbmp_ni->initialized_size);
23618c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
23628c2ecf20Sopenharmony_ci#endif /* DEBUG */
23638c2ecf20Sopenharmony_ci	ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit);
23648c2ecf20Sopenharmony_cifound_free_rec:
23658c2ecf20Sopenharmony_ci	/* @bit is the found free mft record, allocate it in the mft bitmap. */
23668c2ecf20Sopenharmony_ci	ntfs_debug("At found_free_rec.");
23678c2ecf20Sopenharmony_ci	err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit);
23688c2ecf20Sopenharmony_ci	if (unlikely(err)) {
23698c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap.");
23708c2ecf20Sopenharmony_ci		up_write(&vol->mftbmp_lock);
23718c2ecf20Sopenharmony_ci		goto err_out;
23728c2ecf20Sopenharmony_ci	}
23738c2ecf20Sopenharmony_ci	ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit);
23748c2ecf20Sopenharmony_cihave_alloc_rec:
23758c2ecf20Sopenharmony_ci	/*
23768c2ecf20Sopenharmony_ci	 * The mft bitmap is now uptodate.  Deal with mft data attribute now.
23778c2ecf20Sopenharmony_ci	 * Note, we keep hold of the mft bitmap lock for writing until all
23788c2ecf20Sopenharmony_ci	 * modifications to the mft data attribute are complete, too, as they
23798c2ecf20Sopenharmony_ci	 * will impact decisions for mft bitmap and mft record allocation done
23808c2ecf20Sopenharmony_ci	 * by a parallel allocation and if the lock is not maintained a
23818c2ecf20Sopenharmony_ci	 * parallel allocation could allocate the same mft record as this one.
23828c2ecf20Sopenharmony_ci	 */
23838c2ecf20Sopenharmony_ci	ll = (bit + 1) << vol->mft_record_size_bits;
23848c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
23858c2ecf20Sopenharmony_ci	old_data_initialized = mft_ni->initialized_size;
23868c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
23878c2ecf20Sopenharmony_ci	if (ll <= old_data_initialized) {
23888c2ecf20Sopenharmony_ci		ntfs_debug("Allocated mft record already initialized.");
23898c2ecf20Sopenharmony_ci		goto mft_rec_already_initialized;
23908c2ecf20Sopenharmony_ci	}
23918c2ecf20Sopenharmony_ci	ntfs_debug("Initializing allocated mft record.");
23928c2ecf20Sopenharmony_ci	/*
23938c2ecf20Sopenharmony_ci	 * The mft record is outside the initialized data.  Extend the mft data
23948c2ecf20Sopenharmony_ci	 * attribute until it covers the allocated record.  The loop is only
23958c2ecf20Sopenharmony_ci	 * actually traversed more than once when a freshly formatted volume is
23968c2ecf20Sopenharmony_ci	 * first written to so it optimizes away nicely in the common case.
23978c2ecf20Sopenharmony_ci	 */
23988c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
23998c2ecf20Sopenharmony_ci	ntfs_debug("Status of mft data before extension: "
24008c2ecf20Sopenharmony_ci			"allocated_size 0x%llx, data_size 0x%llx, "
24018c2ecf20Sopenharmony_ci			"initialized_size 0x%llx.",
24028c2ecf20Sopenharmony_ci			(long long)mft_ni->allocated_size,
24038c2ecf20Sopenharmony_ci			(long long)i_size_read(vol->mft_ino),
24048c2ecf20Sopenharmony_ci			(long long)mft_ni->initialized_size);
24058c2ecf20Sopenharmony_ci	while (ll > mft_ni->allocated_size) {
24068c2ecf20Sopenharmony_ci		read_unlock_irqrestore(&mft_ni->size_lock, flags);
24078c2ecf20Sopenharmony_ci		err = ntfs_mft_data_extend_allocation_nolock(vol);
24088c2ecf20Sopenharmony_ci		if (unlikely(err)) {
24098c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to extend mft data "
24108c2ecf20Sopenharmony_ci					"allocation.");
24118c2ecf20Sopenharmony_ci			goto undo_mftbmp_alloc_nolock;
24128c2ecf20Sopenharmony_ci		}
24138c2ecf20Sopenharmony_ci		read_lock_irqsave(&mft_ni->size_lock, flags);
24148c2ecf20Sopenharmony_ci		ntfs_debug("Status of mft data after allocation extension: "
24158c2ecf20Sopenharmony_ci				"allocated_size 0x%llx, data_size 0x%llx, "
24168c2ecf20Sopenharmony_ci				"initialized_size 0x%llx.",
24178c2ecf20Sopenharmony_ci				(long long)mft_ni->allocated_size,
24188c2ecf20Sopenharmony_ci				(long long)i_size_read(vol->mft_ino),
24198c2ecf20Sopenharmony_ci				(long long)mft_ni->initialized_size);
24208c2ecf20Sopenharmony_ci	}
24218c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
24228c2ecf20Sopenharmony_ci	/*
24238c2ecf20Sopenharmony_ci	 * Extend mft data initialized size (and data size of course) to reach
24248c2ecf20Sopenharmony_ci	 * the allocated mft record, formatting the mft records allong the way.
24258c2ecf20Sopenharmony_ci	 * Note: We only modify the ntfs_inode structure as that is all that is
24268c2ecf20Sopenharmony_ci	 * needed by ntfs_mft_record_format().  We will update the attribute
24278c2ecf20Sopenharmony_ci	 * record itself in one fell swoop later on.
24288c2ecf20Sopenharmony_ci	 */
24298c2ecf20Sopenharmony_ci	write_lock_irqsave(&mft_ni->size_lock, flags);
24308c2ecf20Sopenharmony_ci	old_data_initialized = mft_ni->initialized_size;
24318c2ecf20Sopenharmony_ci	old_data_size = vol->mft_ino->i_size;
24328c2ecf20Sopenharmony_ci	while (ll > mft_ni->initialized_size) {
24338c2ecf20Sopenharmony_ci		s64 new_initialized_size, mft_no;
24348c2ecf20Sopenharmony_ci
24358c2ecf20Sopenharmony_ci		new_initialized_size = mft_ni->initialized_size +
24368c2ecf20Sopenharmony_ci				vol->mft_record_size;
24378c2ecf20Sopenharmony_ci		mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits;
24388c2ecf20Sopenharmony_ci		if (new_initialized_size > i_size_read(vol->mft_ino))
24398c2ecf20Sopenharmony_ci			i_size_write(vol->mft_ino, new_initialized_size);
24408c2ecf20Sopenharmony_ci		write_unlock_irqrestore(&mft_ni->size_lock, flags);
24418c2ecf20Sopenharmony_ci		ntfs_debug("Initializing mft record 0x%llx.",
24428c2ecf20Sopenharmony_ci				(long long)mft_no);
24438c2ecf20Sopenharmony_ci		err = ntfs_mft_record_format(vol, mft_no);
24448c2ecf20Sopenharmony_ci		if (unlikely(err)) {
24458c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to format mft record.");
24468c2ecf20Sopenharmony_ci			goto undo_data_init;
24478c2ecf20Sopenharmony_ci		}
24488c2ecf20Sopenharmony_ci		write_lock_irqsave(&mft_ni->size_lock, flags);
24498c2ecf20Sopenharmony_ci		mft_ni->initialized_size = new_initialized_size;
24508c2ecf20Sopenharmony_ci	}
24518c2ecf20Sopenharmony_ci	write_unlock_irqrestore(&mft_ni->size_lock, flags);
24528c2ecf20Sopenharmony_ci	record_formatted = true;
24538c2ecf20Sopenharmony_ci	/* Update the mft data attribute record to reflect the new sizes. */
24548c2ecf20Sopenharmony_ci	m = map_mft_record(mft_ni);
24558c2ecf20Sopenharmony_ci	if (IS_ERR(m)) {
24568c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map mft record.");
24578c2ecf20Sopenharmony_ci		err = PTR_ERR(m);
24588c2ecf20Sopenharmony_ci		goto undo_data_init;
24598c2ecf20Sopenharmony_ci	}
24608c2ecf20Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(mft_ni, m);
24618c2ecf20Sopenharmony_ci	if (unlikely(!ctx)) {
24628c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to get search context.");
24638c2ecf20Sopenharmony_ci		err = -ENOMEM;
24648c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
24658c2ecf20Sopenharmony_ci		goto undo_data_init;
24668c2ecf20Sopenharmony_ci	}
24678c2ecf20Sopenharmony_ci	err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len,
24688c2ecf20Sopenharmony_ci			CASE_SENSITIVE, 0, NULL, 0, ctx);
24698c2ecf20Sopenharmony_ci	if (unlikely(err)) {
24708c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to find first attribute extent of "
24718c2ecf20Sopenharmony_ci				"mft data attribute.");
24728c2ecf20Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
24738c2ecf20Sopenharmony_ci		unmap_mft_record(mft_ni);
24748c2ecf20Sopenharmony_ci		goto undo_data_init;
24758c2ecf20Sopenharmony_ci	}
24768c2ecf20Sopenharmony_ci	a = ctx->attr;
24778c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
24788c2ecf20Sopenharmony_ci	a->data.non_resident.initialized_size =
24798c2ecf20Sopenharmony_ci			cpu_to_sle64(mft_ni->initialized_size);
24808c2ecf20Sopenharmony_ci	a->data.non_resident.data_size =
24818c2ecf20Sopenharmony_ci			cpu_to_sle64(i_size_read(vol->mft_ino));
24828c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
24838c2ecf20Sopenharmony_ci	/* Ensure the changes make it to disk. */
24848c2ecf20Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
24858c2ecf20Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
24868c2ecf20Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
24878c2ecf20Sopenharmony_ci	unmap_mft_record(mft_ni);
24888c2ecf20Sopenharmony_ci	read_lock_irqsave(&mft_ni->size_lock, flags);
24898c2ecf20Sopenharmony_ci	ntfs_debug("Status of mft data after mft record initialization: "
24908c2ecf20Sopenharmony_ci			"allocated_size 0x%llx, data_size 0x%llx, "
24918c2ecf20Sopenharmony_ci			"initialized_size 0x%llx.",
24928c2ecf20Sopenharmony_ci			(long long)mft_ni->allocated_size,
24938c2ecf20Sopenharmony_ci			(long long)i_size_read(vol->mft_ino),
24948c2ecf20Sopenharmony_ci			(long long)mft_ni->initialized_size);
24958c2ecf20Sopenharmony_ci	BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size);
24968c2ecf20Sopenharmony_ci	BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino));
24978c2ecf20Sopenharmony_ci	read_unlock_irqrestore(&mft_ni->size_lock, flags);
24988c2ecf20Sopenharmony_cimft_rec_already_initialized:
24998c2ecf20Sopenharmony_ci	/*
25008c2ecf20Sopenharmony_ci	 * We can finally drop the mft bitmap lock as the mft data attribute
25018c2ecf20Sopenharmony_ci	 * has been fully updated.  The only disparity left is that the
25028c2ecf20Sopenharmony_ci	 * allocated mft record still needs to be marked as in use to match the
25038c2ecf20Sopenharmony_ci	 * set bit in the mft bitmap but this is actually not a problem since
25048c2ecf20Sopenharmony_ci	 * this mft record is not referenced from anywhere yet and the fact
25058c2ecf20Sopenharmony_ci	 * that it is allocated in the mft bitmap means that no-one will try to
25068c2ecf20Sopenharmony_ci	 * allocate it either.
25078c2ecf20Sopenharmony_ci	 */
25088c2ecf20Sopenharmony_ci	up_write(&vol->mftbmp_lock);
25098c2ecf20Sopenharmony_ci	/*
25108c2ecf20Sopenharmony_ci	 * We now have allocated and initialized the mft record.  Calculate the
25118c2ecf20Sopenharmony_ci	 * index of and the offset within the page cache page the record is in.
25128c2ecf20Sopenharmony_ci	 */
25138c2ecf20Sopenharmony_ci	index = bit << vol->mft_record_size_bits >> PAGE_SHIFT;
25148c2ecf20Sopenharmony_ci	ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK;
25158c2ecf20Sopenharmony_ci	/* Read, map, and pin the page containing the mft record. */
25168c2ecf20Sopenharmony_ci	page = ntfs_map_page(vol->mft_ino->i_mapping, index);
25178c2ecf20Sopenharmony_ci	if (IS_ERR(page)) {
25188c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to map page containing allocated "
25198c2ecf20Sopenharmony_ci				"mft record 0x%llx.", (long long)bit);
25208c2ecf20Sopenharmony_ci		err = PTR_ERR(page);
25218c2ecf20Sopenharmony_ci		goto undo_mftbmp_alloc;
25228c2ecf20Sopenharmony_ci	}
25238c2ecf20Sopenharmony_ci	lock_page(page);
25248c2ecf20Sopenharmony_ci	BUG_ON(!PageUptodate(page));
25258c2ecf20Sopenharmony_ci	ClearPageUptodate(page);
25268c2ecf20Sopenharmony_ci	m = (MFT_RECORD*)((u8*)page_address(page) + ofs);
25278c2ecf20Sopenharmony_ci	/* If we just formatted the mft record no need to do it again. */
25288c2ecf20Sopenharmony_ci	if (!record_formatted) {
25298c2ecf20Sopenharmony_ci		/* Sanity check that the mft record is really not in use. */
25308c2ecf20Sopenharmony_ci		if (ntfs_is_file_record(m->magic) &&
25318c2ecf20Sopenharmony_ci				(m->flags & MFT_RECORD_IN_USE)) {
25328c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Mft record 0x%llx was marked "
25338c2ecf20Sopenharmony_ci					"free in mft bitmap but is marked "
25348c2ecf20Sopenharmony_ci					"used itself.  Corrupt filesystem.  "
25358c2ecf20Sopenharmony_ci					"Unmount and run chkdsk.",
25368c2ecf20Sopenharmony_ci					(long long)bit);
25378c2ecf20Sopenharmony_ci			err = -EIO;
25388c2ecf20Sopenharmony_ci			SetPageUptodate(page);
25398c2ecf20Sopenharmony_ci			unlock_page(page);
25408c2ecf20Sopenharmony_ci			ntfs_unmap_page(page);
25418c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
25428c2ecf20Sopenharmony_ci			goto undo_mftbmp_alloc;
25438c2ecf20Sopenharmony_ci		}
25448c2ecf20Sopenharmony_ci		/*
25458c2ecf20Sopenharmony_ci		 * We need to (re-)format the mft record, preserving the
25468c2ecf20Sopenharmony_ci		 * sequence number if it is not zero as well as the update
25478c2ecf20Sopenharmony_ci		 * sequence number if it is not zero or -1 (0xffff).  This
25488c2ecf20Sopenharmony_ci		 * means we do not need to care whether or not something went
25498c2ecf20Sopenharmony_ci		 * wrong with the previous mft record.
25508c2ecf20Sopenharmony_ci		 */
25518c2ecf20Sopenharmony_ci		seq_no = m->sequence_number;
25528c2ecf20Sopenharmony_ci		usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs));
25538c2ecf20Sopenharmony_ci		err = ntfs_mft_record_layout(vol, bit, m);
25548c2ecf20Sopenharmony_ci		if (unlikely(err)) {
25558c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to layout allocated mft "
25568c2ecf20Sopenharmony_ci					"record 0x%llx.", (long long)bit);
25578c2ecf20Sopenharmony_ci			SetPageUptodate(page);
25588c2ecf20Sopenharmony_ci			unlock_page(page);
25598c2ecf20Sopenharmony_ci			ntfs_unmap_page(page);
25608c2ecf20Sopenharmony_ci			goto undo_mftbmp_alloc;
25618c2ecf20Sopenharmony_ci		}
25628c2ecf20Sopenharmony_ci		if (seq_no)
25638c2ecf20Sopenharmony_ci			m->sequence_number = seq_no;
25648c2ecf20Sopenharmony_ci		if (usn && le16_to_cpu(usn) != 0xffff)
25658c2ecf20Sopenharmony_ci			*(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
25668c2ecf20Sopenharmony_ci	}
25678c2ecf20Sopenharmony_ci	/* Set the mft record itself in use. */
25688c2ecf20Sopenharmony_ci	m->flags |= MFT_RECORD_IN_USE;
25698c2ecf20Sopenharmony_ci	if (S_ISDIR(mode))
25708c2ecf20Sopenharmony_ci		m->flags |= MFT_RECORD_IS_DIRECTORY;
25718c2ecf20Sopenharmony_ci	flush_dcache_page(page);
25728c2ecf20Sopenharmony_ci	SetPageUptodate(page);
25738c2ecf20Sopenharmony_ci	if (base_ni) {
25748c2ecf20Sopenharmony_ci		MFT_RECORD *m_tmp;
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_ci		/*
25778c2ecf20Sopenharmony_ci		 * Setup the base mft record in the extent mft record.  This
25788c2ecf20Sopenharmony_ci		 * completes initialization of the allocated extent mft record
25798c2ecf20Sopenharmony_ci		 * and we can simply use it with map_extent_mft_record().
25808c2ecf20Sopenharmony_ci		 */
25818c2ecf20Sopenharmony_ci		m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
25828c2ecf20Sopenharmony_ci				base_ni->seq_no);
25838c2ecf20Sopenharmony_ci		/*
25848c2ecf20Sopenharmony_ci		 * Allocate an extent inode structure for the new mft record,
25858c2ecf20Sopenharmony_ci		 * attach it to the base inode @base_ni and map, pin, and lock
25868c2ecf20Sopenharmony_ci		 * its, i.e. the allocated, mft record.
25878c2ecf20Sopenharmony_ci		 */
25888c2ecf20Sopenharmony_ci		m_tmp = map_extent_mft_record(base_ni, bit, &ni);
25898c2ecf20Sopenharmony_ci		if (IS_ERR(m_tmp)) {
25908c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to map allocated extent "
25918c2ecf20Sopenharmony_ci					"mft record 0x%llx.", (long long)bit);
25928c2ecf20Sopenharmony_ci			err = PTR_ERR(m_tmp);
25938c2ecf20Sopenharmony_ci			/* Set the mft record itself not in use. */
25948c2ecf20Sopenharmony_ci			m->flags &= cpu_to_le16(
25958c2ecf20Sopenharmony_ci					~le16_to_cpu(MFT_RECORD_IN_USE));
25968c2ecf20Sopenharmony_ci			flush_dcache_page(page);
25978c2ecf20Sopenharmony_ci			/* Make sure the mft record is written out to disk. */
25988c2ecf20Sopenharmony_ci			mark_ntfs_record_dirty(page, ofs);
25998c2ecf20Sopenharmony_ci			unlock_page(page);
26008c2ecf20Sopenharmony_ci			ntfs_unmap_page(page);
26018c2ecf20Sopenharmony_ci			goto undo_mftbmp_alloc;
26028c2ecf20Sopenharmony_ci		}
26038c2ecf20Sopenharmony_ci		BUG_ON(m != m_tmp);
26048c2ecf20Sopenharmony_ci		/*
26058c2ecf20Sopenharmony_ci		 * Make sure the allocated mft record is written out to disk.
26068c2ecf20Sopenharmony_ci		 * No need to set the inode dirty because the caller is going
26078c2ecf20Sopenharmony_ci		 * to do that anyway after finishing with the new extent mft
26088c2ecf20Sopenharmony_ci		 * record (e.g. at a minimum a new attribute will be added to
26098c2ecf20Sopenharmony_ci		 * the mft record.
26108c2ecf20Sopenharmony_ci		 */
26118c2ecf20Sopenharmony_ci		mark_ntfs_record_dirty(page, ofs);
26128c2ecf20Sopenharmony_ci		unlock_page(page);
26138c2ecf20Sopenharmony_ci		/*
26148c2ecf20Sopenharmony_ci		 * Need to unmap the page since map_extent_mft_record() mapped
26158c2ecf20Sopenharmony_ci		 * it as well so we have it mapped twice at the moment.
26168c2ecf20Sopenharmony_ci		 */
26178c2ecf20Sopenharmony_ci		ntfs_unmap_page(page);
26188c2ecf20Sopenharmony_ci	} else {
26198c2ecf20Sopenharmony_ci		/*
26208c2ecf20Sopenharmony_ci		 * Allocate a new VFS inode and set it up.  NOTE: @vi->i_nlink
26218c2ecf20Sopenharmony_ci		 * is set to 1 but the mft record->link_count is 0.  The caller
26228c2ecf20Sopenharmony_ci		 * needs to bear this in mind.
26238c2ecf20Sopenharmony_ci		 */
26248c2ecf20Sopenharmony_ci		vi = new_inode(vol->sb);
26258c2ecf20Sopenharmony_ci		if (unlikely(!vi)) {
26268c2ecf20Sopenharmony_ci			err = -ENOMEM;
26278c2ecf20Sopenharmony_ci			/* Set the mft record itself not in use. */
26288c2ecf20Sopenharmony_ci			m->flags &= cpu_to_le16(
26298c2ecf20Sopenharmony_ci					~le16_to_cpu(MFT_RECORD_IN_USE));
26308c2ecf20Sopenharmony_ci			flush_dcache_page(page);
26318c2ecf20Sopenharmony_ci			/* Make sure the mft record is written out to disk. */
26328c2ecf20Sopenharmony_ci			mark_ntfs_record_dirty(page, ofs);
26338c2ecf20Sopenharmony_ci			unlock_page(page);
26348c2ecf20Sopenharmony_ci			ntfs_unmap_page(page);
26358c2ecf20Sopenharmony_ci			goto undo_mftbmp_alloc;
26368c2ecf20Sopenharmony_ci		}
26378c2ecf20Sopenharmony_ci		vi->i_ino = bit;
26388c2ecf20Sopenharmony_ci
26398c2ecf20Sopenharmony_ci		/* The owner and group come from the ntfs volume. */
26408c2ecf20Sopenharmony_ci		vi->i_uid = vol->uid;
26418c2ecf20Sopenharmony_ci		vi->i_gid = vol->gid;
26428c2ecf20Sopenharmony_ci
26438c2ecf20Sopenharmony_ci		/* Initialize the ntfs specific part of @vi. */
26448c2ecf20Sopenharmony_ci		ntfs_init_big_inode(vi);
26458c2ecf20Sopenharmony_ci		ni = NTFS_I(vi);
26468c2ecf20Sopenharmony_ci		/*
26478c2ecf20Sopenharmony_ci		 * Set the appropriate mode, attribute type, and name.  For
26488c2ecf20Sopenharmony_ci		 * directories, also setup the index values to the defaults.
26498c2ecf20Sopenharmony_ci		 */
26508c2ecf20Sopenharmony_ci		if (S_ISDIR(mode)) {
26518c2ecf20Sopenharmony_ci			vi->i_mode = S_IFDIR | S_IRWXUGO;
26528c2ecf20Sopenharmony_ci			vi->i_mode &= ~vol->dmask;
26538c2ecf20Sopenharmony_ci
26548c2ecf20Sopenharmony_ci			NInoSetMstProtected(ni);
26558c2ecf20Sopenharmony_ci			ni->type = AT_INDEX_ALLOCATION;
26568c2ecf20Sopenharmony_ci			ni->name = I30;
26578c2ecf20Sopenharmony_ci			ni->name_len = 4;
26588c2ecf20Sopenharmony_ci
26598c2ecf20Sopenharmony_ci			ni->itype.index.block_size = 4096;
26608c2ecf20Sopenharmony_ci			ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1;
26618c2ecf20Sopenharmony_ci			ni->itype.index.collation_rule = COLLATION_FILE_NAME;
26628c2ecf20Sopenharmony_ci			if (vol->cluster_size <= ni->itype.index.block_size) {
26638c2ecf20Sopenharmony_ci				ni->itype.index.vcn_size = vol->cluster_size;
26648c2ecf20Sopenharmony_ci				ni->itype.index.vcn_size_bits =
26658c2ecf20Sopenharmony_ci						vol->cluster_size_bits;
26668c2ecf20Sopenharmony_ci			} else {
26678c2ecf20Sopenharmony_ci				ni->itype.index.vcn_size = vol->sector_size;
26688c2ecf20Sopenharmony_ci				ni->itype.index.vcn_size_bits =
26698c2ecf20Sopenharmony_ci						vol->sector_size_bits;
26708c2ecf20Sopenharmony_ci			}
26718c2ecf20Sopenharmony_ci		} else {
26728c2ecf20Sopenharmony_ci			vi->i_mode = S_IFREG | S_IRWXUGO;
26738c2ecf20Sopenharmony_ci			vi->i_mode &= ~vol->fmask;
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci			ni->type = AT_DATA;
26768c2ecf20Sopenharmony_ci			ni->name = NULL;
26778c2ecf20Sopenharmony_ci			ni->name_len = 0;
26788c2ecf20Sopenharmony_ci		}
26798c2ecf20Sopenharmony_ci		if (IS_RDONLY(vi))
26808c2ecf20Sopenharmony_ci			vi->i_mode &= ~S_IWUGO;
26818c2ecf20Sopenharmony_ci
26828c2ecf20Sopenharmony_ci		/* Set the inode times to the current time. */
26838c2ecf20Sopenharmony_ci		vi->i_atime = vi->i_mtime = vi->i_ctime =
26848c2ecf20Sopenharmony_ci			current_time(vi);
26858c2ecf20Sopenharmony_ci		/*
26868c2ecf20Sopenharmony_ci		 * Set the file size to 0, the ntfs inode sizes are set to 0 by
26878c2ecf20Sopenharmony_ci		 * the call to ntfs_init_big_inode() below.
26888c2ecf20Sopenharmony_ci		 */
26898c2ecf20Sopenharmony_ci		vi->i_size = 0;
26908c2ecf20Sopenharmony_ci		vi->i_blocks = 0;
26918c2ecf20Sopenharmony_ci
26928c2ecf20Sopenharmony_ci		/* Set the sequence number. */
26938c2ecf20Sopenharmony_ci		vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
26948c2ecf20Sopenharmony_ci		/*
26958c2ecf20Sopenharmony_ci		 * Manually map, pin, and lock the mft record as we already
26968c2ecf20Sopenharmony_ci		 * have its page mapped and it is very easy to do.
26978c2ecf20Sopenharmony_ci		 */
26988c2ecf20Sopenharmony_ci		atomic_inc(&ni->count);
26998c2ecf20Sopenharmony_ci		mutex_lock(&ni->mrec_lock);
27008c2ecf20Sopenharmony_ci		ni->page = page;
27018c2ecf20Sopenharmony_ci		ni->page_ofs = ofs;
27028c2ecf20Sopenharmony_ci		/*
27038c2ecf20Sopenharmony_ci		 * Make sure the allocated mft record is written out to disk.
27048c2ecf20Sopenharmony_ci		 * NOTE: We do not set the ntfs inode dirty because this would
27058c2ecf20Sopenharmony_ci		 * fail in ntfs_write_inode() because the inode does not have a
27068c2ecf20Sopenharmony_ci		 * standard information attribute yet.  Also, there is no need
27078c2ecf20Sopenharmony_ci		 * to set the inode dirty because the caller is going to do
27088c2ecf20Sopenharmony_ci		 * that anyway after finishing with the new mft record (e.g. at
27098c2ecf20Sopenharmony_ci		 * a minimum some new attributes will be added to the mft
27108c2ecf20Sopenharmony_ci		 * record.
27118c2ecf20Sopenharmony_ci		 */
27128c2ecf20Sopenharmony_ci		mark_ntfs_record_dirty(page, ofs);
27138c2ecf20Sopenharmony_ci		unlock_page(page);
27148c2ecf20Sopenharmony_ci
27158c2ecf20Sopenharmony_ci		/* Add the inode to the inode hash for the superblock. */
27168c2ecf20Sopenharmony_ci		insert_inode_hash(vi);
27178c2ecf20Sopenharmony_ci
27188c2ecf20Sopenharmony_ci		/* Update the default mft allocation position. */
27198c2ecf20Sopenharmony_ci		vol->mft_data_pos = bit + 1;
27208c2ecf20Sopenharmony_ci	}
27218c2ecf20Sopenharmony_ci	/*
27228c2ecf20Sopenharmony_ci	 * Return the opened, allocated inode of the allocated mft record as
27238c2ecf20Sopenharmony_ci	 * well as the mapped, pinned, and locked mft record.
27248c2ecf20Sopenharmony_ci	 */
27258c2ecf20Sopenharmony_ci	ntfs_debug("Returning opened, allocated %sinode 0x%llx.",
27268c2ecf20Sopenharmony_ci			base_ni ? "extent " : "", (long long)bit);
27278c2ecf20Sopenharmony_ci	*mrec = m;
27288c2ecf20Sopenharmony_ci	return ni;
27298c2ecf20Sopenharmony_ciundo_data_init:
27308c2ecf20Sopenharmony_ci	write_lock_irqsave(&mft_ni->size_lock, flags);
27318c2ecf20Sopenharmony_ci	mft_ni->initialized_size = old_data_initialized;
27328c2ecf20Sopenharmony_ci	i_size_write(vol->mft_ino, old_data_size);
27338c2ecf20Sopenharmony_ci	write_unlock_irqrestore(&mft_ni->size_lock, flags);
27348c2ecf20Sopenharmony_ci	goto undo_mftbmp_alloc_nolock;
27358c2ecf20Sopenharmony_ciundo_mftbmp_alloc:
27368c2ecf20Sopenharmony_ci	down_write(&vol->mftbmp_lock);
27378c2ecf20Sopenharmony_ciundo_mftbmp_alloc_nolock:
27388c2ecf20Sopenharmony_ci	if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) {
27398c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es);
27408c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
27418c2ecf20Sopenharmony_ci	}
27428c2ecf20Sopenharmony_ci	up_write(&vol->mftbmp_lock);
27438c2ecf20Sopenharmony_cierr_out:
27448c2ecf20Sopenharmony_ci	return ERR_PTR(err);
27458c2ecf20Sopenharmony_cimax_err_out:
27468c2ecf20Sopenharmony_ci	ntfs_warning(vol->sb, "Cannot allocate mft record because the maximum "
27478c2ecf20Sopenharmony_ci			"number of inodes (2^32) has already been reached.");
27488c2ecf20Sopenharmony_ci	up_write(&vol->mftbmp_lock);
27498c2ecf20Sopenharmony_ci	return ERR_PTR(-ENOSPC);
27508c2ecf20Sopenharmony_ci}
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ci/**
27538c2ecf20Sopenharmony_ci * ntfs_extent_mft_record_free - free an extent mft record on an ntfs volume
27548c2ecf20Sopenharmony_ci * @ni:		ntfs inode of the mapped extent mft record to free
27558c2ecf20Sopenharmony_ci * @m:		mapped extent mft record of the ntfs inode @ni
27568c2ecf20Sopenharmony_ci *
27578c2ecf20Sopenharmony_ci * Free the mapped extent mft record @m of the extent ntfs inode @ni.
27588c2ecf20Sopenharmony_ci *
27598c2ecf20Sopenharmony_ci * Note that this function unmaps the mft record and closes and destroys @ni
27608c2ecf20Sopenharmony_ci * internally and hence you cannot use either @ni nor @m any more after this
27618c2ecf20Sopenharmony_ci * function returns success.
27628c2ecf20Sopenharmony_ci *
27638c2ecf20Sopenharmony_ci * On success return 0 and on error return -errno.  @ni and @m are still valid
27648c2ecf20Sopenharmony_ci * in this case and have not been freed.
27658c2ecf20Sopenharmony_ci *
27668c2ecf20Sopenharmony_ci * For some errors an error message is displayed and the success code 0 is
27678c2ecf20Sopenharmony_ci * returned and the volume is then left dirty on umount.  This makes sense in
27688c2ecf20Sopenharmony_ci * case we could not rollback the changes that were already done since the
27698c2ecf20Sopenharmony_ci * caller no longer wants to reference this mft record so it does not matter to
27708c2ecf20Sopenharmony_ci * the caller if something is wrong with it as long as it is properly detached
27718c2ecf20Sopenharmony_ci * from the base inode.
27728c2ecf20Sopenharmony_ci */
27738c2ecf20Sopenharmony_ciint ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
27748c2ecf20Sopenharmony_ci{
27758c2ecf20Sopenharmony_ci	unsigned long mft_no = ni->mft_no;
27768c2ecf20Sopenharmony_ci	ntfs_volume *vol = ni->vol;
27778c2ecf20Sopenharmony_ci	ntfs_inode *base_ni;
27788c2ecf20Sopenharmony_ci	ntfs_inode **extent_nis;
27798c2ecf20Sopenharmony_ci	int i, err;
27808c2ecf20Sopenharmony_ci	le16 old_seq_no;
27818c2ecf20Sopenharmony_ci	u16 seq_no;
27828c2ecf20Sopenharmony_ci
27838c2ecf20Sopenharmony_ci	BUG_ON(NInoAttr(ni));
27848c2ecf20Sopenharmony_ci	BUG_ON(ni->nr_extents != -1);
27858c2ecf20Sopenharmony_ci
27868c2ecf20Sopenharmony_ci	mutex_lock(&ni->extent_lock);
27878c2ecf20Sopenharmony_ci	base_ni = ni->ext.base_ntfs_ino;
27888c2ecf20Sopenharmony_ci	mutex_unlock(&ni->extent_lock);
27898c2ecf20Sopenharmony_ci
27908c2ecf20Sopenharmony_ci	BUG_ON(base_ni->nr_extents <= 0);
27918c2ecf20Sopenharmony_ci
27928c2ecf20Sopenharmony_ci	ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n",
27938c2ecf20Sopenharmony_ci			mft_no, base_ni->mft_no);
27948c2ecf20Sopenharmony_ci
27958c2ecf20Sopenharmony_ci	mutex_lock(&base_ni->extent_lock);
27968c2ecf20Sopenharmony_ci
27978c2ecf20Sopenharmony_ci	/* Make sure we are holding the only reference to the extent inode. */
27988c2ecf20Sopenharmony_ci	if (atomic_read(&ni->count) > 2) {
27998c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, "
28008c2ecf20Sopenharmony_ci				"not freeing.", base_ni->mft_no);
28018c2ecf20Sopenharmony_ci		mutex_unlock(&base_ni->extent_lock);
28028c2ecf20Sopenharmony_ci		return -EBUSY;
28038c2ecf20Sopenharmony_ci	}
28048c2ecf20Sopenharmony_ci
28058c2ecf20Sopenharmony_ci	/* Dissociate the ntfs inode from the base inode. */
28068c2ecf20Sopenharmony_ci	extent_nis = base_ni->ext.extent_ntfs_inos;
28078c2ecf20Sopenharmony_ci	err = -ENOENT;
28088c2ecf20Sopenharmony_ci	for (i = 0; i < base_ni->nr_extents; i++) {
28098c2ecf20Sopenharmony_ci		if (ni != extent_nis[i])
28108c2ecf20Sopenharmony_ci			continue;
28118c2ecf20Sopenharmony_ci		extent_nis += i;
28128c2ecf20Sopenharmony_ci		base_ni->nr_extents--;
28138c2ecf20Sopenharmony_ci		memmove(extent_nis, extent_nis + 1, (base_ni->nr_extents - i) *
28148c2ecf20Sopenharmony_ci				sizeof(ntfs_inode*));
28158c2ecf20Sopenharmony_ci		err = 0;
28168c2ecf20Sopenharmony_ci		break;
28178c2ecf20Sopenharmony_ci	}
28188c2ecf20Sopenharmony_ci
28198c2ecf20Sopenharmony_ci	mutex_unlock(&base_ni->extent_lock);
28208c2ecf20Sopenharmony_ci
28218c2ecf20Sopenharmony_ci	if (unlikely(err)) {
28228c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to "
28238c2ecf20Sopenharmony_ci				"its base inode 0x%lx.", mft_no,
28248c2ecf20Sopenharmony_ci				base_ni->mft_no);
28258c2ecf20Sopenharmony_ci		BUG();
28268c2ecf20Sopenharmony_ci	}
28278c2ecf20Sopenharmony_ci
28288c2ecf20Sopenharmony_ci	/*
28298c2ecf20Sopenharmony_ci	 * The extent inode is no longer attached to the base inode so no one
28308c2ecf20Sopenharmony_ci	 * can get a reference to it any more.
28318c2ecf20Sopenharmony_ci	 */
28328c2ecf20Sopenharmony_ci
28338c2ecf20Sopenharmony_ci	/* Mark the mft record as not in use. */
28348c2ecf20Sopenharmony_ci	m->flags &= ~MFT_RECORD_IN_USE;
28358c2ecf20Sopenharmony_ci
28368c2ecf20Sopenharmony_ci	/* Increment the sequence number, skipping zero, if it is not zero. */
28378c2ecf20Sopenharmony_ci	old_seq_no = m->sequence_number;
28388c2ecf20Sopenharmony_ci	seq_no = le16_to_cpu(old_seq_no);
28398c2ecf20Sopenharmony_ci	if (seq_no == 0xffff)
28408c2ecf20Sopenharmony_ci		seq_no = 1;
28418c2ecf20Sopenharmony_ci	else if (seq_no)
28428c2ecf20Sopenharmony_ci		seq_no++;
28438c2ecf20Sopenharmony_ci	m->sequence_number = cpu_to_le16(seq_no);
28448c2ecf20Sopenharmony_ci
28458c2ecf20Sopenharmony_ci	/*
28468c2ecf20Sopenharmony_ci	 * Set the ntfs inode dirty and write it out.  We do not need to worry
28478c2ecf20Sopenharmony_ci	 * about the base inode here since whatever caused the extent mft
28488c2ecf20Sopenharmony_ci	 * record to be freed is guaranteed to do it already.
28498c2ecf20Sopenharmony_ci	 */
28508c2ecf20Sopenharmony_ci	NInoSetDirty(ni);
28518c2ecf20Sopenharmony_ci	err = write_mft_record(ni, m, 0);
28528c2ecf20Sopenharmony_ci	if (unlikely(err)) {
28538c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to write mft record 0x%lx, not "
28548c2ecf20Sopenharmony_ci				"freeing.", mft_no);
28558c2ecf20Sopenharmony_ci		goto rollback;
28568c2ecf20Sopenharmony_ci	}
28578c2ecf20Sopenharmony_cirollback_error:
28588c2ecf20Sopenharmony_ci	/* Unmap and throw away the now freed extent inode. */
28598c2ecf20Sopenharmony_ci	unmap_extent_mft_record(ni);
28608c2ecf20Sopenharmony_ci	ntfs_clear_extent_inode(ni);
28618c2ecf20Sopenharmony_ci
28628c2ecf20Sopenharmony_ci	/* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
28638c2ecf20Sopenharmony_ci	down_write(&vol->mftbmp_lock);
28648c2ecf20Sopenharmony_ci	err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no);
28658c2ecf20Sopenharmony_ci	up_write(&vol->mftbmp_lock);
28668c2ecf20Sopenharmony_ci	if (unlikely(err)) {
28678c2ecf20Sopenharmony_ci		/*
28688c2ecf20Sopenharmony_ci		 * The extent inode is gone but we failed to deallocate it in
28698c2ecf20Sopenharmony_ci		 * the mft bitmap.  Just emit a warning and leave the volume
28708c2ecf20Sopenharmony_ci		 * dirty on umount.
28718c2ecf20Sopenharmony_ci		 */
28728c2ecf20Sopenharmony_ci		ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es);
28738c2ecf20Sopenharmony_ci		NVolSetErrors(vol);
28748c2ecf20Sopenharmony_ci	}
28758c2ecf20Sopenharmony_ci	return 0;
28768c2ecf20Sopenharmony_cirollback:
28778c2ecf20Sopenharmony_ci	/* Rollback what we did... */
28788c2ecf20Sopenharmony_ci	mutex_lock(&base_ni->extent_lock);
28798c2ecf20Sopenharmony_ci	extent_nis = base_ni->ext.extent_ntfs_inos;
28808c2ecf20Sopenharmony_ci	if (!(base_ni->nr_extents & 3)) {
28818c2ecf20Sopenharmony_ci		int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
28828c2ecf20Sopenharmony_ci
28838c2ecf20Sopenharmony_ci		extent_nis = kmalloc(new_size, GFP_NOFS);
28848c2ecf20Sopenharmony_ci		if (unlikely(!extent_nis)) {
28858c2ecf20Sopenharmony_ci			ntfs_error(vol->sb, "Failed to allocate internal "
28868c2ecf20Sopenharmony_ci					"buffer during rollback.%s", es);
28878c2ecf20Sopenharmony_ci			mutex_unlock(&base_ni->extent_lock);
28888c2ecf20Sopenharmony_ci			NVolSetErrors(vol);
28898c2ecf20Sopenharmony_ci			goto rollback_error;
28908c2ecf20Sopenharmony_ci		}
28918c2ecf20Sopenharmony_ci		if (base_ni->nr_extents) {
28928c2ecf20Sopenharmony_ci			BUG_ON(!base_ni->ext.extent_ntfs_inos);
28938c2ecf20Sopenharmony_ci			memcpy(extent_nis, base_ni->ext.extent_ntfs_inos,
28948c2ecf20Sopenharmony_ci					new_size - 4 * sizeof(ntfs_inode*));
28958c2ecf20Sopenharmony_ci			kfree(base_ni->ext.extent_ntfs_inos);
28968c2ecf20Sopenharmony_ci		}
28978c2ecf20Sopenharmony_ci		base_ni->ext.extent_ntfs_inos = extent_nis;
28988c2ecf20Sopenharmony_ci	}
28998c2ecf20Sopenharmony_ci	m->flags |= MFT_RECORD_IN_USE;
29008c2ecf20Sopenharmony_ci	m->sequence_number = old_seq_no;
29018c2ecf20Sopenharmony_ci	extent_nis[base_ni->nr_extents++] = ni;
29028c2ecf20Sopenharmony_ci	mutex_unlock(&base_ni->extent_lock);
29038c2ecf20Sopenharmony_ci	mark_mft_record_dirty(ni);
29048c2ecf20Sopenharmony_ci	return err;
29058c2ecf20Sopenharmony_ci}
29068c2ecf20Sopenharmony_ci#endif /* NTFS_RW */
2907