162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/blkdev.h>
962306a36Sopenharmony_ci#include <linux/backing-dev.h>
1062306a36Sopenharmony_ci#include <linux/buffer_head.h>
1162306a36Sopenharmony_ci#include <linux/gfp.h>
1262306a36Sopenharmony_ci#include <linux/pagemap.h>
1362306a36Sopenharmony_ci#include <linux/pagevec.h>
1462306a36Sopenharmony_ci#include <linux/sched/signal.h>
1562306a36Sopenharmony_ci#include <linux/swap.h>
1662306a36Sopenharmony_ci#include <linux/uio.h>
1762306a36Sopenharmony_ci#include <linux/writeback.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include <asm/page.h>
2062306a36Sopenharmony_ci#include <linux/uaccess.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include "attrib.h"
2362306a36Sopenharmony_ci#include "bitmap.h"
2462306a36Sopenharmony_ci#include "inode.h"
2562306a36Sopenharmony_ci#include "debug.h"
2662306a36Sopenharmony_ci#include "lcnalloc.h"
2762306a36Sopenharmony_ci#include "malloc.h"
2862306a36Sopenharmony_ci#include "mft.h"
2962306a36Sopenharmony_ci#include "ntfs.h"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/**
3262306a36Sopenharmony_ci * ntfs_file_open - called when an inode is about to be opened
3362306a36Sopenharmony_ci * @vi:		inode to be opened
3462306a36Sopenharmony_ci * @filp:	file structure describing the inode
3562306a36Sopenharmony_ci *
3662306a36Sopenharmony_ci * Limit file size to the page cache limit on architectures where unsigned long
3762306a36Sopenharmony_ci * is 32-bits. This is the most we can do for now without overflowing the page
3862306a36Sopenharmony_ci * cache page index. Doing it this way means we don't run into problems because
3962306a36Sopenharmony_ci * of existing too large files. It would be better to allow the user to read
4062306a36Sopenharmony_ci * the beginning of the file but I doubt very much anyone is going to hit this
4162306a36Sopenharmony_ci * check on a 32-bit architecture, so there is no point in adding the extra
4262306a36Sopenharmony_ci * complexity required to support this.
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * On 64-bit architectures, the check is hopefully optimized away by the
4562306a36Sopenharmony_ci * compiler.
4662306a36Sopenharmony_ci *
4762306a36Sopenharmony_ci * After the check passes, just call generic_file_open() to do its work.
4862306a36Sopenharmony_ci */
4962306a36Sopenharmony_cistatic int ntfs_file_open(struct inode *vi, struct file *filp)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	if (sizeof(unsigned long) < 8) {
5262306a36Sopenharmony_ci		if (i_size_read(vi) > MAX_LFS_FILESIZE)
5362306a36Sopenharmony_ci			return -EOVERFLOW;
5462306a36Sopenharmony_ci	}
5562306a36Sopenharmony_ci	return generic_file_open(vi, filp);
5662306a36Sopenharmony_ci}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci#ifdef NTFS_RW
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/**
6162306a36Sopenharmony_ci * ntfs_attr_extend_initialized - extend the initialized size of an attribute
6262306a36Sopenharmony_ci * @ni:			ntfs inode of the attribute to extend
6362306a36Sopenharmony_ci * @new_init_size:	requested new initialized size in bytes
6462306a36Sopenharmony_ci *
6562306a36Sopenharmony_ci * Extend the initialized size of an attribute described by the ntfs inode @ni
6662306a36Sopenharmony_ci * to @new_init_size bytes.  This involves zeroing any non-sparse space between
6762306a36Sopenharmony_ci * the old initialized size and @new_init_size both in the page cache and on
6862306a36Sopenharmony_ci * disk (if relevant complete pages are already uptodate in the page cache then
6962306a36Sopenharmony_ci * these are simply marked dirty).
7062306a36Sopenharmony_ci *
7162306a36Sopenharmony_ci * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
7262306a36Sopenharmony_ci * in the resident attribute case, it is tied to the initialized size and, in
7362306a36Sopenharmony_ci * the non-resident attribute case, it may not fall below the initialized size.
7462306a36Sopenharmony_ci *
7562306a36Sopenharmony_ci * Note that if the attribute is resident, we do not need to touch the page
7662306a36Sopenharmony_ci * cache at all.  This is because if the page cache page is not uptodate we
7762306a36Sopenharmony_ci * bring it uptodate later, when doing the write to the mft record since we
7862306a36Sopenharmony_ci * then already have the page mapped.  And if the page is uptodate, the
7962306a36Sopenharmony_ci * non-initialized region will already have been zeroed when the page was
8062306a36Sopenharmony_ci * brought uptodate and the region may in fact already have been overwritten
8162306a36Sopenharmony_ci * with new data via mmap() based writes, so we cannot just zero it.  And since
8262306a36Sopenharmony_ci * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
8362306a36Sopenharmony_ci * is unspecified, we choose not to do zeroing and thus we do not need to touch
8462306a36Sopenharmony_ci * the page at all.  For a more detailed explanation see ntfs_truncate() in
8562306a36Sopenharmony_ci * fs/ntfs/inode.c.
8662306a36Sopenharmony_ci *
8762306a36Sopenharmony_ci * Return 0 on success and -errno on error.  In the case that an error is
8862306a36Sopenharmony_ci * encountered it is possible that the initialized size will already have been
8962306a36Sopenharmony_ci * incremented some way towards @new_init_size but it is guaranteed that if
9062306a36Sopenharmony_ci * this is the case, the necessary zeroing will also have happened and that all
9162306a36Sopenharmony_ci * metadata is self-consistent.
9262306a36Sopenharmony_ci *
9362306a36Sopenharmony_ci * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
9462306a36Sopenharmony_ci *	    held by the caller.
9562306a36Sopenharmony_ci */
9662306a36Sopenharmony_cistatic int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	s64 old_init_size;
9962306a36Sopenharmony_ci	loff_t old_i_size;
10062306a36Sopenharmony_ci	pgoff_t index, end_index;
10162306a36Sopenharmony_ci	unsigned long flags;
10262306a36Sopenharmony_ci	struct inode *vi = VFS_I(ni);
10362306a36Sopenharmony_ci	ntfs_inode *base_ni;
10462306a36Sopenharmony_ci	MFT_RECORD *m = NULL;
10562306a36Sopenharmony_ci	ATTR_RECORD *a;
10662306a36Sopenharmony_ci	ntfs_attr_search_ctx *ctx = NULL;
10762306a36Sopenharmony_ci	struct address_space *mapping;
10862306a36Sopenharmony_ci	struct page *page = NULL;
10962306a36Sopenharmony_ci	u8 *kattr;
11062306a36Sopenharmony_ci	int err;
11162306a36Sopenharmony_ci	u32 attr_len;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
11462306a36Sopenharmony_ci	old_init_size = ni->initialized_size;
11562306a36Sopenharmony_ci	old_i_size = i_size_read(vi);
11662306a36Sopenharmony_ci	BUG_ON(new_init_size > ni->allocated_size);
11762306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
11862306a36Sopenharmony_ci	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
11962306a36Sopenharmony_ci			"old_initialized_size 0x%llx, "
12062306a36Sopenharmony_ci			"new_initialized_size 0x%llx, i_size 0x%llx.",
12162306a36Sopenharmony_ci			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
12262306a36Sopenharmony_ci			(unsigned long long)old_init_size,
12362306a36Sopenharmony_ci			(unsigned long long)new_init_size, old_i_size);
12462306a36Sopenharmony_ci	if (!NInoAttr(ni))
12562306a36Sopenharmony_ci		base_ni = ni;
12662306a36Sopenharmony_ci	else
12762306a36Sopenharmony_ci		base_ni = ni->ext.base_ntfs_ino;
12862306a36Sopenharmony_ci	/* Use goto to reduce indentation and we need the label below anyway. */
12962306a36Sopenharmony_ci	if (NInoNonResident(ni))
13062306a36Sopenharmony_ci		goto do_non_resident_extend;
13162306a36Sopenharmony_ci	BUG_ON(old_init_size != old_i_size);
13262306a36Sopenharmony_ci	m = map_mft_record(base_ni);
13362306a36Sopenharmony_ci	if (IS_ERR(m)) {
13462306a36Sopenharmony_ci		err = PTR_ERR(m);
13562306a36Sopenharmony_ci		m = NULL;
13662306a36Sopenharmony_ci		goto err_out;
13762306a36Sopenharmony_ci	}
13862306a36Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(base_ni, m);
13962306a36Sopenharmony_ci	if (unlikely(!ctx)) {
14062306a36Sopenharmony_ci		err = -ENOMEM;
14162306a36Sopenharmony_ci		goto err_out;
14262306a36Sopenharmony_ci	}
14362306a36Sopenharmony_ci	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
14462306a36Sopenharmony_ci			CASE_SENSITIVE, 0, NULL, 0, ctx);
14562306a36Sopenharmony_ci	if (unlikely(err)) {
14662306a36Sopenharmony_ci		if (err == -ENOENT)
14762306a36Sopenharmony_ci			err = -EIO;
14862306a36Sopenharmony_ci		goto err_out;
14962306a36Sopenharmony_ci	}
15062306a36Sopenharmony_ci	m = ctx->mrec;
15162306a36Sopenharmony_ci	a = ctx->attr;
15262306a36Sopenharmony_ci	BUG_ON(a->non_resident);
15362306a36Sopenharmony_ci	/* The total length of the attribute value. */
15462306a36Sopenharmony_ci	attr_len = le32_to_cpu(a->data.resident.value_length);
15562306a36Sopenharmony_ci	BUG_ON(old_i_size != (loff_t)attr_len);
15662306a36Sopenharmony_ci	/*
15762306a36Sopenharmony_ci	 * Do the zeroing in the mft record and update the attribute size in
15862306a36Sopenharmony_ci	 * the mft record.
15962306a36Sopenharmony_ci	 */
16062306a36Sopenharmony_ci	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
16162306a36Sopenharmony_ci	memset(kattr + attr_len, 0, new_init_size - attr_len);
16262306a36Sopenharmony_ci	a->data.resident.value_length = cpu_to_le32((u32)new_init_size);
16362306a36Sopenharmony_ci	/* Finally, update the sizes in the vfs and ntfs inodes. */
16462306a36Sopenharmony_ci	write_lock_irqsave(&ni->size_lock, flags);
16562306a36Sopenharmony_ci	i_size_write(vi, new_init_size);
16662306a36Sopenharmony_ci	ni->initialized_size = new_init_size;
16762306a36Sopenharmony_ci	write_unlock_irqrestore(&ni->size_lock, flags);
16862306a36Sopenharmony_ci	goto done;
16962306a36Sopenharmony_cido_non_resident_extend:
17062306a36Sopenharmony_ci	/*
17162306a36Sopenharmony_ci	 * If the new initialized size @new_init_size exceeds the current file
17262306a36Sopenharmony_ci	 * size (vfs inode->i_size), we need to extend the file size to the
17362306a36Sopenharmony_ci	 * new initialized size.
17462306a36Sopenharmony_ci	 */
17562306a36Sopenharmony_ci	if (new_init_size > old_i_size) {
17662306a36Sopenharmony_ci		m = map_mft_record(base_ni);
17762306a36Sopenharmony_ci		if (IS_ERR(m)) {
17862306a36Sopenharmony_ci			err = PTR_ERR(m);
17962306a36Sopenharmony_ci			m = NULL;
18062306a36Sopenharmony_ci			goto err_out;
18162306a36Sopenharmony_ci		}
18262306a36Sopenharmony_ci		ctx = ntfs_attr_get_search_ctx(base_ni, m);
18362306a36Sopenharmony_ci		if (unlikely(!ctx)) {
18462306a36Sopenharmony_ci			err = -ENOMEM;
18562306a36Sopenharmony_ci			goto err_out;
18662306a36Sopenharmony_ci		}
18762306a36Sopenharmony_ci		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
18862306a36Sopenharmony_ci				CASE_SENSITIVE, 0, NULL, 0, ctx);
18962306a36Sopenharmony_ci		if (unlikely(err)) {
19062306a36Sopenharmony_ci			if (err == -ENOENT)
19162306a36Sopenharmony_ci				err = -EIO;
19262306a36Sopenharmony_ci			goto err_out;
19362306a36Sopenharmony_ci		}
19462306a36Sopenharmony_ci		m = ctx->mrec;
19562306a36Sopenharmony_ci		a = ctx->attr;
19662306a36Sopenharmony_ci		BUG_ON(!a->non_resident);
19762306a36Sopenharmony_ci		BUG_ON(old_i_size != (loff_t)
19862306a36Sopenharmony_ci				sle64_to_cpu(a->data.non_resident.data_size));
19962306a36Sopenharmony_ci		a->data.non_resident.data_size = cpu_to_sle64(new_init_size);
20062306a36Sopenharmony_ci		flush_dcache_mft_record_page(ctx->ntfs_ino);
20162306a36Sopenharmony_ci		mark_mft_record_dirty(ctx->ntfs_ino);
20262306a36Sopenharmony_ci		/* Update the file size in the vfs inode. */
20362306a36Sopenharmony_ci		i_size_write(vi, new_init_size);
20462306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
20562306a36Sopenharmony_ci		ctx = NULL;
20662306a36Sopenharmony_ci		unmap_mft_record(base_ni);
20762306a36Sopenharmony_ci		m = NULL;
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci	mapping = vi->i_mapping;
21062306a36Sopenharmony_ci	index = old_init_size >> PAGE_SHIFT;
21162306a36Sopenharmony_ci	end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
21262306a36Sopenharmony_ci	do {
21362306a36Sopenharmony_ci		/*
21462306a36Sopenharmony_ci		 * Read the page.  If the page is not present, this will zero
21562306a36Sopenharmony_ci		 * the uninitialized regions for us.
21662306a36Sopenharmony_ci		 */
21762306a36Sopenharmony_ci		page = read_mapping_page(mapping, index, NULL);
21862306a36Sopenharmony_ci		if (IS_ERR(page)) {
21962306a36Sopenharmony_ci			err = PTR_ERR(page);
22062306a36Sopenharmony_ci			goto init_err_out;
22162306a36Sopenharmony_ci		}
22262306a36Sopenharmony_ci		/*
22362306a36Sopenharmony_ci		 * Update the initialized size in the ntfs inode.  This is
22462306a36Sopenharmony_ci		 * enough to make ntfs_writepage() work.
22562306a36Sopenharmony_ci		 */
22662306a36Sopenharmony_ci		write_lock_irqsave(&ni->size_lock, flags);
22762306a36Sopenharmony_ci		ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT;
22862306a36Sopenharmony_ci		if (ni->initialized_size > new_init_size)
22962306a36Sopenharmony_ci			ni->initialized_size = new_init_size;
23062306a36Sopenharmony_ci		write_unlock_irqrestore(&ni->size_lock, flags);
23162306a36Sopenharmony_ci		/* Set the page dirty so it gets written out. */
23262306a36Sopenharmony_ci		set_page_dirty(page);
23362306a36Sopenharmony_ci		put_page(page);
23462306a36Sopenharmony_ci		/*
23562306a36Sopenharmony_ci		 * Play nice with the vm and the rest of the system.  This is
23662306a36Sopenharmony_ci		 * very much needed as we can potentially be modifying the
23762306a36Sopenharmony_ci		 * initialised size from a very small value to a really huge
23862306a36Sopenharmony_ci		 * value, e.g.
23962306a36Sopenharmony_ci		 *	f = open(somefile, O_TRUNC);
24062306a36Sopenharmony_ci		 *	truncate(f, 10GiB);
24162306a36Sopenharmony_ci		 *	seek(f, 10GiB);
24262306a36Sopenharmony_ci		 *	write(f, 1);
24362306a36Sopenharmony_ci		 * And this would mean we would be marking dirty hundreds of
24462306a36Sopenharmony_ci		 * thousands of pages or as in the above example more than
24562306a36Sopenharmony_ci		 * two and a half million pages!
24662306a36Sopenharmony_ci		 *
24762306a36Sopenharmony_ci		 * TODO: For sparse pages could optimize this workload by using
24862306a36Sopenharmony_ci		 * the FsMisc / MiscFs page bit as a "PageIsSparse" bit.  This
24962306a36Sopenharmony_ci		 * would be set in read_folio for sparse pages and here we would
25062306a36Sopenharmony_ci		 * not need to mark dirty any pages which have this bit set.
25162306a36Sopenharmony_ci		 * The only caveat is that we have to clear the bit everywhere
25262306a36Sopenharmony_ci		 * where we allocate any clusters that lie in the page or that
25362306a36Sopenharmony_ci		 * contain the page.
25462306a36Sopenharmony_ci		 *
25562306a36Sopenharmony_ci		 * TODO: An even greater optimization would be for us to only
25662306a36Sopenharmony_ci		 * call read_folio() on pages which are not in sparse regions as
25762306a36Sopenharmony_ci		 * determined from the runlist.  This would greatly reduce the
25862306a36Sopenharmony_ci		 * number of pages we read and make dirty in the case of sparse
25962306a36Sopenharmony_ci		 * files.
26062306a36Sopenharmony_ci		 */
26162306a36Sopenharmony_ci		balance_dirty_pages_ratelimited(mapping);
26262306a36Sopenharmony_ci		cond_resched();
26362306a36Sopenharmony_ci	} while (++index < end_index);
26462306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
26562306a36Sopenharmony_ci	BUG_ON(ni->initialized_size != new_init_size);
26662306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
26762306a36Sopenharmony_ci	/* Now bring in sync the initialized_size in the mft record. */
26862306a36Sopenharmony_ci	m = map_mft_record(base_ni);
26962306a36Sopenharmony_ci	if (IS_ERR(m)) {
27062306a36Sopenharmony_ci		err = PTR_ERR(m);
27162306a36Sopenharmony_ci		m = NULL;
27262306a36Sopenharmony_ci		goto init_err_out;
27362306a36Sopenharmony_ci	}
27462306a36Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(base_ni, m);
27562306a36Sopenharmony_ci	if (unlikely(!ctx)) {
27662306a36Sopenharmony_ci		err = -ENOMEM;
27762306a36Sopenharmony_ci		goto init_err_out;
27862306a36Sopenharmony_ci	}
27962306a36Sopenharmony_ci	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
28062306a36Sopenharmony_ci			CASE_SENSITIVE, 0, NULL, 0, ctx);
28162306a36Sopenharmony_ci	if (unlikely(err)) {
28262306a36Sopenharmony_ci		if (err == -ENOENT)
28362306a36Sopenharmony_ci			err = -EIO;
28462306a36Sopenharmony_ci		goto init_err_out;
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci	m = ctx->mrec;
28762306a36Sopenharmony_ci	a = ctx->attr;
28862306a36Sopenharmony_ci	BUG_ON(!a->non_resident);
28962306a36Sopenharmony_ci	a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);
29062306a36Sopenharmony_cidone:
29162306a36Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
29262306a36Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
29362306a36Sopenharmony_ci	if (ctx)
29462306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
29562306a36Sopenharmony_ci	if (m)
29662306a36Sopenharmony_ci		unmap_mft_record(base_ni);
29762306a36Sopenharmony_ci	ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",
29862306a36Sopenharmony_ci			(unsigned long long)new_init_size, i_size_read(vi));
29962306a36Sopenharmony_ci	return 0;
30062306a36Sopenharmony_ciinit_err_out:
30162306a36Sopenharmony_ci	write_lock_irqsave(&ni->size_lock, flags);
30262306a36Sopenharmony_ci	ni->initialized_size = old_init_size;
30362306a36Sopenharmony_ci	write_unlock_irqrestore(&ni->size_lock, flags);
30462306a36Sopenharmony_cierr_out:
30562306a36Sopenharmony_ci	if (ctx)
30662306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
30762306a36Sopenharmony_ci	if (m)
30862306a36Sopenharmony_ci		unmap_mft_record(base_ni);
30962306a36Sopenharmony_ci	ntfs_debug("Failed.  Returning error code %i.", err);
31062306a36Sopenharmony_ci	return err;
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_cistatic ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
31462306a36Sopenharmony_ci		struct iov_iter *from)
31562306a36Sopenharmony_ci{
31662306a36Sopenharmony_ci	loff_t pos;
31762306a36Sopenharmony_ci	s64 end, ll;
31862306a36Sopenharmony_ci	ssize_t err;
31962306a36Sopenharmony_ci	unsigned long flags;
32062306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
32162306a36Sopenharmony_ci	struct inode *vi = file_inode(file);
32262306a36Sopenharmony_ci	ntfs_inode *ni = NTFS_I(vi);
32362306a36Sopenharmony_ci	ntfs_volume *vol = ni->vol;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
32662306a36Sopenharmony_ci			"0x%llx, count 0x%zx.", vi->i_ino,
32762306a36Sopenharmony_ci			(unsigned)le32_to_cpu(ni->type),
32862306a36Sopenharmony_ci			(unsigned long long)iocb->ki_pos,
32962306a36Sopenharmony_ci			iov_iter_count(from));
33062306a36Sopenharmony_ci	err = generic_write_checks(iocb, from);
33162306a36Sopenharmony_ci	if (unlikely(err <= 0))
33262306a36Sopenharmony_ci		goto out;
33362306a36Sopenharmony_ci	/*
33462306a36Sopenharmony_ci	 * All checks have passed.  Before we start doing any writing we want
33562306a36Sopenharmony_ci	 * to abort any totally illegal writes.
33662306a36Sopenharmony_ci	 */
33762306a36Sopenharmony_ci	BUG_ON(NInoMstProtected(ni));
33862306a36Sopenharmony_ci	BUG_ON(ni->type != AT_DATA);
33962306a36Sopenharmony_ci	/* If file is encrypted, deny access, just like NT4. */
34062306a36Sopenharmony_ci	if (NInoEncrypted(ni)) {
34162306a36Sopenharmony_ci		/* Only $DATA attributes can be encrypted. */
34262306a36Sopenharmony_ci		/*
34362306a36Sopenharmony_ci		 * Reminder for later: Encrypted files are _always_
34462306a36Sopenharmony_ci		 * non-resident so that the content can always be encrypted.
34562306a36Sopenharmony_ci		 */
34662306a36Sopenharmony_ci		ntfs_debug("Denying write access to encrypted file.");
34762306a36Sopenharmony_ci		err = -EACCES;
34862306a36Sopenharmony_ci		goto out;
34962306a36Sopenharmony_ci	}
35062306a36Sopenharmony_ci	if (NInoCompressed(ni)) {
35162306a36Sopenharmony_ci		/* Only unnamed $DATA attribute can be compressed. */
35262306a36Sopenharmony_ci		BUG_ON(ni->name_len);
35362306a36Sopenharmony_ci		/*
35462306a36Sopenharmony_ci		 * Reminder for later: If resident, the data is not actually
35562306a36Sopenharmony_ci		 * compressed.  Only on the switch to non-resident does
35662306a36Sopenharmony_ci		 * compression kick in.  This is in contrast to encrypted files
35762306a36Sopenharmony_ci		 * (see above).
35862306a36Sopenharmony_ci		 */
35962306a36Sopenharmony_ci		ntfs_error(vi->i_sb, "Writing to compressed files is not "
36062306a36Sopenharmony_ci				"implemented yet.  Sorry.");
36162306a36Sopenharmony_ci		err = -EOPNOTSUPP;
36262306a36Sopenharmony_ci		goto out;
36362306a36Sopenharmony_ci	}
36462306a36Sopenharmony_ci	err = file_remove_privs(file);
36562306a36Sopenharmony_ci	if (unlikely(err))
36662306a36Sopenharmony_ci		goto out;
36762306a36Sopenharmony_ci	/*
36862306a36Sopenharmony_ci	 * Our ->update_time method always succeeds thus file_update_time()
36962306a36Sopenharmony_ci	 * cannot fail either so there is no need to check the return code.
37062306a36Sopenharmony_ci	 */
37162306a36Sopenharmony_ci	file_update_time(file);
37262306a36Sopenharmony_ci	pos = iocb->ki_pos;
37362306a36Sopenharmony_ci	/* The first byte after the last cluster being written to. */
37462306a36Sopenharmony_ci	end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
37562306a36Sopenharmony_ci			~(u64)vol->cluster_size_mask;
37662306a36Sopenharmony_ci	/*
37762306a36Sopenharmony_ci	 * If the write goes beyond the allocated size, extend the allocation
37862306a36Sopenharmony_ci	 * to cover the whole of the write, rounded up to the nearest cluster.
37962306a36Sopenharmony_ci	 */
38062306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
38162306a36Sopenharmony_ci	ll = ni->allocated_size;
38262306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
38362306a36Sopenharmony_ci	if (end > ll) {
38462306a36Sopenharmony_ci		/*
38562306a36Sopenharmony_ci		 * Extend the allocation without changing the data size.
38662306a36Sopenharmony_ci		 *
38762306a36Sopenharmony_ci		 * Note we ensure the allocation is big enough to at least
38862306a36Sopenharmony_ci		 * write some data but we do not require the allocation to be
38962306a36Sopenharmony_ci		 * complete, i.e. it may be partial.
39062306a36Sopenharmony_ci		 */
39162306a36Sopenharmony_ci		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
39262306a36Sopenharmony_ci		if (likely(ll >= 0)) {
39362306a36Sopenharmony_ci			BUG_ON(pos >= ll);
39462306a36Sopenharmony_ci			/* If the extension was partial truncate the write. */
39562306a36Sopenharmony_ci			if (end > ll) {
39662306a36Sopenharmony_ci				ntfs_debug("Truncating write to inode 0x%lx, "
39762306a36Sopenharmony_ci						"attribute type 0x%x, because "
39862306a36Sopenharmony_ci						"the allocation was only "
39962306a36Sopenharmony_ci						"partially extended.",
40062306a36Sopenharmony_ci						vi->i_ino, (unsigned)
40162306a36Sopenharmony_ci						le32_to_cpu(ni->type));
40262306a36Sopenharmony_ci				iov_iter_truncate(from, ll - pos);
40362306a36Sopenharmony_ci			}
40462306a36Sopenharmony_ci		} else {
40562306a36Sopenharmony_ci			err = ll;
40662306a36Sopenharmony_ci			read_lock_irqsave(&ni->size_lock, flags);
40762306a36Sopenharmony_ci			ll = ni->allocated_size;
40862306a36Sopenharmony_ci			read_unlock_irqrestore(&ni->size_lock, flags);
40962306a36Sopenharmony_ci			/* Perform a partial write if possible or fail. */
41062306a36Sopenharmony_ci			if (pos < ll) {
41162306a36Sopenharmony_ci				ntfs_debug("Truncating write to inode 0x%lx "
41262306a36Sopenharmony_ci						"attribute type 0x%x, because "
41362306a36Sopenharmony_ci						"extending the allocation "
41462306a36Sopenharmony_ci						"failed (error %d).",
41562306a36Sopenharmony_ci						vi->i_ino, (unsigned)
41662306a36Sopenharmony_ci						le32_to_cpu(ni->type),
41762306a36Sopenharmony_ci						(int)-err);
41862306a36Sopenharmony_ci				iov_iter_truncate(from, ll - pos);
41962306a36Sopenharmony_ci			} else {
42062306a36Sopenharmony_ci				if (err != -ENOSPC)
42162306a36Sopenharmony_ci					ntfs_error(vi->i_sb, "Cannot perform "
42262306a36Sopenharmony_ci							"write to inode "
42362306a36Sopenharmony_ci							"0x%lx, attribute "
42462306a36Sopenharmony_ci							"type 0x%x, because "
42562306a36Sopenharmony_ci							"extending the "
42662306a36Sopenharmony_ci							"allocation failed "
42762306a36Sopenharmony_ci							"(error %ld).",
42862306a36Sopenharmony_ci							vi->i_ino, (unsigned)
42962306a36Sopenharmony_ci							le32_to_cpu(ni->type),
43062306a36Sopenharmony_ci							(long)-err);
43162306a36Sopenharmony_ci				else
43262306a36Sopenharmony_ci					ntfs_debug("Cannot perform write to "
43362306a36Sopenharmony_ci							"inode 0x%lx, "
43462306a36Sopenharmony_ci							"attribute type 0x%x, "
43562306a36Sopenharmony_ci							"because there is not "
43662306a36Sopenharmony_ci							"space left.",
43762306a36Sopenharmony_ci							vi->i_ino, (unsigned)
43862306a36Sopenharmony_ci							le32_to_cpu(ni->type));
43962306a36Sopenharmony_ci				goto out;
44062306a36Sopenharmony_ci			}
44162306a36Sopenharmony_ci		}
44262306a36Sopenharmony_ci	}
44362306a36Sopenharmony_ci	/*
44462306a36Sopenharmony_ci	 * If the write starts beyond the initialized size, extend it up to the
44562306a36Sopenharmony_ci	 * beginning of the write and initialize all non-sparse space between
44662306a36Sopenharmony_ci	 * the old initialized size and the new one.  This automatically also
44762306a36Sopenharmony_ci	 * increments the vfs inode->i_size to keep it above or equal to the
44862306a36Sopenharmony_ci	 * initialized_size.
44962306a36Sopenharmony_ci	 */
45062306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
45162306a36Sopenharmony_ci	ll = ni->initialized_size;
45262306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
45362306a36Sopenharmony_ci	if (pos > ll) {
45462306a36Sopenharmony_ci		/*
45562306a36Sopenharmony_ci		 * Wait for ongoing direct i/o to complete before proceeding.
45662306a36Sopenharmony_ci		 * New direct i/o cannot start as we hold i_mutex.
45762306a36Sopenharmony_ci		 */
45862306a36Sopenharmony_ci		inode_dio_wait(vi);
45962306a36Sopenharmony_ci		err = ntfs_attr_extend_initialized(ni, pos);
46062306a36Sopenharmony_ci		if (unlikely(err < 0))
46162306a36Sopenharmony_ci			ntfs_error(vi->i_sb, "Cannot perform write to inode "
46262306a36Sopenharmony_ci					"0x%lx, attribute type 0x%x, because "
46362306a36Sopenharmony_ci					"extending the initialized size "
46462306a36Sopenharmony_ci					"failed (error %d).", vi->i_ino,
46562306a36Sopenharmony_ci					(unsigned)le32_to_cpu(ni->type),
46662306a36Sopenharmony_ci					(int)-err);
46762306a36Sopenharmony_ci	}
46862306a36Sopenharmony_ciout:
46962306a36Sopenharmony_ci	return err;
47062306a36Sopenharmony_ci}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci/**
47362306a36Sopenharmony_ci * __ntfs_grab_cache_pages - obtain a number of locked pages
47462306a36Sopenharmony_ci * @mapping:	address space mapping from which to obtain page cache pages
47562306a36Sopenharmony_ci * @index:	starting index in @mapping at which to begin obtaining pages
47662306a36Sopenharmony_ci * @nr_pages:	number of page cache pages to obtain
47762306a36Sopenharmony_ci * @pages:	array of pages in which to return the obtained page cache pages
47862306a36Sopenharmony_ci * @cached_page: allocated but as yet unused page
47962306a36Sopenharmony_ci *
48062306a36Sopenharmony_ci * Obtain @nr_pages locked page cache pages from the mapping @mapping and
48162306a36Sopenharmony_ci * starting at index @index.
48262306a36Sopenharmony_ci *
48362306a36Sopenharmony_ci * If a page is newly created, add it to lru list
48462306a36Sopenharmony_ci *
48562306a36Sopenharmony_ci * Note, the page locks are obtained in ascending page index order.
48662306a36Sopenharmony_ci */
48762306a36Sopenharmony_cistatic inline int __ntfs_grab_cache_pages(struct address_space *mapping,
48862306a36Sopenharmony_ci		pgoff_t index, const unsigned nr_pages, struct page **pages,
48962306a36Sopenharmony_ci		struct page **cached_page)
49062306a36Sopenharmony_ci{
49162306a36Sopenharmony_ci	int err, nr;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	BUG_ON(!nr_pages);
49462306a36Sopenharmony_ci	err = nr = 0;
49562306a36Sopenharmony_ci	do {
49662306a36Sopenharmony_ci		pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK |
49762306a36Sopenharmony_ci				FGP_ACCESSED);
49862306a36Sopenharmony_ci		if (!pages[nr]) {
49962306a36Sopenharmony_ci			if (!*cached_page) {
50062306a36Sopenharmony_ci				*cached_page = page_cache_alloc(mapping);
50162306a36Sopenharmony_ci				if (unlikely(!*cached_page)) {
50262306a36Sopenharmony_ci					err = -ENOMEM;
50362306a36Sopenharmony_ci					goto err_out;
50462306a36Sopenharmony_ci				}
50562306a36Sopenharmony_ci			}
50662306a36Sopenharmony_ci			err = add_to_page_cache_lru(*cached_page, mapping,
50762306a36Sopenharmony_ci				   index,
50862306a36Sopenharmony_ci				   mapping_gfp_constraint(mapping, GFP_KERNEL));
50962306a36Sopenharmony_ci			if (unlikely(err)) {
51062306a36Sopenharmony_ci				if (err == -EEXIST)
51162306a36Sopenharmony_ci					continue;
51262306a36Sopenharmony_ci				goto err_out;
51362306a36Sopenharmony_ci			}
51462306a36Sopenharmony_ci			pages[nr] = *cached_page;
51562306a36Sopenharmony_ci			*cached_page = NULL;
51662306a36Sopenharmony_ci		}
51762306a36Sopenharmony_ci		index++;
51862306a36Sopenharmony_ci		nr++;
51962306a36Sopenharmony_ci	} while (nr < nr_pages);
52062306a36Sopenharmony_ciout:
52162306a36Sopenharmony_ci	return err;
52262306a36Sopenharmony_cierr_out:
52362306a36Sopenharmony_ci	while (nr > 0) {
52462306a36Sopenharmony_ci		unlock_page(pages[--nr]);
52562306a36Sopenharmony_ci		put_page(pages[nr]);
52662306a36Sopenharmony_ci	}
52762306a36Sopenharmony_ci	goto out;
52862306a36Sopenharmony_ci}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_cistatic inline void ntfs_submit_bh_for_read(struct buffer_head *bh)
53162306a36Sopenharmony_ci{
53262306a36Sopenharmony_ci	lock_buffer(bh);
53362306a36Sopenharmony_ci	get_bh(bh);
53462306a36Sopenharmony_ci	bh->b_end_io = end_buffer_read_sync;
53562306a36Sopenharmony_ci	submit_bh(REQ_OP_READ, bh);
53662306a36Sopenharmony_ci}
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci/**
53962306a36Sopenharmony_ci * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data
54062306a36Sopenharmony_ci * @pages:	array of destination pages
54162306a36Sopenharmony_ci * @nr_pages:	number of pages in @pages
54262306a36Sopenharmony_ci * @pos:	byte position in file at which the write begins
54362306a36Sopenharmony_ci * @bytes:	number of bytes to be written
54462306a36Sopenharmony_ci *
54562306a36Sopenharmony_ci * This is called for non-resident attributes from ntfs_file_buffered_write()
54662306a36Sopenharmony_ci * with i_mutex held on the inode (@pages[0]->mapping->host).  There are
54762306a36Sopenharmony_ci * @nr_pages pages in @pages which are locked but not kmap()ped.  The source
54862306a36Sopenharmony_ci * data has not yet been copied into the @pages.
54962306a36Sopenharmony_ci *
55062306a36Sopenharmony_ci * Need to fill any holes with actual clusters, allocate buffers if necessary,
55162306a36Sopenharmony_ci * ensure all the buffers are mapped, and bring uptodate any buffers that are
55262306a36Sopenharmony_ci * only partially being written to.
55362306a36Sopenharmony_ci *
55462306a36Sopenharmony_ci * If @nr_pages is greater than one, we are guaranteed that the cluster size is
55562306a36Sopenharmony_ci * greater than PAGE_SIZE, that all pages in @pages are entirely inside
55662306a36Sopenharmony_ci * the same cluster and that they are the entirety of that cluster, and that
55762306a36Sopenharmony_ci * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
55862306a36Sopenharmony_ci *
55962306a36Sopenharmony_ci * i_size is not to be modified yet.
56062306a36Sopenharmony_ci *
56162306a36Sopenharmony_ci * Return 0 on success or -errno on error.
56262306a36Sopenharmony_ci */
56362306a36Sopenharmony_cistatic int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
56462306a36Sopenharmony_ci		unsigned nr_pages, s64 pos, size_t bytes)
56562306a36Sopenharmony_ci{
56662306a36Sopenharmony_ci	VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend;
56762306a36Sopenharmony_ci	LCN lcn;
56862306a36Sopenharmony_ci	s64 bh_pos, vcn_len, end, initialized_size;
56962306a36Sopenharmony_ci	sector_t lcn_block;
57062306a36Sopenharmony_ci	struct page *page;
57162306a36Sopenharmony_ci	struct inode *vi;
57262306a36Sopenharmony_ci	ntfs_inode *ni, *base_ni = NULL;
57362306a36Sopenharmony_ci	ntfs_volume *vol;
57462306a36Sopenharmony_ci	runlist_element *rl, *rl2;
57562306a36Sopenharmony_ci	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
57662306a36Sopenharmony_ci	ntfs_attr_search_ctx *ctx = NULL;
57762306a36Sopenharmony_ci	MFT_RECORD *m = NULL;
57862306a36Sopenharmony_ci	ATTR_RECORD *a = NULL;
57962306a36Sopenharmony_ci	unsigned long flags;
58062306a36Sopenharmony_ci	u32 attr_rec_len = 0;
58162306a36Sopenharmony_ci	unsigned blocksize, u;
58262306a36Sopenharmony_ci	int err, mp_size;
58362306a36Sopenharmony_ci	bool rl_write_locked, was_hole, is_retry;
58462306a36Sopenharmony_ci	unsigned char blocksize_bits;
58562306a36Sopenharmony_ci	struct {
58662306a36Sopenharmony_ci		u8 runlist_merged:1;
58762306a36Sopenharmony_ci		u8 mft_attr_mapped:1;
58862306a36Sopenharmony_ci		u8 mp_rebuilt:1;
58962306a36Sopenharmony_ci		u8 attr_switched:1;
59062306a36Sopenharmony_ci	} status = { 0, 0, 0, 0 };
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci	BUG_ON(!nr_pages);
59362306a36Sopenharmony_ci	BUG_ON(!pages);
59462306a36Sopenharmony_ci	BUG_ON(!*pages);
59562306a36Sopenharmony_ci	vi = pages[0]->mapping->host;
59662306a36Sopenharmony_ci	ni = NTFS_I(vi);
59762306a36Sopenharmony_ci	vol = ni->vol;
59862306a36Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
59962306a36Sopenharmony_ci			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
60062306a36Sopenharmony_ci			vi->i_ino, ni->type, pages[0]->index, nr_pages,
60162306a36Sopenharmony_ci			(long long)pos, bytes);
60262306a36Sopenharmony_ci	blocksize = vol->sb->s_blocksize;
60362306a36Sopenharmony_ci	blocksize_bits = vol->sb->s_blocksize_bits;
60462306a36Sopenharmony_ci	u = 0;
60562306a36Sopenharmony_ci	do {
60662306a36Sopenharmony_ci		page = pages[u];
60762306a36Sopenharmony_ci		BUG_ON(!page);
60862306a36Sopenharmony_ci		/*
60962306a36Sopenharmony_ci		 * create_empty_buffers() will create uptodate/dirty buffers if
61062306a36Sopenharmony_ci		 * the page is uptodate/dirty.
61162306a36Sopenharmony_ci		 */
61262306a36Sopenharmony_ci		if (!page_has_buffers(page)) {
61362306a36Sopenharmony_ci			create_empty_buffers(page, blocksize, 0);
61462306a36Sopenharmony_ci			if (unlikely(!page_has_buffers(page)))
61562306a36Sopenharmony_ci				return -ENOMEM;
61662306a36Sopenharmony_ci		}
61762306a36Sopenharmony_ci	} while (++u < nr_pages);
61862306a36Sopenharmony_ci	rl_write_locked = false;
61962306a36Sopenharmony_ci	rl = NULL;
62062306a36Sopenharmony_ci	err = 0;
62162306a36Sopenharmony_ci	vcn = lcn = -1;
62262306a36Sopenharmony_ci	vcn_len = 0;
62362306a36Sopenharmony_ci	lcn_block = -1;
62462306a36Sopenharmony_ci	was_hole = false;
62562306a36Sopenharmony_ci	cpos = pos >> vol->cluster_size_bits;
62662306a36Sopenharmony_ci	end = pos + bytes;
62762306a36Sopenharmony_ci	cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
62862306a36Sopenharmony_ci	/*
62962306a36Sopenharmony_ci	 * Loop over each page and for each page over each buffer.  Use goto to
63062306a36Sopenharmony_ci	 * reduce indentation.
63162306a36Sopenharmony_ci	 */
63262306a36Sopenharmony_ci	u = 0;
63362306a36Sopenharmony_cido_next_page:
63462306a36Sopenharmony_ci	page = pages[u];
63562306a36Sopenharmony_ci	bh_pos = (s64)page->index << PAGE_SHIFT;
63662306a36Sopenharmony_ci	bh = head = page_buffers(page);
63762306a36Sopenharmony_ci	do {
63862306a36Sopenharmony_ci		VCN cdelta;
63962306a36Sopenharmony_ci		s64 bh_end;
64062306a36Sopenharmony_ci		unsigned bh_cofs;
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci		/* Clear buffer_new on all buffers to reinitialise state. */
64362306a36Sopenharmony_ci		if (buffer_new(bh))
64462306a36Sopenharmony_ci			clear_buffer_new(bh);
64562306a36Sopenharmony_ci		bh_end = bh_pos + blocksize;
64662306a36Sopenharmony_ci		bh_cpos = bh_pos >> vol->cluster_size_bits;
64762306a36Sopenharmony_ci		bh_cofs = bh_pos & vol->cluster_size_mask;
64862306a36Sopenharmony_ci		if (buffer_mapped(bh)) {
64962306a36Sopenharmony_ci			/*
65062306a36Sopenharmony_ci			 * The buffer is already mapped.  If it is uptodate,
65162306a36Sopenharmony_ci			 * ignore it.
65262306a36Sopenharmony_ci			 */
65362306a36Sopenharmony_ci			if (buffer_uptodate(bh))
65462306a36Sopenharmony_ci				continue;
65562306a36Sopenharmony_ci			/*
65662306a36Sopenharmony_ci			 * The buffer is not uptodate.  If the page is uptodate
65762306a36Sopenharmony_ci			 * set the buffer uptodate and otherwise ignore it.
65862306a36Sopenharmony_ci			 */
65962306a36Sopenharmony_ci			if (PageUptodate(page)) {
66062306a36Sopenharmony_ci				set_buffer_uptodate(bh);
66162306a36Sopenharmony_ci				continue;
66262306a36Sopenharmony_ci			}
66362306a36Sopenharmony_ci			/*
66462306a36Sopenharmony_ci			 * Neither the page nor the buffer are uptodate.  If
66562306a36Sopenharmony_ci			 * the buffer is only partially being written to, we
66662306a36Sopenharmony_ci			 * need to read it in before the write, i.e. now.
66762306a36Sopenharmony_ci			 */
66862306a36Sopenharmony_ci			if ((bh_pos < pos && bh_end > pos) ||
66962306a36Sopenharmony_ci					(bh_pos < end && bh_end > end)) {
67062306a36Sopenharmony_ci				/*
67162306a36Sopenharmony_ci				 * If the buffer is fully or partially within
67262306a36Sopenharmony_ci				 * the initialized size, do an actual read.
67362306a36Sopenharmony_ci				 * Otherwise, simply zero the buffer.
67462306a36Sopenharmony_ci				 */
67562306a36Sopenharmony_ci				read_lock_irqsave(&ni->size_lock, flags);
67662306a36Sopenharmony_ci				initialized_size = ni->initialized_size;
67762306a36Sopenharmony_ci				read_unlock_irqrestore(&ni->size_lock, flags);
67862306a36Sopenharmony_ci				if (bh_pos < initialized_size) {
67962306a36Sopenharmony_ci					ntfs_submit_bh_for_read(bh);
68062306a36Sopenharmony_ci					*wait_bh++ = bh;
68162306a36Sopenharmony_ci				} else {
68262306a36Sopenharmony_ci					zero_user(page, bh_offset(bh),
68362306a36Sopenharmony_ci							blocksize);
68462306a36Sopenharmony_ci					set_buffer_uptodate(bh);
68562306a36Sopenharmony_ci				}
68662306a36Sopenharmony_ci			}
68762306a36Sopenharmony_ci			continue;
68862306a36Sopenharmony_ci		}
68962306a36Sopenharmony_ci		/* Unmapped buffer.  Need to map it. */
69062306a36Sopenharmony_ci		bh->b_bdev = vol->sb->s_bdev;
69162306a36Sopenharmony_ci		/*
69262306a36Sopenharmony_ci		 * If the current buffer is in the same clusters as the map
69362306a36Sopenharmony_ci		 * cache, there is no need to check the runlist again.  The
69462306a36Sopenharmony_ci		 * map cache is made up of @vcn, which is the first cached file
69562306a36Sopenharmony_ci		 * cluster, @vcn_len which is the number of cached file
69662306a36Sopenharmony_ci		 * clusters, @lcn is the device cluster corresponding to @vcn,
69762306a36Sopenharmony_ci		 * and @lcn_block is the block number corresponding to @lcn.
69862306a36Sopenharmony_ci		 */
69962306a36Sopenharmony_ci		cdelta = bh_cpos - vcn;
70062306a36Sopenharmony_ci		if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) {
70162306a36Sopenharmony_cimap_buffer_cached:
70262306a36Sopenharmony_ci			BUG_ON(lcn < 0);
70362306a36Sopenharmony_ci			bh->b_blocknr = lcn_block +
70462306a36Sopenharmony_ci					(cdelta << (vol->cluster_size_bits -
70562306a36Sopenharmony_ci					blocksize_bits)) +
70662306a36Sopenharmony_ci					(bh_cofs >> blocksize_bits);
70762306a36Sopenharmony_ci			set_buffer_mapped(bh);
70862306a36Sopenharmony_ci			/*
70962306a36Sopenharmony_ci			 * If the page is uptodate so is the buffer.  If the
71062306a36Sopenharmony_ci			 * buffer is fully outside the write, we ignore it if
71162306a36Sopenharmony_ci			 * it was already allocated and we mark it dirty so it
71262306a36Sopenharmony_ci			 * gets written out if we allocated it.  On the other
71362306a36Sopenharmony_ci			 * hand, if we allocated the buffer but we are not
71462306a36Sopenharmony_ci			 * marking it dirty we set buffer_new so we can do
71562306a36Sopenharmony_ci			 * error recovery.
71662306a36Sopenharmony_ci			 */
71762306a36Sopenharmony_ci			if (PageUptodate(page)) {
71862306a36Sopenharmony_ci				if (!buffer_uptodate(bh))
71962306a36Sopenharmony_ci					set_buffer_uptodate(bh);
72062306a36Sopenharmony_ci				if (unlikely(was_hole)) {
72162306a36Sopenharmony_ci					/* We allocated the buffer. */
72262306a36Sopenharmony_ci					clean_bdev_bh_alias(bh);
72362306a36Sopenharmony_ci					if (bh_end <= pos || bh_pos >= end)
72462306a36Sopenharmony_ci						mark_buffer_dirty(bh);
72562306a36Sopenharmony_ci					else
72662306a36Sopenharmony_ci						set_buffer_new(bh);
72762306a36Sopenharmony_ci				}
72862306a36Sopenharmony_ci				continue;
72962306a36Sopenharmony_ci			}
73062306a36Sopenharmony_ci			/* Page is _not_ uptodate. */
73162306a36Sopenharmony_ci			if (likely(!was_hole)) {
73262306a36Sopenharmony_ci				/*
73362306a36Sopenharmony_ci				 * Buffer was already allocated.  If it is not
73462306a36Sopenharmony_ci				 * uptodate and is only partially being written
73562306a36Sopenharmony_ci				 * to, we need to read it in before the write,
73662306a36Sopenharmony_ci				 * i.e. now.
73762306a36Sopenharmony_ci				 */
73862306a36Sopenharmony_ci				if (!buffer_uptodate(bh) && bh_pos < end &&
73962306a36Sopenharmony_ci						bh_end > pos &&
74062306a36Sopenharmony_ci						(bh_pos < pos ||
74162306a36Sopenharmony_ci						bh_end > end)) {
74262306a36Sopenharmony_ci					/*
74362306a36Sopenharmony_ci					 * If the buffer is fully or partially
74462306a36Sopenharmony_ci					 * within the initialized size, do an
74562306a36Sopenharmony_ci					 * actual read.  Otherwise, simply zero
74662306a36Sopenharmony_ci					 * the buffer.
74762306a36Sopenharmony_ci					 */
74862306a36Sopenharmony_ci					read_lock_irqsave(&ni->size_lock,
74962306a36Sopenharmony_ci							flags);
75062306a36Sopenharmony_ci					initialized_size = ni->initialized_size;
75162306a36Sopenharmony_ci					read_unlock_irqrestore(&ni->size_lock,
75262306a36Sopenharmony_ci							flags);
75362306a36Sopenharmony_ci					if (bh_pos < initialized_size) {
75462306a36Sopenharmony_ci						ntfs_submit_bh_for_read(bh);
75562306a36Sopenharmony_ci						*wait_bh++ = bh;
75662306a36Sopenharmony_ci					} else {
75762306a36Sopenharmony_ci						zero_user(page, bh_offset(bh),
75862306a36Sopenharmony_ci								blocksize);
75962306a36Sopenharmony_ci						set_buffer_uptodate(bh);
76062306a36Sopenharmony_ci					}
76162306a36Sopenharmony_ci				}
76262306a36Sopenharmony_ci				continue;
76362306a36Sopenharmony_ci			}
76462306a36Sopenharmony_ci			/* We allocated the buffer. */
76562306a36Sopenharmony_ci			clean_bdev_bh_alias(bh);
76662306a36Sopenharmony_ci			/*
76762306a36Sopenharmony_ci			 * If the buffer is fully outside the write, zero it,
76862306a36Sopenharmony_ci			 * set it uptodate, and mark it dirty so it gets
76962306a36Sopenharmony_ci			 * written out.  If it is partially being written to,
77062306a36Sopenharmony_ci			 * zero region surrounding the write but leave it to
77162306a36Sopenharmony_ci			 * commit write to do anything else.  Finally, if the
77262306a36Sopenharmony_ci			 * buffer is fully being overwritten, do nothing.
77362306a36Sopenharmony_ci			 */
77462306a36Sopenharmony_ci			if (bh_end <= pos || bh_pos >= end) {
77562306a36Sopenharmony_ci				if (!buffer_uptodate(bh)) {
77662306a36Sopenharmony_ci					zero_user(page, bh_offset(bh),
77762306a36Sopenharmony_ci							blocksize);
77862306a36Sopenharmony_ci					set_buffer_uptodate(bh);
77962306a36Sopenharmony_ci				}
78062306a36Sopenharmony_ci				mark_buffer_dirty(bh);
78162306a36Sopenharmony_ci				continue;
78262306a36Sopenharmony_ci			}
78362306a36Sopenharmony_ci			set_buffer_new(bh);
78462306a36Sopenharmony_ci			if (!buffer_uptodate(bh) &&
78562306a36Sopenharmony_ci					(bh_pos < pos || bh_end > end)) {
78662306a36Sopenharmony_ci				u8 *kaddr;
78762306a36Sopenharmony_ci				unsigned pofs;
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci				kaddr = kmap_atomic(page);
79062306a36Sopenharmony_ci				if (bh_pos < pos) {
79162306a36Sopenharmony_ci					pofs = bh_pos & ~PAGE_MASK;
79262306a36Sopenharmony_ci					memset(kaddr + pofs, 0, pos - bh_pos);
79362306a36Sopenharmony_ci				}
79462306a36Sopenharmony_ci				if (bh_end > end) {
79562306a36Sopenharmony_ci					pofs = end & ~PAGE_MASK;
79662306a36Sopenharmony_ci					memset(kaddr + pofs, 0, bh_end - end);
79762306a36Sopenharmony_ci				}
79862306a36Sopenharmony_ci				kunmap_atomic(kaddr);
79962306a36Sopenharmony_ci				flush_dcache_page(page);
80062306a36Sopenharmony_ci			}
80162306a36Sopenharmony_ci			continue;
80262306a36Sopenharmony_ci		}
80362306a36Sopenharmony_ci		/*
80462306a36Sopenharmony_ci		 * Slow path: this is the first buffer in the cluster.  If it
80562306a36Sopenharmony_ci		 * is outside allocated size and is not uptodate, zero it and
80662306a36Sopenharmony_ci		 * set it uptodate.
80762306a36Sopenharmony_ci		 */
80862306a36Sopenharmony_ci		read_lock_irqsave(&ni->size_lock, flags);
80962306a36Sopenharmony_ci		initialized_size = ni->allocated_size;
81062306a36Sopenharmony_ci		read_unlock_irqrestore(&ni->size_lock, flags);
81162306a36Sopenharmony_ci		if (bh_pos > initialized_size) {
81262306a36Sopenharmony_ci			if (PageUptodate(page)) {
81362306a36Sopenharmony_ci				if (!buffer_uptodate(bh))
81462306a36Sopenharmony_ci					set_buffer_uptodate(bh);
81562306a36Sopenharmony_ci			} else if (!buffer_uptodate(bh)) {
81662306a36Sopenharmony_ci				zero_user(page, bh_offset(bh), blocksize);
81762306a36Sopenharmony_ci				set_buffer_uptodate(bh);
81862306a36Sopenharmony_ci			}
81962306a36Sopenharmony_ci			continue;
82062306a36Sopenharmony_ci		}
82162306a36Sopenharmony_ci		is_retry = false;
82262306a36Sopenharmony_ci		if (!rl) {
82362306a36Sopenharmony_ci			down_read(&ni->runlist.lock);
82462306a36Sopenharmony_ciretry_remap:
82562306a36Sopenharmony_ci			rl = ni->runlist.rl;
82662306a36Sopenharmony_ci		}
82762306a36Sopenharmony_ci		if (likely(rl != NULL)) {
82862306a36Sopenharmony_ci			/* Seek to element containing target cluster. */
82962306a36Sopenharmony_ci			while (rl->length && rl[1].vcn <= bh_cpos)
83062306a36Sopenharmony_ci				rl++;
83162306a36Sopenharmony_ci			lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos);
83262306a36Sopenharmony_ci			if (likely(lcn >= 0)) {
83362306a36Sopenharmony_ci				/*
83462306a36Sopenharmony_ci				 * Successful remap, setup the map cache and
83562306a36Sopenharmony_ci				 * use that to deal with the buffer.
83662306a36Sopenharmony_ci				 */
83762306a36Sopenharmony_ci				was_hole = false;
83862306a36Sopenharmony_ci				vcn = bh_cpos;
83962306a36Sopenharmony_ci				vcn_len = rl[1].vcn - vcn;
84062306a36Sopenharmony_ci				lcn_block = lcn << (vol->cluster_size_bits -
84162306a36Sopenharmony_ci						blocksize_bits);
84262306a36Sopenharmony_ci				cdelta = 0;
84362306a36Sopenharmony_ci				/*
84462306a36Sopenharmony_ci				 * If the number of remaining clusters touched
84562306a36Sopenharmony_ci				 * by the write is smaller or equal to the
84662306a36Sopenharmony_ci				 * number of cached clusters, unlock the
84762306a36Sopenharmony_ci				 * runlist as the map cache will be used from
84862306a36Sopenharmony_ci				 * now on.
84962306a36Sopenharmony_ci				 */
85062306a36Sopenharmony_ci				if (likely(vcn + vcn_len >= cend)) {
85162306a36Sopenharmony_ci					if (rl_write_locked) {
85262306a36Sopenharmony_ci						up_write(&ni->runlist.lock);
85362306a36Sopenharmony_ci						rl_write_locked = false;
85462306a36Sopenharmony_ci					} else
85562306a36Sopenharmony_ci						up_read(&ni->runlist.lock);
85662306a36Sopenharmony_ci					rl = NULL;
85762306a36Sopenharmony_ci				}
85862306a36Sopenharmony_ci				goto map_buffer_cached;
85962306a36Sopenharmony_ci			}
86062306a36Sopenharmony_ci		} else
86162306a36Sopenharmony_ci			lcn = LCN_RL_NOT_MAPPED;
86262306a36Sopenharmony_ci		/*
86362306a36Sopenharmony_ci		 * If it is not a hole and not out of bounds, the runlist is
86462306a36Sopenharmony_ci		 * probably unmapped so try to map it now.
86562306a36Sopenharmony_ci		 */
86662306a36Sopenharmony_ci		if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) {
86762306a36Sopenharmony_ci			if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) {
86862306a36Sopenharmony_ci				/* Attempt to map runlist. */
86962306a36Sopenharmony_ci				if (!rl_write_locked) {
87062306a36Sopenharmony_ci					/*
87162306a36Sopenharmony_ci					 * We need the runlist locked for
87262306a36Sopenharmony_ci					 * writing, so if it is locked for
87362306a36Sopenharmony_ci					 * reading relock it now and retry in
87462306a36Sopenharmony_ci					 * case it changed whilst we dropped
87562306a36Sopenharmony_ci					 * the lock.
87662306a36Sopenharmony_ci					 */
87762306a36Sopenharmony_ci					up_read(&ni->runlist.lock);
87862306a36Sopenharmony_ci					down_write(&ni->runlist.lock);
87962306a36Sopenharmony_ci					rl_write_locked = true;
88062306a36Sopenharmony_ci					goto retry_remap;
88162306a36Sopenharmony_ci				}
88262306a36Sopenharmony_ci				err = ntfs_map_runlist_nolock(ni, bh_cpos,
88362306a36Sopenharmony_ci						NULL);
88462306a36Sopenharmony_ci				if (likely(!err)) {
88562306a36Sopenharmony_ci					is_retry = true;
88662306a36Sopenharmony_ci					goto retry_remap;
88762306a36Sopenharmony_ci				}
88862306a36Sopenharmony_ci				/*
88962306a36Sopenharmony_ci				 * If @vcn is out of bounds, pretend @lcn is
89062306a36Sopenharmony_ci				 * LCN_ENOENT.  As long as the buffer is out
89162306a36Sopenharmony_ci				 * of bounds this will work fine.
89262306a36Sopenharmony_ci				 */
89362306a36Sopenharmony_ci				if (err == -ENOENT) {
89462306a36Sopenharmony_ci					lcn = LCN_ENOENT;
89562306a36Sopenharmony_ci					err = 0;
89662306a36Sopenharmony_ci					goto rl_not_mapped_enoent;
89762306a36Sopenharmony_ci				}
89862306a36Sopenharmony_ci			} else
89962306a36Sopenharmony_ci				err = -EIO;
90062306a36Sopenharmony_ci			/* Failed to map the buffer, even after retrying. */
90162306a36Sopenharmony_ci			bh->b_blocknr = -1;
90262306a36Sopenharmony_ci			ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
90362306a36Sopenharmony_ci					"attribute type 0x%x, vcn 0x%llx, "
90462306a36Sopenharmony_ci					"vcn offset 0x%x, because its "
90562306a36Sopenharmony_ci					"location on disk could not be "
90662306a36Sopenharmony_ci					"determined%s (error code %i).",
90762306a36Sopenharmony_ci					ni->mft_no, ni->type,
90862306a36Sopenharmony_ci					(unsigned long long)bh_cpos,
90962306a36Sopenharmony_ci					(unsigned)bh_pos &
91062306a36Sopenharmony_ci					vol->cluster_size_mask,
91162306a36Sopenharmony_ci					is_retry ? " even after retrying" : "",
91262306a36Sopenharmony_ci					err);
91362306a36Sopenharmony_ci			break;
91462306a36Sopenharmony_ci		}
91562306a36Sopenharmony_cirl_not_mapped_enoent:
91662306a36Sopenharmony_ci		/*
91762306a36Sopenharmony_ci		 * The buffer is in a hole or out of bounds.  We need to fill
91862306a36Sopenharmony_ci		 * the hole, unless the buffer is in a cluster which is not
91962306a36Sopenharmony_ci		 * touched by the write, in which case we just leave the buffer
92062306a36Sopenharmony_ci		 * unmapped.  This can only happen when the cluster size is
92162306a36Sopenharmony_ci		 * less than the page cache size.
92262306a36Sopenharmony_ci		 */
92362306a36Sopenharmony_ci		if (unlikely(vol->cluster_size < PAGE_SIZE)) {
92462306a36Sopenharmony_ci			bh_cend = (bh_end + vol->cluster_size - 1) >>
92562306a36Sopenharmony_ci					vol->cluster_size_bits;
92662306a36Sopenharmony_ci			if ((bh_cend <= cpos || bh_cpos >= cend)) {
92762306a36Sopenharmony_ci				bh->b_blocknr = -1;
92862306a36Sopenharmony_ci				/*
92962306a36Sopenharmony_ci				 * If the buffer is uptodate we skip it.  If it
93062306a36Sopenharmony_ci				 * is not but the page is uptodate, we can set
93162306a36Sopenharmony_ci				 * the buffer uptodate.  If the page is not
93262306a36Sopenharmony_ci				 * uptodate, we can clear the buffer and set it
93362306a36Sopenharmony_ci				 * uptodate.  Whether this is worthwhile is
93462306a36Sopenharmony_ci				 * debatable and this could be removed.
93562306a36Sopenharmony_ci				 */
93662306a36Sopenharmony_ci				if (PageUptodate(page)) {
93762306a36Sopenharmony_ci					if (!buffer_uptodate(bh))
93862306a36Sopenharmony_ci						set_buffer_uptodate(bh);
93962306a36Sopenharmony_ci				} else if (!buffer_uptodate(bh)) {
94062306a36Sopenharmony_ci					zero_user(page, bh_offset(bh),
94162306a36Sopenharmony_ci						blocksize);
94262306a36Sopenharmony_ci					set_buffer_uptodate(bh);
94362306a36Sopenharmony_ci				}
94462306a36Sopenharmony_ci				continue;
94562306a36Sopenharmony_ci			}
94662306a36Sopenharmony_ci		}
94762306a36Sopenharmony_ci		/*
94862306a36Sopenharmony_ci		 * Out of bounds buffer is invalid if it was not really out of
94962306a36Sopenharmony_ci		 * bounds.
95062306a36Sopenharmony_ci		 */
95162306a36Sopenharmony_ci		BUG_ON(lcn != LCN_HOLE);
95262306a36Sopenharmony_ci		/*
95362306a36Sopenharmony_ci		 * We need the runlist locked for writing, so if it is locked
95462306a36Sopenharmony_ci		 * for reading relock it now and retry in case it changed
95562306a36Sopenharmony_ci		 * whilst we dropped the lock.
95662306a36Sopenharmony_ci		 */
95762306a36Sopenharmony_ci		BUG_ON(!rl);
95862306a36Sopenharmony_ci		if (!rl_write_locked) {
95962306a36Sopenharmony_ci			up_read(&ni->runlist.lock);
96062306a36Sopenharmony_ci			down_write(&ni->runlist.lock);
96162306a36Sopenharmony_ci			rl_write_locked = true;
96262306a36Sopenharmony_ci			goto retry_remap;
96362306a36Sopenharmony_ci		}
96462306a36Sopenharmony_ci		/* Find the previous last allocated cluster. */
96562306a36Sopenharmony_ci		BUG_ON(rl->lcn != LCN_HOLE);
96662306a36Sopenharmony_ci		lcn = -1;
96762306a36Sopenharmony_ci		rl2 = rl;
96862306a36Sopenharmony_ci		while (--rl2 >= ni->runlist.rl) {
96962306a36Sopenharmony_ci			if (rl2->lcn >= 0) {
97062306a36Sopenharmony_ci				lcn = rl2->lcn + rl2->length;
97162306a36Sopenharmony_ci				break;
97262306a36Sopenharmony_ci			}
97362306a36Sopenharmony_ci		}
97462306a36Sopenharmony_ci		rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE,
97562306a36Sopenharmony_ci				false);
97662306a36Sopenharmony_ci		if (IS_ERR(rl2)) {
97762306a36Sopenharmony_ci			err = PTR_ERR(rl2);
97862306a36Sopenharmony_ci			ntfs_debug("Failed to allocate cluster, error code %i.",
97962306a36Sopenharmony_ci					err);
98062306a36Sopenharmony_ci			break;
98162306a36Sopenharmony_ci		}
98262306a36Sopenharmony_ci		lcn = rl2->lcn;
98362306a36Sopenharmony_ci		rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
98462306a36Sopenharmony_ci		if (IS_ERR(rl)) {
98562306a36Sopenharmony_ci			err = PTR_ERR(rl);
98662306a36Sopenharmony_ci			if (err != -ENOMEM)
98762306a36Sopenharmony_ci				err = -EIO;
98862306a36Sopenharmony_ci			if (ntfs_cluster_free_from_rl(vol, rl2)) {
98962306a36Sopenharmony_ci				ntfs_error(vol->sb, "Failed to release "
99062306a36Sopenharmony_ci						"allocated cluster in error "
99162306a36Sopenharmony_ci						"code path.  Run chkdsk to "
99262306a36Sopenharmony_ci						"recover the lost cluster.");
99362306a36Sopenharmony_ci				NVolSetErrors(vol);
99462306a36Sopenharmony_ci			}
99562306a36Sopenharmony_ci			ntfs_free(rl2);
99662306a36Sopenharmony_ci			break;
99762306a36Sopenharmony_ci		}
99862306a36Sopenharmony_ci		ni->runlist.rl = rl;
99962306a36Sopenharmony_ci		status.runlist_merged = 1;
100062306a36Sopenharmony_ci		ntfs_debug("Allocated cluster, lcn 0x%llx.",
100162306a36Sopenharmony_ci				(unsigned long long)lcn);
100262306a36Sopenharmony_ci		/* Map and lock the mft record and get the attribute record. */
100362306a36Sopenharmony_ci		if (!NInoAttr(ni))
100462306a36Sopenharmony_ci			base_ni = ni;
100562306a36Sopenharmony_ci		else
100662306a36Sopenharmony_ci			base_ni = ni->ext.base_ntfs_ino;
100762306a36Sopenharmony_ci		m = map_mft_record(base_ni);
100862306a36Sopenharmony_ci		if (IS_ERR(m)) {
100962306a36Sopenharmony_ci			err = PTR_ERR(m);
101062306a36Sopenharmony_ci			break;
101162306a36Sopenharmony_ci		}
101262306a36Sopenharmony_ci		ctx = ntfs_attr_get_search_ctx(base_ni, m);
101362306a36Sopenharmony_ci		if (unlikely(!ctx)) {
101462306a36Sopenharmony_ci			err = -ENOMEM;
101562306a36Sopenharmony_ci			unmap_mft_record(base_ni);
101662306a36Sopenharmony_ci			break;
101762306a36Sopenharmony_ci		}
101862306a36Sopenharmony_ci		status.mft_attr_mapped = 1;
101962306a36Sopenharmony_ci		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
102062306a36Sopenharmony_ci				CASE_SENSITIVE, bh_cpos, NULL, 0, ctx);
102162306a36Sopenharmony_ci		if (unlikely(err)) {
102262306a36Sopenharmony_ci			if (err == -ENOENT)
102362306a36Sopenharmony_ci				err = -EIO;
102462306a36Sopenharmony_ci			break;
102562306a36Sopenharmony_ci		}
102662306a36Sopenharmony_ci		m = ctx->mrec;
102762306a36Sopenharmony_ci		a = ctx->attr;
102862306a36Sopenharmony_ci		/*
102962306a36Sopenharmony_ci		 * Find the runlist element with which the attribute extent
103062306a36Sopenharmony_ci		 * starts.  Note, we cannot use the _attr_ version because we
103162306a36Sopenharmony_ci		 * have mapped the mft record.  That is ok because we know the
103262306a36Sopenharmony_ci		 * runlist fragment must be mapped already to have ever gotten
103362306a36Sopenharmony_ci		 * here, so we can just use the _rl_ version.
103462306a36Sopenharmony_ci		 */
103562306a36Sopenharmony_ci		vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn);
103662306a36Sopenharmony_ci		rl2 = ntfs_rl_find_vcn_nolock(rl, vcn);
103762306a36Sopenharmony_ci		BUG_ON(!rl2);
103862306a36Sopenharmony_ci		BUG_ON(!rl2->length);
103962306a36Sopenharmony_ci		BUG_ON(rl2->lcn < LCN_HOLE);
104062306a36Sopenharmony_ci		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
104162306a36Sopenharmony_ci		/*
104262306a36Sopenharmony_ci		 * If @highest_vcn is zero, calculate the real highest_vcn
104362306a36Sopenharmony_ci		 * (which can really be zero).
104462306a36Sopenharmony_ci		 */
104562306a36Sopenharmony_ci		if (!highest_vcn)
104662306a36Sopenharmony_ci			highest_vcn = (sle64_to_cpu(
104762306a36Sopenharmony_ci					a->data.non_resident.allocated_size) >>
104862306a36Sopenharmony_ci					vol->cluster_size_bits) - 1;
104962306a36Sopenharmony_ci		/*
105062306a36Sopenharmony_ci		 * Determine the size of the mapping pairs array for the new
105162306a36Sopenharmony_ci		 * extent, i.e. the old extent with the hole filled.
105262306a36Sopenharmony_ci		 */
105362306a36Sopenharmony_ci		mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn,
105462306a36Sopenharmony_ci				highest_vcn);
105562306a36Sopenharmony_ci		if (unlikely(mp_size <= 0)) {
105662306a36Sopenharmony_ci			if (!(err = mp_size))
105762306a36Sopenharmony_ci				err = -EIO;
105862306a36Sopenharmony_ci			ntfs_debug("Failed to get size for mapping pairs "
105962306a36Sopenharmony_ci					"array, error code %i.", err);
106062306a36Sopenharmony_ci			break;
106162306a36Sopenharmony_ci		}
106262306a36Sopenharmony_ci		/*
106362306a36Sopenharmony_ci		 * Resize the attribute record to fit the new mapping pairs
106462306a36Sopenharmony_ci		 * array.
106562306a36Sopenharmony_ci		 */
106662306a36Sopenharmony_ci		attr_rec_len = le32_to_cpu(a->length);
106762306a36Sopenharmony_ci		err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(
106862306a36Sopenharmony_ci				a->data.non_resident.mapping_pairs_offset));
106962306a36Sopenharmony_ci		if (unlikely(err)) {
107062306a36Sopenharmony_ci			BUG_ON(err != -ENOSPC);
107162306a36Sopenharmony_ci			// TODO: Deal with this by using the current attribute
107262306a36Sopenharmony_ci			// and fill it with as much of the mapping pairs
107362306a36Sopenharmony_ci			// array as possible.  Then loop over each attribute
107462306a36Sopenharmony_ci			// extent rewriting the mapping pairs arrays as we go
107562306a36Sopenharmony_ci			// along and if when we reach the end we have not
107662306a36Sopenharmony_ci			// enough space, try to resize the last attribute
107762306a36Sopenharmony_ci			// extent and if even that fails, add a new attribute
107862306a36Sopenharmony_ci			// extent.
107962306a36Sopenharmony_ci			// We could also try to resize at each step in the hope
108062306a36Sopenharmony_ci			// that we will not need to rewrite every single extent.
108162306a36Sopenharmony_ci			// Note, we may need to decompress some extents to fill
108262306a36Sopenharmony_ci			// the runlist as we are walking the extents...
108362306a36Sopenharmony_ci			ntfs_error(vol->sb, "Not enough space in the mft "
108462306a36Sopenharmony_ci					"record for the extended attribute "
108562306a36Sopenharmony_ci					"record.  This case is not "
108662306a36Sopenharmony_ci					"implemented yet.");
108762306a36Sopenharmony_ci			err = -EOPNOTSUPP;
108862306a36Sopenharmony_ci			break ;
108962306a36Sopenharmony_ci		}
109062306a36Sopenharmony_ci		status.mp_rebuilt = 1;
109162306a36Sopenharmony_ci		/*
109262306a36Sopenharmony_ci		 * Generate the mapping pairs array directly into the attribute
109362306a36Sopenharmony_ci		 * record.
109462306a36Sopenharmony_ci		 */
109562306a36Sopenharmony_ci		err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
109662306a36Sopenharmony_ci				a->data.non_resident.mapping_pairs_offset),
109762306a36Sopenharmony_ci				mp_size, rl2, vcn, highest_vcn, NULL);
109862306a36Sopenharmony_ci		if (unlikely(err)) {
109962306a36Sopenharmony_ci			ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, "
110062306a36Sopenharmony_ci					"attribute type 0x%x, because building "
110162306a36Sopenharmony_ci					"the mapping pairs failed with error "
110262306a36Sopenharmony_ci					"code %i.", vi->i_ino,
110362306a36Sopenharmony_ci					(unsigned)le32_to_cpu(ni->type), err);
110462306a36Sopenharmony_ci			err = -EIO;
110562306a36Sopenharmony_ci			break;
110662306a36Sopenharmony_ci		}
110762306a36Sopenharmony_ci		/* Update the highest_vcn but only if it was not set. */
110862306a36Sopenharmony_ci		if (unlikely(!a->data.non_resident.highest_vcn))
110962306a36Sopenharmony_ci			a->data.non_resident.highest_vcn =
111062306a36Sopenharmony_ci					cpu_to_sle64(highest_vcn);
111162306a36Sopenharmony_ci		/*
111262306a36Sopenharmony_ci		 * If the attribute is sparse/compressed, update the compressed
111362306a36Sopenharmony_ci		 * size in the ntfs_inode structure and the attribute record.
111462306a36Sopenharmony_ci		 */
111562306a36Sopenharmony_ci		if (likely(NInoSparse(ni) || NInoCompressed(ni))) {
111662306a36Sopenharmony_ci			/*
111762306a36Sopenharmony_ci			 * If we are not in the first attribute extent, switch
111862306a36Sopenharmony_ci			 * to it, but first ensure the changes will make it to
111962306a36Sopenharmony_ci			 * disk later.
112062306a36Sopenharmony_ci			 */
112162306a36Sopenharmony_ci			if (a->data.non_resident.lowest_vcn) {
112262306a36Sopenharmony_ci				flush_dcache_mft_record_page(ctx->ntfs_ino);
112362306a36Sopenharmony_ci				mark_mft_record_dirty(ctx->ntfs_ino);
112462306a36Sopenharmony_ci				ntfs_attr_reinit_search_ctx(ctx);
112562306a36Sopenharmony_ci				err = ntfs_attr_lookup(ni->type, ni->name,
112662306a36Sopenharmony_ci						ni->name_len, CASE_SENSITIVE,
112762306a36Sopenharmony_ci						0, NULL, 0, ctx);
112862306a36Sopenharmony_ci				if (unlikely(err)) {
112962306a36Sopenharmony_ci					status.attr_switched = 1;
113062306a36Sopenharmony_ci					break;
113162306a36Sopenharmony_ci				}
113262306a36Sopenharmony_ci				/* @m is not used any more so do not set it. */
113362306a36Sopenharmony_ci				a = ctx->attr;
113462306a36Sopenharmony_ci			}
113562306a36Sopenharmony_ci			write_lock_irqsave(&ni->size_lock, flags);
113662306a36Sopenharmony_ci			ni->itype.compressed.size += vol->cluster_size;
113762306a36Sopenharmony_ci			a->data.non_resident.compressed_size =
113862306a36Sopenharmony_ci					cpu_to_sle64(ni->itype.compressed.size);
113962306a36Sopenharmony_ci			write_unlock_irqrestore(&ni->size_lock, flags);
114062306a36Sopenharmony_ci		}
114162306a36Sopenharmony_ci		/* Ensure the changes make it to disk. */
114262306a36Sopenharmony_ci		flush_dcache_mft_record_page(ctx->ntfs_ino);
114362306a36Sopenharmony_ci		mark_mft_record_dirty(ctx->ntfs_ino);
114462306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
114562306a36Sopenharmony_ci		unmap_mft_record(base_ni);
114662306a36Sopenharmony_ci		/* Successfully filled the hole. */
114762306a36Sopenharmony_ci		status.runlist_merged = 0;
114862306a36Sopenharmony_ci		status.mft_attr_mapped = 0;
114962306a36Sopenharmony_ci		status.mp_rebuilt = 0;
115062306a36Sopenharmony_ci		/* Setup the map cache and use that to deal with the buffer. */
115162306a36Sopenharmony_ci		was_hole = true;
115262306a36Sopenharmony_ci		vcn = bh_cpos;
115362306a36Sopenharmony_ci		vcn_len = 1;
115462306a36Sopenharmony_ci		lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits);
115562306a36Sopenharmony_ci		cdelta = 0;
115662306a36Sopenharmony_ci		/*
115762306a36Sopenharmony_ci		 * If the number of remaining clusters in the @pages is smaller
115862306a36Sopenharmony_ci		 * or equal to the number of cached clusters, unlock the
115962306a36Sopenharmony_ci		 * runlist as the map cache will be used from now on.
116062306a36Sopenharmony_ci		 */
116162306a36Sopenharmony_ci		if (likely(vcn + vcn_len >= cend)) {
116262306a36Sopenharmony_ci			up_write(&ni->runlist.lock);
116362306a36Sopenharmony_ci			rl_write_locked = false;
116462306a36Sopenharmony_ci			rl = NULL;
116562306a36Sopenharmony_ci		}
116662306a36Sopenharmony_ci		goto map_buffer_cached;
116762306a36Sopenharmony_ci	} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
116862306a36Sopenharmony_ci	/* If there are no errors, do the next page. */
116962306a36Sopenharmony_ci	if (likely(!err && ++u < nr_pages))
117062306a36Sopenharmony_ci		goto do_next_page;
117162306a36Sopenharmony_ci	/* If there are no errors, release the runlist lock if we took it. */
117262306a36Sopenharmony_ci	if (likely(!err)) {
117362306a36Sopenharmony_ci		if (unlikely(rl_write_locked)) {
117462306a36Sopenharmony_ci			up_write(&ni->runlist.lock);
117562306a36Sopenharmony_ci			rl_write_locked = false;
117662306a36Sopenharmony_ci		} else if (unlikely(rl))
117762306a36Sopenharmony_ci			up_read(&ni->runlist.lock);
117862306a36Sopenharmony_ci		rl = NULL;
117962306a36Sopenharmony_ci	}
118062306a36Sopenharmony_ci	/* If we issued read requests, let them complete. */
118162306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
118262306a36Sopenharmony_ci	initialized_size = ni->initialized_size;
118362306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
118462306a36Sopenharmony_ci	while (wait_bh > wait) {
118562306a36Sopenharmony_ci		bh = *--wait_bh;
118662306a36Sopenharmony_ci		wait_on_buffer(bh);
118762306a36Sopenharmony_ci		if (likely(buffer_uptodate(bh))) {
118862306a36Sopenharmony_ci			page = bh->b_page;
118962306a36Sopenharmony_ci			bh_pos = ((s64)page->index << PAGE_SHIFT) +
119062306a36Sopenharmony_ci					bh_offset(bh);
119162306a36Sopenharmony_ci			/*
119262306a36Sopenharmony_ci			 * If the buffer overflows the initialized size, need
119362306a36Sopenharmony_ci			 * to zero the overflowing region.
119462306a36Sopenharmony_ci			 */
119562306a36Sopenharmony_ci			if (unlikely(bh_pos + blocksize > initialized_size)) {
119662306a36Sopenharmony_ci				int ofs = 0;
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci				if (likely(bh_pos < initialized_size))
119962306a36Sopenharmony_ci					ofs = initialized_size - bh_pos;
120062306a36Sopenharmony_ci				zero_user_segment(page, bh_offset(bh) + ofs,
120162306a36Sopenharmony_ci						blocksize);
120262306a36Sopenharmony_ci			}
120362306a36Sopenharmony_ci		} else /* if (unlikely(!buffer_uptodate(bh))) */
120462306a36Sopenharmony_ci			err = -EIO;
120562306a36Sopenharmony_ci	}
120662306a36Sopenharmony_ci	if (likely(!err)) {
120762306a36Sopenharmony_ci		/* Clear buffer_new on all buffers. */
120862306a36Sopenharmony_ci		u = 0;
120962306a36Sopenharmony_ci		do {
121062306a36Sopenharmony_ci			bh = head = page_buffers(pages[u]);
121162306a36Sopenharmony_ci			do {
121262306a36Sopenharmony_ci				if (buffer_new(bh))
121362306a36Sopenharmony_ci					clear_buffer_new(bh);
121462306a36Sopenharmony_ci			} while ((bh = bh->b_this_page) != head);
121562306a36Sopenharmony_ci		} while (++u < nr_pages);
121662306a36Sopenharmony_ci		ntfs_debug("Done.");
121762306a36Sopenharmony_ci		return err;
121862306a36Sopenharmony_ci	}
121962306a36Sopenharmony_ci	if (status.attr_switched) {
122062306a36Sopenharmony_ci		/* Get back to the attribute extent we modified. */
122162306a36Sopenharmony_ci		ntfs_attr_reinit_search_ctx(ctx);
122262306a36Sopenharmony_ci		if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
122362306a36Sopenharmony_ci				CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) {
122462306a36Sopenharmony_ci			ntfs_error(vol->sb, "Failed to find required "
122562306a36Sopenharmony_ci					"attribute extent of attribute in "
122662306a36Sopenharmony_ci					"error code path.  Run chkdsk to "
122762306a36Sopenharmony_ci					"recover.");
122862306a36Sopenharmony_ci			write_lock_irqsave(&ni->size_lock, flags);
122962306a36Sopenharmony_ci			ni->itype.compressed.size += vol->cluster_size;
123062306a36Sopenharmony_ci			write_unlock_irqrestore(&ni->size_lock, flags);
123162306a36Sopenharmony_ci			flush_dcache_mft_record_page(ctx->ntfs_ino);
123262306a36Sopenharmony_ci			mark_mft_record_dirty(ctx->ntfs_ino);
123362306a36Sopenharmony_ci			/*
123462306a36Sopenharmony_ci			 * The only thing that is now wrong is the compressed
123562306a36Sopenharmony_ci			 * size of the base attribute extent which chkdsk
123662306a36Sopenharmony_ci			 * should be able to fix.
123762306a36Sopenharmony_ci			 */
123862306a36Sopenharmony_ci			NVolSetErrors(vol);
123962306a36Sopenharmony_ci		} else {
124062306a36Sopenharmony_ci			m = ctx->mrec;
124162306a36Sopenharmony_ci			a = ctx->attr;
124262306a36Sopenharmony_ci			status.attr_switched = 0;
124362306a36Sopenharmony_ci		}
124462306a36Sopenharmony_ci	}
124562306a36Sopenharmony_ci	/*
124662306a36Sopenharmony_ci	 * If the runlist has been modified, need to restore it by punching a
124762306a36Sopenharmony_ci	 * hole into it and we then need to deallocate the on-disk cluster as
124862306a36Sopenharmony_ci	 * well.  Note, we only modify the runlist if we are able to generate a
124962306a36Sopenharmony_ci	 * new mapping pairs array, i.e. only when the mapped attribute extent
125062306a36Sopenharmony_ci	 * is not switched.
125162306a36Sopenharmony_ci	 */
125262306a36Sopenharmony_ci	if (status.runlist_merged && !status.attr_switched) {
125362306a36Sopenharmony_ci		BUG_ON(!rl_write_locked);
125462306a36Sopenharmony_ci		/* Make the file cluster we allocated sparse in the runlist. */
125562306a36Sopenharmony_ci		if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) {
125662306a36Sopenharmony_ci			ntfs_error(vol->sb, "Failed to punch hole into "
125762306a36Sopenharmony_ci					"attribute runlist in error code "
125862306a36Sopenharmony_ci					"path.  Run chkdsk to recover the "
125962306a36Sopenharmony_ci					"lost cluster.");
126062306a36Sopenharmony_ci			NVolSetErrors(vol);
126162306a36Sopenharmony_ci		} else /* if (success) */ {
126262306a36Sopenharmony_ci			status.runlist_merged = 0;
126362306a36Sopenharmony_ci			/*
126462306a36Sopenharmony_ci			 * Deallocate the on-disk cluster we allocated but only
126562306a36Sopenharmony_ci			 * if we succeeded in punching its vcn out of the
126662306a36Sopenharmony_ci			 * runlist.
126762306a36Sopenharmony_ci			 */
126862306a36Sopenharmony_ci			down_write(&vol->lcnbmp_lock);
126962306a36Sopenharmony_ci			if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
127062306a36Sopenharmony_ci				ntfs_error(vol->sb, "Failed to release "
127162306a36Sopenharmony_ci						"allocated cluster in error "
127262306a36Sopenharmony_ci						"code path.  Run chkdsk to "
127362306a36Sopenharmony_ci						"recover the lost cluster.");
127462306a36Sopenharmony_ci				NVolSetErrors(vol);
127562306a36Sopenharmony_ci			}
127662306a36Sopenharmony_ci			up_write(&vol->lcnbmp_lock);
127762306a36Sopenharmony_ci		}
127862306a36Sopenharmony_ci	}
127962306a36Sopenharmony_ci	/*
128062306a36Sopenharmony_ci	 * Resize the attribute record to its old size and rebuild the mapping
128162306a36Sopenharmony_ci	 * pairs array.  Note, we only can do this if the runlist has been
128262306a36Sopenharmony_ci	 * restored to its old state which also implies that the mapped
128362306a36Sopenharmony_ci	 * attribute extent is not switched.
128462306a36Sopenharmony_ci	 */
128562306a36Sopenharmony_ci	if (status.mp_rebuilt && !status.runlist_merged) {
128662306a36Sopenharmony_ci		if (ntfs_attr_record_resize(m, a, attr_rec_len)) {
128762306a36Sopenharmony_ci			ntfs_error(vol->sb, "Failed to restore attribute "
128862306a36Sopenharmony_ci					"record in error code path.  Run "
128962306a36Sopenharmony_ci					"chkdsk to recover.");
129062306a36Sopenharmony_ci			NVolSetErrors(vol);
129162306a36Sopenharmony_ci		} else /* if (success) */ {
129262306a36Sopenharmony_ci			if (ntfs_mapping_pairs_build(vol, (u8*)a +
129362306a36Sopenharmony_ci					le16_to_cpu(a->data.non_resident.
129462306a36Sopenharmony_ci					mapping_pairs_offset), attr_rec_len -
129562306a36Sopenharmony_ci					le16_to_cpu(a->data.non_resident.
129662306a36Sopenharmony_ci					mapping_pairs_offset), ni->runlist.rl,
129762306a36Sopenharmony_ci					vcn, highest_vcn, NULL)) {
129862306a36Sopenharmony_ci				ntfs_error(vol->sb, "Failed to restore "
129962306a36Sopenharmony_ci						"mapping pairs array in error "
130062306a36Sopenharmony_ci						"code path.  Run chkdsk to "
130162306a36Sopenharmony_ci						"recover.");
130262306a36Sopenharmony_ci				NVolSetErrors(vol);
130362306a36Sopenharmony_ci			}
130462306a36Sopenharmony_ci			flush_dcache_mft_record_page(ctx->ntfs_ino);
130562306a36Sopenharmony_ci			mark_mft_record_dirty(ctx->ntfs_ino);
130662306a36Sopenharmony_ci		}
130762306a36Sopenharmony_ci	}
130862306a36Sopenharmony_ci	/* Release the mft record and the attribute. */
130962306a36Sopenharmony_ci	if (status.mft_attr_mapped) {
131062306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
131162306a36Sopenharmony_ci		unmap_mft_record(base_ni);
131262306a36Sopenharmony_ci	}
131362306a36Sopenharmony_ci	/* Release the runlist lock. */
131462306a36Sopenharmony_ci	if (rl_write_locked)
131562306a36Sopenharmony_ci		up_write(&ni->runlist.lock);
131662306a36Sopenharmony_ci	else if (rl)
131762306a36Sopenharmony_ci		up_read(&ni->runlist.lock);
131862306a36Sopenharmony_ci	/*
131962306a36Sopenharmony_ci	 * Zero out any newly allocated blocks to avoid exposing stale data.
132062306a36Sopenharmony_ci	 * If BH_New is set, we know that the block was newly allocated above
132162306a36Sopenharmony_ci	 * and that it has not been fully zeroed and marked dirty yet.
132262306a36Sopenharmony_ci	 */
132362306a36Sopenharmony_ci	nr_pages = u;
132462306a36Sopenharmony_ci	u = 0;
132562306a36Sopenharmony_ci	end = bh_cpos << vol->cluster_size_bits;
132662306a36Sopenharmony_ci	do {
132762306a36Sopenharmony_ci		page = pages[u];
132862306a36Sopenharmony_ci		bh = head = page_buffers(page);
132962306a36Sopenharmony_ci		do {
133062306a36Sopenharmony_ci			if (u == nr_pages &&
133162306a36Sopenharmony_ci					((s64)page->index << PAGE_SHIFT) +
133262306a36Sopenharmony_ci					bh_offset(bh) >= end)
133362306a36Sopenharmony_ci				break;
133462306a36Sopenharmony_ci			if (!buffer_new(bh))
133562306a36Sopenharmony_ci				continue;
133662306a36Sopenharmony_ci			clear_buffer_new(bh);
133762306a36Sopenharmony_ci			if (!buffer_uptodate(bh)) {
133862306a36Sopenharmony_ci				if (PageUptodate(page))
133962306a36Sopenharmony_ci					set_buffer_uptodate(bh);
134062306a36Sopenharmony_ci				else {
134162306a36Sopenharmony_ci					zero_user(page, bh_offset(bh),
134262306a36Sopenharmony_ci							blocksize);
134362306a36Sopenharmony_ci					set_buffer_uptodate(bh);
134462306a36Sopenharmony_ci				}
134562306a36Sopenharmony_ci			}
134662306a36Sopenharmony_ci			mark_buffer_dirty(bh);
134762306a36Sopenharmony_ci		} while ((bh = bh->b_this_page) != head);
134862306a36Sopenharmony_ci	} while (++u <= nr_pages);
134962306a36Sopenharmony_ci	ntfs_error(vol->sb, "Failed.  Returning error code %i.", err);
135062306a36Sopenharmony_ci	return err;
135162306a36Sopenharmony_ci}
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_cistatic inline void ntfs_flush_dcache_pages(struct page **pages,
135462306a36Sopenharmony_ci		unsigned nr_pages)
135562306a36Sopenharmony_ci{
135662306a36Sopenharmony_ci	BUG_ON(!nr_pages);
135762306a36Sopenharmony_ci	/*
135862306a36Sopenharmony_ci	 * Warning: Do not do the decrement at the same time as the call to
135962306a36Sopenharmony_ci	 * flush_dcache_page() because it is a NULL macro on i386 and hence the
136062306a36Sopenharmony_ci	 * decrement never happens so the loop never terminates.
136162306a36Sopenharmony_ci	 */
136262306a36Sopenharmony_ci	do {
136362306a36Sopenharmony_ci		--nr_pages;
136462306a36Sopenharmony_ci		flush_dcache_page(pages[nr_pages]);
136562306a36Sopenharmony_ci	} while (nr_pages > 0);
136662306a36Sopenharmony_ci}
136762306a36Sopenharmony_ci
136862306a36Sopenharmony_ci/**
136962306a36Sopenharmony_ci * ntfs_commit_pages_after_non_resident_write - commit the received data
137062306a36Sopenharmony_ci * @pages:	array of destination pages
137162306a36Sopenharmony_ci * @nr_pages:	number of pages in @pages
137262306a36Sopenharmony_ci * @pos:	byte position in file at which the write begins
137362306a36Sopenharmony_ci * @bytes:	number of bytes to be written
137462306a36Sopenharmony_ci *
137562306a36Sopenharmony_ci * See description of ntfs_commit_pages_after_write(), below.
137662306a36Sopenharmony_ci */
137762306a36Sopenharmony_cistatic inline int ntfs_commit_pages_after_non_resident_write(
137862306a36Sopenharmony_ci		struct page **pages, const unsigned nr_pages,
137962306a36Sopenharmony_ci		s64 pos, size_t bytes)
138062306a36Sopenharmony_ci{
138162306a36Sopenharmony_ci	s64 end, initialized_size;
138262306a36Sopenharmony_ci	struct inode *vi;
138362306a36Sopenharmony_ci	ntfs_inode *ni, *base_ni;
138462306a36Sopenharmony_ci	struct buffer_head *bh, *head;
138562306a36Sopenharmony_ci	ntfs_attr_search_ctx *ctx;
138662306a36Sopenharmony_ci	MFT_RECORD *m;
138762306a36Sopenharmony_ci	ATTR_RECORD *a;
138862306a36Sopenharmony_ci	unsigned long flags;
138962306a36Sopenharmony_ci	unsigned blocksize, u;
139062306a36Sopenharmony_ci	int err;
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	vi = pages[0]->mapping->host;
139362306a36Sopenharmony_ci	ni = NTFS_I(vi);
139462306a36Sopenharmony_ci	blocksize = vi->i_sb->s_blocksize;
139562306a36Sopenharmony_ci	end = pos + bytes;
139662306a36Sopenharmony_ci	u = 0;
139762306a36Sopenharmony_ci	do {
139862306a36Sopenharmony_ci		s64 bh_pos;
139962306a36Sopenharmony_ci		struct page *page;
140062306a36Sopenharmony_ci		bool partial;
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci		page = pages[u];
140362306a36Sopenharmony_ci		bh_pos = (s64)page->index << PAGE_SHIFT;
140462306a36Sopenharmony_ci		bh = head = page_buffers(page);
140562306a36Sopenharmony_ci		partial = false;
140662306a36Sopenharmony_ci		do {
140762306a36Sopenharmony_ci			s64 bh_end;
140862306a36Sopenharmony_ci
140962306a36Sopenharmony_ci			bh_end = bh_pos + blocksize;
141062306a36Sopenharmony_ci			if (bh_end <= pos || bh_pos >= end) {
141162306a36Sopenharmony_ci				if (!buffer_uptodate(bh))
141262306a36Sopenharmony_ci					partial = true;
141362306a36Sopenharmony_ci			} else {
141462306a36Sopenharmony_ci				set_buffer_uptodate(bh);
141562306a36Sopenharmony_ci				mark_buffer_dirty(bh);
141662306a36Sopenharmony_ci			}
141762306a36Sopenharmony_ci		} while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
141862306a36Sopenharmony_ci		/*
141962306a36Sopenharmony_ci		 * If all buffers are now uptodate but the page is not, set the
142062306a36Sopenharmony_ci		 * page uptodate.
142162306a36Sopenharmony_ci		 */
142262306a36Sopenharmony_ci		if (!partial && !PageUptodate(page))
142362306a36Sopenharmony_ci			SetPageUptodate(page);
142462306a36Sopenharmony_ci	} while (++u < nr_pages);
142562306a36Sopenharmony_ci	/*
142662306a36Sopenharmony_ci	 * Finally, if we do not need to update initialized_size or i_size we
142762306a36Sopenharmony_ci	 * are finished.
142862306a36Sopenharmony_ci	 */
142962306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
143062306a36Sopenharmony_ci	initialized_size = ni->initialized_size;
143162306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
143262306a36Sopenharmony_ci	if (end <= initialized_size) {
143362306a36Sopenharmony_ci		ntfs_debug("Done.");
143462306a36Sopenharmony_ci		return 0;
143562306a36Sopenharmony_ci	}
143662306a36Sopenharmony_ci	/*
143762306a36Sopenharmony_ci	 * Update initialized_size/i_size as appropriate, both in the inode and
143862306a36Sopenharmony_ci	 * the mft record.
143962306a36Sopenharmony_ci	 */
144062306a36Sopenharmony_ci	if (!NInoAttr(ni))
144162306a36Sopenharmony_ci		base_ni = ni;
144262306a36Sopenharmony_ci	else
144362306a36Sopenharmony_ci		base_ni = ni->ext.base_ntfs_ino;
144462306a36Sopenharmony_ci	/* Map, pin, and lock the mft record. */
144562306a36Sopenharmony_ci	m = map_mft_record(base_ni);
144662306a36Sopenharmony_ci	if (IS_ERR(m)) {
144762306a36Sopenharmony_ci		err = PTR_ERR(m);
144862306a36Sopenharmony_ci		m = NULL;
144962306a36Sopenharmony_ci		ctx = NULL;
145062306a36Sopenharmony_ci		goto err_out;
145162306a36Sopenharmony_ci	}
145262306a36Sopenharmony_ci	BUG_ON(!NInoNonResident(ni));
145362306a36Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(base_ni, m);
145462306a36Sopenharmony_ci	if (unlikely(!ctx)) {
145562306a36Sopenharmony_ci		err = -ENOMEM;
145662306a36Sopenharmony_ci		goto err_out;
145762306a36Sopenharmony_ci	}
145862306a36Sopenharmony_ci	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
145962306a36Sopenharmony_ci			CASE_SENSITIVE, 0, NULL, 0, ctx);
146062306a36Sopenharmony_ci	if (unlikely(err)) {
146162306a36Sopenharmony_ci		if (err == -ENOENT)
146262306a36Sopenharmony_ci			err = -EIO;
146362306a36Sopenharmony_ci		goto err_out;
146462306a36Sopenharmony_ci	}
146562306a36Sopenharmony_ci	a = ctx->attr;
146662306a36Sopenharmony_ci	BUG_ON(!a->non_resident);
146762306a36Sopenharmony_ci	write_lock_irqsave(&ni->size_lock, flags);
146862306a36Sopenharmony_ci	BUG_ON(end > ni->allocated_size);
146962306a36Sopenharmony_ci	ni->initialized_size = end;
147062306a36Sopenharmony_ci	a->data.non_resident.initialized_size = cpu_to_sle64(end);
147162306a36Sopenharmony_ci	if (end > i_size_read(vi)) {
147262306a36Sopenharmony_ci		i_size_write(vi, end);
147362306a36Sopenharmony_ci		a->data.non_resident.data_size =
147462306a36Sopenharmony_ci				a->data.non_resident.initialized_size;
147562306a36Sopenharmony_ci	}
147662306a36Sopenharmony_ci	write_unlock_irqrestore(&ni->size_lock, flags);
147762306a36Sopenharmony_ci	/* Mark the mft record dirty, so it gets written back. */
147862306a36Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
147962306a36Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
148062306a36Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
148162306a36Sopenharmony_ci	unmap_mft_record(base_ni);
148262306a36Sopenharmony_ci	ntfs_debug("Done.");
148362306a36Sopenharmony_ci	return 0;
148462306a36Sopenharmony_cierr_out:
148562306a36Sopenharmony_ci	if (ctx)
148662306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
148762306a36Sopenharmony_ci	if (m)
148862306a36Sopenharmony_ci		unmap_mft_record(base_ni);
148962306a36Sopenharmony_ci	ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
149062306a36Sopenharmony_ci			"code %i).", err);
149162306a36Sopenharmony_ci	if (err != -ENOMEM)
149262306a36Sopenharmony_ci		NVolSetErrors(ni->vol);
149362306a36Sopenharmony_ci	return err;
149462306a36Sopenharmony_ci}
149562306a36Sopenharmony_ci
149662306a36Sopenharmony_ci/**
149762306a36Sopenharmony_ci * ntfs_commit_pages_after_write - commit the received data
149862306a36Sopenharmony_ci * @pages:	array of destination pages
149962306a36Sopenharmony_ci * @nr_pages:	number of pages in @pages
150062306a36Sopenharmony_ci * @pos:	byte position in file at which the write begins
150162306a36Sopenharmony_ci * @bytes:	number of bytes to be written
150262306a36Sopenharmony_ci *
150362306a36Sopenharmony_ci * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
150462306a36Sopenharmony_ci * (@pages[0]->mapping->host).  There are @nr_pages pages in @pages which are
150562306a36Sopenharmony_ci * locked but not kmap()ped.  The source data has already been copied into the
150662306a36Sopenharmony_ci * @page.  ntfs_prepare_pages_for_non_resident_write() has been called before
150762306a36Sopenharmony_ci * the data was copied (for non-resident attributes only) and it returned
150862306a36Sopenharmony_ci * success.
150962306a36Sopenharmony_ci *
151062306a36Sopenharmony_ci * Need to set uptodate and mark dirty all buffers within the boundary of the
151162306a36Sopenharmony_ci * write.  If all buffers in a page are uptodate we set the page uptodate, too.
151262306a36Sopenharmony_ci *
151362306a36Sopenharmony_ci * Setting the buffers dirty ensures that they get written out later when
151462306a36Sopenharmony_ci * ntfs_writepage() is invoked by the VM.
151562306a36Sopenharmony_ci *
151662306a36Sopenharmony_ci * Finally, we need to update i_size and initialized_size as appropriate both
151762306a36Sopenharmony_ci * in the inode and the mft record.
151862306a36Sopenharmony_ci *
151962306a36Sopenharmony_ci * This is modelled after fs/buffer.c::generic_commit_write(), which marks
152062306a36Sopenharmony_ci * buffers uptodate and dirty, sets the page uptodate if all buffers in the
152162306a36Sopenharmony_ci * page are uptodate, and updates i_size if the end of io is beyond i_size.  In
152262306a36Sopenharmony_ci * that case, it also marks the inode dirty.
152362306a36Sopenharmony_ci *
152462306a36Sopenharmony_ci * If things have gone as outlined in
152562306a36Sopenharmony_ci * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page
152662306a36Sopenharmony_ci * content modifications here for non-resident attributes.  For resident
152762306a36Sopenharmony_ci * attributes we need to do the uptodate bringing here which we combine with
152862306a36Sopenharmony_ci * the copying into the mft record which means we save one atomic kmap.
152962306a36Sopenharmony_ci *
153062306a36Sopenharmony_ci * Return 0 on success or -errno on error.
153162306a36Sopenharmony_ci */
153262306a36Sopenharmony_cistatic int ntfs_commit_pages_after_write(struct page **pages,
153362306a36Sopenharmony_ci		const unsigned nr_pages, s64 pos, size_t bytes)
153462306a36Sopenharmony_ci{
153562306a36Sopenharmony_ci	s64 end, initialized_size;
153662306a36Sopenharmony_ci	loff_t i_size;
153762306a36Sopenharmony_ci	struct inode *vi;
153862306a36Sopenharmony_ci	ntfs_inode *ni, *base_ni;
153962306a36Sopenharmony_ci	struct page *page;
154062306a36Sopenharmony_ci	ntfs_attr_search_ctx *ctx;
154162306a36Sopenharmony_ci	MFT_RECORD *m;
154262306a36Sopenharmony_ci	ATTR_RECORD *a;
154362306a36Sopenharmony_ci	char *kattr, *kaddr;
154462306a36Sopenharmony_ci	unsigned long flags;
154562306a36Sopenharmony_ci	u32 attr_len;
154662306a36Sopenharmony_ci	int err;
154762306a36Sopenharmony_ci
154862306a36Sopenharmony_ci	BUG_ON(!nr_pages);
154962306a36Sopenharmony_ci	BUG_ON(!pages);
155062306a36Sopenharmony_ci	page = pages[0];
155162306a36Sopenharmony_ci	BUG_ON(!page);
155262306a36Sopenharmony_ci	vi = page->mapping->host;
155362306a36Sopenharmony_ci	ni = NTFS_I(vi);
155462306a36Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
155562306a36Sopenharmony_ci			"index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
155662306a36Sopenharmony_ci			vi->i_ino, ni->type, page->index, nr_pages,
155762306a36Sopenharmony_ci			(long long)pos, bytes);
155862306a36Sopenharmony_ci	if (NInoNonResident(ni))
155962306a36Sopenharmony_ci		return ntfs_commit_pages_after_non_resident_write(pages,
156062306a36Sopenharmony_ci				nr_pages, pos, bytes);
156162306a36Sopenharmony_ci	BUG_ON(nr_pages > 1);
156262306a36Sopenharmony_ci	/*
156362306a36Sopenharmony_ci	 * Attribute is resident, implying it is not compressed, encrypted, or
156462306a36Sopenharmony_ci	 * sparse.
156562306a36Sopenharmony_ci	 */
156662306a36Sopenharmony_ci	if (!NInoAttr(ni))
156762306a36Sopenharmony_ci		base_ni = ni;
156862306a36Sopenharmony_ci	else
156962306a36Sopenharmony_ci		base_ni = ni->ext.base_ntfs_ino;
157062306a36Sopenharmony_ci	BUG_ON(NInoNonResident(ni));
157162306a36Sopenharmony_ci	/* Map, pin, and lock the mft record. */
157262306a36Sopenharmony_ci	m = map_mft_record(base_ni);
157362306a36Sopenharmony_ci	if (IS_ERR(m)) {
157462306a36Sopenharmony_ci		err = PTR_ERR(m);
157562306a36Sopenharmony_ci		m = NULL;
157662306a36Sopenharmony_ci		ctx = NULL;
157762306a36Sopenharmony_ci		goto err_out;
157862306a36Sopenharmony_ci	}
157962306a36Sopenharmony_ci	ctx = ntfs_attr_get_search_ctx(base_ni, m);
158062306a36Sopenharmony_ci	if (unlikely(!ctx)) {
158162306a36Sopenharmony_ci		err = -ENOMEM;
158262306a36Sopenharmony_ci		goto err_out;
158362306a36Sopenharmony_ci	}
158462306a36Sopenharmony_ci	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
158562306a36Sopenharmony_ci			CASE_SENSITIVE, 0, NULL, 0, ctx);
158662306a36Sopenharmony_ci	if (unlikely(err)) {
158762306a36Sopenharmony_ci		if (err == -ENOENT)
158862306a36Sopenharmony_ci			err = -EIO;
158962306a36Sopenharmony_ci		goto err_out;
159062306a36Sopenharmony_ci	}
159162306a36Sopenharmony_ci	a = ctx->attr;
159262306a36Sopenharmony_ci	BUG_ON(a->non_resident);
159362306a36Sopenharmony_ci	/* The total length of the attribute value. */
159462306a36Sopenharmony_ci	attr_len = le32_to_cpu(a->data.resident.value_length);
159562306a36Sopenharmony_ci	i_size = i_size_read(vi);
159662306a36Sopenharmony_ci	BUG_ON(attr_len != i_size);
159762306a36Sopenharmony_ci	BUG_ON(pos > attr_len);
159862306a36Sopenharmony_ci	end = pos + bytes;
159962306a36Sopenharmony_ci	BUG_ON(end > le32_to_cpu(a->length) -
160062306a36Sopenharmony_ci			le16_to_cpu(a->data.resident.value_offset));
160162306a36Sopenharmony_ci	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
160262306a36Sopenharmony_ci	kaddr = kmap_atomic(page);
160362306a36Sopenharmony_ci	/* Copy the received data from the page to the mft record. */
160462306a36Sopenharmony_ci	memcpy(kattr + pos, kaddr + pos, bytes);
160562306a36Sopenharmony_ci	/* Update the attribute length if necessary. */
160662306a36Sopenharmony_ci	if (end > attr_len) {
160762306a36Sopenharmony_ci		attr_len = end;
160862306a36Sopenharmony_ci		a->data.resident.value_length = cpu_to_le32(attr_len);
160962306a36Sopenharmony_ci	}
161062306a36Sopenharmony_ci	/*
161162306a36Sopenharmony_ci	 * If the page is not uptodate, bring the out of bounds area(s)
161262306a36Sopenharmony_ci	 * uptodate by copying data from the mft record to the page.
161362306a36Sopenharmony_ci	 */
161462306a36Sopenharmony_ci	if (!PageUptodate(page)) {
161562306a36Sopenharmony_ci		if (pos > 0)
161662306a36Sopenharmony_ci			memcpy(kaddr, kattr, pos);
161762306a36Sopenharmony_ci		if (end < attr_len)
161862306a36Sopenharmony_ci			memcpy(kaddr + end, kattr + end, attr_len - end);
161962306a36Sopenharmony_ci		/* Zero the region outside the end of the attribute value. */
162062306a36Sopenharmony_ci		memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len);
162162306a36Sopenharmony_ci		flush_dcache_page(page);
162262306a36Sopenharmony_ci		SetPageUptodate(page);
162362306a36Sopenharmony_ci	}
162462306a36Sopenharmony_ci	kunmap_atomic(kaddr);
162562306a36Sopenharmony_ci	/* Update initialized_size/i_size if necessary. */
162662306a36Sopenharmony_ci	read_lock_irqsave(&ni->size_lock, flags);
162762306a36Sopenharmony_ci	initialized_size = ni->initialized_size;
162862306a36Sopenharmony_ci	BUG_ON(end > ni->allocated_size);
162962306a36Sopenharmony_ci	read_unlock_irqrestore(&ni->size_lock, flags);
163062306a36Sopenharmony_ci	BUG_ON(initialized_size != i_size);
163162306a36Sopenharmony_ci	if (end > initialized_size) {
163262306a36Sopenharmony_ci		write_lock_irqsave(&ni->size_lock, flags);
163362306a36Sopenharmony_ci		ni->initialized_size = end;
163462306a36Sopenharmony_ci		i_size_write(vi, end);
163562306a36Sopenharmony_ci		write_unlock_irqrestore(&ni->size_lock, flags);
163662306a36Sopenharmony_ci	}
163762306a36Sopenharmony_ci	/* Mark the mft record dirty, so it gets written back. */
163862306a36Sopenharmony_ci	flush_dcache_mft_record_page(ctx->ntfs_ino);
163962306a36Sopenharmony_ci	mark_mft_record_dirty(ctx->ntfs_ino);
164062306a36Sopenharmony_ci	ntfs_attr_put_search_ctx(ctx);
164162306a36Sopenharmony_ci	unmap_mft_record(base_ni);
164262306a36Sopenharmony_ci	ntfs_debug("Done.");
164362306a36Sopenharmony_ci	return 0;
164462306a36Sopenharmony_cierr_out:
164562306a36Sopenharmony_ci	if (err == -ENOMEM) {
164662306a36Sopenharmony_ci		ntfs_warning(vi->i_sb, "Error allocating memory required to "
164762306a36Sopenharmony_ci				"commit the write.");
164862306a36Sopenharmony_ci		if (PageUptodate(page)) {
164962306a36Sopenharmony_ci			ntfs_warning(vi->i_sb, "Page is uptodate, setting "
165062306a36Sopenharmony_ci					"dirty so the write will be retried "
165162306a36Sopenharmony_ci					"later on by the VM.");
165262306a36Sopenharmony_ci			/*
165362306a36Sopenharmony_ci			 * Put the page on mapping->dirty_pages, but leave its
165462306a36Sopenharmony_ci			 * buffers' dirty state as-is.
165562306a36Sopenharmony_ci			 */
165662306a36Sopenharmony_ci			__set_page_dirty_nobuffers(page);
165762306a36Sopenharmony_ci			err = 0;
165862306a36Sopenharmony_ci		} else
165962306a36Sopenharmony_ci			ntfs_error(vi->i_sb, "Page is not uptodate.  Written "
166062306a36Sopenharmony_ci					"data has been lost.");
166162306a36Sopenharmony_ci	} else {
166262306a36Sopenharmony_ci		ntfs_error(vi->i_sb, "Resident attribute commit write failed "
166362306a36Sopenharmony_ci				"with error %i.", err);
166462306a36Sopenharmony_ci		NVolSetErrors(ni->vol);
166562306a36Sopenharmony_ci	}
166662306a36Sopenharmony_ci	if (ctx)
166762306a36Sopenharmony_ci		ntfs_attr_put_search_ctx(ctx);
166862306a36Sopenharmony_ci	if (m)
166962306a36Sopenharmony_ci		unmap_mft_record(base_ni);
167062306a36Sopenharmony_ci	return err;
167162306a36Sopenharmony_ci}
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ci/*
167462306a36Sopenharmony_ci * Copy as much as we can into the pages and return the number of bytes which
167562306a36Sopenharmony_ci * were successfully copied.  If a fault is encountered then clear the pages
167662306a36Sopenharmony_ci * out to (ofs + bytes) and return the number of bytes which were copied.
167762306a36Sopenharmony_ci */
167862306a36Sopenharmony_cistatic size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
167962306a36Sopenharmony_ci		unsigned ofs, struct iov_iter *i, size_t bytes)
168062306a36Sopenharmony_ci{
168162306a36Sopenharmony_ci	struct page **last_page = pages + nr_pages;
168262306a36Sopenharmony_ci	size_t total = 0;
168362306a36Sopenharmony_ci	unsigned len, copied;
168462306a36Sopenharmony_ci
168562306a36Sopenharmony_ci	do {
168662306a36Sopenharmony_ci		len = PAGE_SIZE - ofs;
168762306a36Sopenharmony_ci		if (len > bytes)
168862306a36Sopenharmony_ci			len = bytes;
168962306a36Sopenharmony_ci		copied = copy_page_from_iter_atomic(*pages, ofs, len, i);
169062306a36Sopenharmony_ci		total += copied;
169162306a36Sopenharmony_ci		bytes -= copied;
169262306a36Sopenharmony_ci		if (!bytes)
169362306a36Sopenharmony_ci			break;
169462306a36Sopenharmony_ci		if (copied < len)
169562306a36Sopenharmony_ci			goto err;
169662306a36Sopenharmony_ci		ofs = 0;
169762306a36Sopenharmony_ci	} while (++pages < last_page);
169862306a36Sopenharmony_ciout:
169962306a36Sopenharmony_ci	return total;
170062306a36Sopenharmony_cierr:
170162306a36Sopenharmony_ci	/* Zero the rest of the target like __copy_from_user(). */
170262306a36Sopenharmony_ci	len = PAGE_SIZE - copied;
170362306a36Sopenharmony_ci	do {
170462306a36Sopenharmony_ci		if (len > bytes)
170562306a36Sopenharmony_ci			len = bytes;
170662306a36Sopenharmony_ci		zero_user(*pages, copied, len);
170762306a36Sopenharmony_ci		bytes -= len;
170862306a36Sopenharmony_ci		copied = 0;
170962306a36Sopenharmony_ci		len = PAGE_SIZE;
171062306a36Sopenharmony_ci	} while (++pages < last_page);
171162306a36Sopenharmony_ci	goto out;
171262306a36Sopenharmony_ci}
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci/**
171562306a36Sopenharmony_ci * ntfs_perform_write - perform buffered write to a file
171662306a36Sopenharmony_ci * @file:	file to write to
171762306a36Sopenharmony_ci * @i:		iov_iter with data to write
171862306a36Sopenharmony_ci * @pos:	byte offset in file at which to begin writing to
171962306a36Sopenharmony_ci */
172062306a36Sopenharmony_cistatic ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
172162306a36Sopenharmony_ci		loff_t pos)
172262306a36Sopenharmony_ci{
172362306a36Sopenharmony_ci	struct address_space *mapping = file->f_mapping;
172462306a36Sopenharmony_ci	struct inode *vi = mapping->host;
172562306a36Sopenharmony_ci	ntfs_inode *ni = NTFS_I(vi);
172662306a36Sopenharmony_ci	ntfs_volume *vol = ni->vol;
172762306a36Sopenharmony_ci	struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
172862306a36Sopenharmony_ci	struct page *cached_page = NULL;
172962306a36Sopenharmony_ci	VCN last_vcn;
173062306a36Sopenharmony_ci	LCN lcn;
173162306a36Sopenharmony_ci	size_t bytes;
173262306a36Sopenharmony_ci	ssize_t status, written = 0;
173362306a36Sopenharmony_ci	unsigned nr_pages;
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_ci	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
173662306a36Sopenharmony_ci			"0x%llx, count 0x%lx.", vi->i_ino,
173762306a36Sopenharmony_ci			(unsigned)le32_to_cpu(ni->type),
173862306a36Sopenharmony_ci			(unsigned long long)pos,
173962306a36Sopenharmony_ci			(unsigned long)iov_iter_count(i));
174062306a36Sopenharmony_ci	/*
174162306a36Sopenharmony_ci	 * If a previous ntfs_truncate() failed, repeat it and abort if it
174262306a36Sopenharmony_ci	 * fails again.
174362306a36Sopenharmony_ci	 */
174462306a36Sopenharmony_ci	if (unlikely(NInoTruncateFailed(ni))) {
174562306a36Sopenharmony_ci		int err;
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_ci		inode_dio_wait(vi);
174862306a36Sopenharmony_ci		err = ntfs_truncate(vi);
174962306a36Sopenharmony_ci		if (err || NInoTruncateFailed(ni)) {
175062306a36Sopenharmony_ci			if (!err)
175162306a36Sopenharmony_ci				err = -EIO;
175262306a36Sopenharmony_ci			ntfs_error(vol->sb, "Cannot perform write to inode "
175362306a36Sopenharmony_ci					"0x%lx, attribute type 0x%x, because "
175462306a36Sopenharmony_ci					"ntfs_truncate() failed (error code "
175562306a36Sopenharmony_ci					"%i).", vi->i_ino,
175662306a36Sopenharmony_ci					(unsigned)le32_to_cpu(ni->type), err);
175762306a36Sopenharmony_ci			return err;
175862306a36Sopenharmony_ci		}
175962306a36Sopenharmony_ci	}
176062306a36Sopenharmony_ci	/*
176162306a36Sopenharmony_ci	 * Determine the number of pages per cluster for non-resident
176262306a36Sopenharmony_ci	 * attributes.
176362306a36Sopenharmony_ci	 */
176462306a36Sopenharmony_ci	nr_pages = 1;
176562306a36Sopenharmony_ci	if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni))
176662306a36Sopenharmony_ci		nr_pages = vol->cluster_size >> PAGE_SHIFT;
176762306a36Sopenharmony_ci	last_vcn = -1;
176862306a36Sopenharmony_ci	do {
176962306a36Sopenharmony_ci		VCN vcn;
177062306a36Sopenharmony_ci		pgoff_t start_idx;
177162306a36Sopenharmony_ci		unsigned ofs, do_pages, u;
177262306a36Sopenharmony_ci		size_t copied;
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_ci		start_idx = pos >> PAGE_SHIFT;
177562306a36Sopenharmony_ci		ofs = pos & ~PAGE_MASK;
177662306a36Sopenharmony_ci		bytes = PAGE_SIZE - ofs;
177762306a36Sopenharmony_ci		do_pages = 1;
177862306a36Sopenharmony_ci		if (nr_pages > 1) {
177962306a36Sopenharmony_ci			vcn = pos >> vol->cluster_size_bits;
178062306a36Sopenharmony_ci			if (vcn != last_vcn) {
178162306a36Sopenharmony_ci				last_vcn = vcn;
178262306a36Sopenharmony_ci				/*
178362306a36Sopenharmony_ci				 * Get the lcn of the vcn the write is in.  If
178462306a36Sopenharmony_ci				 * it is a hole, need to lock down all pages in
178562306a36Sopenharmony_ci				 * the cluster.
178662306a36Sopenharmony_ci				 */
178762306a36Sopenharmony_ci				down_read(&ni->runlist.lock);
178862306a36Sopenharmony_ci				lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >>
178962306a36Sopenharmony_ci						vol->cluster_size_bits, false);
179062306a36Sopenharmony_ci				up_read(&ni->runlist.lock);
179162306a36Sopenharmony_ci				if (unlikely(lcn < LCN_HOLE)) {
179262306a36Sopenharmony_ci					if (lcn == LCN_ENOMEM)
179362306a36Sopenharmony_ci						status = -ENOMEM;
179462306a36Sopenharmony_ci					else {
179562306a36Sopenharmony_ci						status = -EIO;
179662306a36Sopenharmony_ci						ntfs_error(vol->sb, "Cannot "
179762306a36Sopenharmony_ci							"perform write to "
179862306a36Sopenharmony_ci							"inode 0x%lx, "
179962306a36Sopenharmony_ci							"attribute type 0x%x, "
180062306a36Sopenharmony_ci							"because the attribute "
180162306a36Sopenharmony_ci							"is corrupt.",
180262306a36Sopenharmony_ci							vi->i_ino, (unsigned)
180362306a36Sopenharmony_ci							le32_to_cpu(ni->type));
180462306a36Sopenharmony_ci					}
180562306a36Sopenharmony_ci					break;
180662306a36Sopenharmony_ci				}
180762306a36Sopenharmony_ci				if (lcn == LCN_HOLE) {
180862306a36Sopenharmony_ci					start_idx = (pos & ~(s64)
180962306a36Sopenharmony_ci							vol->cluster_size_mask)
181062306a36Sopenharmony_ci							>> PAGE_SHIFT;
181162306a36Sopenharmony_ci					bytes = vol->cluster_size - (pos &
181262306a36Sopenharmony_ci							vol->cluster_size_mask);
181362306a36Sopenharmony_ci					do_pages = nr_pages;
181462306a36Sopenharmony_ci				}
181562306a36Sopenharmony_ci			}
181662306a36Sopenharmony_ci		}
181762306a36Sopenharmony_ci		if (bytes > iov_iter_count(i))
181862306a36Sopenharmony_ci			bytes = iov_iter_count(i);
181962306a36Sopenharmony_ciagain:
182062306a36Sopenharmony_ci		/*
182162306a36Sopenharmony_ci		 * Bring in the user page(s) that we will copy from _first_.
182262306a36Sopenharmony_ci		 * Otherwise there is a nasty deadlock on copying from the same
182362306a36Sopenharmony_ci		 * page(s) as we are writing to, without it/them being marked
182462306a36Sopenharmony_ci		 * up-to-date.  Note, at present there is nothing to stop the
182562306a36Sopenharmony_ci		 * pages being swapped out between us bringing them into memory
182662306a36Sopenharmony_ci		 * and doing the actual copying.
182762306a36Sopenharmony_ci		 */
182862306a36Sopenharmony_ci		if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
182962306a36Sopenharmony_ci			status = -EFAULT;
183062306a36Sopenharmony_ci			break;
183162306a36Sopenharmony_ci		}
183262306a36Sopenharmony_ci		/* Get and lock @do_pages starting at index @start_idx. */
183362306a36Sopenharmony_ci		status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
183462306a36Sopenharmony_ci				pages, &cached_page);
183562306a36Sopenharmony_ci		if (unlikely(status))
183662306a36Sopenharmony_ci			break;
183762306a36Sopenharmony_ci		/*
183862306a36Sopenharmony_ci		 * For non-resident attributes, we need to fill any holes with
183962306a36Sopenharmony_ci		 * actual clusters and ensure all bufferes are mapped.  We also
184062306a36Sopenharmony_ci		 * need to bring uptodate any buffers that are only partially
184162306a36Sopenharmony_ci		 * being written to.
184262306a36Sopenharmony_ci		 */
184362306a36Sopenharmony_ci		if (NInoNonResident(ni)) {
184462306a36Sopenharmony_ci			status = ntfs_prepare_pages_for_non_resident_write(
184562306a36Sopenharmony_ci					pages, do_pages, pos, bytes);
184662306a36Sopenharmony_ci			if (unlikely(status)) {
184762306a36Sopenharmony_ci				do {
184862306a36Sopenharmony_ci					unlock_page(pages[--do_pages]);
184962306a36Sopenharmony_ci					put_page(pages[do_pages]);
185062306a36Sopenharmony_ci				} while (do_pages);
185162306a36Sopenharmony_ci				break;
185262306a36Sopenharmony_ci			}
185362306a36Sopenharmony_ci		}
185462306a36Sopenharmony_ci		u = (pos >> PAGE_SHIFT) - pages[0]->index;
185562306a36Sopenharmony_ci		copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
185662306a36Sopenharmony_ci					i, bytes);
185762306a36Sopenharmony_ci		ntfs_flush_dcache_pages(pages + u, do_pages - u);
185862306a36Sopenharmony_ci		status = 0;
185962306a36Sopenharmony_ci		if (likely(copied == bytes)) {
186062306a36Sopenharmony_ci			status = ntfs_commit_pages_after_write(pages, do_pages,
186162306a36Sopenharmony_ci					pos, bytes);
186262306a36Sopenharmony_ci		}
186362306a36Sopenharmony_ci		do {
186462306a36Sopenharmony_ci			unlock_page(pages[--do_pages]);
186562306a36Sopenharmony_ci			put_page(pages[do_pages]);
186662306a36Sopenharmony_ci		} while (do_pages);
186762306a36Sopenharmony_ci		if (unlikely(status < 0)) {
186862306a36Sopenharmony_ci			iov_iter_revert(i, copied);
186962306a36Sopenharmony_ci			break;
187062306a36Sopenharmony_ci		}
187162306a36Sopenharmony_ci		cond_resched();
187262306a36Sopenharmony_ci		if (unlikely(copied < bytes)) {
187362306a36Sopenharmony_ci			iov_iter_revert(i, copied);
187462306a36Sopenharmony_ci			if (copied)
187562306a36Sopenharmony_ci				bytes = copied;
187662306a36Sopenharmony_ci			else if (bytes > PAGE_SIZE - ofs)
187762306a36Sopenharmony_ci				bytes = PAGE_SIZE - ofs;
187862306a36Sopenharmony_ci			goto again;
187962306a36Sopenharmony_ci		}
188062306a36Sopenharmony_ci		pos += copied;
188162306a36Sopenharmony_ci		written += copied;
188262306a36Sopenharmony_ci		balance_dirty_pages_ratelimited(mapping);
188362306a36Sopenharmony_ci		if (fatal_signal_pending(current)) {
188462306a36Sopenharmony_ci			status = -EINTR;
188562306a36Sopenharmony_ci			break;
188662306a36Sopenharmony_ci		}
188762306a36Sopenharmony_ci	} while (iov_iter_count(i));
188862306a36Sopenharmony_ci	if (cached_page)
188962306a36Sopenharmony_ci		put_page(cached_page);
189062306a36Sopenharmony_ci	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
189162306a36Sopenharmony_ci			written ? "written" : "status", (unsigned long)written,
189262306a36Sopenharmony_ci			(long)status);
189362306a36Sopenharmony_ci	return written ? written : status;
189462306a36Sopenharmony_ci}
189562306a36Sopenharmony_ci
189662306a36Sopenharmony_ci/**
189762306a36Sopenharmony_ci * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
189862306a36Sopenharmony_ci * @iocb:	IO state structure
189962306a36Sopenharmony_ci * @from:	iov_iter with data to write
190062306a36Sopenharmony_ci *
190162306a36Sopenharmony_ci * Basically the same as generic_file_write_iter() except that it ends up
190262306a36Sopenharmony_ci * up calling ntfs_perform_write() instead of generic_perform_write() and that
190362306a36Sopenharmony_ci * O_DIRECT is not implemented.
190462306a36Sopenharmony_ci */
190562306a36Sopenharmony_cistatic ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
190662306a36Sopenharmony_ci{
190762306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
190862306a36Sopenharmony_ci	struct inode *vi = file_inode(file);
190962306a36Sopenharmony_ci	ssize_t written = 0;
191062306a36Sopenharmony_ci	ssize_t err;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	inode_lock(vi);
191362306a36Sopenharmony_ci	/* We can write back this queue in page reclaim. */
191462306a36Sopenharmony_ci	err = ntfs_prepare_file_for_write(iocb, from);
191562306a36Sopenharmony_ci	if (iov_iter_count(from) && !err)
191662306a36Sopenharmony_ci		written = ntfs_perform_write(file, from, iocb->ki_pos);
191762306a36Sopenharmony_ci	inode_unlock(vi);
191862306a36Sopenharmony_ci	iocb->ki_pos += written;
191962306a36Sopenharmony_ci	if (likely(written > 0))
192062306a36Sopenharmony_ci		written = generic_write_sync(iocb, written);
192162306a36Sopenharmony_ci	return written ? written : err;
192262306a36Sopenharmony_ci}
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci/**
192562306a36Sopenharmony_ci * ntfs_file_fsync - sync a file to disk
192662306a36Sopenharmony_ci * @filp:	file to be synced
192762306a36Sopenharmony_ci * @datasync:	if non-zero only flush user data and not metadata
192862306a36Sopenharmony_ci *
192962306a36Sopenharmony_ci * Data integrity sync of a file to disk.  Used for fsync, fdatasync, and msync
193062306a36Sopenharmony_ci * system calls.  This function is inspired by fs/buffer.c::file_fsync().
193162306a36Sopenharmony_ci *
193262306a36Sopenharmony_ci * If @datasync is false, write the mft record and all associated extent mft
193362306a36Sopenharmony_ci * records as well as the $DATA attribute and then sync the block device.
193462306a36Sopenharmony_ci *
193562306a36Sopenharmony_ci * If @datasync is true and the attribute is non-resident, we skip the writing
193662306a36Sopenharmony_ci * of the mft record and all associated extent mft records (this might still
193762306a36Sopenharmony_ci * happen due to the write_inode_now() call).
193862306a36Sopenharmony_ci *
193962306a36Sopenharmony_ci * Also, if @datasync is true, we do not wait on the inode to be written out
194062306a36Sopenharmony_ci * but we always wait on the page cache pages to be written out.
194162306a36Sopenharmony_ci *
194262306a36Sopenharmony_ci * Locking: Caller must hold i_mutex on the inode.
194362306a36Sopenharmony_ci *
194462306a36Sopenharmony_ci * TODO: We should probably also write all attribute/index inodes associated
194562306a36Sopenharmony_ci * with this inode but since we have no simple way of getting to them we ignore
194662306a36Sopenharmony_ci * this problem for now.
194762306a36Sopenharmony_ci */
194862306a36Sopenharmony_cistatic int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
194962306a36Sopenharmony_ci			   int datasync)
195062306a36Sopenharmony_ci{
195162306a36Sopenharmony_ci	struct inode *vi = filp->f_mapping->host;
195262306a36Sopenharmony_ci	int err, ret = 0;
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_ci	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
195562306a36Sopenharmony_ci
195662306a36Sopenharmony_ci	err = file_write_and_wait_range(filp, start, end);
195762306a36Sopenharmony_ci	if (err)
195862306a36Sopenharmony_ci		return err;
195962306a36Sopenharmony_ci	inode_lock(vi);
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci	BUG_ON(S_ISDIR(vi->i_mode));
196262306a36Sopenharmony_ci	if (!datasync || !NInoNonResident(NTFS_I(vi)))
196362306a36Sopenharmony_ci		ret = __ntfs_write_inode(vi, 1);
196462306a36Sopenharmony_ci	write_inode_now(vi, !datasync);
196562306a36Sopenharmony_ci	/*
196662306a36Sopenharmony_ci	 * NOTE: If we were to use mapping->private_list (see ext2 and
196762306a36Sopenharmony_ci	 * fs/buffer.c) for dirty blocks then we could optimize the below to be
196862306a36Sopenharmony_ci	 * sync_mapping_buffers(vi->i_mapping).
196962306a36Sopenharmony_ci	 */
197062306a36Sopenharmony_ci	err = sync_blockdev(vi->i_sb->s_bdev);
197162306a36Sopenharmony_ci	if (unlikely(err && !ret))
197262306a36Sopenharmony_ci		ret = err;
197362306a36Sopenharmony_ci	if (likely(!ret))
197462306a36Sopenharmony_ci		ntfs_debug("Done.");
197562306a36Sopenharmony_ci	else
197662306a36Sopenharmony_ci		ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
197762306a36Sopenharmony_ci				"%u.", datasync ? "data" : "", vi->i_ino, -ret);
197862306a36Sopenharmony_ci	inode_unlock(vi);
197962306a36Sopenharmony_ci	return ret;
198062306a36Sopenharmony_ci}
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_ci#endif /* NTFS_RW */
198362306a36Sopenharmony_ci
198462306a36Sopenharmony_ciconst struct file_operations ntfs_file_ops = {
198562306a36Sopenharmony_ci	.llseek		= generic_file_llseek,
198662306a36Sopenharmony_ci	.read_iter	= generic_file_read_iter,
198762306a36Sopenharmony_ci#ifdef NTFS_RW
198862306a36Sopenharmony_ci	.write_iter	= ntfs_file_write_iter,
198962306a36Sopenharmony_ci	.fsync		= ntfs_file_fsync,
199062306a36Sopenharmony_ci#endif /* NTFS_RW */
199162306a36Sopenharmony_ci	.mmap		= generic_file_mmap,
199262306a36Sopenharmony_ci	.open		= ntfs_file_open,
199362306a36Sopenharmony_ci	.splice_read	= filemap_splice_read,
199462306a36Sopenharmony_ci};
199562306a36Sopenharmony_ci
199662306a36Sopenharmony_ciconst struct inode_operations ntfs_file_inode_ops = {
199762306a36Sopenharmony_ci#ifdef NTFS_RW
199862306a36Sopenharmony_ci	.setattr	= ntfs_setattr,
199962306a36Sopenharmony_ci#endif /* NTFS_RW */
200062306a36Sopenharmony_ci};
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ciconst struct file_operations ntfs_empty_file_ops = {};
200362306a36Sopenharmony_ci
200462306a36Sopenharmony_ciconst struct inode_operations ntfs_empty_inode_ops = {};
2005