xref: /kernel/linux/linux-5.10/fs/orangefs/inode.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * (C) 2001 Clemson University and The University of Chicago
48c2ecf20Sopenharmony_ci * Copyright 2018 Omnibond Systems, L.L.C.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * See COPYING in top-level directory.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci/*
108c2ecf20Sopenharmony_ci *  Linux VFS inode operations.
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/bvec.h>
148c2ecf20Sopenharmony_ci#include "protocol.h"
158c2ecf20Sopenharmony_ci#include "orangefs-kernel.h"
168c2ecf20Sopenharmony_ci#include "orangefs-bufmap.h"
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_cistatic int orangefs_writepage_locked(struct page *page,
198c2ecf20Sopenharmony_ci    struct writeback_control *wbc)
208c2ecf20Sopenharmony_ci{
218c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
228c2ecf20Sopenharmony_ci	struct orangefs_write_range *wr = NULL;
238c2ecf20Sopenharmony_ci	struct iov_iter iter;
248c2ecf20Sopenharmony_ci	struct bio_vec bv;
258c2ecf20Sopenharmony_ci	size_t len, wlen;
268c2ecf20Sopenharmony_ci	ssize_t ret;
278c2ecf20Sopenharmony_ci	loff_t off;
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	set_page_writeback(page);
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	len = i_size_read(inode);
328c2ecf20Sopenharmony_ci	if (PagePrivate(page)) {
338c2ecf20Sopenharmony_ci		wr = (struct orangefs_write_range *)page_private(page);
348c2ecf20Sopenharmony_ci		WARN_ON(wr->pos >= len);
358c2ecf20Sopenharmony_ci		off = wr->pos;
368c2ecf20Sopenharmony_ci		if (off + wr->len > len)
378c2ecf20Sopenharmony_ci			wlen = len - off;
388c2ecf20Sopenharmony_ci		else
398c2ecf20Sopenharmony_ci			wlen = wr->len;
408c2ecf20Sopenharmony_ci	} else {
418c2ecf20Sopenharmony_ci		WARN_ON(1);
428c2ecf20Sopenharmony_ci		off = page_offset(page);
438c2ecf20Sopenharmony_ci		if (off + PAGE_SIZE > len)
448c2ecf20Sopenharmony_ci			wlen = len - off;
458c2ecf20Sopenharmony_ci		else
468c2ecf20Sopenharmony_ci			wlen = PAGE_SIZE;
478c2ecf20Sopenharmony_ci	}
488c2ecf20Sopenharmony_ci	/* Should've been handled in orangefs_invalidatepage. */
498c2ecf20Sopenharmony_ci	WARN_ON(off == len || off + wlen > len);
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	bv.bv_page = page;
528c2ecf20Sopenharmony_ci	bv.bv_len = wlen;
538c2ecf20Sopenharmony_ci	bv.bv_offset = off % PAGE_SIZE;
548c2ecf20Sopenharmony_ci	WARN_ON(wlen == 0);
558c2ecf20Sopenharmony_ci	iov_iter_bvec(&iter, WRITE, &bv, 1, wlen);
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
588c2ecf20Sopenharmony_ci	    len, wr, NULL, NULL);
598c2ecf20Sopenharmony_ci	if (ret < 0) {
608c2ecf20Sopenharmony_ci		SetPageError(page);
618c2ecf20Sopenharmony_ci		mapping_set_error(page->mapping, ret);
628c2ecf20Sopenharmony_ci	} else {
638c2ecf20Sopenharmony_ci		ret = 0;
648c2ecf20Sopenharmony_ci	}
658c2ecf20Sopenharmony_ci	kfree(detach_page_private(page));
668c2ecf20Sopenharmony_ci	return ret;
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_cistatic int orangefs_writepage(struct page *page, struct writeback_control *wbc)
708c2ecf20Sopenharmony_ci{
718c2ecf20Sopenharmony_ci	int ret;
728c2ecf20Sopenharmony_ci	ret = orangefs_writepage_locked(page, wbc);
738c2ecf20Sopenharmony_ci	unlock_page(page);
748c2ecf20Sopenharmony_ci	end_page_writeback(page);
758c2ecf20Sopenharmony_ci	return ret;
768c2ecf20Sopenharmony_ci}
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_cistruct orangefs_writepages {
798c2ecf20Sopenharmony_ci	loff_t off;
808c2ecf20Sopenharmony_ci	size_t len;
818c2ecf20Sopenharmony_ci	kuid_t uid;
828c2ecf20Sopenharmony_ci	kgid_t gid;
838c2ecf20Sopenharmony_ci	int maxpages;
848c2ecf20Sopenharmony_ci	int npages;
858c2ecf20Sopenharmony_ci	struct page **pages;
868c2ecf20Sopenharmony_ci	struct bio_vec *bv;
878c2ecf20Sopenharmony_ci};
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_cistatic int orangefs_writepages_work(struct orangefs_writepages *ow,
908c2ecf20Sopenharmony_ci    struct writeback_control *wbc)
918c2ecf20Sopenharmony_ci{
928c2ecf20Sopenharmony_ci	struct inode *inode = ow->pages[0]->mapping->host;
938c2ecf20Sopenharmony_ci	struct orangefs_write_range *wrp, wr;
948c2ecf20Sopenharmony_ci	struct iov_iter iter;
958c2ecf20Sopenharmony_ci	ssize_t ret;
968c2ecf20Sopenharmony_ci	size_t len;
978c2ecf20Sopenharmony_ci	loff_t off;
988c2ecf20Sopenharmony_ci	int i;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	len = i_size_read(inode);
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	for (i = 0; i < ow->npages; i++) {
1038c2ecf20Sopenharmony_ci		set_page_writeback(ow->pages[i]);
1048c2ecf20Sopenharmony_ci		ow->bv[i].bv_page = ow->pages[i];
1058c2ecf20Sopenharmony_ci		ow->bv[i].bv_len = min(page_offset(ow->pages[i]) + PAGE_SIZE,
1068c2ecf20Sopenharmony_ci		    ow->off + ow->len) -
1078c2ecf20Sopenharmony_ci		    max(ow->off, page_offset(ow->pages[i]));
1088c2ecf20Sopenharmony_ci		if (i == 0)
1098c2ecf20Sopenharmony_ci			ow->bv[i].bv_offset = ow->off -
1108c2ecf20Sopenharmony_ci			    page_offset(ow->pages[i]);
1118c2ecf20Sopenharmony_ci		else
1128c2ecf20Sopenharmony_ci			ow->bv[i].bv_offset = 0;
1138c2ecf20Sopenharmony_ci	}
1148c2ecf20Sopenharmony_ci	iov_iter_bvec(&iter, WRITE, ow->bv, ow->npages, ow->len);
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	WARN_ON(ow->off >= len);
1178c2ecf20Sopenharmony_ci	if (ow->off + ow->len > len)
1188c2ecf20Sopenharmony_ci		ow->len = len - ow->off;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	off = ow->off;
1218c2ecf20Sopenharmony_ci	wr.uid = ow->uid;
1228c2ecf20Sopenharmony_ci	wr.gid = ow->gid;
1238c2ecf20Sopenharmony_ci	ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len,
1248c2ecf20Sopenharmony_ci	    0, &wr, NULL, NULL);
1258c2ecf20Sopenharmony_ci	if (ret < 0) {
1268c2ecf20Sopenharmony_ci		for (i = 0; i < ow->npages; i++) {
1278c2ecf20Sopenharmony_ci			SetPageError(ow->pages[i]);
1288c2ecf20Sopenharmony_ci			mapping_set_error(ow->pages[i]->mapping, ret);
1298c2ecf20Sopenharmony_ci			if (PagePrivate(ow->pages[i])) {
1308c2ecf20Sopenharmony_ci				wrp = (struct orangefs_write_range *)
1318c2ecf20Sopenharmony_ci				    page_private(ow->pages[i]);
1328c2ecf20Sopenharmony_ci				ClearPagePrivate(ow->pages[i]);
1338c2ecf20Sopenharmony_ci				put_page(ow->pages[i]);
1348c2ecf20Sopenharmony_ci				kfree(wrp);
1358c2ecf20Sopenharmony_ci			}
1368c2ecf20Sopenharmony_ci			end_page_writeback(ow->pages[i]);
1378c2ecf20Sopenharmony_ci			unlock_page(ow->pages[i]);
1388c2ecf20Sopenharmony_ci		}
1398c2ecf20Sopenharmony_ci	} else {
1408c2ecf20Sopenharmony_ci		ret = 0;
1418c2ecf20Sopenharmony_ci		for (i = 0; i < ow->npages; i++) {
1428c2ecf20Sopenharmony_ci			if (PagePrivate(ow->pages[i])) {
1438c2ecf20Sopenharmony_ci				wrp = (struct orangefs_write_range *)
1448c2ecf20Sopenharmony_ci				    page_private(ow->pages[i]);
1458c2ecf20Sopenharmony_ci				ClearPagePrivate(ow->pages[i]);
1468c2ecf20Sopenharmony_ci				put_page(ow->pages[i]);
1478c2ecf20Sopenharmony_ci				kfree(wrp);
1488c2ecf20Sopenharmony_ci			}
1498c2ecf20Sopenharmony_ci			end_page_writeback(ow->pages[i]);
1508c2ecf20Sopenharmony_ci			unlock_page(ow->pages[i]);
1518c2ecf20Sopenharmony_ci		}
1528c2ecf20Sopenharmony_ci	}
1538c2ecf20Sopenharmony_ci	return ret;
1548c2ecf20Sopenharmony_ci}
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_cistatic int orangefs_writepages_callback(struct page *page,
1578c2ecf20Sopenharmony_ci    struct writeback_control *wbc, void *data)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	struct orangefs_writepages *ow = data;
1608c2ecf20Sopenharmony_ci	struct orangefs_write_range *wr;
1618c2ecf20Sopenharmony_ci	int ret;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	if (!PagePrivate(page)) {
1648c2ecf20Sopenharmony_ci		unlock_page(page);
1658c2ecf20Sopenharmony_ci		/* It's not private so there's nothing to write, right? */
1668c2ecf20Sopenharmony_ci		printk("writepages_callback not private!\n");
1678c2ecf20Sopenharmony_ci		BUG();
1688c2ecf20Sopenharmony_ci		return 0;
1698c2ecf20Sopenharmony_ci	}
1708c2ecf20Sopenharmony_ci	wr = (struct orangefs_write_range *)page_private(page);
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	ret = -1;
1738c2ecf20Sopenharmony_ci	if (ow->npages == 0) {
1748c2ecf20Sopenharmony_ci		ow->off = wr->pos;
1758c2ecf20Sopenharmony_ci		ow->len = wr->len;
1768c2ecf20Sopenharmony_ci		ow->uid = wr->uid;
1778c2ecf20Sopenharmony_ci		ow->gid = wr->gid;
1788c2ecf20Sopenharmony_ci		ow->pages[ow->npages++] = page;
1798c2ecf20Sopenharmony_ci		ret = 0;
1808c2ecf20Sopenharmony_ci		goto done;
1818c2ecf20Sopenharmony_ci	}
1828c2ecf20Sopenharmony_ci	if (!uid_eq(ow->uid, wr->uid) || !gid_eq(ow->gid, wr->gid)) {
1838c2ecf20Sopenharmony_ci		orangefs_writepages_work(ow, wbc);
1848c2ecf20Sopenharmony_ci		ow->npages = 0;
1858c2ecf20Sopenharmony_ci		ret = -1;
1868c2ecf20Sopenharmony_ci		goto done;
1878c2ecf20Sopenharmony_ci	}
1888c2ecf20Sopenharmony_ci	if (ow->off + ow->len == wr->pos) {
1898c2ecf20Sopenharmony_ci		ow->len += wr->len;
1908c2ecf20Sopenharmony_ci		ow->pages[ow->npages++] = page;
1918c2ecf20Sopenharmony_ci		ret = 0;
1928c2ecf20Sopenharmony_ci		goto done;
1938c2ecf20Sopenharmony_ci	}
1948c2ecf20Sopenharmony_cidone:
1958c2ecf20Sopenharmony_ci	if (ret == -1) {
1968c2ecf20Sopenharmony_ci		if (ow->npages) {
1978c2ecf20Sopenharmony_ci			orangefs_writepages_work(ow, wbc);
1988c2ecf20Sopenharmony_ci			ow->npages = 0;
1998c2ecf20Sopenharmony_ci		}
2008c2ecf20Sopenharmony_ci		ret = orangefs_writepage_locked(page, wbc);
2018c2ecf20Sopenharmony_ci		mapping_set_error(page->mapping, ret);
2028c2ecf20Sopenharmony_ci		unlock_page(page);
2038c2ecf20Sopenharmony_ci		end_page_writeback(page);
2048c2ecf20Sopenharmony_ci	} else {
2058c2ecf20Sopenharmony_ci		if (ow->npages == ow->maxpages) {
2068c2ecf20Sopenharmony_ci			orangefs_writepages_work(ow, wbc);
2078c2ecf20Sopenharmony_ci			ow->npages = 0;
2088c2ecf20Sopenharmony_ci		}
2098c2ecf20Sopenharmony_ci	}
2108c2ecf20Sopenharmony_ci	return ret;
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistatic int orangefs_writepages(struct address_space *mapping,
2148c2ecf20Sopenharmony_ci    struct writeback_control *wbc)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	struct orangefs_writepages *ow;
2178c2ecf20Sopenharmony_ci	struct blk_plug plug;
2188c2ecf20Sopenharmony_ci	int ret;
2198c2ecf20Sopenharmony_ci	ow = kzalloc(sizeof(struct orangefs_writepages), GFP_KERNEL);
2208c2ecf20Sopenharmony_ci	if (!ow)
2218c2ecf20Sopenharmony_ci		return -ENOMEM;
2228c2ecf20Sopenharmony_ci	ow->maxpages = orangefs_bufmap_size_query()/PAGE_SIZE;
2238c2ecf20Sopenharmony_ci	ow->pages = kcalloc(ow->maxpages, sizeof(struct page *), GFP_KERNEL);
2248c2ecf20Sopenharmony_ci	if (!ow->pages) {
2258c2ecf20Sopenharmony_ci		kfree(ow);
2268c2ecf20Sopenharmony_ci		return -ENOMEM;
2278c2ecf20Sopenharmony_ci	}
2288c2ecf20Sopenharmony_ci	ow->bv = kcalloc(ow->maxpages, sizeof(struct bio_vec), GFP_KERNEL);
2298c2ecf20Sopenharmony_ci	if (!ow->bv) {
2308c2ecf20Sopenharmony_ci		kfree(ow->pages);
2318c2ecf20Sopenharmony_ci		kfree(ow);
2328c2ecf20Sopenharmony_ci		return -ENOMEM;
2338c2ecf20Sopenharmony_ci	}
2348c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
2358c2ecf20Sopenharmony_ci	ret = write_cache_pages(mapping, wbc, orangefs_writepages_callback, ow);
2368c2ecf20Sopenharmony_ci	if (ow->npages)
2378c2ecf20Sopenharmony_ci		ret = orangefs_writepages_work(ow, wbc);
2388c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
2398c2ecf20Sopenharmony_ci	kfree(ow->pages);
2408c2ecf20Sopenharmony_ci	kfree(ow->bv);
2418c2ecf20Sopenharmony_ci	kfree(ow);
2428c2ecf20Sopenharmony_ci	return ret;
2438c2ecf20Sopenharmony_ci}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_cistatic int orangefs_launder_page(struct page *);
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_cistatic int orangefs_readpage(struct file *file, struct page *page)
2488c2ecf20Sopenharmony_ci{
2498c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
2508c2ecf20Sopenharmony_ci	struct iov_iter iter;
2518c2ecf20Sopenharmony_ci	struct bio_vec bv;
2528c2ecf20Sopenharmony_ci	ssize_t ret;
2538c2ecf20Sopenharmony_ci	loff_t off; /* offset into this page */
2548c2ecf20Sopenharmony_ci	pgoff_t index; /* which page */
2558c2ecf20Sopenharmony_ci	struct page *next_page;
2568c2ecf20Sopenharmony_ci	char *kaddr;
2578c2ecf20Sopenharmony_ci	loff_t read_size;
2588c2ecf20Sopenharmony_ci	int buffer_index = -1; /* orangefs shared memory slot */
2598c2ecf20Sopenharmony_ci	int slot_index;   /* index into slot */
2608c2ecf20Sopenharmony_ci	int remaining;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	/*
2638c2ecf20Sopenharmony_ci	 * Get up to this many bytes from Orangefs at a time and try
2648c2ecf20Sopenharmony_ci	 * to fill them into the page cache at once. Tests with dd made
2658c2ecf20Sopenharmony_ci	 * this seem like a reasonable static number, if there was
2668c2ecf20Sopenharmony_ci	 * interest perhaps this number could be made setable through
2678c2ecf20Sopenharmony_ci	 * sysfs...
2688c2ecf20Sopenharmony_ci	 */
2698c2ecf20Sopenharmony_ci	read_size = 524288;
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	if (PageDirty(page))
2728c2ecf20Sopenharmony_ci		orangefs_launder_page(page);
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	off = page_offset(page);
2758c2ecf20Sopenharmony_ci	index = off >> PAGE_SHIFT;
2768c2ecf20Sopenharmony_ci	bv.bv_page = page;
2778c2ecf20Sopenharmony_ci	bv.bv_len = PAGE_SIZE;
2788c2ecf20Sopenharmony_ci	bv.bv_offset = 0;
2798c2ecf20Sopenharmony_ci	iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE);
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
2828c2ecf20Sopenharmony_ci	    read_size, inode->i_size, NULL, &buffer_index, file);
2838c2ecf20Sopenharmony_ci	remaining = ret;
2848c2ecf20Sopenharmony_ci	/* this will only zero remaining unread portions of the page data */
2858c2ecf20Sopenharmony_ci	iov_iter_zero(~0U, &iter);
2868c2ecf20Sopenharmony_ci	/* takes care of potential aliasing */
2878c2ecf20Sopenharmony_ci	flush_dcache_page(page);
2888c2ecf20Sopenharmony_ci	if (ret < 0) {
2898c2ecf20Sopenharmony_ci		SetPageError(page);
2908c2ecf20Sopenharmony_ci		unlock_page(page);
2918c2ecf20Sopenharmony_ci		goto out;
2928c2ecf20Sopenharmony_ci	} else {
2938c2ecf20Sopenharmony_ci		SetPageUptodate(page);
2948c2ecf20Sopenharmony_ci		if (PageError(page))
2958c2ecf20Sopenharmony_ci			ClearPageError(page);
2968c2ecf20Sopenharmony_ci		ret = 0;
2978c2ecf20Sopenharmony_ci	}
2988c2ecf20Sopenharmony_ci	/* unlock the page after the ->readpage() routine completes */
2998c2ecf20Sopenharmony_ci	unlock_page(page);
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	if (remaining > PAGE_SIZE) {
3028c2ecf20Sopenharmony_ci		slot_index = 0;
3038c2ecf20Sopenharmony_ci		while ((remaining - PAGE_SIZE) >= PAGE_SIZE) {
3048c2ecf20Sopenharmony_ci			remaining -= PAGE_SIZE;
3058c2ecf20Sopenharmony_ci			/*
3068c2ecf20Sopenharmony_ci			 * It is an optimization to try and fill more than one
3078c2ecf20Sopenharmony_ci			 * page... by now we've already gotten the single
3088c2ecf20Sopenharmony_ci			 * page we were after, if stuff doesn't seem to
3098c2ecf20Sopenharmony_ci			 * be going our way at this point just return
3108c2ecf20Sopenharmony_ci			 * and hope for the best.
3118c2ecf20Sopenharmony_ci			 *
3128c2ecf20Sopenharmony_ci			 * If we look for pages and they're already there is
3138c2ecf20Sopenharmony_ci			 * one reason to give up, and if they're not there
3148c2ecf20Sopenharmony_ci			 * and we can't create them is another reason.
3158c2ecf20Sopenharmony_ci			 */
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci			index++;
3188c2ecf20Sopenharmony_ci			slot_index++;
3198c2ecf20Sopenharmony_ci			next_page = find_get_page(inode->i_mapping, index);
3208c2ecf20Sopenharmony_ci			if (next_page) {
3218c2ecf20Sopenharmony_ci				gossip_debug(GOSSIP_FILE_DEBUG,
3228c2ecf20Sopenharmony_ci					"%s: found next page, quitting\n",
3238c2ecf20Sopenharmony_ci					__func__);
3248c2ecf20Sopenharmony_ci				put_page(next_page);
3258c2ecf20Sopenharmony_ci				goto out;
3268c2ecf20Sopenharmony_ci			}
3278c2ecf20Sopenharmony_ci			next_page = find_or_create_page(inode->i_mapping,
3288c2ecf20Sopenharmony_ci							index,
3298c2ecf20Sopenharmony_ci							GFP_KERNEL);
3308c2ecf20Sopenharmony_ci			/*
3318c2ecf20Sopenharmony_ci			 * I've never hit this, leave it as a printk for
3328c2ecf20Sopenharmony_ci			 * now so it will be obvious.
3338c2ecf20Sopenharmony_ci			 */
3348c2ecf20Sopenharmony_ci			if (!next_page) {
3358c2ecf20Sopenharmony_ci				printk("%s: can't create next page, quitting\n",
3368c2ecf20Sopenharmony_ci					__func__);
3378c2ecf20Sopenharmony_ci				goto out;
3388c2ecf20Sopenharmony_ci			}
3398c2ecf20Sopenharmony_ci			kaddr = kmap_atomic(next_page);
3408c2ecf20Sopenharmony_ci			orangefs_bufmap_page_fill(kaddr,
3418c2ecf20Sopenharmony_ci						buffer_index,
3428c2ecf20Sopenharmony_ci						slot_index);
3438c2ecf20Sopenharmony_ci			kunmap_atomic(kaddr);
3448c2ecf20Sopenharmony_ci			SetPageUptodate(next_page);
3458c2ecf20Sopenharmony_ci			unlock_page(next_page);
3468c2ecf20Sopenharmony_ci			put_page(next_page);
3478c2ecf20Sopenharmony_ci		}
3488c2ecf20Sopenharmony_ci	}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ciout:
3518c2ecf20Sopenharmony_ci	if (buffer_index != -1)
3528c2ecf20Sopenharmony_ci		orangefs_bufmap_put(buffer_index);
3538c2ecf20Sopenharmony_ci	return ret;
3548c2ecf20Sopenharmony_ci}
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_cistatic int orangefs_write_begin(struct file *file,
3578c2ecf20Sopenharmony_ci    struct address_space *mapping,
3588c2ecf20Sopenharmony_ci    loff_t pos, unsigned len, unsigned flags, struct page **pagep,
3598c2ecf20Sopenharmony_ci    void **fsdata)
3608c2ecf20Sopenharmony_ci{
3618c2ecf20Sopenharmony_ci	struct orangefs_write_range *wr;
3628c2ecf20Sopenharmony_ci	struct page *page;
3638c2ecf20Sopenharmony_ci	pgoff_t index;
3648c2ecf20Sopenharmony_ci	int ret;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	index = pos >> PAGE_SHIFT;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	page = grab_cache_page_write_begin(mapping, index, flags);
3698c2ecf20Sopenharmony_ci	if (!page)
3708c2ecf20Sopenharmony_ci		return -ENOMEM;
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	*pagep = page;
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (PageDirty(page) && !PagePrivate(page)) {
3758c2ecf20Sopenharmony_ci		/*
3768c2ecf20Sopenharmony_ci		 * Should be impossible.  If it happens, launder the page
3778c2ecf20Sopenharmony_ci		 * since we don't know what's dirty.  This will WARN in
3788c2ecf20Sopenharmony_ci		 * orangefs_writepage_locked.
3798c2ecf20Sopenharmony_ci		 */
3808c2ecf20Sopenharmony_ci		ret = orangefs_launder_page(page);
3818c2ecf20Sopenharmony_ci		if (ret)
3828c2ecf20Sopenharmony_ci			return ret;
3838c2ecf20Sopenharmony_ci	}
3848c2ecf20Sopenharmony_ci	if (PagePrivate(page)) {
3858c2ecf20Sopenharmony_ci		struct orangefs_write_range *wr;
3868c2ecf20Sopenharmony_ci		wr = (struct orangefs_write_range *)page_private(page);
3878c2ecf20Sopenharmony_ci		if (wr->pos + wr->len == pos &&
3888c2ecf20Sopenharmony_ci		    uid_eq(wr->uid, current_fsuid()) &&
3898c2ecf20Sopenharmony_ci		    gid_eq(wr->gid, current_fsgid())) {
3908c2ecf20Sopenharmony_ci			wr->len += len;
3918c2ecf20Sopenharmony_ci			goto okay;
3928c2ecf20Sopenharmony_ci		} else {
3938c2ecf20Sopenharmony_ci			ret = orangefs_launder_page(page);
3948c2ecf20Sopenharmony_ci			if (ret)
3958c2ecf20Sopenharmony_ci				return ret;
3968c2ecf20Sopenharmony_ci		}
3978c2ecf20Sopenharmony_ci	}
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	wr = kmalloc(sizeof *wr, GFP_KERNEL);
4008c2ecf20Sopenharmony_ci	if (!wr)
4018c2ecf20Sopenharmony_ci		return -ENOMEM;
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	wr->pos = pos;
4048c2ecf20Sopenharmony_ci	wr->len = len;
4058c2ecf20Sopenharmony_ci	wr->uid = current_fsuid();
4068c2ecf20Sopenharmony_ci	wr->gid = current_fsgid();
4078c2ecf20Sopenharmony_ci	attach_page_private(page, wr);
4088c2ecf20Sopenharmony_ciokay:
4098c2ecf20Sopenharmony_ci	return 0;
4108c2ecf20Sopenharmony_ci}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_cistatic int orangefs_write_end(struct file *file, struct address_space *mapping,
4138c2ecf20Sopenharmony_ci    loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata)
4148c2ecf20Sopenharmony_ci{
4158c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
4168c2ecf20Sopenharmony_ci	loff_t last_pos = pos + copied;
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci	/*
4198c2ecf20Sopenharmony_ci	 * No need to use i_size_read() here, the i_size
4208c2ecf20Sopenharmony_ci	 * cannot change under us because we hold the i_mutex.
4218c2ecf20Sopenharmony_ci	 */
4228c2ecf20Sopenharmony_ci	if (last_pos > inode->i_size)
4238c2ecf20Sopenharmony_ci		i_size_write(inode, last_pos);
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	/* zero the stale part of the page if we did a short copy */
4268c2ecf20Sopenharmony_ci	if (!PageUptodate(page)) {
4278c2ecf20Sopenharmony_ci		unsigned from = pos & (PAGE_SIZE - 1);
4288c2ecf20Sopenharmony_ci		if (copied < len) {
4298c2ecf20Sopenharmony_ci			zero_user(page, from + copied, len - copied);
4308c2ecf20Sopenharmony_ci		}
4318c2ecf20Sopenharmony_ci		/* Set fully written pages uptodate. */
4328c2ecf20Sopenharmony_ci		if (pos == page_offset(page) &&
4338c2ecf20Sopenharmony_ci		    (len == PAGE_SIZE || pos + len == inode->i_size)) {
4348c2ecf20Sopenharmony_ci			zero_user_segment(page, from + copied, PAGE_SIZE);
4358c2ecf20Sopenharmony_ci			SetPageUptodate(page);
4368c2ecf20Sopenharmony_ci		}
4378c2ecf20Sopenharmony_ci	}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	set_page_dirty(page);
4408c2ecf20Sopenharmony_ci	unlock_page(page);
4418c2ecf20Sopenharmony_ci	put_page(page);
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	mark_inode_dirty_sync(file_inode(file));
4448c2ecf20Sopenharmony_ci	return copied;
4458c2ecf20Sopenharmony_ci}
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_cistatic void orangefs_invalidatepage(struct page *page,
4488c2ecf20Sopenharmony_ci				 unsigned int offset,
4498c2ecf20Sopenharmony_ci				 unsigned int length)
4508c2ecf20Sopenharmony_ci{
4518c2ecf20Sopenharmony_ci	struct orangefs_write_range *wr;
4528c2ecf20Sopenharmony_ci	wr = (struct orangefs_write_range *)page_private(page);
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	if (offset == 0 && length == PAGE_SIZE) {
4558c2ecf20Sopenharmony_ci		kfree(detach_page_private(page));
4568c2ecf20Sopenharmony_ci		return;
4578c2ecf20Sopenharmony_ci	/* write range entirely within invalidate range (or equal) */
4588c2ecf20Sopenharmony_ci	} else if (page_offset(page) + offset <= wr->pos &&
4598c2ecf20Sopenharmony_ci	    wr->pos + wr->len <= page_offset(page) + offset + length) {
4608c2ecf20Sopenharmony_ci		kfree(detach_page_private(page));
4618c2ecf20Sopenharmony_ci		/* XXX is this right? only caller in fs */
4628c2ecf20Sopenharmony_ci		cancel_dirty_page(page);
4638c2ecf20Sopenharmony_ci		return;
4648c2ecf20Sopenharmony_ci	/* invalidate range chops off end of write range */
4658c2ecf20Sopenharmony_ci	} else if (wr->pos < page_offset(page) + offset &&
4668c2ecf20Sopenharmony_ci	    wr->pos + wr->len <= page_offset(page) + offset + length &&
4678c2ecf20Sopenharmony_ci	     page_offset(page) + offset < wr->pos + wr->len) {
4688c2ecf20Sopenharmony_ci		size_t x;
4698c2ecf20Sopenharmony_ci		x = wr->pos + wr->len - (page_offset(page) + offset);
4708c2ecf20Sopenharmony_ci		WARN_ON(x > wr->len);
4718c2ecf20Sopenharmony_ci		wr->len -= x;
4728c2ecf20Sopenharmony_ci		wr->uid = current_fsuid();
4738c2ecf20Sopenharmony_ci		wr->gid = current_fsgid();
4748c2ecf20Sopenharmony_ci	/* invalidate range chops off beginning of write range */
4758c2ecf20Sopenharmony_ci	} else if (page_offset(page) + offset <= wr->pos &&
4768c2ecf20Sopenharmony_ci	    page_offset(page) + offset + length < wr->pos + wr->len &&
4778c2ecf20Sopenharmony_ci	    wr->pos < page_offset(page) + offset + length) {
4788c2ecf20Sopenharmony_ci		size_t x;
4798c2ecf20Sopenharmony_ci		x = page_offset(page) + offset + length - wr->pos;
4808c2ecf20Sopenharmony_ci		WARN_ON(x > wr->len);
4818c2ecf20Sopenharmony_ci		wr->pos += x;
4828c2ecf20Sopenharmony_ci		wr->len -= x;
4838c2ecf20Sopenharmony_ci		wr->uid = current_fsuid();
4848c2ecf20Sopenharmony_ci		wr->gid = current_fsgid();
4858c2ecf20Sopenharmony_ci	/* invalidate range entirely within write range (punch hole) */
4868c2ecf20Sopenharmony_ci	} else if (wr->pos < page_offset(page) + offset &&
4878c2ecf20Sopenharmony_ci	    page_offset(page) + offset + length < wr->pos + wr->len) {
4888c2ecf20Sopenharmony_ci		/* XXX what do we do here... should not WARN_ON */
4898c2ecf20Sopenharmony_ci		WARN_ON(1);
4908c2ecf20Sopenharmony_ci		/* punch hole */
4918c2ecf20Sopenharmony_ci		/*
4928c2ecf20Sopenharmony_ci		 * should we just ignore this and write it out anyway?
4938c2ecf20Sopenharmony_ci		 * it hardly makes sense
4948c2ecf20Sopenharmony_ci		 */
4958c2ecf20Sopenharmony_ci		return;
4968c2ecf20Sopenharmony_ci	/* non-overlapping ranges */
4978c2ecf20Sopenharmony_ci	} else {
4988c2ecf20Sopenharmony_ci		/* WARN if they do overlap */
4998c2ecf20Sopenharmony_ci		if (!((page_offset(page) + offset + length <= wr->pos) ^
5008c2ecf20Sopenharmony_ci		    (wr->pos + wr->len <= page_offset(page) + offset))) {
5018c2ecf20Sopenharmony_ci			WARN_ON(1);
5028c2ecf20Sopenharmony_ci			printk("invalidate range offset %llu length %u\n",
5038c2ecf20Sopenharmony_ci			    page_offset(page) + offset, length);
5048c2ecf20Sopenharmony_ci			printk("write range offset %llu length %zu\n",
5058c2ecf20Sopenharmony_ci			    wr->pos, wr->len);
5068c2ecf20Sopenharmony_ci		}
5078c2ecf20Sopenharmony_ci		return;
5088c2ecf20Sopenharmony_ci	}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	/*
5118c2ecf20Sopenharmony_ci	 * Above there are returns where wr is freed or where we WARN.
5128c2ecf20Sopenharmony_ci	 * Thus the following runs if wr was modified above.
5138c2ecf20Sopenharmony_ci	 */
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	orangefs_launder_page(page);
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_cistatic int orangefs_releasepage(struct page *page, gfp_t foo)
5198c2ecf20Sopenharmony_ci{
5208c2ecf20Sopenharmony_ci	return !PagePrivate(page);
5218c2ecf20Sopenharmony_ci}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_cistatic void orangefs_freepage(struct page *page)
5248c2ecf20Sopenharmony_ci{
5258c2ecf20Sopenharmony_ci	kfree(detach_page_private(page));
5268c2ecf20Sopenharmony_ci}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_cistatic int orangefs_launder_page(struct page *page)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	int r = 0;
5318c2ecf20Sopenharmony_ci	struct writeback_control wbc = {
5328c2ecf20Sopenharmony_ci		.sync_mode = WB_SYNC_ALL,
5338c2ecf20Sopenharmony_ci		.nr_to_write = 0,
5348c2ecf20Sopenharmony_ci	};
5358c2ecf20Sopenharmony_ci	wait_on_page_writeback(page);
5368c2ecf20Sopenharmony_ci	if (clear_page_dirty_for_io(page)) {
5378c2ecf20Sopenharmony_ci		r = orangefs_writepage_locked(page, &wbc);
5388c2ecf20Sopenharmony_ci		end_page_writeback(page);
5398c2ecf20Sopenharmony_ci	}
5408c2ecf20Sopenharmony_ci	return r;
5418c2ecf20Sopenharmony_ci}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_cistatic ssize_t orangefs_direct_IO(struct kiocb *iocb,
5448c2ecf20Sopenharmony_ci				  struct iov_iter *iter)
5458c2ecf20Sopenharmony_ci{
5468c2ecf20Sopenharmony_ci	/*
5478c2ecf20Sopenharmony_ci	 * Comment from original do_readv_writev:
5488c2ecf20Sopenharmony_ci	 * Common entry point for read/write/readv/writev
5498c2ecf20Sopenharmony_ci	 * This function will dispatch it to either the direct I/O
5508c2ecf20Sopenharmony_ci	 * or buffered I/O path depending on the mount options and/or
5518c2ecf20Sopenharmony_ci	 * augmented/extended metadata attached to the file.
5528c2ecf20Sopenharmony_ci	 * Note: File extended attributes override any mount options.
5538c2ecf20Sopenharmony_ci	 */
5548c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
5558c2ecf20Sopenharmony_ci	loff_t pos = iocb->ki_pos;
5568c2ecf20Sopenharmony_ci	enum ORANGEFS_io_type type = iov_iter_rw(iter) == WRITE ?
5578c2ecf20Sopenharmony_ci            ORANGEFS_IO_WRITE : ORANGEFS_IO_READ;
5588c2ecf20Sopenharmony_ci	loff_t *offset = &pos;
5598c2ecf20Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
5608c2ecf20Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
5618c2ecf20Sopenharmony_ci	struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
5628c2ecf20Sopenharmony_ci	size_t count = iov_iter_count(iter);
5638c2ecf20Sopenharmony_ci	ssize_t total_count = 0;
5648c2ecf20Sopenharmony_ci	ssize_t ret = -EINVAL;
5658c2ecf20Sopenharmony_ci	int i = 0;
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
5688c2ecf20Sopenharmony_ci		"%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
5698c2ecf20Sopenharmony_ci		__func__,
5708c2ecf20Sopenharmony_ci		handle,
5718c2ecf20Sopenharmony_ci		(int)count);
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	if (type == ORANGEFS_IO_WRITE) {
5748c2ecf20Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
5758c2ecf20Sopenharmony_ci			     "%s(%pU): proceeding with offset : %llu, "
5768c2ecf20Sopenharmony_ci			     "size %d\n",
5778c2ecf20Sopenharmony_ci			     __func__,
5788c2ecf20Sopenharmony_ci			     handle,
5798c2ecf20Sopenharmony_ci			     llu(*offset),
5808c2ecf20Sopenharmony_ci			     (int)count);
5818c2ecf20Sopenharmony_ci	}
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	if (count == 0) {
5848c2ecf20Sopenharmony_ci		ret = 0;
5858c2ecf20Sopenharmony_ci		goto out;
5868c2ecf20Sopenharmony_ci	}
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci	while (iov_iter_count(iter)) {
5898c2ecf20Sopenharmony_ci		size_t each_count = iov_iter_count(iter);
5908c2ecf20Sopenharmony_ci		size_t amt_complete;
5918c2ecf20Sopenharmony_ci		i++;
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci		/* how much to transfer in this loop iteration */
5948c2ecf20Sopenharmony_ci		if (each_count > orangefs_bufmap_size_query())
5958c2ecf20Sopenharmony_ci			each_count = orangefs_bufmap_size_query();
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
5988c2ecf20Sopenharmony_ci			     "%s(%pU): size of each_count(%d)\n",
5998c2ecf20Sopenharmony_ci			     __func__,
6008c2ecf20Sopenharmony_ci			     handle,
6018c2ecf20Sopenharmony_ci			     (int)each_count);
6028c2ecf20Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
6038c2ecf20Sopenharmony_ci			     "%s(%pU): BEFORE wait_for_io: offset is %d\n",
6048c2ecf20Sopenharmony_ci			     __func__,
6058c2ecf20Sopenharmony_ci			     handle,
6068c2ecf20Sopenharmony_ci			     (int)*offset);
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci		ret = wait_for_direct_io(type, inode, offset, iter,
6098c2ecf20Sopenharmony_ci				each_count, 0, NULL, NULL, file);
6108c2ecf20Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
6118c2ecf20Sopenharmony_ci			     "%s(%pU): return from wait_for_io:%d\n",
6128c2ecf20Sopenharmony_ci			     __func__,
6138c2ecf20Sopenharmony_ci			     handle,
6148c2ecf20Sopenharmony_ci			     (int)ret);
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci		if (ret < 0)
6178c2ecf20Sopenharmony_ci			goto out;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci		*offset += ret;
6208c2ecf20Sopenharmony_ci		total_count += ret;
6218c2ecf20Sopenharmony_ci		amt_complete = ret;
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
6248c2ecf20Sopenharmony_ci			     "%s(%pU): AFTER wait_for_io: offset is %d\n",
6258c2ecf20Sopenharmony_ci			     __func__,
6268c2ecf20Sopenharmony_ci			     handle,
6278c2ecf20Sopenharmony_ci			     (int)*offset);
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci		/*
6308c2ecf20Sopenharmony_ci		 * if we got a short I/O operations,
6318c2ecf20Sopenharmony_ci		 * fall out and return what we got so far
6328c2ecf20Sopenharmony_ci		 */
6338c2ecf20Sopenharmony_ci		if (amt_complete < each_count)
6348c2ecf20Sopenharmony_ci			break;
6358c2ecf20Sopenharmony_ci	} /*end while */
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_ciout:
6388c2ecf20Sopenharmony_ci	if (total_count > 0)
6398c2ecf20Sopenharmony_ci		ret = total_count;
6408c2ecf20Sopenharmony_ci	if (ret > 0) {
6418c2ecf20Sopenharmony_ci		if (type == ORANGEFS_IO_READ) {
6428c2ecf20Sopenharmony_ci			file_accessed(file);
6438c2ecf20Sopenharmony_ci		} else {
6448c2ecf20Sopenharmony_ci			file_update_time(file);
6458c2ecf20Sopenharmony_ci			if (*offset > i_size_read(inode))
6468c2ecf20Sopenharmony_ci				i_size_write(inode, *offset);
6478c2ecf20Sopenharmony_ci		}
6488c2ecf20Sopenharmony_ci	}
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
6518c2ecf20Sopenharmony_ci		     "%s(%pU): Value(%d) returned.\n",
6528c2ecf20Sopenharmony_ci		     __func__,
6538c2ecf20Sopenharmony_ci		     handle,
6548c2ecf20Sopenharmony_ci		     (int)ret);
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci	return ret;
6578c2ecf20Sopenharmony_ci}
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci/** ORANGEFS2 implementation of address space operations */
6608c2ecf20Sopenharmony_cistatic const struct address_space_operations orangefs_address_operations = {
6618c2ecf20Sopenharmony_ci	.writepage = orangefs_writepage,
6628c2ecf20Sopenharmony_ci	.readpage = orangefs_readpage,
6638c2ecf20Sopenharmony_ci	.writepages = orangefs_writepages,
6648c2ecf20Sopenharmony_ci	.set_page_dirty = __set_page_dirty_nobuffers,
6658c2ecf20Sopenharmony_ci	.write_begin = orangefs_write_begin,
6668c2ecf20Sopenharmony_ci	.write_end = orangefs_write_end,
6678c2ecf20Sopenharmony_ci	.invalidatepage = orangefs_invalidatepage,
6688c2ecf20Sopenharmony_ci	.releasepage = orangefs_releasepage,
6698c2ecf20Sopenharmony_ci	.freepage = orangefs_freepage,
6708c2ecf20Sopenharmony_ci	.launder_page = orangefs_launder_page,
6718c2ecf20Sopenharmony_ci	.direct_IO = orangefs_direct_IO,
6728c2ecf20Sopenharmony_ci};
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_civm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf)
6758c2ecf20Sopenharmony_ci{
6768c2ecf20Sopenharmony_ci	struct page *page = vmf->page;
6778c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(vmf->vma->vm_file);
6788c2ecf20Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
6798c2ecf20Sopenharmony_ci	unsigned long *bitlock = &orangefs_inode->bitlock;
6808c2ecf20Sopenharmony_ci	vm_fault_t ret;
6818c2ecf20Sopenharmony_ci	struct orangefs_write_range *wr;
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci	sb_start_pagefault(inode->i_sb);
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci	if (wait_on_bit(bitlock, 1, TASK_KILLABLE)) {
6868c2ecf20Sopenharmony_ci		ret = VM_FAULT_RETRY;
6878c2ecf20Sopenharmony_ci		goto out;
6888c2ecf20Sopenharmony_ci	}
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci	lock_page(page);
6918c2ecf20Sopenharmony_ci	if (PageDirty(page) && !PagePrivate(page)) {
6928c2ecf20Sopenharmony_ci		/*
6938c2ecf20Sopenharmony_ci		 * Should be impossible.  If it happens, launder the page
6948c2ecf20Sopenharmony_ci		 * since we don't know what's dirty.  This will WARN in
6958c2ecf20Sopenharmony_ci		 * orangefs_writepage_locked.
6968c2ecf20Sopenharmony_ci		 */
6978c2ecf20Sopenharmony_ci		if (orangefs_launder_page(page)) {
6988c2ecf20Sopenharmony_ci			ret = VM_FAULT_LOCKED|VM_FAULT_RETRY;
6998c2ecf20Sopenharmony_ci			goto out;
7008c2ecf20Sopenharmony_ci		}
7018c2ecf20Sopenharmony_ci	}
7028c2ecf20Sopenharmony_ci	if (PagePrivate(page)) {
7038c2ecf20Sopenharmony_ci		wr = (struct orangefs_write_range *)page_private(page);
7048c2ecf20Sopenharmony_ci		if (uid_eq(wr->uid, current_fsuid()) &&
7058c2ecf20Sopenharmony_ci		    gid_eq(wr->gid, current_fsgid())) {
7068c2ecf20Sopenharmony_ci			wr->pos = page_offset(page);
7078c2ecf20Sopenharmony_ci			wr->len = PAGE_SIZE;
7088c2ecf20Sopenharmony_ci			goto okay;
7098c2ecf20Sopenharmony_ci		} else {
7108c2ecf20Sopenharmony_ci			if (orangefs_launder_page(page)) {
7118c2ecf20Sopenharmony_ci				ret = VM_FAULT_LOCKED|VM_FAULT_RETRY;
7128c2ecf20Sopenharmony_ci				goto out;
7138c2ecf20Sopenharmony_ci			}
7148c2ecf20Sopenharmony_ci		}
7158c2ecf20Sopenharmony_ci	}
7168c2ecf20Sopenharmony_ci	wr = kmalloc(sizeof *wr, GFP_KERNEL);
7178c2ecf20Sopenharmony_ci	if (!wr) {
7188c2ecf20Sopenharmony_ci		ret = VM_FAULT_LOCKED|VM_FAULT_RETRY;
7198c2ecf20Sopenharmony_ci		goto out;
7208c2ecf20Sopenharmony_ci	}
7218c2ecf20Sopenharmony_ci	wr->pos = page_offset(page);
7228c2ecf20Sopenharmony_ci	wr->len = PAGE_SIZE;
7238c2ecf20Sopenharmony_ci	wr->uid = current_fsuid();
7248c2ecf20Sopenharmony_ci	wr->gid = current_fsgid();
7258c2ecf20Sopenharmony_ci	attach_page_private(page, wr);
7268c2ecf20Sopenharmony_ciokay:
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci	file_update_time(vmf->vma->vm_file);
7298c2ecf20Sopenharmony_ci	if (page->mapping != inode->i_mapping) {
7308c2ecf20Sopenharmony_ci		unlock_page(page);
7318c2ecf20Sopenharmony_ci		ret = VM_FAULT_LOCKED|VM_FAULT_NOPAGE;
7328c2ecf20Sopenharmony_ci		goto out;
7338c2ecf20Sopenharmony_ci	}
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	/*
7368c2ecf20Sopenharmony_ci	 * We mark the page dirty already here so that when freeze is in
7378c2ecf20Sopenharmony_ci	 * progress, we are guaranteed that writeback during freezing will
7388c2ecf20Sopenharmony_ci	 * see the dirty page and writeprotect it again.
7398c2ecf20Sopenharmony_ci	 */
7408c2ecf20Sopenharmony_ci	set_page_dirty(page);
7418c2ecf20Sopenharmony_ci	wait_for_stable_page(page);
7428c2ecf20Sopenharmony_ci	ret = VM_FAULT_LOCKED;
7438c2ecf20Sopenharmony_ciout:
7448c2ecf20Sopenharmony_ci	sb_end_pagefault(inode->i_sb);
7458c2ecf20Sopenharmony_ci	return ret;
7468c2ecf20Sopenharmony_ci}
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_cistatic int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
7498c2ecf20Sopenharmony_ci{
7508c2ecf20Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
7518c2ecf20Sopenharmony_ci	struct orangefs_kernel_op_s *new_op;
7528c2ecf20Sopenharmony_ci	loff_t orig_size;
7538c2ecf20Sopenharmony_ci	int ret = -EINVAL;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG,
7568c2ecf20Sopenharmony_ci		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
7578c2ecf20Sopenharmony_ci		     __func__,
7588c2ecf20Sopenharmony_ci		     get_khandle_from_ino(inode),
7598c2ecf20Sopenharmony_ci		     &orangefs_inode->refn.khandle,
7608c2ecf20Sopenharmony_ci		     orangefs_inode->refn.fs_id,
7618c2ecf20Sopenharmony_ci		     iattr->ia_size);
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	/* Ensure that we have a up to date size, so we know if it changed. */
7648c2ecf20Sopenharmony_ci	ret = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_SIZE);
7658c2ecf20Sopenharmony_ci	if (ret == -ESTALE)
7668c2ecf20Sopenharmony_ci		ret = -EIO;
7678c2ecf20Sopenharmony_ci	if (ret) {
7688c2ecf20Sopenharmony_ci		gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
7698c2ecf20Sopenharmony_ci		    __func__, ret);
7708c2ecf20Sopenharmony_ci		return ret;
7718c2ecf20Sopenharmony_ci	}
7728c2ecf20Sopenharmony_ci	orig_size = i_size_read(inode);
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_ci	/* This is truncate_setsize in a different order. */
7758c2ecf20Sopenharmony_ci	truncate_pagecache(inode, iattr->ia_size);
7768c2ecf20Sopenharmony_ci	i_size_write(inode, iattr->ia_size);
7778c2ecf20Sopenharmony_ci	if (iattr->ia_size > orig_size)
7788c2ecf20Sopenharmony_ci		pagecache_isize_extended(inode, orig_size, iattr->ia_size);
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE);
7818c2ecf20Sopenharmony_ci	if (!new_op)
7828c2ecf20Sopenharmony_ci		return -ENOMEM;
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	new_op->upcall.req.truncate.refn = orangefs_inode->refn;
7858c2ecf20Sopenharmony_ci	new_op->upcall.req.truncate.size = (__s64) iattr->ia_size;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	ret = service_operation(new_op,
7888c2ecf20Sopenharmony_ci		__func__,
7898c2ecf20Sopenharmony_ci		get_interruptible_flag(inode));
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	/*
7928c2ecf20Sopenharmony_ci	 * the truncate has no downcall members to retrieve, but
7938c2ecf20Sopenharmony_ci	 * the status value tells us if it went through ok or not
7948c2ecf20Sopenharmony_ci	 */
7958c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG, "%s: ret:%d:\n", __func__, ret);
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	op_release(new_op);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci	if (ret != 0)
8008c2ecf20Sopenharmony_ci		return ret;
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	if (orig_size != i_size_read(inode))
8038c2ecf20Sopenharmony_ci		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	return ret;
8068c2ecf20Sopenharmony_ci}
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ciint __orangefs_setattr(struct inode *inode, struct iattr *iattr)
8098c2ecf20Sopenharmony_ci{
8108c2ecf20Sopenharmony_ci	int ret;
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	if (iattr->ia_valid & ATTR_MODE) {
8138c2ecf20Sopenharmony_ci		if (iattr->ia_mode & (S_ISVTX)) {
8148c2ecf20Sopenharmony_ci			if (is_root_handle(inode)) {
8158c2ecf20Sopenharmony_ci				/*
8168c2ecf20Sopenharmony_ci				 * allow sticky bit to be set on root (since
8178c2ecf20Sopenharmony_ci				 * it shows up that way by default anyhow),
8188c2ecf20Sopenharmony_ci				 * but don't show it to the server
8198c2ecf20Sopenharmony_ci				 */
8208c2ecf20Sopenharmony_ci				iattr->ia_mode -= S_ISVTX;
8218c2ecf20Sopenharmony_ci			} else {
8228c2ecf20Sopenharmony_ci				gossip_debug(GOSSIP_UTILS_DEBUG,
8238c2ecf20Sopenharmony_ci					     "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
8248c2ecf20Sopenharmony_ci				ret = -EINVAL;
8258c2ecf20Sopenharmony_ci				goto out;
8268c2ecf20Sopenharmony_ci			}
8278c2ecf20Sopenharmony_ci		}
8288c2ecf20Sopenharmony_ci		if (iattr->ia_mode & (S_ISUID)) {
8298c2ecf20Sopenharmony_ci			gossip_debug(GOSSIP_UTILS_DEBUG,
8308c2ecf20Sopenharmony_ci				     "Attempting to set setuid bit (not supported); returning EINVAL.\n");
8318c2ecf20Sopenharmony_ci			ret = -EINVAL;
8328c2ecf20Sopenharmony_ci			goto out;
8338c2ecf20Sopenharmony_ci		}
8348c2ecf20Sopenharmony_ci	}
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	if (iattr->ia_valid & ATTR_SIZE) {
8378c2ecf20Sopenharmony_ci		ret = orangefs_setattr_size(inode, iattr);
8388c2ecf20Sopenharmony_ci		if (ret)
8398c2ecf20Sopenharmony_ci			goto out;
8408c2ecf20Sopenharmony_ci	}
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ciagain:
8438c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
8448c2ecf20Sopenharmony_ci	if (ORANGEFS_I(inode)->attr_valid) {
8458c2ecf20Sopenharmony_ci		if (uid_eq(ORANGEFS_I(inode)->attr_uid, current_fsuid()) &&
8468c2ecf20Sopenharmony_ci		    gid_eq(ORANGEFS_I(inode)->attr_gid, current_fsgid())) {
8478c2ecf20Sopenharmony_ci			ORANGEFS_I(inode)->attr_valid = iattr->ia_valid;
8488c2ecf20Sopenharmony_ci		} else {
8498c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
8508c2ecf20Sopenharmony_ci			write_inode_now(inode, 1);
8518c2ecf20Sopenharmony_ci			goto again;
8528c2ecf20Sopenharmony_ci		}
8538c2ecf20Sopenharmony_ci	} else {
8548c2ecf20Sopenharmony_ci		ORANGEFS_I(inode)->attr_valid = iattr->ia_valid;
8558c2ecf20Sopenharmony_ci		ORANGEFS_I(inode)->attr_uid = current_fsuid();
8568c2ecf20Sopenharmony_ci		ORANGEFS_I(inode)->attr_gid = current_fsgid();
8578c2ecf20Sopenharmony_ci	}
8588c2ecf20Sopenharmony_ci	setattr_copy(inode, iattr);
8598c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
8608c2ecf20Sopenharmony_ci	mark_inode_dirty(inode);
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci	if (iattr->ia_valid & ATTR_MODE)
8638c2ecf20Sopenharmony_ci		/* change mod on a file that has ACLs */
8648c2ecf20Sopenharmony_ci		ret = posix_acl_chmod(inode, inode->i_mode);
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci	ret = 0;
8678c2ecf20Sopenharmony_ciout:
8688c2ecf20Sopenharmony_ci	return ret;
8698c2ecf20Sopenharmony_ci}
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci/*
8728c2ecf20Sopenharmony_ci * Change attributes of an object referenced by dentry.
8738c2ecf20Sopenharmony_ci */
8748c2ecf20Sopenharmony_ciint orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
8758c2ecf20Sopenharmony_ci{
8768c2ecf20Sopenharmony_ci	int ret;
8778c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n",
8788c2ecf20Sopenharmony_ci	    dentry);
8798c2ecf20Sopenharmony_ci	ret = setattr_prepare(dentry, iattr);
8808c2ecf20Sopenharmony_ci	if (ret)
8818c2ecf20Sopenharmony_ci	        goto out;
8828c2ecf20Sopenharmony_ci	ret = __orangefs_setattr(d_inode(dentry), iattr);
8838c2ecf20Sopenharmony_ci	sync_inode_metadata(d_inode(dentry), 1);
8848c2ecf20Sopenharmony_ciout:
8858c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n",
8868c2ecf20Sopenharmony_ci	    ret);
8878c2ecf20Sopenharmony_ci	return ret;
8888c2ecf20Sopenharmony_ci}
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_ci/*
8918c2ecf20Sopenharmony_ci * Obtain attributes of an object given a dentry
8928c2ecf20Sopenharmony_ci */
8938c2ecf20Sopenharmony_ciint orangefs_getattr(const struct path *path, struct kstat *stat,
8948c2ecf20Sopenharmony_ci		     u32 request_mask, unsigned int flags)
8958c2ecf20Sopenharmony_ci{
8968c2ecf20Sopenharmony_ci	int ret;
8978c2ecf20Sopenharmony_ci	struct inode *inode = path->dentry->d_inode;
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG,
9008c2ecf20Sopenharmony_ci		     "orangefs_getattr: called on %pd mask %u\n",
9018c2ecf20Sopenharmony_ci		     path->dentry, request_mask);
9028c2ecf20Sopenharmony_ci
9038c2ecf20Sopenharmony_ci	ret = orangefs_inode_getattr(inode,
9048c2ecf20Sopenharmony_ci	    request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
9058c2ecf20Sopenharmony_ci	if (ret == 0) {
9068c2ecf20Sopenharmony_ci		generic_fillattr(inode, stat);
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci		/* override block size reported to stat */
9098c2ecf20Sopenharmony_ci		if (!(request_mask & STATX_SIZE))
9108c2ecf20Sopenharmony_ci			stat->result_mask &= ~STATX_SIZE;
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci		stat->attributes_mask = STATX_ATTR_IMMUTABLE |
9138c2ecf20Sopenharmony_ci		    STATX_ATTR_APPEND;
9148c2ecf20Sopenharmony_ci		if (inode->i_flags & S_IMMUTABLE)
9158c2ecf20Sopenharmony_ci			stat->attributes |= STATX_ATTR_IMMUTABLE;
9168c2ecf20Sopenharmony_ci		if (inode->i_flags & S_APPEND)
9178c2ecf20Sopenharmony_ci			stat->attributes |= STATX_ATTR_APPEND;
9188c2ecf20Sopenharmony_ci	}
9198c2ecf20Sopenharmony_ci	return ret;
9208c2ecf20Sopenharmony_ci}
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ciint orangefs_permission(struct inode *inode, int mask)
9238c2ecf20Sopenharmony_ci{
9248c2ecf20Sopenharmony_ci	int ret;
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	if (mask & MAY_NOT_BLOCK)
9278c2ecf20Sopenharmony_ci		return -ECHILD;
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	/* Make sure the permission (and other common attrs) are up to date. */
9328c2ecf20Sopenharmony_ci	ret = orangefs_inode_getattr(inode, 0);
9338c2ecf20Sopenharmony_ci	if (ret < 0)
9348c2ecf20Sopenharmony_ci		return ret;
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ci	return generic_permission(inode, mask);
9378c2ecf20Sopenharmony_ci}
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ciint orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
9408c2ecf20Sopenharmony_ci{
9418c2ecf20Sopenharmony_ci	struct iattr iattr;
9428c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n",
9438c2ecf20Sopenharmony_ci	    get_khandle_from_ino(inode));
9448c2ecf20Sopenharmony_ci	generic_update_time(inode, time, flags);
9458c2ecf20Sopenharmony_ci	memset(&iattr, 0, sizeof iattr);
9468c2ecf20Sopenharmony_ci        if (flags & S_ATIME)
9478c2ecf20Sopenharmony_ci		iattr.ia_valid |= ATTR_ATIME;
9488c2ecf20Sopenharmony_ci	if (flags & S_CTIME)
9498c2ecf20Sopenharmony_ci		iattr.ia_valid |= ATTR_CTIME;
9508c2ecf20Sopenharmony_ci	if (flags & S_MTIME)
9518c2ecf20Sopenharmony_ci		iattr.ia_valid |= ATTR_MTIME;
9528c2ecf20Sopenharmony_ci	return __orangefs_setattr(inode, &iattr);
9538c2ecf20Sopenharmony_ci}
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ci/* ORANGEFS2 implementation of VFS inode operations for files */
9568c2ecf20Sopenharmony_cistatic const struct inode_operations orangefs_file_inode_operations = {
9578c2ecf20Sopenharmony_ci	.get_acl = orangefs_get_acl,
9588c2ecf20Sopenharmony_ci	.set_acl = orangefs_set_acl,
9598c2ecf20Sopenharmony_ci	.setattr = orangefs_setattr,
9608c2ecf20Sopenharmony_ci	.getattr = orangefs_getattr,
9618c2ecf20Sopenharmony_ci	.listxattr = orangefs_listxattr,
9628c2ecf20Sopenharmony_ci	.permission = orangefs_permission,
9638c2ecf20Sopenharmony_ci	.update_time = orangefs_update_time,
9648c2ecf20Sopenharmony_ci};
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_cistatic int orangefs_init_iops(struct inode *inode)
9678c2ecf20Sopenharmony_ci{
9688c2ecf20Sopenharmony_ci	inode->i_mapping->a_ops = &orangefs_address_operations;
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci	switch (inode->i_mode & S_IFMT) {
9718c2ecf20Sopenharmony_ci	case S_IFREG:
9728c2ecf20Sopenharmony_ci		inode->i_op = &orangefs_file_inode_operations;
9738c2ecf20Sopenharmony_ci		inode->i_fop = &orangefs_file_operations;
9748c2ecf20Sopenharmony_ci		break;
9758c2ecf20Sopenharmony_ci	case S_IFLNK:
9768c2ecf20Sopenharmony_ci		inode->i_op = &orangefs_symlink_inode_operations;
9778c2ecf20Sopenharmony_ci		break;
9788c2ecf20Sopenharmony_ci	case S_IFDIR:
9798c2ecf20Sopenharmony_ci		inode->i_op = &orangefs_dir_inode_operations;
9808c2ecf20Sopenharmony_ci		inode->i_fop = &orangefs_dir_operations;
9818c2ecf20Sopenharmony_ci		break;
9828c2ecf20Sopenharmony_ci	default:
9838c2ecf20Sopenharmony_ci		gossip_debug(GOSSIP_INODE_DEBUG,
9848c2ecf20Sopenharmony_ci			     "%s: unsupported mode\n",
9858c2ecf20Sopenharmony_ci			     __func__);
9868c2ecf20Sopenharmony_ci		return -EINVAL;
9878c2ecf20Sopenharmony_ci	}
9888c2ecf20Sopenharmony_ci
9898c2ecf20Sopenharmony_ci	return 0;
9908c2ecf20Sopenharmony_ci}
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci/*
9938c2ecf20Sopenharmony_ci * Given an ORANGEFS object identifier (fsid, handle), convert it into
9948c2ecf20Sopenharmony_ci * a ino_t type that will be used as a hash-index from where the handle will
9958c2ecf20Sopenharmony_ci * be searched for in the VFS hash table of inodes.
9968c2ecf20Sopenharmony_ci */
9978c2ecf20Sopenharmony_cistatic inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref)
9988c2ecf20Sopenharmony_ci{
9998c2ecf20Sopenharmony_ci	if (!ref)
10008c2ecf20Sopenharmony_ci		return 0;
10018c2ecf20Sopenharmony_ci	return orangefs_khandle_to_ino(&(ref->khandle));
10028c2ecf20Sopenharmony_ci}
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_ci/*
10058c2ecf20Sopenharmony_ci * Called to set up an inode from iget5_locked.
10068c2ecf20Sopenharmony_ci */
10078c2ecf20Sopenharmony_cistatic int orangefs_set_inode(struct inode *inode, void *data)
10088c2ecf20Sopenharmony_ci{
10098c2ecf20Sopenharmony_ci	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
10108c2ecf20Sopenharmony_ci	ORANGEFS_I(inode)->refn.fs_id = ref->fs_id;
10118c2ecf20Sopenharmony_ci	ORANGEFS_I(inode)->refn.khandle = ref->khandle;
10128c2ecf20Sopenharmony_ci	ORANGEFS_I(inode)->attr_valid = 0;
10138c2ecf20Sopenharmony_ci	hash_init(ORANGEFS_I(inode)->xattr_cache);
10148c2ecf20Sopenharmony_ci	ORANGEFS_I(inode)->mapping_time = jiffies - 1;
10158c2ecf20Sopenharmony_ci	ORANGEFS_I(inode)->bitlock = 0;
10168c2ecf20Sopenharmony_ci	return 0;
10178c2ecf20Sopenharmony_ci}
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_ci/*
10208c2ecf20Sopenharmony_ci * Called to determine if handles match.
10218c2ecf20Sopenharmony_ci */
10228c2ecf20Sopenharmony_cistatic int orangefs_test_inode(struct inode *inode, void *data)
10238c2ecf20Sopenharmony_ci{
10248c2ecf20Sopenharmony_ci	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
10258c2ecf20Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = NULL;
10268c2ecf20Sopenharmony_ci
10278c2ecf20Sopenharmony_ci	orangefs_inode = ORANGEFS_I(inode);
10288c2ecf20Sopenharmony_ci	/* test handles and fs_ids... */
10298c2ecf20Sopenharmony_ci	return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle),
10308c2ecf20Sopenharmony_ci				&(ref->khandle)) &&
10318c2ecf20Sopenharmony_ci			orangefs_inode->refn.fs_id == ref->fs_id);
10328c2ecf20Sopenharmony_ci}
10338c2ecf20Sopenharmony_ci
10348c2ecf20Sopenharmony_ci/*
10358c2ecf20Sopenharmony_ci * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
10368c2ecf20Sopenharmony_ci * file handle.
10378c2ecf20Sopenharmony_ci *
10388c2ecf20Sopenharmony_ci * @sb: the file system super block instance.
10398c2ecf20Sopenharmony_ci * @ref: The ORANGEFS object for which we are trying to locate an inode.
10408c2ecf20Sopenharmony_ci */
10418c2ecf20Sopenharmony_cistruct inode *orangefs_iget(struct super_block *sb,
10428c2ecf20Sopenharmony_ci		struct orangefs_object_kref *ref)
10438c2ecf20Sopenharmony_ci{
10448c2ecf20Sopenharmony_ci	struct inode *inode = NULL;
10458c2ecf20Sopenharmony_ci	unsigned long hash;
10468c2ecf20Sopenharmony_ci	int error;
10478c2ecf20Sopenharmony_ci
10488c2ecf20Sopenharmony_ci	hash = orangefs_handle_hash(ref);
10498c2ecf20Sopenharmony_ci	inode = iget5_locked(sb,
10508c2ecf20Sopenharmony_ci			hash,
10518c2ecf20Sopenharmony_ci			orangefs_test_inode,
10528c2ecf20Sopenharmony_ci			orangefs_set_inode,
10538c2ecf20Sopenharmony_ci			ref);
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	if (!inode)
10568c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
10578c2ecf20Sopenharmony_ci
10588c2ecf20Sopenharmony_ci	if (!(inode->i_state & I_NEW))
10598c2ecf20Sopenharmony_ci		return inode;
10608c2ecf20Sopenharmony_ci
10618c2ecf20Sopenharmony_ci	error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW);
10628c2ecf20Sopenharmony_ci	if (error) {
10638c2ecf20Sopenharmony_ci		iget_failed(inode);
10648c2ecf20Sopenharmony_ci		return ERR_PTR(error);
10658c2ecf20Sopenharmony_ci	}
10668c2ecf20Sopenharmony_ci
10678c2ecf20Sopenharmony_ci	inode->i_ino = hash;	/* needed for stat etc */
10688c2ecf20Sopenharmony_ci	orangefs_init_iops(inode);
10698c2ecf20Sopenharmony_ci	unlock_new_inode(inode);
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG,
10728c2ecf20Sopenharmony_ci		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
10738c2ecf20Sopenharmony_ci		     &ref->khandle,
10748c2ecf20Sopenharmony_ci		     ref->fs_id,
10758c2ecf20Sopenharmony_ci		     hash,
10768c2ecf20Sopenharmony_ci		     inode->i_ino);
10778c2ecf20Sopenharmony_ci
10788c2ecf20Sopenharmony_ci	return inode;
10798c2ecf20Sopenharmony_ci}
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci/*
10828c2ecf20Sopenharmony_ci * Allocate an inode for a newly created file and insert it into the inode hash.
10838c2ecf20Sopenharmony_ci */
10848c2ecf20Sopenharmony_cistruct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
10858c2ecf20Sopenharmony_ci		int mode, dev_t dev, struct orangefs_object_kref *ref)
10868c2ecf20Sopenharmony_ci{
10878c2ecf20Sopenharmony_ci	unsigned long hash = orangefs_handle_hash(ref);
10888c2ecf20Sopenharmony_ci	struct inode *inode;
10898c2ecf20Sopenharmony_ci	int error;
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG,
10928c2ecf20Sopenharmony_ci		     "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
10938c2ecf20Sopenharmony_ci		     __func__,
10948c2ecf20Sopenharmony_ci		     sb,
10958c2ecf20Sopenharmony_ci		     MAJOR(dev),
10968c2ecf20Sopenharmony_ci		     MINOR(dev),
10978c2ecf20Sopenharmony_ci		     mode);
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_ci	inode = new_inode(sb);
11008c2ecf20Sopenharmony_ci	if (!inode)
11018c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	orangefs_set_inode(inode, ref);
11048c2ecf20Sopenharmony_ci	inode->i_ino = hash;	/* needed for stat etc */
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci	error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW);
11078c2ecf20Sopenharmony_ci	if (error)
11088c2ecf20Sopenharmony_ci		goto out_iput;
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_ci	orangefs_init_iops(inode);
11118c2ecf20Sopenharmony_ci	inode->i_rdev = dev;
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_ci	error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
11148c2ecf20Sopenharmony_ci	if (error < 0)
11158c2ecf20Sopenharmony_ci		goto out_iput;
11168c2ecf20Sopenharmony_ci
11178c2ecf20Sopenharmony_ci	gossip_debug(GOSSIP_INODE_DEBUG,
11188c2ecf20Sopenharmony_ci		     "Initializing ACL's for inode %pU\n",
11198c2ecf20Sopenharmony_ci		     get_khandle_from_ino(inode));
11208c2ecf20Sopenharmony_ci	orangefs_init_acl(inode, dir);
11218c2ecf20Sopenharmony_ci	return inode;
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ciout_iput:
11248c2ecf20Sopenharmony_ci	iput(inode);
11258c2ecf20Sopenharmony_ci	return ERR_PTR(error);
11268c2ecf20Sopenharmony_ci}
1127