xref: /kernel/linux/linux-6.6/fs/smb/client/file.c (revision 62306a36)
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 *
4 *   vfs operations that deal with files
5 *
6 *   Copyright (C) International Business Machines  Corp., 2002,2010
7 *   Author(s): Steve French (sfrench@us.ibm.com)
8 *              Jeremy Allison (jra@samba.org)
9 *
10 */
11#include <linux/fs.h>
12#include <linux/filelock.h>
13#include <linux/backing-dev.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/pagemap.h>
17#include <linux/pagevec.h>
18#include <linux/writeback.h>
19#include <linux/task_io_accounting_ops.h>
20#include <linux/delay.h>
21#include <linux/mount.h>
22#include <linux/slab.h>
23#include <linux/swap.h>
24#include <linux/mm.h>
25#include <asm/div64.h>
26#include "cifsfs.h"
27#include "cifspdu.h"
28#include "cifsglob.h"
29#include "cifsproto.h"
30#include "smb2proto.h"
31#include "cifs_unicode.h"
32#include "cifs_debug.h"
33#include "cifs_fs_sb.h"
34#include "fscache.h"
35#include "smbdirect.h"
36#include "fs_context.h"
37#include "cifs_ioctl.h"
38#include "cached_dir.h"
39
40/*
41 * Remove the dirty flags from a span of pages.
42 */
43static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44{
45	struct address_space *mapping = inode->i_mapping;
46	struct folio *folio;
47	pgoff_t end;
48
49	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51	rcu_read_lock();
52
53	end = (start + len - 1) / PAGE_SIZE;
54	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55		if (xas_retry(&xas, folio))
56			continue;
57		xas_pause(&xas);
58		rcu_read_unlock();
59		folio_lock(folio);
60		folio_clear_dirty_for_io(folio);
61		folio_unlock(folio);
62		rcu_read_lock();
63	}
64
65	rcu_read_unlock();
66}
67
68/*
69 * Completion of write to server.
70 */
71void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72{
73	struct address_space *mapping = inode->i_mapping;
74	struct folio *folio;
75	pgoff_t end;
76
77	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79	if (!len)
80		return;
81
82	rcu_read_lock();
83
84	end = (start + len - 1) / PAGE_SIZE;
85	xas_for_each(&xas, folio, end) {
86		if (xas_retry(&xas, folio))
87			continue;
88		if (!folio_test_writeback(folio)) {
89			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90				  len, start, folio->index, end);
91			continue;
92		}
93
94		folio_detach_private(folio);
95		folio_end_writeback(folio);
96	}
97
98	rcu_read_unlock();
99}
100
101/*
102 * Failure of write to server.
103 */
104void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105{
106	struct address_space *mapping = inode->i_mapping;
107	struct folio *folio;
108	pgoff_t end;
109
110	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112	if (!len)
113		return;
114
115	rcu_read_lock();
116
117	end = (start + len - 1) / PAGE_SIZE;
118	xas_for_each(&xas, folio, end) {
119		if (xas_retry(&xas, folio))
120			continue;
121		if (!folio_test_writeback(folio)) {
122			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123				  len, start, folio->index, end);
124			continue;
125		}
126
127		folio_set_error(folio);
128		folio_end_writeback(folio);
129	}
130
131	rcu_read_unlock();
132}
133
134/*
135 * Redirty pages after a temporary failure.
136 */
137void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138{
139	struct address_space *mapping = inode->i_mapping;
140	struct folio *folio;
141	pgoff_t end;
142
143	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145	if (!len)
146		return;
147
148	rcu_read_lock();
149
150	end = (start + len - 1) / PAGE_SIZE;
151	xas_for_each(&xas, folio, end) {
152		if (!folio_test_writeback(folio)) {
153			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154				  len, start, folio->index, end);
155			continue;
156		}
157
158		filemap_dirty_folio(folio->mapping, folio);
159		folio_end_writeback(folio);
160	}
161
162	rcu_read_unlock();
163}
164
165/*
166 * Mark as invalid, all open files on tree connections since they
167 * were closed when session to server was lost.
168 */
169void
170cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171{
172	struct cifsFileInfo *open_file = NULL;
173	struct list_head *tmp;
174	struct list_head *tmp1;
175
176	/* only send once per connect */
177	spin_lock(&tcon->tc_lock);
178	if (tcon->need_reconnect)
179		tcon->status = TID_NEED_RECON;
180
181	if (tcon->status != TID_NEED_RECON) {
182		spin_unlock(&tcon->tc_lock);
183		return;
184	}
185	tcon->status = TID_IN_FILES_INVALIDATE;
186	spin_unlock(&tcon->tc_lock);
187
188	/* list all files open on tree connection and mark them invalid */
189	spin_lock(&tcon->open_file_lock);
190	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192		open_file->invalidHandle = true;
193		open_file->oplock_break_cancelled = true;
194	}
195	spin_unlock(&tcon->open_file_lock);
196
197	invalidate_all_cached_dirs(tcon);
198	spin_lock(&tcon->tc_lock);
199	if (tcon->status == TID_IN_FILES_INVALIDATE)
200		tcon->status = TID_NEED_TCON;
201	spin_unlock(&tcon->tc_lock);
202
203	/*
204	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205	 * to this tcon.
206	 */
207}
208
209static inline int cifs_convert_flags(unsigned int flags)
210{
211	if ((flags & O_ACCMODE) == O_RDONLY)
212		return GENERIC_READ;
213	else if ((flags & O_ACCMODE) == O_WRONLY)
214		return GENERIC_WRITE;
215	else if ((flags & O_ACCMODE) == O_RDWR) {
216		/* GENERIC_ALL is too much permission to request
217		   can cause unnecessary access denied on create */
218		/* return GENERIC_ALL; */
219		return (GENERIC_READ | GENERIC_WRITE);
220	}
221
222	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224		FILE_READ_DATA);
225}
226
227#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228static u32 cifs_posix_convert_flags(unsigned int flags)
229{
230	u32 posix_flags = 0;
231
232	if ((flags & O_ACCMODE) == O_RDONLY)
233		posix_flags = SMB_O_RDONLY;
234	else if ((flags & O_ACCMODE) == O_WRONLY)
235		posix_flags = SMB_O_WRONLY;
236	else if ((flags & O_ACCMODE) == O_RDWR)
237		posix_flags = SMB_O_RDWR;
238
239	if (flags & O_CREAT) {
240		posix_flags |= SMB_O_CREAT;
241		if (flags & O_EXCL)
242			posix_flags |= SMB_O_EXCL;
243	} else if (flags & O_EXCL)
244		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245			 current->comm, current->tgid);
246
247	if (flags & O_TRUNC)
248		posix_flags |= SMB_O_TRUNC;
249	/* be safe and imply O_SYNC for O_DSYNC */
250	if (flags & O_DSYNC)
251		posix_flags |= SMB_O_SYNC;
252	if (flags & O_DIRECTORY)
253		posix_flags |= SMB_O_DIRECTORY;
254	if (flags & O_NOFOLLOW)
255		posix_flags |= SMB_O_NOFOLLOW;
256	if (flags & O_DIRECT)
257		posix_flags |= SMB_O_DIRECT;
258
259	return posix_flags;
260}
261#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262
263static inline int cifs_get_disposition(unsigned int flags)
264{
265	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266		return FILE_CREATE;
267	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268		return FILE_OVERWRITE_IF;
269	else if ((flags & O_CREAT) == O_CREAT)
270		return FILE_OPEN_IF;
271	else if ((flags & O_TRUNC) == O_TRUNC)
272		return FILE_OVERWRITE;
273	else
274		return FILE_OPEN;
275}
276
277#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278int cifs_posix_open(const char *full_path, struct inode **pinode,
279			struct super_block *sb, int mode, unsigned int f_flags,
280			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281{
282	int rc;
283	FILE_UNIX_BASIC_INFO *presp_data;
284	__u32 posix_flags = 0;
285	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286	struct cifs_fattr fattr;
287	struct tcon_link *tlink;
288	struct cifs_tcon *tcon;
289
290	cifs_dbg(FYI, "posix open %s\n", full_path);
291
292	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293	if (presp_data == NULL)
294		return -ENOMEM;
295
296	tlink = cifs_sb_tlink(cifs_sb);
297	if (IS_ERR(tlink)) {
298		rc = PTR_ERR(tlink);
299		goto posix_open_ret;
300	}
301
302	tcon = tlink_tcon(tlink);
303	mode &= ~current_umask();
304
305	posix_flags = cifs_posix_convert_flags(f_flags);
306	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307			     poplock, full_path, cifs_sb->local_nls,
308			     cifs_remap(cifs_sb));
309	cifs_put_tlink(tlink);
310
311	if (rc)
312		goto posix_open_ret;
313
314	if (presp_data->Type == cpu_to_le32(-1))
315		goto posix_open_ret; /* open ok, caller does qpathinfo */
316
317	if (!pinode)
318		goto posix_open_ret; /* caller does not need info */
319
320	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321
322	/* get new inode and set it up */
323	if (*pinode == NULL) {
324		cifs_fill_uniqueid(sb, &fattr);
325		*pinode = cifs_iget(sb, &fattr);
326		if (!*pinode) {
327			rc = -ENOMEM;
328			goto posix_open_ret;
329		}
330	} else {
331		cifs_revalidate_mapping(*pinode);
332		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333	}
334
335posix_open_ret:
336	kfree(presp_data);
337	return rc;
338}
339#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340
341static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344{
345	int rc;
346	int desired_access;
347	int disposition;
348	int create_options = CREATE_NOT_DIR;
349	struct TCP_Server_Info *server = tcon->ses->server;
350	struct cifs_open_parms oparms;
351
352	if (!server->ops->open)
353		return -ENOSYS;
354
355	desired_access = cifs_convert_flags(f_flags);
356
357/*********************************************************************
358 *  open flag mapping table:
359 *
360 *	POSIX Flag            CIFS Disposition
361 *	----------            ----------------
362 *	O_CREAT               FILE_OPEN_IF
363 *	O_CREAT | O_EXCL      FILE_CREATE
364 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
365 *	O_TRUNC               FILE_OVERWRITE
366 *	none of the above     FILE_OPEN
367 *
368 *	Note that there is not a direct match between disposition
369 *	FILE_SUPERSEDE (ie create whether or not file exists although
370 *	O_CREAT | O_TRUNC is similar but truncates the existing
371 *	file rather than creating a new file as FILE_SUPERSEDE does
372 *	(which uses the attributes / metadata passed in on open call)
373 *?
374 *?  O_SYNC is a reasonable match to CIFS writethrough flag
375 *?  and the read write flags match reasonably.  O_LARGEFILE
376 *?  is irrelevant because largefile support is always used
377 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
378 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
379 *********************************************************************/
380
381	disposition = cifs_get_disposition(f_flags);
382
383	/* BB pass O_SYNC flag through on file attributes .. BB */
384
385	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
386	if (f_flags & O_SYNC)
387		create_options |= CREATE_WRITE_THROUGH;
388
389	if (f_flags & O_DIRECT)
390		create_options |= CREATE_NO_BUFFER;
391
392	oparms = (struct cifs_open_parms) {
393		.tcon = tcon,
394		.cifs_sb = cifs_sb,
395		.desired_access = desired_access,
396		.create_options = cifs_create_options(cifs_sb, create_options),
397		.disposition = disposition,
398		.path = full_path,
399		.fid = fid,
400	};
401
402	rc = server->ops->open(xid, &oparms, oplock, buf);
403	if (rc)
404		return rc;
405
406	/* TODO: Add support for calling posix query info but with passing in fid */
407	if (tcon->unix_ext)
408		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
409					      xid);
410	else
411		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
412					 xid, fid);
413
414	if (rc) {
415		server->ops->close(xid, tcon, fid);
416		if (rc == -ESTALE)
417			rc = -EOPENSTALE;
418	}
419
420	return rc;
421}
422
423static bool
424cifs_has_mand_locks(struct cifsInodeInfo *cinode)
425{
426	struct cifs_fid_locks *cur;
427	bool has_locks = false;
428
429	down_read(&cinode->lock_sem);
430	list_for_each_entry(cur, &cinode->llist, llist) {
431		if (!list_empty(&cur->locks)) {
432			has_locks = true;
433			break;
434		}
435	}
436	up_read(&cinode->lock_sem);
437	return has_locks;
438}
439
440void
441cifs_down_write(struct rw_semaphore *sem)
442{
443	while (!down_write_trylock(sem))
444		msleep(10);
445}
446
447static void cifsFileInfo_put_work(struct work_struct *work);
448
449struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
450				       struct tcon_link *tlink, __u32 oplock,
451				       const char *symlink_target)
452{
453	struct dentry *dentry = file_dentry(file);
454	struct inode *inode = d_inode(dentry);
455	struct cifsInodeInfo *cinode = CIFS_I(inode);
456	struct cifsFileInfo *cfile;
457	struct cifs_fid_locks *fdlocks;
458	struct cifs_tcon *tcon = tlink_tcon(tlink);
459	struct TCP_Server_Info *server = tcon->ses->server;
460
461	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
462	if (cfile == NULL)
463		return cfile;
464
465	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
466	if (!fdlocks) {
467		kfree(cfile);
468		return NULL;
469	}
470
471	if (symlink_target) {
472		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
473		if (!cfile->symlink_target) {
474			kfree(fdlocks);
475			kfree(cfile);
476			return NULL;
477		}
478	}
479
480	INIT_LIST_HEAD(&fdlocks->locks);
481	fdlocks->cfile = cfile;
482	cfile->llist = fdlocks;
483
484	cfile->count = 1;
485	cfile->pid = current->tgid;
486	cfile->uid = current_fsuid();
487	cfile->dentry = dget(dentry);
488	cfile->f_flags = file->f_flags;
489	cfile->invalidHandle = false;
490	cfile->deferred_close_scheduled = false;
491	cfile->tlink = cifs_get_tlink(tlink);
492	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
493	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
494	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
495	mutex_init(&cfile->fh_mutex);
496	spin_lock_init(&cfile->file_info_lock);
497
498	cifs_sb_active(inode->i_sb);
499
500	/*
501	 * If the server returned a read oplock and we have mandatory brlocks,
502	 * set oplock level to None.
503	 */
504	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
505		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
506		oplock = 0;
507	}
508
509	cifs_down_write(&cinode->lock_sem);
510	list_add(&fdlocks->llist, &cinode->llist);
511	up_write(&cinode->lock_sem);
512
513	spin_lock(&tcon->open_file_lock);
514	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
515		oplock = fid->pending_open->oplock;
516	list_del(&fid->pending_open->olist);
517
518	fid->purge_cache = false;
519	server->ops->set_fid(cfile, fid, oplock);
520
521	list_add(&cfile->tlist, &tcon->openFileList);
522	atomic_inc(&tcon->num_local_opens);
523
524	/* if readable file instance put first in list*/
525	spin_lock(&cinode->open_file_lock);
526	if (file->f_mode & FMODE_READ)
527		list_add(&cfile->flist, &cinode->openFileList);
528	else
529		list_add_tail(&cfile->flist, &cinode->openFileList);
530	spin_unlock(&cinode->open_file_lock);
531	spin_unlock(&tcon->open_file_lock);
532
533	if (fid->purge_cache)
534		cifs_zap_mapping(inode);
535
536	file->private_data = cfile;
537	return cfile;
538}
539
540struct cifsFileInfo *
541cifsFileInfo_get(struct cifsFileInfo *cifs_file)
542{
543	spin_lock(&cifs_file->file_info_lock);
544	cifsFileInfo_get_locked(cifs_file);
545	spin_unlock(&cifs_file->file_info_lock);
546	return cifs_file;
547}
548
549static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
550{
551	struct inode *inode = d_inode(cifs_file->dentry);
552	struct cifsInodeInfo *cifsi = CIFS_I(inode);
553	struct cifsLockInfo *li, *tmp;
554	struct super_block *sb = inode->i_sb;
555
556	/*
557	 * Delete any outstanding lock records. We'll lose them when the file
558	 * is closed anyway.
559	 */
560	cifs_down_write(&cifsi->lock_sem);
561	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
562		list_del(&li->llist);
563		cifs_del_lock_waiters(li);
564		kfree(li);
565	}
566	list_del(&cifs_file->llist->llist);
567	kfree(cifs_file->llist);
568	up_write(&cifsi->lock_sem);
569
570	cifs_put_tlink(cifs_file->tlink);
571	dput(cifs_file->dentry);
572	cifs_sb_deactive(sb);
573	kfree(cifs_file->symlink_target);
574	kfree(cifs_file);
575}
576
577static void cifsFileInfo_put_work(struct work_struct *work)
578{
579	struct cifsFileInfo *cifs_file = container_of(work,
580			struct cifsFileInfo, put);
581
582	cifsFileInfo_put_final(cifs_file);
583}
584
585/**
586 * cifsFileInfo_put - release a reference of file priv data
587 *
588 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
589 *
590 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
591 */
592void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
593{
594	_cifsFileInfo_put(cifs_file, true, true);
595}
596
597/**
598 * _cifsFileInfo_put - release a reference of file priv data
599 *
600 * This may involve closing the filehandle @cifs_file out on the
601 * server. Must be called without holding tcon->open_file_lock,
602 * cinode->open_file_lock and cifs_file->file_info_lock.
603 *
604 * If @wait_for_oplock_handler is true and we are releasing the last
605 * reference, wait for any running oplock break handler of the file
606 * and cancel any pending one.
607 *
608 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
609 * @wait_oplock_handler: must be false if called from oplock_break_handler
610 * @offload:	not offloaded on close and oplock breaks
611 *
612 */
613void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
614		       bool wait_oplock_handler, bool offload)
615{
616	struct inode *inode = d_inode(cifs_file->dentry);
617	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
618	struct TCP_Server_Info *server = tcon->ses->server;
619	struct cifsInodeInfo *cifsi = CIFS_I(inode);
620	struct super_block *sb = inode->i_sb;
621	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
622	struct cifs_fid fid = {};
623	struct cifs_pending_open open;
624	bool oplock_break_cancelled;
625
626	spin_lock(&tcon->open_file_lock);
627	spin_lock(&cifsi->open_file_lock);
628	spin_lock(&cifs_file->file_info_lock);
629	if (--cifs_file->count > 0) {
630		spin_unlock(&cifs_file->file_info_lock);
631		spin_unlock(&cifsi->open_file_lock);
632		spin_unlock(&tcon->open_file_lock);
633		return;
634	}
635	spin_unlock(&cifs_file->file_info_lock);
636
637	if (server->ops->get_lease_key)
638		server->ops->get_lease_key(inode, &fid);
639
640	/* store open in pending opens to make sure we don't miss lease break */
641	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
642
643	/* remove it from the lists */
644	list_del(&cifs_file->flist);
645	list_del(&cifs_file->tlist);
646	atomic_dec(&tcon->num_local_opens);
647
648	if (list_empty(&cifsi->openFileList)) {
649		cifs_dbg(FYI, "closing last open instance for inode %p\n",
650			 d_inode(cifs_file->dentry));
651		/*
652		 * In strict cache mode we need invalidate mapping on the last
653		 * close  because it may cause a error when we open this file
654		 * again and get at least level II oplock.
655		 */
656		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
657			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
658		cifs_set_oplock_level(cifsi, 0);
659	}
660
661	spin_unlock(&cifsi->open_file_lock);
662	spin_unlock(&tcon->open_file_lock);
663
664	oplock_break_cancelled = wait_oplock_handler ?
665		cancel_work_sync(&cifs_file->oplock_break) : false;
666
667	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
668		struct TCP_Server_Info *server = tcon->ses->server;
669		unsigned int xid;
670
671		xid = get_xid();
672		if (server->ops->close_getattr)
673			server->ops->close_getattr(xid, tcon, cifs_file);
674		else if (server->ops->close)
675			server->ops->close(xid, tcon, &cifs_file->fid);
676		_free_xid(xid);
677	}
678
679	if (oplock_break_cancelled)
680		cifs_done_oplock_break(cifsi);
681
682	cifs_del_pending_open(&open);
683
684	if (offload)
685		queue_work(fileinfo_put_wq, &cifs_file->put);
686	else
687		cifsFileInfo_put_final(cifs_file);
688}
689
690int cifs_open(struct inode *inode, struct file *file)
691
692{
693	int rc = -EACCES;
694	unsigned int xid;
695	__u32 oplock;
696	struct cifs_sb_info *cifs_sb;
697	struct TCP_Server_Info *server;
698	struct cifs_tcon *tcon;
699	struct tcon_link *tlink;
700	struct cifsFileInfo *cfile = NULL;
701	void *page;
702	const char *full_path;
703	bool posix_open_ok = false;
704	struct cifs_fid fid = {};
705	struct cifs_pending_open open;
706	struct cifs_open_info_data data = {};
707
708	xid = get_xid();
709
710	cifs_sb = CIFS_SB(inode->i_sb);
711	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
712		free_xid(xid);
713		return -EIO;
714	}
715
716	tlink = cifs_sb_tlink(cifs_sb);
717	if (IS_ERR(tlink)) {
718		free_xid(xid);
719		return PTR_ERR(tlink);
720	}
721	tcon = tlink_tcon(tlink);
722	server = tcon->ses->server;
723
724	page = alloc_dentry_path();
725	full_path = build_path_from_dentry(file_dentry(file), page);
726	if (IS_ERR(full_path)) {
727		rc = PTR_ERR(full_path);
728		goto out;
729	}
730
731	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
732		 inode, file->f_flags, full_path);
733
734	if (file->f_flags & O_DIRECT &&
735	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
736		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
737			file->f_op = &cifs_file_direct_nobrl_ops;
738		else
739			file->f_op = &cifs_file_direct_ops;
740	}
741
742	/* Get the cached handle as SMB2 close is deferred */
743	rc = cifs_get_readable_path(tcon, full_path, &cfile);
744	if (rc == 0) {
745		if (file->f_flags == cfile->f_flags) {
746			file->private_data = cfile;
747			spin_lock(&CIFS_I(inode)->deferred_lock);
748			cifs_del_deferred_close(cfile);
749			spin_unlock(&CIFS_I(inode)->deferred_lock);
750			goto use_cache;
751		} else {
752			_cifsFileInfo_put(cfile, true, false);
753		}
754	}
755
756	if (server->oplocks)
757		oplock = REQ_OPLOCK;
758	else
759		oplock = 0;
760
761#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
762	if (!tcon->broken_posix_open && tcon->unix_ext &&
763	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
764				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
765		/* can not refresh inode info since size could be stale */
766		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
767				cifs_sb->ctx->file_mode /* ignored */,
768				file->f_flags, &oplock, &fid.netfid, xid);
769		if (rc == 0) {
770			cifs_dbg(FYI, "posix open succeeded\n");
771			posix_open_ok = true;
772		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
773			if (tcon->ses->serverNOS)
774				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
775					 tcon->ses->ip_addr,
776					 tcon->ses->serverNOS);
777			tcon->broken_posix_open = true;
778		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
779			 (rc != -EOPNOTSUPP)) /* path not found or net err */
780			goto out;
781		/*
782		 * Else fallthrough to retry open the old way on network i/o
783		 * or DFS errors.
784		 */
785	}
786#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
787
788	if (server->ops->get_lease_key)
789		server->ops->get_lease_key(inode, &fid);
790
791	cifs_add_pending_open(&fid, tlink, &open);
792
793	if (!posix_open_ok) {
794		if (server->ops->get_lease_key)
795			server->ops->get_lease_key(inode, &fid);
796
797		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
798				  xid, &data);
799		if (rc) {
800			cifs_del_pending_open(&open);
801			goto out;
802		}
803	}
804
805	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
806	if (cfile == NULL) {
807		if (server->ops->close)
808			server->ops->close(xid, tcon, &fid);
809		cifs_del_pending_open(&open);
810		rc = -ENOMEM;
811		goto out;
812	}
813
814#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
815	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
816		/*
817		 * Time to set mode which we can not set earlier due to
818		 * problems creating new read-only files.
819		 */
820		struct cifs_unix_set_info_args args = {
821			.mode	= inode->i_mode,
822			.uid	= INVALID_UID, /* no change */
823			.gid	= INVALID_GID, /* no change */
824			.ctime	= NO_CHANGE_64,
825			.atime	= NO_CHANGE_64,
826			.mtime	= NO_CHANGE_64,
827			.device	= 0,
828		};
829		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
830				       cfile->pid);
831	}
832#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
833
834use_cache:
835	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
836			   file->f_mode & FMODE_WRITE);
837	if (file->f_flags & O_DIRECT &&
838	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
839	     file->f_flags & O_APPEND))
840		cifs_invalidate_cache(file_inode(file),
841				      FSCACHE_INVAL_DIO_WRITE);
842
843out:
844	free_dentry_path(page);
845	free_xid(xid);
846	cifs_put_tlink(tlink);
847	cifs_free_open_info(&data);
848	return rc;
849}
850
851#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
852static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
853#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
854
855/*
856 * Try to reacquire byte range locks that were released when session
857 * to server was lost.
858 */
859static int
860cifs_relock_file(struct cifsFileInfo *cfile)
861{
862	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
863	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
864	int rc = 0;
865#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
866	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
867#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
868
869	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
870	if (cinode->can_cache_brlcks) {
871		/* can cache locks - no need to relock */
872		up_read(&cinode->lock_sem);
873		return rc;
874	}
875
876#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
877	if (cap_unix(tcon->ses) &&
878	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
879	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
880		rc = cifs_push_posix_locks(cfile);
881	else
882#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
883		rc = tcon->ses->server->ops->push_mand_locks(cfile);
884
885	up_read(&cinode->lock_sem);
886	return rc;
887}
888
889static int
890cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
891{
892	int rc = -EACCES;
893	unsigned int xid;
894	__u32 oplock;
895	struct cifs_sb_info *cifs_sb;
896	struct cifs_tcon *tcon;
897	struct TCP_Server_Info *server;
898	struct cifsInodeInfo *cinode;
899	struct inode *inode;
900	void *page;
901	const char *full_path;
902	int desired_access;
903	int disposition = FILE_OPEN;
904	int create_options = CREATE_NOT_DIR;
905	struct cifs_open_parms oparms;
906
907	xid = get_xid();
908	mutex_lock(&cfile->fh_mutex);
909	if (!cfile->invalidHandle) {
910		mutex_unlock(&cfile->fh_mutex);
911		free_xid(xid);
912		return 0;
913	}
914
915	inode = d_inode(cfile->dentry);
916	cifs_sb = CIFS_SB(inode->i_sb);
917	tcon = tlink_tcon(cfile->tlink);
918	server = tcon->ses->server;
919
920	/*
921	 * Can not grab rename sem here because various ops, including those
922	 * that already have the rename sem can end up causing writepage to get
923	 * called and if the server was down that means we end up here, and we
924	 * can never tell if the caller already has the rename_sem.
925	 */
926	page = alloc_dentry_path();
927	full_path = build_path_from_dentry(cfile->dentry, page);
928	if (IS_ERR(full_path)) {
929		mutex_unlock(&cfile->fh_mutex);
930		free_dentry_path(page);
931		free_xid(xid);
932		return PTR_ERR(full_path);
933	}
934
935	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
936		 inode, cfile->f_flags, full_path);
937
938	if (tcon->ses->server->oplocks)
939		oplock = REQ_OPLOCK;
940	else
941		oplock = 0;
942
943#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
944	if (tcon->unix_ext && cap_unix(tcon->ses) &&
945	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
946				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
947		/*
948		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
949		 * original open. Must mask them off for a reopen.
950		 */
951		unsigned int oflags = cfile->f_flags &
952						~(O_CREAT | O_EXCL | O_TRUNC);
953
954		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
955				     cifs_sb->ctx->file_mode /* ignored */,
956				     oflags, &oplock, &cfile->fid.netfid, xid);
957		if (rc == 0) {
958			cifs_dbg(FYI, "posix reopen succeeded\n");
959			oparms.reconnect = true;
960			goto reopen_success;
961		}
962		/*
963		 * fallthrough to retry open the old way on errors, especially
964		 * in the reconnect path it is important to retry hard
965		 */
966	}
967#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
968
969	desired_access = cifs_convert_flags(cfile->f_flags);
970
971	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
972	if (cfile->f_flags & O_SYNC)
973		create_options |= CREATE_WRITE_THROUGH;
974
975	if (cfile->f_flags & O_DIRECT)
976		create_options |= CREATE_NO_BUFFER;
977
978	if (server->ops->get_lease_key)
979		server->ops->get_lease_key(inode, &cfile->fid);
980
981	oparms = (struct cifs_open_parms) {
982		.tcon = tcon,
983		.cifs_sb = cifs_sb,
984		.desired_access = desired_access,
985		.create_options = cifs_create_options(cifs_sb, create_options),
986		.disposition = disposition,
987		.path = full_path,
988		.fid = &cfile->fid,
989		.reconnect = true,
990	};
991
992	/*
993	 * Can not refresh inode by passing in file_info buf to be returned by
994	 * ops->open and then calling get_inode_info with returned buf since
995	 * file might have write behind data that needs to be flushed and server
996	 * version of file size can be stale. If we knew for sure that inode was
997	 * not dirty locally we could do this.
998	 */
999	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000	if (rc == -ENOENT && oparms.reconnect == false) {
1001		/* durable handle timeout is expired - open the file again */
1002		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1003		/* indicate that we need to relock the file */
1004		oparms.reconnect = true;
1005	}
1006
1007	if (rc) {
1008		mutex_unlock(&cfile->fh_mutex);
1009		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1010		cifs_dbg(FYI, "oplock: %d\n", oplock);
1011		goto reopen_error_exit;
1012	}
1013
1014#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1015reopen_success:
1016#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1017	cfile->invalidHandle = false;
1018	mutex_unlock(&cfile->fh_mutex);
1019	cinode = CIFS_I(inode);
1020
1021	if (can_flush) {
1022		rc = filemap_write_and_wait(inode->i_mapping);
1023		if (!is_interrupt_error(rc))
1024			mapping_set_error(inode->i_mapping, rc);
1025
1026		if (tcon->posix_extensions)
1027			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1028		else if (tcon->unix_ext)
1029			rc = cifs_get_inode_info_unix(&inode, full_path,
1030						      inode->i_sb, xid);
1031		else
1032			rc = cifs_get_inode_info(&inode, full_path, NULL,
1033						 inode->i_sb, xid, NULL);
1034	}
1035	/*
1036	 * Else we are writing out data to server already and could deadlock if
1037	 * we tried to flush data, and since we do not know if we have data that
1038	 * would invalidate the current end of file on the server we can not go
1039	 * to the server to get the new inode info.
1040	 */
1041
1042	/*
1043	 * If the server returned a read oplock and we have mandatory brlocks,
1044	 * set oplock level to None.
1045	 */
1046	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1047		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1048		oplock = 0;
1049	}
1050
1051	server->ops->set_fid(cfile, &cfile->fid, oplock);
1052	if (oparms.reconnect)
1053		cifs_relock_file(cfile);
1054
1055reopen_error_exit:
1056	free_dentry_path(page);
1057	free_xid(xid);
1058	return rc;
1059}
1060
1061void smb2_deferred_work_close(struct work_struct *work)
1062{
1063	struct cifsFileInfo *cfile = container_of(work,
1064			struct cifsFileInfo, deferred.work);
1065
1066	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1067	cifs_del_deferred_close(cfile);
1068	cfile->deferred_close_scheduled = false;
1069	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1070	_cifsFileInfo_put(cfile, true, false);
1071}
1072
1073int cifs_close(struct inode *inode, struct file *file)
1074{
1075	struct cifsFileInfo *cfile;
1076	struct cifsInodeInfo *cinode = CIFS_I(inode);
1077	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1078	struct cifs_deferred_close *dclose;
1079
1080	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1081
1082	if (file->private_data != NULL) {
1083		cfile = file->private_data;
1084		file->private_data = NULL;
1085		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1086		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1087		    && cinode->lease_granted &&
1088		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1089		    dclose) {
1090			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1091				inode_set_mtime_to_ts(inode,
1092						      inode_set_ctime_current(inode));
1093			}
1094			spin_lock(&cinode->deferred_lock);
1095			cifs_add_deferred_close(cfile, dclose);
1096			if (cfile->deferred_close_scheduled &&
1097			    delayed_work_pending(&cfile->deferred)) {
1098				/*
1099				 * If there is no pending work, mod_delayed_work queues new work.
1100				 * So, Increase the ref count to avoid use-after-free.
1101				 */
1102				if (!mod_delayed_work(deferredclose_wq,
1103						&cfile->deferred, cifs_sb->ctx->closetimeo))
1104					cifsFileInfo_get(cfile);
1105			} else {
1106				/* Deferred close for files */
1107				queue_delayed_work(deferredclose_wq,
1108						&cfile->deferred, cifs_sb->ctx->closetimeo);
1109				cfile->deferred_close_scheduled = true;
1110				spin_unlock(&cinode->deferred_lock);
1111				return 0;
1112			}
1113			spin_unlock(&cinode->deferred_lock);
1114			_cifsFileInfo_put(cfile, true, false);
1115		} else {
1116			_cifsFileInfo_put(cfile, true, false);
1117			kfree(dclose);
1118		}
1119	}
1120
1121	/* return code from the ->release op is always ignored */
1122	return 0;
1123}
1124
1125void
1126cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1127{
1128	struct cifsFileInfo *open_file, *tmp;
1129	struct list_head tmp_list;
1130
1131	if (!tcon->use_persistent || !tcon->need_reopen_files)
1132		return;
1133
1134	tcon->need_reopen_files = false;
1135
1136	cifs_dbg(FYI, "Reopen persistent handles\n");
1137	INIT_LIST_HEAD(&tmp_list);
1138
1139	/* list all files open on tree connection, reopen resilient handles  */
1140	spin_lock(&tcon->open_file_lock);
1141	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1142		if (!open_file->invalidHandle)
1143			continue;
1144		cifsFileInfo_get(open_file);
1145		list_add_tail(&open_file->rlist, &tmp_list);
1146	}
1147	spin_unlock(&tcon->open_file_lock);
1148
1149	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1150		if (cifs_reopen_file(open_file, false /* do not flush */))
1151			tcon->need_reopen_files = true;
1152		list_del_init(&open_file->rlist);
1153		cifsFileInfo_put(open_file);
1154	}
1155}
1156
1157int cifs_closedir(struct inode *inode, struct file *file)
1158{
1159	int rc = 0;
1160	unsigned int xid;
1161	struct cifsFileInfo *cfile = file->private_data;
1162	struct cifs_tcon *tcon;
1163	struct TCP_Server_Info *server;
1164	char *buf;
1165
1166	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1167
1168	if (cfile == NULL)
1169		return rc;
1170
1171	xid = get_xid();
1172	tcon = tlink_tcon(cfile->tlink);
1173	server = tcon->ses->server;
1174
1175	cifs_dbg(FYI, "Freeing private data in close dir\n");
1176	spin_lock(&cfile->file_info_lock);
1177	if (server->ops->dir_needs_close(cfile)) {
1178		cfile->invalidHandle = true;
1179		spin_unlock(&cfile->file_info_lock);
1180		if (server->ops->close_dir)
1181			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1182		else
1183			rc = -ENOSYS;
1184		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1185		/* not much we can do if it fails anyway, ignore rc */
1186		rc = 0;
1187	} else
1188		spin_unlock(&cfile->file_info_lock);
1189
1190	buf = cfile->srch_inf.ntwrk_buf_start;
1191	if (buf) {
1192		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1193		cfile->srch_inf.ntwrk_buf_start = NULL;
1194		if (cfile->srch_inf.smallBuf)
1195			cifs_small_buf_release(buf);
1196		else
1197			cifs_buf_release(buf);
1198	}
1199
1200	cifs_put_tlink(cfile->tlink);
1201	kfree(file->private_data);
1202	file->private_data = NULL;
1203	/* BB can we lock the filestruct while this is going on? */
1204	free_xid(xid);
1205	return rc;
1206}
1207
1208static struct cifsLockInfo *
1209cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1210{
1211	struct cifsLockInfo *lock =
1212		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1213	if (!lock)
1214		return lock;
1215	lock->offset = offset;
1216	lock->length = length;
1217	lock->type = type;
1218	lock->pid = current->tgid;
1219	lock->flags = flags;
1220	INIT_LIST_HEAD(&lock->blist);
1221	init_waitqueue_head(&lock->block_q);
1222	return lock;
1223}
1224
1225void
1226cifs_del_lock_waiters(struct cifsLockInfo *lock)
1227{
1228	struct cifsLockInfo *li, *tmp;
1229	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1230		list_del_init(&li->blist);
1231		wake_up(&li->block_q);
1232	}
1233}
1234
1235#define CIFS_LOCK_OP	0
1236#define CIFS_READ_OP	1
1237#define CIFS_WRITE_OP	2
1238
1239/* @rw_check : 0 - no op, 1 - read, 2 - write */
1240static bool
1241cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1242			    __u64 length, __u8 type, __u16 flags,
1243			    struct cifsFileInfo *cfile,
1244			    struct cifsLockInfo **conf_lock, int rw_check)
1245{
1246	struct cifsLockInfo *li;
1247	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1248	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1249
1250	list_for_each_entry(li, &fdlocks->locks, llist) {
1251		if (offset + length <= li->offset ||
1252		    offset >= li->offset + li->length)
1253			continue;
1254		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1255		    server->ops->compare_fids(cfile, cur_cfile)) {
1256			/* shared lock prevents write op through the same fid */
1257			if (!(li->type & server->vals->shared_lock_type) ||
1258			    rw_check != CIFS_WRITE_OP)
1259				continue;
1260		}
1261		if ((type & server->vals->shared_lock_type) &&
1262		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1263		     current->tgid == li->pid) || type == li->type))
1264			continue;
1265		if (rw_check == CIFS_LOCK_OP &&
1266		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1267		    server->ops->compare_fids(cfile, cur_cfile))
1268			continue;
1269		if (conf_lock)
1270			*conf_lock = li;
1271		return true;
1272	}
1273	return false;
1274}
1275
1276bool
1277cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1278			__u8 type, __u16 flags,
1279			struct cifsLockInfo **conf_lock, int rw_check)
1280{
1281	bool rc = false;
1282	struct cifs_fid_locks *cur;
1283	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284
1285	list_for_each_entry(cur, &cinode->llist, llist) {
1286		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1287						 flags, cfile, conf_lock,
1288						 rw_check);
1289		if (rc)
1290			break;
1291	}
1292
1293	return rc;
1294}
1295
1296/*
1297 * Check if there is another lock that prevents us to set the lock (mandatory
1298 * style). If such a lock exists, update the flock structure with its
1299 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1300 * or leave it the same if we can't. Returns 0 if we don't need to request to
1301 * the server or 1 otherwise.
1302 */
1303static int
1304cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1305	       __u8 type, struct file_lock *flock)
1306{
1307	int rc = 0;
1308	struct cifsLockInfo *conf_lock;
1309	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1310	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1311	bool exist;
1312
1313	down_read(&cinode->lock_sem);
1314
1315	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1316					flock->fl_flags, &conf_lock,
1317					CIFS_LOCK_OP);
1318	if (exist) {
1319		flock->fl_start = conf_lock->offset;
1320		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1321		flock->fl_pid = conf_lock->pid;
1322		if (conf_lock->type & server->vals->shared_lock_type)
1323			flock->fl_type = F_RDLCK;
1324		else
1325			flock->fl_type = F_WRLCK;
1326	} else if (!cinode->can_cache_brlcks)
1327		rc = 1;
1328	else
1329		flock->fl_type = F_UNLCK;
1330
1331	up_read(&cinode->lock_sem);
1332	return rc;
1333}
1334
1335static void
1336cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1337{
1338	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1339	cifs_down_write(&cinode->lock_sem);
1340	list_add_tail(&lock->llist, &cfile->llist->locks);
1341	up_write(&cinode->lock_sem);
1342}
1343
1344/*
1345 * Set the byte-range lock (mandatory style). Returns:
1346 * 1) 0, if we set the lock and don't need to request to the server;
1347 * 2) 1, if no locks prevent us but we need to request to the server;
1348 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1349 */
1350static int
1351cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1352		 bool wait)
1353{
1354	struct cifsLockInfo *conf_lock;
1355	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1356	bool exist;
1357	int rc = 0;
1358
1359try_again:
1360	exist = false;
1361	cifs_down_write(&cinode->lock_sem);
1362
1363	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1364					lock->type, lock->flags, &conf_lock,
1365					CIFS_LOCK_OP);
1366	if (!exist && cinode->can_cache_brlcks) {
1367		list_add_tail(&lock->llist, &cfile->llist->locks);
1368		up_write(&cinode->lock_sem);
1369		return rc;
1370	}
1371
1372	if (!exist)
1373		rc = 1;
1374	else if (!wait)
1375		rc = -EACCES;
1376	else {
1377		list_add_tail(&lock->blist, &conf_lock->blist);
1378		up_write(&cinode->lock_sem);
1379		rc = wait_event_interruptible(lock->block_q,
1380					(lock->blist.prev == &lock->blist) &&
1381					(lock->blist.next == &lock->blist));
1382		if (!rc)
1383			goto try_again;
1384		cifs_down_write(&cinode->lock_sem);
1385		list_del_init(&lock->blist);
1386	}
1387
1388	up_write(&cinode->lock_sem);
1389	return rc;
1390}
1391
1392#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1393/*
1394 * Check if there is another lock that prevents us to set the lock (posix
1395 * style). If such a lock exists, update the flock structure with its
1396 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1397 * or leave it the same if we can't. Returns 0 if we don't need to request to
1398 * the server or 1 otherwise.
1399 */
1400static int
1401cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1402{
1403	int rc = 0;
1404	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1405	unsigned char saved_type = flock->fl_type;
1406
1407	if ((flock->fl_flags & FL_POSIX) == 0)
1408		return 1;
1409
1410	down_read(&cinode->lock_sem);
1411	posix_test_lock(file, flock);
1412
1413	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1414		flock->fl_type = saved_type;
1415		rc = 1;
1416	}
1417
1418	up_read(&cinode->lock_sem);
1419	return rc;
1420}
1421
1422/*
1423 * Set the byte-range lock (posix style). Returns:
1424 * 1) <0, if the error occurs while setting the lock;
1425 * 2) 0, if we set the lock and don't need to request to the server;
1426 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1427 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1428 */
1429static int
1430cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1431{
1432	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1433	int rc = FILE_LOCK_DEFERRED + 1;
1434
1435	if ((flock->fl_flags & FL_POSIX) == 0)
1436		return rc;
1437
1438	cifs_down_write(&cinode->lock_sem);
1439	if (!cinode->can_cache_brlcks) {
1440		up_write(&cinode->lock_sem);
1441		return rc;
1442	}
1443
1444	rc = posix_lock_file(file, flock, NULL);
1445	up_write(&cinode->lock_sem);
1446	return rc;
1447}
1448
1449int
1450cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1451{
1452	unsigned int xid;
1453	int rc = 0, stored_rc;
1454	struct cifsLockInfo *li, *tmp;
1455	struct cifs_tcon *tcon;
1456	unsigned int num, max_num, max_buf;
1457	LOCKING_ANDX_RANGE *buf, *cur;
1458	static const int types[] = {
1459		LOCKING_ANDX_LARGE_FILES,
1460		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1461	};
1462	int i;
1463
1464	xid = get_xid();
1465	tcon = tlink_tcon(cfile->tlink);
1466
1467	/*
1468	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1469	 * and check it before using.
1470	 */
1471	max_buf = tcon->ses->server->maxBuf;
1472	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1473		free_xid(xid);
1474		return -EINVAL;
1475	}
1476
1477	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1478		     PAGE_SIZE);
1479	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1480			PAGE_SIZE);
1481	max_num = (max_buf - sizeof(struct smb_hdr)) /
1482						sizeof(LOCKING_ANDX_RANGE);
1483	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1484	if (!buf) {
1485		free_xid(xid);
1486		return -ENOMEM;
1487	}
1488
1489	for (i = 0; i < 2; i++) {
1490		cur = buf;
1491		num = 0;
1492		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1493			if (li->type != types[i])
1494				continue;
1495			cur->Pid = cpu_to_le16(li->pid);
1496			cur->LengthLow = cpu_to_le32((u32)li->length);
1497			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1498			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1499			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1500			if (++num == max_num) {
1501				stored_rc = cifs_lockv(xid, tcon,
1502						       cfile->fid.netfid,
1503						       (__u8)li->type, 0, num,
1504						       buf);
1505				if (stored_rc)
1506					rc = stored_rc;
1507				cur = buf;
1508				num = 0;
1509			} else
1510				cur++;
1511		}
1512
1513		if (num) {
1514			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1515					       (__u8)types[i], 0, num, buf);
1516			if (stored_rc)
1517				rc = stored_rc;
1518		}
1519	}
1520
1521	kfree(buf);
1522	free_xid(xid);
1523	return rc;
1524}
1525
1526static __u32
1527hash_lockowner(fl_owner_t owner)
1528{
1529	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1530}
1531#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1532
1533struct lock_to_push {
1534	struct list_head llist;
1535	__u64 offset;
1536	__u64 length;
1537	__u32 pid;
1538	__u16 netfid;
1539	__u8 type;
1540};
1541
1542#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1543static int
1544cifs_push_posix_locks(struct cifsFileInfo *cfile)
1545{
1546	struct inode *inode = d_inode(cfile->dentry);
1547	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1548	struct file_lock *flock;
1549	struct file_lock_context *flctx = locks_inode_context(inode);
1550	unsigned int count = 0, i;
1551	int rc = 0, xid, type;
1552	struct list_head locks_to_send, *el;
1553	struct lock_to_push *lck, *tmp;
1554	__u64 length;
1555
1556	xid = get_xid();
1557
1558	if (!flctx)
1559		goto out;
1560
1561	spin_lock(&flctx->flc_lock);
1562	list_for_each(el, &flctx->flc_posix) {
1563		count++;
1564	}
1565	spin_unlock(&flctx->flc_lock);
1566
1567	INIT_LIST_HEAD(&locks_to_send);
1568
1569	/*
1570	 * Allocating count locks is enough because no FL_POSIX locks can be
1571	 * added to the list while we are holding cinode->lock_sem that
1572	 * protects locking operations of this inode.
1573	 */
1574	for (i = 0; i < count; i++) {
1575		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1576		if (!lck) {
1577			rc = -ENOMEM;
1578			goto err_out;
1579		}
1580		list_add_tail(&lck->llist, &locks_to_send);
1581	}
1582
1583	el = locks_to_send.next;
1584	spin_lock(&flctx->flc_lock);
1585	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1586		if (el == &locks_to_send) {
1587			/*
1588			 * The list ended. We don't have enough allocated
1589			 * structures - something is really wrong.
1590			 */
1591			cifs_dbg(VFS, "Can't push all brlocks!\n");
1592			break;
1593		}
1594		length = cifs_flock_len(flock);
1595		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1596			type = CIFS_RDLCK;
1597		else
1598			type = CIFS_WRLCK;
1599		lck = list_entry(el, struct lock_to_push, llist);
1600		lck->pid = hash_lockowner(flock->fl_owner);
1601		lck->netfid = cfile->fid.netfid;
1602		lck->length = length;
1603		lck->type = type;
1604		lck->offset = flock->fl_start;
1605	}
1606	spin_unlock(&flctx->flc_lock);
1607
1608	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1609		int stored_rc;
1610
1611		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1612					     lck->offset, lck->length, NULL,
1613					     lck->type, 0);
1614		if (stored_rc)
1615			rc = stored_rc;
1616		list_del(&lck->llist);
1617		kfree(lck);
1618	}
1619
1620out:
1621	free_xid(xid);
1622	return rc;
1623err_out:
1624	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1625		list_del(&lck->llist);
1626		kfree(lck);
1627	}
1628	goto out;
1629}
1630#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1631
1632static int
1633cifs_push_locks(struct cifsFileInfo *cfile)
1634{
1635	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1636	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1637	int rc = 0;
1638#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1639	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1640#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1641
1642	/* we are going to update can_cache_brlcks here - need a write access */
1643	cifs_down_write(&cinode->lock_sem);
1644	if (!cinode->can_cache_brlcks) {
1645		up_write(&cinode->lock_sem);
1646		return rc;
1647	}
1648
1649#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1650	if (cap_unix(tcon->ses) &&
1651	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1652	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1653		rc = cifs_push_posix_locks(cfile);
1654	else
1655#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1656		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1657
1658	cinode->can_cache_brlcks = false;
1659	up_write(&cinode->lock_sem);
1660	return rc;
1661}
1662
1663static void
1664cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1665		bool *wait_flag, struct TCP_Server_Info *server)
1666{
1667	if (flock->fl_flags & FL_POSIX)
1668		cifs_dbg(FYI, "Posix\n");
1669	if (flock->fl_flags & FL_FLOCK)
1670		cifs_dbg(FYI, "Flock\n");
1671	if (flock->fl_flags & FL_SLEEP) {
1672		cifs_dbg(FYI, "Blocking lock\n");
1673		*wait_flag = true;
1674	}
1675	if (flock->fl_flags & FL_ACCESS)
1676		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1677	if (flock->fl_flags & FL_LEASE)
1678		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1679	if (flock->fl_flags &
1680	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1681	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1682		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1683
1684	*type = server->vals->large_lock_type;
1685	if (flock->fl_type == F_WRLCK) {
1686		cifs_dbg(FYI, "F_WRLCK\n");
1687		*type |= server->vals->exclusive_lock_type;
1688		*lock = 1;
1689	} else if (flock->fl_type == F_UNLCK) {
1690		cifs_dbg(FYI, "F_UNLCK\n");
1691		*type |= server->vals->unlock_lock_type;
1692		*unlock = 1;
1693		/* Check if unlock includes more than one lock range */
1694	} else if (flock->fl_type == F_RDLCK) {
1695		cifs_dbg(FYI, "F_RDLCK\n");
1696		*type |= server->vals->shared_lock_type;
1697		*lock = 1;
1698	} else if (flock->fl_type == F_EXLCK) {
1699		cifs_dbg(FYI, "F_EXLCK\n");
1700		*type |= server->vals->exclusive_lock_type;
1701		*lock = 1;
1702	} else if (flock->fl_type == F_SHLCK) {
1703		cifs_dbg(FYI, "F_SHLCK\n");
1704		*type |= server->vals->shared_lock_type;
1705		*lock = 1;
1706	} else
1707		cifs_dbg(FYI, "Unknown type of lock\n");
1708}
1709
1710static int
1711cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1712	   bool wait_flag, bool posix_lck, unsigned int xid)
1713{
1714	int rc = 0;
1715	__u64 length = cifs_flock_len(flock);
1716	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1717	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1718	struct TCP_Server_Info *server = tcon->ses->server;
1719#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1720	__u16 netfid = cfile->fid.netfid;
1721
1722	if (posix_lck) {
1723		int posix_lock_type;
1724
1725		rc = cifs_posix_lock_test(file, flock);
1726		if (!rc)
1727			return rc;
1728
1729		if (type & server->vals->shared_lock_type)
1730			posix_lock_type = CIFS_RDLCK;
1731		else
1732			posix_lock_type = CIFS_WRLCK;
1733		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1734				      hash_lockowner(flock->fl_owner),
1735				      flock->fl_start, length, flock,
1736				      posix_lock_type, wait_flag);
1737		return rc;
1738	}
1739#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1740
1741	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1742	if (!rc)
1743		return rc;
1744
1745	/* BB we could chain these into one lock request BB */
1746	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1747				    1, 0, false);
1748	if (rc == 0) {
1749		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1750					    type, 0, 1, false);
1751		flock->fl_type = F_UNLCK;
1752		if (rc != 0)
1753			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1754				 rc);
1755		return 0;
1756	}
1757
1758	if (type & server->vals->shared_lock_type) {
1759		flock->fl_type = F_WRLCK;
1760		return 0;
1761	}
1762
1763	type &= ~server->vals->exclusive_lock_type;
1764
1765	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766				    type | server->vals->shared_lock_type,
1767				    1, 0, false);
1768	if (rc == 0) {
1769		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770			type | server->vals->shared_lock_type, 0, 1, false);
1771		flock->fl_type = F_RDLCK;
1772		if (rc != 0)
1773			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1774				 rc);
1775	} else
1776		flock->fl_type = F_WRLCK;
1777
1778	return 0;
1779}
1780
1781void
1782cifs_move_llist(struct list_head *source, struct list_head *dest)
1783{
1784	struct list_head *li, *tmp;
1785	list_for_each_safe(li, tmp, source)
1786		list_move(li, dest);
1787}
1788
1789void
1790cifs_free_llist(struct list_head *llist)
1791{
1792	struct cifsLockInfo *li, *tmp;
1793	list_for_each_entry_safe(li, tmp, llist, llist) {
1794		cifs_del_lock_waiters(li);
1795		list_del(&li->llist);
1796		kfree(li);
1797	}
1798}
1799
1800#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1801int
1802cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1803		  unsigned int xid)
1804{
1805	int rc = 0, stored_rc;
1806	static const int types[] = {
1807		LOCKING_ANDX_LARGE_FILES,
1808		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1809	};
1810	unsigned int i;
1811	unsigned int max_num, num, max_buf;
1812	LOCKING_ANDX_RANGE *buf, *cur;
1813	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1814	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1815	struct cifsLockInfo *li, *tmp;
1816	__u64 length = cifs_flock_len(flock);
1817	struct list_head tmp_llist;
1818
1819	INIT_LIST_HEAD(&tmp_llist);
1820
1821	/*
1822	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1823	 * and check it before using.
1824	 */
1825	max_buf = tcon->ses->server->maxBuf;
1826	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1827		return -EINVAL;
1828
1829	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1830		     PAGE_SIZE);
1831	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1832			PAGE_SIZE);
1833	max_num = (max_buf - sizeof(struct smb_hdr)) /
1834						sizeof(LOCKING_ANDX_RANGE);
1835	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1836	if (!buf)
1837		return -ENOMEM;
1838
1839	cifs_down_write(&cinode->lock_sem);
1840	for (i = 0; i < 2; i++) {
1841		cur = buf;
1842		num = 0;
1843		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1844			if (flock->fl_start > li->offset ||
1845			    (flock->fl_start + length) <
1846			    (li->offset + li->length))
1847				continue;
1848			if (current->tgid != li->pid)
1849				continue;
1850			if (types[i] != li->type)
1851				continue;
1852			if (cinode->can_cache_brlcks) {
1853				/*
1854				 * We can cache brlock requests - simply remove
1855				 * a lock from the file's list.
1856				 */
1857				list_del(&li->llist);
1858				cifs_del_lock_waiters(li);
1859				kfree(li);
1860				continue;
1861			}
1862			cur->Pid = cpu_to_le16(li->pid);
1863			cur->LengthLow = cpu_to_le32((u32)li->length);
1864			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1865			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1866			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1867			/*
1868			 * We need to save a lock here to let us add it again to
1869			 * the file's list if the unlock range request fails on
1870			 * the server.
1871			 */
1872			list_move(&li->llist, &tmp_llist);
1873			if (++num == max_num) {
1874				stored_rc = cifs_lockv(xid, tcon,
1875						       cfile->fid.netfid,
1876						       li->type, num, 0, buf);
1877				if (stored_rc) {
1878					/*
1879					 * We failed on the unlock range
1880					 * request - add all locks from the tmp
1881					 * list to the head of the file's list.
1882					 */
1883					cifs_move_llist(&tmp_llist,
1884							&cfile->llist->locks);
1885					rc = stored_rc;
1886				} else
1887					/*
1888					 * The unlock range request succeed -
1889					 * free the tmp list.
1890					 */
1891					cifs_free_llist(&tmp_llist);
1892				cur = buf;
1893				num = 0;
1894			} else
1895				cur++;
1896		}
1897		if (num) {
1898			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1899					       types[i], num, 0, buf);
1900			if (stored_rc) {
1901				cifs_move_llist(&tmp_llist,
1902						&cfile->llist->locks);
1903				rc = stored_rc;
1904			} else
1905				cifs_free_llist(&tmp_llist);
1906		}
1907	}
1908
1909	up_write(&cinode->lock_sem);
1910	kfree(buf);
1911	return rc;
1912}
1913#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1914
1915static int
1916cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1917	   bool wait_flag, bool posix_lck, int lock, int unlock,
1918	   unsigned int xid)
1919{
1920	int rc = 0;
1921	__u64 length = cifs_flock_len(flock);
1922	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1923	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1924	struct TCP_Server_Info *server = tcon->ses->server;
1925	struct inode *inode = d_inode(cfile->dentry);
1926
1927#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1928	if (posix_lck) {
1929		int posix_lock_type;
1930
1931		rc = cifs_posix_lock_set(file, flock);
1932		if (rc <= FILE_LOCK_DEFERRED)
1933			return rc;
1934
1935		if (type & server->vals->shared_lock_type)
1936			posix_lock_type = CIFS_RDLCK;
1937		else
1938			posix_lock_type = CIFS_WRLCK;
1939
1940		if (unlock == 1)
1941			posix_lock_type = CIFS_UNLCK;
1942
1943		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1944				      hash_lockowner(flock->fl_owner),
1945				      flock->fl_start, length,
1946				      NULL, posix_lock_type, wait_flag);
1947		goto out;
1948	}
1949#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1950	if (lock) {
1951		struct cifsLockInfo *lock;
1952
1953		lock = cifs_lock_init(flock->fl_start, length, type,
1954				      flock->fl_flags);
1955		if (!lock)
1956			return -ENOMEM;
1957
1958		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1959		if (rc < 0) {
1960			kfree(lock);
1961			return rc;
1962		}
1963		if (!rc)
1964			goto out;
1965
1966		/*
1967		 * Windows 7 server can delay breaking lease from read to None
1968		 * if we set a byte-range lock on a file - break it explicitly
1969		 * before sending the lock to the server to be sure the next
1970		 * read won't conflict with non-overlapted locks due to
1971		 * pagereading.
1972		 */
1973		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1974					CIFS_CACHE_READ(CIFS_I(inode))) {
1975			cifs_zap_mapping(inode);
1976			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1977				 inode);
1978			CIFS_I(inode)->oplock = 0;
1979		}
1980
1981		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1982					    type, 1, 0, wait_flag);
1983		if (rc) {
1984			kfree(lock);
1985			return rc;
1986		}
1987
1988		cifs_lock_add(cfile, lock);
1989	} else if (unlock)
1990		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1991
1992out:
1993	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1994		/*
1995		 * If this is a request to remove all locks because we
1996		 * are closing the file, it doesn't matter if the
1997		 * unlocking failed as both cifs.ko and the SMB server
1998		 * remove the lock on file close
1999		 */
2000		if (rc) {
2001			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2002			if (!(flock->fl_flags & FL_CLOSE))
2003				return rc;
2004		}
2005		rc = locks_lock_file_wait(file, flock);
2006	}
2007	return rc;
2008}
2009
2010int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2011{
2012	int rc, xid;
2013	int lock = 0, unlock = 0;
2014	bool wait_flag = false;
2015	bool posix_lck = false;
2016	struct cifs_sb_info *cifs_sb;
2017	struct cifs_tcon *tcon;
2018	struct cifsFileInfo *cfile;
2019	__u32 type;
2020
2021	xid = get_xid();
2022
2023	if (!(fl->fl_flags & FL_FLOCK)) {
2024		rc = -ENOLCK;
2025		free_xid(xid);
2026		return rc;
2027	}
2028
2029	cfile = (struct cifsFileInfo *)file->private_data;
2030	tcon = tlink_tcon(cfile->tlink);
2031
2032	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2033			tcon->ses->server);
2034	cifs_sb = CIFS_FILE_SB(file);
2035
2036	if (cap_unix(tcon->ses) &&
2037	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2038	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2039		posix_lck = true;
2040
2041	if (!lock && !unlock) {
2042		/*
2043		 * if no lock or unlock then nothing to do since we do not
2044		 * know what it is
2045		 */
2046		rc = -EOPNOTSUPP;
2047		free_xid(xid);
2048		return rc;
2049	}
2050
2051	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2052			xid);
2053	free_xid(xid);
2054	return rc;
2055
2056
2057}
2058
2059int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2060{
2061	int rc, xid;
2062	int lock = 0, unlock = 0;
2063	bool wait_flag = false;
2064	bool posix_lck = false;
2065	struct cifs_sb_info *cifs_sb;
2066	struct cifs_tcon *tcon;
2067	struct cifsFileInfo *cfile;
2068	__u32 type;
2069
2070	rc = -EACCES;
2071	xid = get_xid();
2072
2073	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2074		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2075		 (long long)flock->fl_end);
2076
2077	cfile = (struct cifsFileInfo *)file->private_data;
2078	tcon = tlink_tcon(cfile->tlink);
2079
2080	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2081			tcon->ses->server);
2082	cifs_sb = CIFS_FILE_SB(file);
2083	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2084
2085	if (cap_unix(tcon->ses) &&
2086	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2087	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2088		posix_lck = true;
2089	/*
2090	 * BB add code here to normalize offset and length to account for
2091	 * negative length which we can not accept over the wire.
2092	 */
2093	if (IS_GETLK(cmd)) {
2094		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2095		free_xid(xid);
2096		return rc;
2097	}
2098
2099	if (!lock && !unlock) {
2100		/*
2101		 * if no lock or unlock then nothing to do since we do not
2102		 * know what it is
2103		 */
2104		free_xid(xid);
2105		return -EOPNOTSUPP;
2106	}
2107
2108	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2109			xid);
2110	free_xid(xid);
2111	return rc;
2112}
2113
2114/*
2115 * update the file size (if needed) after a write. Should be called with
2116 * the inode->i_lock held
2117 */
2118void
2119cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2120		      unsigned int bytes_written)
2121{
2122	loff_t end_of_write = offset + bytes_written;
2123
2124	if (end_of_write > cifsi->server_eof)
2125		cifsi->server_eof = end_of_write;
2126}
2127
2128static ssize_t
2129cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2130	   size_t write_size, loff_t *offset)
2131{
2132	int rc = 0;
2133	unsigned int bytes_written = 0;
2134	unsigned int total_written;
2135	struct cifs_tcon *tcon;
2136	struct TCP_Server_Info *server;
2137	unsigned int xid;
2138	struct dentry *dentry = open_file->dentry;
2139	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2140	struct cifs_io_parms io_parms = {0};
2141
2142	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2143		 write_size, *offset, dentry);
2144
2145	tcon = tlink_tcon(open_file->tlink);
2146	server = tcon->ses->server;
2147
2148	if (!server->ops->sync_write)
2149		return -ENOSYS;
2150
2151	xid = get_xid();
2152
2153	for (total_written = 0; write_size > total_written;
2154	     total_written += bytes_written) {
2155		rc = -EAGAIN;
2156		while (rc == -EAGAIN) {
2157			struct kvec iov[2];
2158			unsigned int len;
2159
2160			if (open_file->invalidHandle) {
2161				/* we could deadlock if we called
2162				   filemap_fdatawait from here so tell
2163				   reopen_file not to flush data to
2164				   server now */
2165				rc = cifs_reopen_file(open_file, false);
2166				if (rc != 0)
2167					break;
2168			}
2169
2170			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2171				  (unsigned int)write_size - total_written);
2172			/* iov[0] is reserved for smb header */
2173			iov[1].iov_base = (char *)write_data + total_written;
2174			iov[1].iov_len = len;
2175			io_parms.pid = pid;
2176			io_parms.tcon = tcon;
2177			io_parms.offset = *offset;
2178			io_parms.length = len;
2179			rc = server->ops->sync_write(xid, &open_file->fid,
2180					&io_parms, &bytes_written, iov, 1);
2181		}
2182		if (rc || (bytes_written == 0)) {
2183			if (total_written)
2184				break;
2185			else {
2186				free_xid(xid);
2187				return rc;
2188			}
2189		} else {
2190			spin_lock(&d_inode(dentry)->i_lock);
2191			cifs_update_eof(cifsi, *offset, bytes_written);
2192			spin_unlock(&d_inode(dentry)->i_lock);
2193			*offset += bytes_written;
2194		}
2195	}
2196
2197	cifs_stats_bytes_written(tcon, total_written);
2198
2199	if (total_written > 0) {
2200		spin_lock(&d_inode(dentry)->i_lock);
2201		if (*offset > d_inode(dentry)->i_size) {
2202			i_size_write(d_inode(dentry), *offset);
2203			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2204		}
2205		spin_unlock(&d_inode(dentry)->i_lock);
2206	}
2207	mark_inode_dirty_sync(d_inode(dentry));
2208	free_xid(xid);
2209	return total_written;
2210}
2211
2212struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2213					bool fsuid_only)
2214{
2215	struct cifsFileInfo *open_file = NULL;
2216	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2217
2218	/* only filter by fsuid on multiuser mounts */
2219	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2220		fsuid_only = false;
2221
2222	spin_lock(&cifs_inode->open_file_lock);
2223	/* we could simply get the first_list_entry since write-only entries
2224	   are always at the end of the list but since the first entry might
2225	   have a close pending, we go through the whole list */
2226	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2227		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2228			continue;
2229		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2230			if ((!open_file->invalidHandle)) {
2231				/* found a good file */
2232				/* lock it so it will not be closed on us */
2233				cifsFileInfo_get(open_file);
2234				spin_unlock(&cifs_inode->open_file_lock);
2235				return open_file;
2236			} /* else might as well continue, and look for
2237			     another, or simply have the caller reopen it
2238			     again rather than trying to fix this handle */
2239		} else /* write only file */
2240			break; /* write only files are last so must be done */
2241	}
2242	spin_unlock(&cifs_inode->open_file_lock);
2243	return NULL;
2244}
2245
2246/* Return -EBADF if no handle is found and general rc otherwise */
2247int
2248cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2249		       struct cifsFileInfo **ret_file)
2250{
2251	struct cifsFileInfo *open_file, *inv_file = NULL;
2252	struct cifs_sb_info *cifs_sb;
2253	bool any_available = false;
2254	int rc = -EBADF;
2255	unsigned int refind = 0;
2256	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2257	bool with_delete = flags & FIND_WR_WITH_DELETE;
2258	*ret_file = NULL;
2259
2260	/*
2261	 * Having a null inode here (because mapping->host was set to zero by
2262	 * the VFS or MM) should not happen but we had reports of on oops (due
2263	 * to it being zero) during stress testcases so we need to check for it
2264	 */
2265
2266	if (cifs_inode == NULL) {
2267		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2268		dump_stack();
2269		return rc;
2270	}
2271
2272	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2273
2274	/* only filter by fsuid on multiuser mounts */
2275	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2276		fsuid_only = false;
2277
2278	spin_lock(&cifs_inode->open_file_lock);
2279refind_writable:
2280	if (refind > MAX_REOPEN_ATT) {
2281		spin_unlock(&cifs_inode->open_file_lock);
2282		return rc;
2283	}
2284	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2285		if (!any_available && open_file->pid != current->tgid)
2286			continue;
2287		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2288			continue;
2289		if (with_delete && !(open_file->fid.access & DELETE))
2290			continue;
2291		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2292			if (!open_file->invalidHandle) {
2293				/* found a good writable file */
2294				cifsFileInfo_get(open_file);
2295				spin_unlock(&cifs_inode->open_file_lock);
2296				*ret_file = open_file;
2297				return 0;
2298			} else {
2299				if (!inv_file)
2300					inv_file = open_file;
2301			}
2302		}
2303	}
2304	/* couldn't find useable FH with same pid, try any available */
2305	if (!any_available) {
2306		any_available = true;
2307		goto refind_writable;
2308	}
2309
2310	if (inv_file) {
2311		any_available = false;
2312		cifsFileInfo_get(inv_file);
2313	}
2314
2315	spin_unlock(&cifs_inode->open_file_lock);
2316
2317	if (inv_file) {
2318		rc = cifs_reopen_file(inv_file, false);
2319		if (!rc) {
2320			*ret_file = inv_file;
2321			return 0;
2322		}
2323
2324		spin_lock(&cifs_inode->open_file_lock);
2325		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2326		spin_unlock(&cifs_inode->open_file_lock);
2327		cifsFileInfo_put(inv_file);
2328		++refind;
2329		inv_file = NULL;
2330		spin_lock(&cifs_inode->open_file_lock);
2331		goto refind_writable;
2332	}
2333
2334	return rc;
2335}
2336
2337struct cifsFileInfo *
2338find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2339{
2340	struct cifsFileInfo *cfile;
2341	int rc;
2342
2343	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2344	if (rc)
2345		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2346
2347	return cfile;
2348}
2349
2350int
2351cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2352		       int flags,
2353		       struct cifsFileInfo **ret_file)
2354{
2355	struct cifsFileInfo *cfile;
2356	void *page = alloc_dentry_path();
2357
2358	*ret_file = NULL;
2359
2360	spin_lock(&tcon->open_file_lock);
2361	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2362		struct cifsInodeInfo *cinode;
2363		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2364		if (IS_ERR(full_path)) {
2365			spin_unlock(&tcon->open_file_lock);
2366			free_dentry_path(page);
2367			return PTR_ERR(full_path);
2368		}
2369		if (strcmp(full_path, name))
2370			continue;
2371
2372		cinode = CIFS_I(d_inode(cfile->dentry));
2373		spin_unlock(&tcon->open_file_lock);
2374		free_dentry_path(page);
2375		return cifs_get_writable_file(cinode, flags, ret_file);
2376	}
2377
2378	spin_unlock(&tcon->open_file_lock);
2379	free_dentry_path(page);
2380	return -ENOENT;
2381}
2382
2383int
2384cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2385		       struct cifsFileInfo **ret_file)
2386{
2387	struct cifsFileInfo *cfile;
2388	void *page = alloc_dentry_path();
2389
2390	*ret_file = NULL;
2391
2392	spin_lock(&tcon->open_file_lock);
2393	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2394		struct cifsInodeInfo *cinode;
2395		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2396		if (IS_ERR(full_path)) {
2397			spin_unlock(&tcon->open_file_lock);
2398			free_dentry_path(page);
2399			return PTR_ERR(full_path);
2400		}
2401		if (strcmp(full_path, name))
2402			continue;
2403
2404		cinode = CIFS_I(d_inode(cfile->dentry));
2405		spin_unlock(&tcon->open_file_lock);
2406		free_dentry_path(page);
2407		*ret_file = find_readable_file(cinode, 0);
2408		return *ret_file ? 0 : -ENOENT;
2409	}
2410
2411	spin_unlock(&tcon->open_file_lock);
2412	free_dentry_path(page);
2413	return -ENOENT;
2414}
2415
2416void
2417cifs_writedata_release(struct kref *refcount)
2418{
2419	struct cifs_writedata *wdata = container_of(refcount,
2420					struct cifs_writedata, refcount);
2421#ifdef CONFIG_CIFS_SMB_DIRECT
2422	if (wdata->mr) {
2423		smbd_deregister_mr(wdata->mr);
2424		wdata->mr = NULL;
2425	}
2426#endif
2427
2428	if (wdata->cfile)
2429		cifsFileInfo_put(wdata->cfile);
2430
2431	kfree(wdata);
2432}
2433
2434/*
2435 * Write failed with a retryable error. Resend the write request. It's also
2436 * possible that the page was redirtied so re-clean the page.
2437 */
2438static void
2439cifs_writev_requeue(struct cifs_writedata *wdata)
2440{
2441	int rc = 0;
2442	struct inode *inode = d_inode(wdata->cfile->dentry);
2443	struct TCP_Server_Info *server;
2444	unsigned int rest_len = wdata->bytes;
2445	loff_t fpos = wdata->offset;
2446
2447	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2448	do {
2449		struct cifs_writedata *wdata2;
2450		unsigned int wsize, cur_len;
2451
2452		wsize = server->ops->wp_retry_size(inode);
2453		if (wsize < rest_len) {
2454			if (wsize < PAGE_SIZE) {
2455				rc = -EOPNOTSUPP;
2456				break;
2457			}
2458			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2459		} else {
2460			cur_len = rest_len;
2461		}
2462
2463		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2464		if (!wdata2) {
2465			rc = -ENOMEM;
2466			break;
2467		}
2468
2469		wdata2->sync_mode = wdata->sync_mode;
2470		wdata2->offset	= fpos;
2471		wdata2->bytes	= cur_len;
2472		wdata2->iter	= wdata->iter;
2473
2474		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2475		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2476
2477		if (iov_iter_is_xarray(&wdata2->iter))
2478			/* Check for pages having been redirtied and clean
2479			 * them.  We can do this by walking the xarray.  If
2480			 * it's not an xarray, then it's a DIO and we shouldn't
2481			 * be mucking around with the page bits.
2482			 */
2483			cifs_undirty_folios(inode, fpos, cur_len);
2484
2485		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2486					    &wdata2->cfile);
2487		if (!wdata2->cfile) {
2488			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2489				 rc);
2490			if (!is_retryable_error(rc))
2491				rc = -EBADF;
2492		} else {
2493			wdata2->pid = wdata2->cfile->pid;
2494			rc = server->ops->async_writev(wdata2,
2495						       cifs_writedata_release);
2496		}
2497
2498		kref_put(&wdata2->refcount, cifs_writedata_release);
2499		if (rc) {
2500			if (is_retryable_error(rc))
2501				continue;
2502			fpos += cur_len;
2503			rest_len -= cur_len;
2504			break;
2505		}
2506
2507		fpos += cur_len;
2508		rest_len -= cur_len;
2509	} while (rest_len > 0);
2510
2511	/* Clean up remaining pages from the original wdata */
2512	if (iov_iter_is_xarray(&wdata->iter))
2513		cifs_pages_write_failed(inode, fpos, rest_len);
2514
2515	if (rc != 0 && !is_retryable_error(rc))
2516		mapping_set_error(inode->i_mapping, rc);
2517	kref_put(&wdata->refcount, cifs_writedata_release);
2518}
2519
2520void
2521cifs_writev_complete(struct work_struct *work)
2522{
2523	struct cifs_writedata *wdata = container_of(work,
2524						struct cifs_writedata, work);
2525	struct inode *inode = d_inode(wdata->cfile->dentry);
2526
2527	if (wdata->result == 0) {
2528		spin_lock(&inode->i_lock);
2529		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2530		spin_unlock(&inode->i_lock);
2531		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2532					 wdata->bytes);
2533	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2534		return cifs_writev_requeue(wdata);
2535
2536	if (wdata->result == -EAGAIN)
2537		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2538	else if (wdata->result < 0)
2539		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2540	else
2541		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2542
2543	if (wdata->result != -EAGAIN)
2544		mapping_set_error(inode->i_mapping, wdata->result);
2545	kref_put(&wdata->refcount, cifs_writedata_release);
2546}
2547
2548struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2549{
2550	struct cifs_writedata *wdata;
2551
2552	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2553	if (wdata != NULL) {
2554		kref_init(&wdata->refcount);
2555		INIT_LIST_HEAD(&wdata->list);
2556		init_completion(&wdata->done);
2557		INIT_WORK(&wdata->work, complete);
2558	}
2559	return wdata;
2560}
2561
2562static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2563{
2564	struct address_space *mapping = page->mapping;
2565	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2566	char *write_data;
2567	int rc = -EFAULT;
2568	int bytes_written = 0;
2569	struct inode *inode;
2570	struct cifsFileInfo *open_file;
2571
2572	if (!mapping || !mapping->host)
2573		return -EFAULT;
2574
2575	inode = page->mapping->host;
2576
2577	offset += (loff_t)from;
2578	write_data = kmap(page);
2579	write_data += from;
2580
2581	if ((to > PAGE_SIZE) || (from > to)) {
2582		kunmap(page);
2583		return -EIO;
2584	}
2585
2586	/* racing with truncate? */
2587	if (offset > mapping->host->i_size) {
2588		kunmap(page);
2589		return 0; /* don't care */
2590	}
2591
2592	/* check to make sure that we are not extending the file */
2593	if (mapping->host->i_size - offset < (loff_t)to)
2594		to = (unsigned)(mapping->host->i_size - offset);
2595
2596	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2597				    &open_file);
2598	if (!rc) {
2599		bytes_written = cifs_write(open_file, open_file->pid,
2600					   write_data, to - from, &offset);
2601		cifsFileInfo_put(open_file);
2602		/* Does mm or vfs already set times? */
2603		simple_inode_init_ts(inode);
2604		if ((bytes_written > 0) && (offset))
2605			rc = 0;
2606		else if (bytes_written < 0)
2607			rc = bytes_written;
2608		else
2609			rc = -EFAULT;
2610	} else {
2611		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2612		if (!is_retryable_error(rc))
2613			rc = -EIO;
2614	}
2615
2616	kunmap(page);
2617	return rc;
2618}
2619
2620/*
2621 * Extend the region to be written back to include subsequent contiguously
2622 * dirty pages if possible, but don't sleep while doing so.
2623 */
2624static void cifs_extend_writeback(struct address_space *mapping,
2625				  struct xa_state *xas,
2626				  long *_count,
2627				  loff_t start,
2628				  int max_pages,
2629				  loff_t max_len,
2630				  size_t *_len)
2631{
2632	struct folio_batch batch;
2633	struct folio *folio;
2634	unsigned int nr_pages;
2635	pgoff_t index = (start + *_len) / PAGE_SIZE;
2636	size_t len;
2637	bool stop = true;
2638	unsigned int i;
2639
2640	folio_batch_init(&batch);
2641
2642	do {
2643		/* Firstly, we gather up a batch of contiguous dirty pages
2644		 * under the RCU read lock - but we can't clear the dirty flags
2645		 * there if any of those pages are mapped.
2646		 */
2647		rcu_read_lock();
2648
2649		xas_for_each(xas, folio, ULONG_MAX) {
2650			stop = true;
2651			if (xas_retry(xas, folio))
2652				continue;
2653			if (xa_is_value(folio))
2654				break;
2655			if (folio->index != index) {
2656				xas_reset(xas);
2657				break;
2658			}
2659
2660			if (!folio_try_get_rcu(folio)) {
2661				xas_reset(xas);
2662				continue;
2663			}
2664			nr_pages = folio_nr_pages(folio);
2665			if (nr_pages > max_pages) {
2666				xas_reset(xas);
2667				break;
2668			}
2669
2670			/* Has the page moved or been split? */
2671			if (unlikely(folio != xas_reload(xas))) {
2672				folio_put(folio);
2673				xas_reset(xas);
2674				break;
2675			}
2676
2677			if (!folio_trylock(folio)) {
2678				folio_put(folio);
2679				xas_reset(xas);
2680				break;
2681			}
2682			if (!folio_test_dirty(folio) ||
2683			    folio_test_writeback(folio)) {
2684				folio_unlock(folio);
2685				folio_put(folio);
2686				xas_reset(xas);
2687				break;
2688			}
2689
2690			max_pages -= nr_pages;
2691			len = folio_size(folio);
2692			stop = false;
2693
2694			index += nr_pages;
2695			*_count -= nr_pages;
2696			*_len += len;
2697			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2698				stop = true;
2699
2700			if (!folio_batch_add(&batch, folio))
2701				break;
2702			if (stop)
2703				break;
2704		}
2705
2706		xas_pause(xas);
2707		rcu_read_unlock();
2708
2709		/* Now, if we obtained any pages, we can shift them to being
2710		 * writable and mark them for caching.
2711		 */
2712		if (!folio_batch_count(&batch))
2713			break;
2714
2715		for (i = 0; i < folio_batch_count(&batch); i++) {
2716			folio = batch.folios[i];
2717			/* The folio should be locked, dirty and not undergoing
2718			 * writeback from the loop above.
2719			 */
2720			if (!folio_clear_dirty_for_io(folio))
2721				WARN_ON(1);
2722			folio_start_writeback(folio);
2723			folio_unlock(folio);
2724		}
2725
2726		folio_batch_release(&batch);
2727		cond_resched();
2728	} while (!stop);
2729}
2730
2731/*
2732 * Write back the locked page and any subsequent non-locked dirty pages.
2733 */
2734static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2735						 struct writeback_control *wbc,
2736						 struct xa_state *xas,
2737						 struct folio *folio,
2738						 unsigned long long start,
2739						 unsigned long long end)
2740{
2741	struct inode *inode = mapping->host;
2742	struct TCP_Server_Info *server;
2743	struct cifs_writedata *wdata;
2744	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2745	struct cifs_credits credits_on_stack;
2746	struct cifs_credits *credits = &credits_on_stack;
2747	struct cifsFileInfo *cfile = NULL;
2748	unsigned long long i_size = i_size_read(inode), max_len;
2749	unsigned int xid, wsize;
2750	size_t len = folio_size(folio);
2751	long count = wbc->nr_to_write;
2752	int rc;
2753
2754	/* The folio should be locked, dirty and not undergoing writeback. */
2755	if (!folio_clear_dirty_for_io(folio))
2756		WARN_ON_ONCE(1);
2757	folio_start_writeback(folio);
2758
2759	count -= folio_nr_pages(folio);
2760
2761	xid = get_xid();
2762	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2763
2764	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2765	if (rc) {
2766		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2767		goto err_xid;
2768	}
2769
2770	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2771					   &wsize, credits);
2772	if (rc != 0)
2773		goto err_close;
2774
2775	wdata = cifs_writedata_alloc(cifs_writev_complete);
2776	if (!wdata) {
2777		rc = -ENOMEM;
2778		goto err_uncredit;
2779	}
2780
2781	wdata->sync_mode = wbc->sync_mode;
2782	wdata->offset = folio_pos(folio);
2783	wdata->pid = cfile->pid;
2784	wdata->credits = credits_on_stack;
2785	wdata->cfile = cfile;
2786	wdata->server = server;
2787	cfile = NULL;
2788
2789	/* Find all consecutive lockable dirty pages that have contiguous
2790	 * written regions, stopping when we find a page that is not
2791	 * immediately lockable, is not dirty or is missing, or we reach the
2792	 * end of the range.
2793	 */
2794	if (start < i_size) {
2795		/* Trim the write to the EOF; the extra data is ignored.  Also
2796		 * put an upper limit on the size of a single storedata op.
2797		 */
2798		max_len = wsize;
2799		max_len = min_t(unsigned long long, max_len, end - start + 1);
2800		max_len = min_t(unsigned long long, max_len, i_size - start);
2801
2802		if (len < max_len) {
2803			int max_pages = INT_MAX;
2804
2805#ifdef CONFIG_CIFS_SMB_DIRECT
2806			if (server->smbd_conn)
2807				max_pages = server->smbd_conn->max_frmr_depth;
2808#endif
2809			max_pages -= folio_nr_pages(folio);
2810
2811			if (max_pages > 0)
2812				cifs_extend_writeback(mapping, xas, &count, start,
2813						      max_pages, max_len, &len);
2814		}
2815	}
2816	len = min_t(unsigned long long, len, i_size - start);
2817
2818	/* We now have a contiguous set of dirty pages, each with writeback
2819	 * set; the first page is still locked at this point, but all the rest
2820	 * have been unlocked.
2821	 */
2822	folio_unlock(folio);
2823	wdata->bytes = len;
2824
2825	if (start < i_size) {
2826		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2827				start, len);
2828
2829		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2830		if (rc)
2831			goto err_wdata;
2832
2833		if (wdata->cfile->invalidHandle)
2834			rc = -EAGAIN;
2835		else
2836			rc = wdata->server->ops->async_writev(wdata,
2837							      cifs_writedata_release);
2838		if (rc >= 0) {
2839			kref_put(&wdata->refcount, cifs_writedata_release);
2840			goto err_close;
2841		}
2842	} else {
2843		/* The dirty region was entirely beyond the EOF. */
2844		cifs_pages_written_back(inode, start, len);
2845		rc = 0;
2846	}
2847
2848err_wdata:
2849	kref_put(&wdata->refcount, cifs_writedata_release);
2850err_uncredit:
2851	add_credits_and_wake_if(server, credits, 0);
2852err_close:
2853	if (cfile)
2854		cifsFileInfo_put(cfile);
2855err_xid:
2856	free_xid(xid);
2857	if (rc == 0) {
2858		wbc->nr_to_write = count;
2859		rc = len;
2860	} else if (is_retryable_error(rc)) {
2861		cifs_pages_write_redirty(inode, start, len);
2862	} else {
2863		cifs_pages_write_failed(inode, start, len);
2864		mapping_set_error(mapping, rc);
2865	}
2866	/* Indication to update ctime and mtime as close is deferred */
2867	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2868	return rc;
2869}
2870
2871/*
2872 * write a region of pages back to the server
2873 */
2874static ssize_t cifs_writepages_begin(struct address_space *mapping,
2875				     struct writeback_control *wbc,
2876				     struct xa_state *xas,
2877				     unsigned long long *_start,
2878				     unsigned long long end)
2879{
2880	struct folio *folio;
2881	unsigned long long start = *_start;
2882	ssize_t ret;
2883	int skips = 0;
2884
2885search_again:
2886	/* Find the first dirty page. */
2887	rcu_read_lock();
2888
2889	for (;;) {
2890		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2891		if (xas_retry(xas, folio) || xa_is_value(folio))
2892			continue;
2893		if (!folio)
2894			break;
2895
2896		if (!folio_try_get_rcu(folio)) {
2897			xas_reset(xas);
2898			continue;
2899		}
2900
2901		if (unlikely(folio != xas_reload(xas))) {
2902			folio_put(folio);
2903			xas_reset(xas);
2904			continue;
2905		}
2906
2907		xas_pause(xas);
2908		break;
2909	}
2910	rcu_read_unlock();
2911	if (!folio)
2912		return 0;
2913
2914	start = folio_pos(folio); /* May regress with THPs */
2915
2916	/* At this point we hold neither the i_pages lock nor the page lock:
2917	 * the page may be truncated or invalidated (changing page->mapping to
2918	 * NULL), or even swizzled back from swapper_space to tmpfs file
2919	 * mapping
2920	 */
2921lock_again:
2922	if (wbc->sync_mode != WB_SYNC_NONE) {
2923		ret = folio_lock_killable(folio);
2924		if (ret < 0)
2925			return ret;
2926	} else {
2927		if (!folio_trylock(folio))
2928			goto search_again;
2929	}
2930
2931	if (folio->mapping != mapping ||
2932	    !folio_test_dirty(folio)) {
2933		start += folio_size(folio);
2934		folio_unlock(folio);
2935		goto search_again;
2936	}
2937
2938	if (folio_test_writeback(folio) ||
2939	    folio_test_fscache(folio)) {
2940		folio_unlock(folio);
2941		if (wbc->sync_mode != WB_SYNC_NONE) {
2942			folio_wait_writeback(folio);
2943#ifdef CONFIG_CIFS_FSCACHE
2944			folio_wait_fscache(folio);
2945#endif
2946			goto lock_again;
2947		}
2948
2949		start += folio_size(folio);
2950		if (wbc->sync_mode == WB_SYNC_NONE) {
2951			if (skips >= 5 || need_resched()) {
2952				ret = 0;
2953				goto out;
2954			}
2955			skips++;
2956		}
2957		goto search_again;
2958	}
2959
2960	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
2961out:
2962	if (ret > 0)
2963		*_start = start + ret;
2964	return ret;
2965}
2966
2967/*
2968 * Write a region of pages back to the server
2969 */
2970static int cifs_writepages_region(struct address_space *mapping,
2971				  struct writeback_control *wbc,
2972				  unsigned long long *_start,
2973				  unsigned long long end)
2974{
2975	ssize_t ret;
2976
2977	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
2978
2979	do {
2980		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
2981		if (ret > 0 && wbc->nr_to_write > 0)
2982			cond_resched();
2983	} while (ret > 0 && wbc->nr_to_write > 0);
2984
2985	return ret > 0 ? 0 : ret;
2986}
2987
2988/*
2989 * Write some of the pending data back to the server
2990 */
2991static int cifs_writepages(struct address_space *mapping,
2992			   struct writeback_control *wbc)
2993{
2994	loff_t start, end;
2995	int ret;
2996
2997	/* We have to be careful as we can end up racing with setattr()
2998	 * truncating the pagecache since the caller doesn't take a lock here
2999	 * to prevent it.
3000	 */
3001
3002	if (wbc->range_cyclic && mapping->writeback_index) {
3003		start = mapping->writeback_index * PAGE_SIZE;
3004		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3005		if (ret < 0)
3006			goto out;
3007
3008		if (wbc->nr_to_write <= 0) {
3009			mapping->writeback_index = start / PAGE_SIZE;
3010			goto out;
3011		}
3012
3013		start = 0;
3014		end = mapping->writeback_index * PAGE_SIZE;
3015		mapping->writeback_index = 0;
3016		ret = cifs_writepages_region(mapping, wbc, &start, end);
3017		if (ret == 0)
3018			mapping->writeback_index = start / PAGE_SIZE;
3019	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3020		start = 0;
3021		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3022		if (wbc->nr_to_write > 0 && ret == 0)
3023			mapping->writeback_index = start / PAGE_SIZE;
3024	} else {
3025		start = wbc->range_start;
3026		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3027	}
3028
3029out:
3030	return ret;
3031}
3032
3033static int
3034cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3035{
3036	int rc;
3037	unsigned int xid;
3038
3039	xid = get_xid();
3040/* BB add check for wbc flags */
3041	get_page(page);
3042	if (!PageUptodate(page))
3043		cifs_dbg(FYI, "ppw - page not up to date\n");
3044
3045	/*
3046	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3047	 *
3048	 * A writepage() implementation always needs to do either this,
3049	 * or re-dirty the page with "redirty_page_for_writepage()" in
3050	 * the case of a failure.
3051	 *
3052	 * Just unlocking the page will cause the radix tree tag-bits
3053	 * to fail to update with the state of the page correctly.
3054	 */
3055	set_page_writeback(page);
3056retry_write:
3057	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3058	if (is_retryable_error(rc)) {
3059		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3060			goto retry_write;
3061		redirty_page_for_writepage(wbc, page);
3062	} else if (rc != 0) {
3063		SetPageError(page);
3064		mapping_set_error(page->mapping, rc);
3065	} else {
3066		SetPageUptodate(page);
3067	}
3068	end_page_writeback(page);
3069	put_page(page);
3070	free_xid(xid);
3071	return rc;
3072}
3073
3074static int cifs_write_end(struct file *file, struct address_space *mapping,
3075			loff_t pos, unsigned len, unsigned copied,
3076			struct page *page, void *fsdata)
3077{
3078	int rc;
3079	struct inode *inode = mapping->host;
3080	struct cifsFileInfo *cfile = file->private_data;
3081	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3082	struct folio *folio = page_folio(page);
3083	__u32 pid;
3084
3085	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3086		pid = cfile->pid;
3087	else
3088		pid = current->tgid;
3089
3090	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3091		 page, pos, copied);
3092
3093	if (folio_test_checked(folio)) {
3094		if (copied == len)
3095			folio_mark_uptodate(folio);
3096		folio_clear_checked(folio);
3097	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3098		folio_mark_uptodate(folio);
3099
3100	if (!folio_test_uptodate(folio)) {
3101		char *page_data;
3102		unsigned offset = pos & (PAGE_SIZE - 1);
3103		unsigned int xid;
3104
3105		xid = get_xid();
3106		/* this is probably better than directly calling
3107		   partialpage_write since in this function the file handle is
3108		   known which we might as well	leverage */
3109		/* BB check if anything else missing out of ppw
3110		   such as updating last write time */
3111		page_data = kmap(page);
3112		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3113		/* if (rc < 0) should we set writebehind rc? */
3114		kunmap(page);
3115
3116		free_xid(xid);
3117	} else {
3118		rc = copied;
3119		pos += copied;
3120		set_page_dirty(page);
3121	}
3122
3123	if (rc > 0) {
3124		spin_lock(&inode->i_lock);
3125		if (pos > inode->i_size) {
3126			i_size_write(inode, pos);
3127			inode->i_blocks = (512 - 1 + pos) >> 9;
3128		}
3129		spin_unlock(&inode->i_lock);
3130	}
3131
3132	unlock_page(page);
3133	put_page(page);
3134	/* Indication to update ctime and mtime as close is deferred */
3135	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3136
3137	return rc;
3138}
3139
3140int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3141		      int datasync)
3142{
3143	unsigned int xid;
3144	int rc = 0;
3145	struct cifs_tcon *tcon;
3146	struct TCP_Server_Info *server;
3147	struct cifsFileInfo *smbfile = file->private_data;
3148	struct inode *inode = file_inode(file);
3149	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3150
3151	rc = file_write_and_wait_range(file, start, end);
3152	if (rc) {
3153		trace_cifs_fsync_err(inode->i_ino, rc);
3154		return rc;
3155	}
3156
3157	xid = get_xid();
3158
3159	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3160		 file, datasync);
3161
3162	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3163		rc = cifs_zap_mapping(inode);
3164		if (rc) {
3165			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3166			rc = 0; /* don't care about it in fsync */
3167		}
3168	}
3169
3170	tcon = tlink_tcon(smbfile->tlink);
3171	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3172		server = tcon->ses->server;
3173		if (server->ops->flush == NULL) {
3174			rc = -ENOSYS;
3175			goto strict_fsync_exit;
3176		}
3177
3178		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3179			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3180			if (smbfile) {
3181				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3182				cifsFileInfo_put(smbfile);
3183			} else
3184				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3185		} else
3186			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3187	}
3188
3189strict_fsync_exit:
3190	free_xid(xid);
3191	return rc;
3192}
3193
3194int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3195{
3196	unsigned int xid;
3197	int rc = 0;
3198	struct cifs_tcon *tcon;
3199	struct TCP_Server_Info *server;
3200	struct cifsFileInfo *smbfile = file->private_data;
3201	struct inode *inode = file_inode(file);
3202	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3203
3204	rc = file_write_and_wait_range(file, start, end);
3205	if (rc) {
3206		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3207		return rc;
3208	}
3209
3210	xid = get_xid();
3211
3212	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3213		 file, datasync);
3214
3215	tcon = tlink_tcon(smbfile->tlink);
3216	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3217		server = tcon->ses->server;
3218		if (server->ops->flush == NULL) {
3219			rc = -ENOSYS;
3220			goto fsync_exit;
3221		}
3222
3223		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3224			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3225			if (smbfile) {
3226				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3227				cifsFileInfo_put(smbfile);
3228			} else
3229				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3230		} else
3231			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3232	}
3233
3234fsync_exit:
3235	free_xid(xid);
3236	return rc;
3237}
3238
3239/*
3240 * As file closes, flush all cached write data for this inode checking
3241 * for write behind errors.
3242 */
3243int cifs_flush(struct file *file, fl_owner_t id)
3244{
3245	struct inode *inode = file_inode(file);
3246	int rc = 0;
3247
3248	if (file->f_mode & FMODE_WRITE)
3249		rc = filemap_write_and_wait(inode->i_mapping);
3250
3251	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3252	if (rc) {
3253		/* get more nuanced writeback errors */
3254		rc = filemap_check_wb_err(file->f_mapping, 0);
3255		trace_cifs_flush_err(inode->i_ino, rc);
3256	}
3257	return rc;
3258}
3259
3260static void
3261cifs_uncached_writedata_release(struct kref *refcount)
3262{
3263	struct cifs_writedata *wdata = container_of(refcount,
3264					struct cifs_writedata, refcount);
3265
3266	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3267	cifs_writedata_release(refcount);
3268}
3269
3270static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3271
3272static void
3273cifs_uncached_writev_complete(struct work_struct *work)
3274{
3275	struct cifs_writedata *wdata = container_of(work,
3276					struct cifs_writedata, work);
3277	struct inode *inode = d_inode(wdata->cfile->dentry);
3278	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3279
3280	spin_lock(&inode->i_lock);
3281	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3282	if (cifsi->server_eof > inode->i_size)
3283		i_size_write(inode, cifsi->server_eof);
3284	spin_unlock(&inode->i_lock);
3285
3286	complete(&wdata->done);
3287	collect_uncached_write_data(wdata->ctx);
3288	/* the below call can possibly free the last ref to aio ctx */
3289	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3290}
3291
3292static int
3293cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3294	struct cifs_aio_ctx *ctx)
3295{
3296	unsigned int wsize;
3297	struct cifs_credits credits;
3298	int rc;
3299	struct TCP_Server_Info *server = wdata->server;
3300
3301	do {
3302		if (wdata->cfile->invalidHandle) {
3303			rc = cifs_reopen_file(wdata->cfile, false);
3304			if (rc == -EAGAIN)
3305				continue;
3306			else if (rc)
3307				break;
3308		}
3309
3310
3311		/*
3312		 * Wait for credits to resend this wdata.
3313		 * Note: we are attempting to resend the whole wdata not in
3314		 * segments
3315		 */
3316		do {
3317			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3318						&wsize, &credits);
3319			if (rc)
3320				goto fail;
3321
3322			if (wsize < wdata->bytes) {
3323				add_credits_and_wake_if(server, &credits, 0);
3324				msleep(1000);
3325			}
3326		} while (wsize < wdata->bytes);
3327		wdata->credits = credits;
3328
3329		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3330
3331		if (!rc) {
3332			if (wdata->cfile->invalidHandle)
3333				rc = -EAGAIN;
3334			else {
3335#ifdef CONFIG_CIFS_SMB_DIRECT
3336				if (wdata->mr) {
3337					wdata->mr->need_invalidate = true;
3338					smbd_deregister_mr(wdata->mr);
3339					wdata->mr = NULL;
3340				}
3341#endif
3342				rc = server->ops->async_writev(wdata,
3343					cifs_uncached_writedata_release);
3344			}
3345		}
3346
3347		/* If the write was successfully sent, we are done */
3348		if (!rc) {
3349			list_add_tail(&wdata->list, wdata_list);
3350			return 0;
3351		}
3352
3353		/* Roll back credits and retry if needed */
3354		add_credits_and_wake_if(server, &wdata->credits, 0);
3355	} while (rc == -EAGAIN);
3356
3357fail:
3358	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3359	return rc;
3360}
3361
3362/*
3363 * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3364 * size and maximum number of segments.
3365 */
3366static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3367				     size_t max_segs, unsigned int *_nsegs)
3368{
3369	const struct bio_vec *bvecs = iter->bvec;
3370	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3371	size_t len, span = 0, n = iter->count;
3372	size_t skip = iter->iov_offset;
3373
3374	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3375		return 0;
3376
3377	while (n && ix < nbv && skip) {
3378		len = bvecs[ix].bv_len;
3379		if (skip < len)
3380			break;
3381		skip -= len;
3382		n -= len;
3383		ix++;
3384	}
3385
3386	while (n && ix < nbv) {
3387		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3388		span += len;
3389		max_size -= len;
3390		nsegs++;
3391		ix++;
3392		if (max_size == 0 || nsegs >= max_segs)
3393			break;
3394		skip = 0;
3395		n -= len;
3396	}
3397
3398	*_nsegs = nsegs;
3399	return span;
3400}
3401
3402static int
3403cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3404		     struct cifsFileInfo *open_file,
3405		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3406		     struct cifs_aio_ctx *ctx)
3407{
3408	int rc = 0;
3409	size_t cur_len, max_len;
3410	struct cifs_writedata *wdata;
3411	pid_t pid;
3412	struct TCP_Server_Info *server;
3413	unsigned int xid, max_segs = INT_MAX;
3414
3415	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3416		pid = open_file->pid;
3417	else
3418		pid = current->tgid;
3419
3420	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3421	xid = get_xid();
3422
3423#ifdef CONFIG_CIFS_SMB_DIRECT
3424	if (server->smbd_conn)
3425		max_segs = server->smbd_conn->max_frmr_depth;
3426#endif
3427
3428	do {
3429		struct cifs_credits credits_on_stack;
3430		struct cifs_credits *credits = &credits_on_stack;
3431		unsigned int wsize, nsegs = 0;
3432
3433		if (signal_pending(current)) {
3434			rc = -EINTR;
3435			break;
3436		}
3437
3438		if (open_file->invalidHandle) {
3439			rc = cifs_reopen_file(open_file, false);
3440			if (rc == -EAGAIN)
3441				continue;
3442			else if (rc)
3443				break;
3444		}
3445
3446		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3447						   &wsize, credits);
3448		if (rc)
3449			break;
3450
3451		max_len = min_t(const size_t, len, wsize);
3452		if (!max_len) {
3453			rc = -EAGAIN;
3454			add_credits_and_wake_if(server, credits, 0);
3455			break;
3456		}
3457
3458		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3459		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3460			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3461		if (cur_len == 0) {
3462			rc = -EIO;
3463			add_credits_and_wake_if(server, credits, 0);
3464			break;
3465		}
3466
3467		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3468		if (!wdata) {
3469			rc = -ENOMEM;
3470			add_credits_and_wake_if(server, credits, 0);
3471			break;
3472		}
3473
3474		wdata->sync_mode = WB_SYNC_ALL;
3475		wdata->offset	= (__u64)fpos;
3476		wdata->cfile	= cifsFileInfo_get(open_file);
3477		wdata->server	= server;
3478		wdata->pid	= pid;
3479		wdata->bytes	= cur_len;
3480		wdata->credits	= credits_on_stack;
3481		wdata->iter	= *from;
3482		wdata->ctx	= ctx;
3483		kref_get(&ctx->refcount);
3484
3485		iov_iter_truncate(&wdata->iter, cur_len);
3486
3487		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3488
3489		if (!rc) {
3490			if (wdata->cfile->invalidHandle)
3491				rc = -EAGAIN;
3492			else
3493				rc = server->ops->async_writev(wdata,
3494					cifs_uncached_writedata_release);
3495		}
3496
3497		if (rc) {
3498			add_credits_and_wake_if(server, &wdata->credits, 0);
3499			kref_put(&wdata->refcount,
3500				 cifs_uncached_writedata_release);
3501			if (rc == -EAGAIN)
3502				continue;
3503			break;
3504		}
3505
3506		list_add_tail(&wdata->list, wdata_list);
3507		iov_iter_advance(from, cur_len);
3508		fpos += cur_len;
3509		len -= cur_len;
3510	} while (len > 0);
3511
3512	free_xid(xid);
3513	return rc;
3514}
3515
3516static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3517{
3518	struct cifs_writedata *wdata, *tmp;
3519	struct cifs_tcon *tcon;
3520	struct cifs_sb_info *cifs_sb;
3521	struct dentry *dentry = ctx->cfile->dentry;
3522	ssize_t rc;
3523
3524	tcon = tlink_tcon(ctx->cfile->tlink);
3525	cifs_sb = CIFS_SB(dentry->d_sb);
3526
3527	mutex_lock(&ctx->aio_mutex);
3528
3529	if (list_empty(&ctx->list)) {
3530		mutex_unlock(&ctx->aio_mutex);
3531		return;
3532	}
3533
3534	rc = ctx->rc;
3535	/*
3536	 * Wait for and collect replies for any successful sends in order of
3537	 * increasing offset. Once an error is hit, then return without waiting
3538	 * for any more replies.
3539	 */
3540restart_loop:
3541	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3542		if (!rc) {
3543			if (!try_wait_for_completion(&wdata->done)) {
3544				mutex_unlock(&ctx->aio_mutex);
3545				return;
3546			}
3547
3548			if (wdata->result)
3549				rc = wdata->result;
3550			else
3551				ctx->total_len += wdata->bytes;
3552
3553			/* resend call if it's a retryable error */
3554			if (rc == -EAGAIN) {
3555				struct list_head tmp_list;
3556				struct iov_iter tmp_from = ctx->iter;
3557
3558				INIT_LIST_HEAD(&tmp_list);
3559				list_del_init(&wdata->list);
3560
3561				if (ctx->direct_io)
3562					rc = cifs_resend_wdata(
3563						wdata, &tmp_list, ctx);
3564				else {
3565					iov_iter_advance(&tmp_from,
3566						 wdata->offset - ctx->pos);
3567
3568					rc = cifs_write_from_iter(wdata->offset,
3569						wdata->bytes, &tmp_from,
3570						ctx->cfile, cifs_sb, &tmp_list,
3571						ctx);
3572
3573					kref_put(&wdata->refcount,
3574						cifs_uncached_writedata_release);
3575				}
3576
3577				list_splice(&tmp_list, &ctx->list);
3578				goto restart_loop;
3579			}
3580		}
3581		list_del_init(&wdata->list);
3582		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3583	}
3584
3585	cifs_stats_bytes_written(tcon, ctx->total_len);
3586	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3587
3588	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3589
3590	mutex_unlock(&ctx->aio_mutex);
3591
3592	if (ctx->iocb && ctx->iocb->ki_complete)
3593		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3594	else
3595		complete(&ctx->done);
3596}
3597
3598static ssize_t __cifs_writev(
3599	struct kiocb *iocb, struct iov_iter *from, bool direct)
3600{
3601	struct file *file = iocb->ki_filp;
3602	ssize_t total_written = 0;
3603	struct cifsFileInfo *cfile;
3604	struct cifs_tcon *tcon;
3605	struct cifs_sb_info *cifs_sb;
3606	struct cifs_aio_ctx *ctx;
3607	int rc;
3608
3609	rc = generic_write_checks(iocb, from);
3610	if (rc <= 0)
3611		return rc;
3612
3613	cifs_sb = CIFS_FILE_SB(file);
3614	cfile = file->private_data;
3615	tcon = tlink_tcon(cfile->tlink);
3616
3617	if (!tcon->ses->server->ops->async_writev)
3618		return -ENOSYS;
3619
3620	ctx = cifs_aio_ctx_alloc();
3621	if (!ctx)
3622		return -ENOMEM;
3623
3624	ctx->cfile = cifsFileInfo_get(cfile);
3625
3626	if (!is_sync_kiocb(iocb))
3627		ctx->iocb = iocb;
3628
3629	ctx->pos = iocb->ki_pos;
3630	ctx->direct_io = direct;
3631	ctx->nr_pinned_pages = 0;
3632
3633	if (user_backed_iter(from)) {
3634		/*
3635		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3636		 * they contain references to the calling process's virtual
3637		 * memory layout which won't be available in an async worker
3638		 * thread.  This also takes a pin on every folio involved.
3639		 */
3640		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3641					     &ctx->iter, 0);
3642		if (rc < 0) {
3643			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3644			return rc;
3645		}
3646
3647		ctx->nr_pinned_pages = rc;
3648		ctx->bv = (void *)ctx->iter.bvec;
3649		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3650	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3651		   !is_sync_kiocb(iocb)) {
3652		/*
3653		 * If the op is asynchronous, we need to copy the list attached
3654		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3655		 * will be pinned by the caller; in any case, we may or may not
3656		 * be able to pin the pages, so we don't try.
3657		 */
3658		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3659		if (!ctx->bv) {
3660			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3661			return -ENOMEM;
3662		}
3663	} else {
3664		/*
3665		 * Otherwise, we just pass the iterator down as-is and rely on
3666		 * the caller to make sure the pages referred to by the
3667		 * iterator don't evaporate.
3668		 */
3669		ctx->iter = *from;
3670	}
3671
3672	ctx->len = iov_iter_count(&ctx->iter);
3673
3674	/* grab a lock here due to read response handlers can access ctx */
3675	mutex_lock(&ctx->aio_mutex);
3676
3677	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3678				  cfile, cifs_sb, &ctx->list, ctx);
3679
3680	/*
3681	 * If at least one write was successfully sent, then discard any rc
3682	 * value from the later writes. If the other write succeeds, then
3683	 * we'll end up returning whatever was written. If it fails, then
3684	 * we'll get a new rc value from that.
3685	 */
3686	if (!list_empty(&ctx->list))
3687		rc = 0;
3688
3689	mutex_unlock(&ctx->aio_mutex);
3690
3691	if (rc) {
3692		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3693		return rc;
3694	}
3695
3696	if (!is_sync_kiocb(iocb)) {
3697		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3698		return -EIOCBQUEUED;
3699	}
3700
3701	rc = wait_for_completion_killable(&ctx->done);
3702	if (rc) {
3703		mutex_lock(&ctx->aio_mutex);
3704		ctx->rc = rc = -EINTR;
3705		total_written = ctx->total_len;
3706		mutex_unlock(&ctx->aio_mutex);
3707	} else {
3708		rc = ctx->rc;
3709		total_written = ctx->total_len;
3710	}
3711
3712	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3713
3714	if (unlikely(!total_written))
3715		return rc;
3716
3717	iocb->ki_pos += total_written;
3718	return total_written;
3719}
3720
3721ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3722{
3723	struct file *file = iocb->ki_filp;
3724
3725	cifs_revalidate_mapping(file->f_inode);
3726	return __cifs_writev(iocb, from, true);
3727}
3728
3729ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3730{
3731	return __cifs_writev(iocb, from, false);
3732}
3733
3734static ssize_t
3735cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3736{
3737	struct file *file = iocb->ki_filp;
3738	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3739	struct inode *inode = file->f_mapping->host;
3740	struct cifsInodeInfo *cinode = CIFS_I(inode);
3741	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3742	ssize_t rc;
3743
3744	inode_lock(inode);
3745	/*
3746	 * We need to hold the sem to be sure nobody modifies lock list
3747	 * with a brlock that prevents writing.
3748	 */
3749	down_read(&cinode->lock_sem);
3750
3751	rc = generic_write_checks(iocb, from);
3752	if (rc <= 0)
3753		goto out;
3754
3755	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3756				     server->vals->exclusive_lock_type, 0,
3757				     NULL, CIFS_WRITE_OP))
3758		rc = __generic_file_write_iter(iocb, from);
3759	else
3760		rc = -EACCES;
3761out:
3762	up_read(&cinode->lock_sem);
3763	inode_unlock(inode);
3764
3765	if (rc > 0)
3766		rc = generic_write_sync(iocb, rc);
3767	return rc;
3768}
3769
3770ssize_t
3771cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3772{
3773	struct inode *inode = file_inode(iocb->ki_filp);
3774	struct cifsInodeInfo *cinode = CIFS_I(inode);
3775	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3776	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3777						iocb->ki_filp->private_data;
3778	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3779	ssize_t written;
3780
3781	written = cifs_get_writer(cinode);
3782	if (written)
3783		return written;
3784
3785	if (CIFS_CACHE_WRITE(cinode)) {
3786		if (cap_unix(tcon->ses) &&
3787		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3788		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3789			written = generic_file_write_iter(iocb, from);
3790			goto out;
3791		}
3792		written = cifs_writev(iocb, from);
3793		goto out;
3794	}
3795	/*
3796	 * For non-oplocked files in strict cache mode we need to write the data
3797	 * to the server exactly from the pos to pos+len-1 rather than flush all
3798	 * affected pages because it may cause a error with mandatory locks on
3799	 * these pages but not on the region from pos to ppos+len-1.
3800	 */
3801	written = cifs_user_writev(iocb, from);
3802	if (CIFS_CACHE_READ(cinode)) {
3803		/*
3804		 * We have read level caching and we have just sent a write
3805		 * request to the server thus making data in the cache stale.
3806		 * Zap the cache and set oplock/lease level to NONE to avoid
3807		 * reading stale data from the cache. All subsequent read
3808		 * operations will read new data from the server.
3809		 */
3810		cifs_zap_mapping(inode);
3811		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3812			 inode);
3813		cinode->oplock = 0;
3814	}
3815out:
3816	cifs_put_writer(cinode);
3817	return written;
3818}
3819
3820static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3821{
3822	struct cifs_readdata *rdata;
3823
3824	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3825	if (rdata) {
3826		kref_init(&rdata->refcount);
3827		INIT_LIST_HEAD(&rdata->list);
3828		init_completion(&rdata->done);
3829		INIT_WORK(&rdata->work, complete);
3830	}
3831
3832	return rdata;
3833}
3834
3835void
3836cifs_readdata_release(struct kref *refcount)
3837{
3838	struct cifs_readdata *rdata = container_of(refcount,
3839					struct cifs_readdata, refcount);
3840
3841	if (rdata->ctx)
3842		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3843#ifdef CONFIG_CIFS_SMB_DIRECT
3844	if (rdata->mr) {
3845		smbd_deregister_mr(rdata->mr);
3846		rdata->mr = NULL;
3847	}
3848#endif
3849	if (rdata->cfile)
3850		cifsFileInfo_put(rdata->cfile);
3851
3852	kfree(rdata);
3853}
3854
3855static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3856
3857static void
3858cifs_uncached_readv_complete(struct work_struct *work)
3859{
3860	struct cifs_readdata *rdata = container_of(work,
3861						struct cifs_readdata, work);
3862
3863	complete(&rdata->done);
3864	collect_uncached_read_data(rdata->ctx);
3865	/* the below call can possibly free the last ref to aio ctx */
3866	kref_put(&rdata->refcount, cifs_readdata_release);
3867}
3868
3869static int cifs_resend_rdata(struct cifs_readdata *rdata,
3870			struct list_head *rdata_list,
3871			struct cifs_aio_ctx *ctx)
3872{
3873	unsigned int rsize;
3874	struct cifs_credits credits;
3875	int rc;
3876	struct TCP_Server_Info *server;
3877
3878	/* XXX: should we pick a new channel here? */
3879	server = rdata->server;
3880
3881	do {
3882		if (rdata->cfile->invalidHandle) {
3883			rc = cifs_reopen_file(rdata->cfile, true);
3884			if (rc == -EAGAIN)
3885				continue;
3886			else if (rc)
3887				break;
3888		}
3889
3890		/*
3891		 * Wait for credits to resend this rdata.
3892		 * Note: we are attempting to resend the whole rdata not in
3893		 * segments
3894		 */
3895		do {
3896			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3897						&rsize, &credits);
3898
3899			if (rc)
3900				goto fail;
3901
3902			if (rsize < rdata->bytes) {
3903				add_credits_and_wake_if(server, &credits, 0);
3904				msleep(1000);
3905			}
3906		} while (rsize < rdata->bytes);
3907		rdata->credits = credits;
3908
3909		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3910		if (!rc) {
3911			if (rdata->cfile->invalidHandle)
3912				rc = -EAGAIN;
3913			else {
3914#ifdef CONFIG_CIFS_SMB_DIRECT
3915				if (rdata->mr) {
3916					rdata->mr->need_invalidate = true;
3917					smbd_deregister_mr(rdata->mr);
3918					rdata->mr = NULL;
3919				}
3920#endif
3921				rc = server->ops->async_readv(rdata);
3922			}
3923		}
3924
3925		/* If the read was successfully sent, we are done */
3926		if (!rc) {
3927			/* Add to aio pending list */
3928			list_add_tail(&rdata->list, rdata_list);
3929			return 0;
3930		}
3931
3932		/* Roll back credits and retry if needed */
3933		add_credits_and_wake_if(server, &rdata->credits, 0);
3934	} while (rc == -EAGAIN);
3935
3936fail:
3937	kref_put(&rdata->refcount, cifs_readdata_release);
3938	return rc;
3939}
3940
3941static int
3942cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3943		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3944		     struct cifs_aio_ctx *ctx)
3945{
3946	struct cifs_readdata *rdata;
3947	unsigned int rsize, nsegs, max_segs = INT_MAX;
3948	struct cifs_credits credits_on_stack;
3949	struct cifs_credits *credits = &credits_on_stack;
3950	size_t cur_len, max_len;
3951	int rc;
3952	pid_t pid;
3953	struct TCP_Server_Info *server;
3954
3955	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3956
3957#ifdef CONFIG_CIFS_SMB_DIRECT
3958	if (server->smbd_conn)
3959		max_segs = server->smbd_conn->max_frmr_depth;
3960#endif
3961
3962	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3963		pid = open_file->pid;
3964	else
3965		pid = current->tgid;
3966
3967	do {
3968		if (open_file->invalidHandle) {
3969			rc = cifs_reopen_file(open_file, true);
3970			if (rc == -EAGAIN)
3971				continue;
3972			else if (rc)
3973				break;
3974		}
3975
3976		if (cifs_sb->ctx->rsize == 0)
3977			cifs_sb->ctx->rsize =
3978				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3979							     cifs_sb->ctx);
3980
3981		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3982						   &rsize, credits);
3983		if (rc)
3984			break;
3985
3986		max_len = min_t(size_t, len, rsize);
3987
3988		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3989						 max_segs, &nsegs);
3990		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3991			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3992		if (cur_len == 0) {
3993			rc = -EIO;
3994			add_credits_and_wake_if(server, credits, 0);
3995			break;
3996		}
3997
3998		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3999		if (!rdata) {
4000			add_credits_and_wake_if(server, credits, 0);
4001			rc = -ENOMEM;
4002			break;
4003		}
4004
4005		rdata->server	= server;
4006		rdata->cfile	= cifsFileInfo_get(open_file);
4007		rdata->offset	= fpos;
4008		rdata->bytes	= cur_len;
4009		rdata->pid	= pid;
4010		rdata->credits	= credits_on_stack;
4011		rdata->ctx	= ctx;
4012		kref_get(&ctx->refcount);
4013
4014		rdata->iter	= ctx->iter;
4015		iov_iter_truncate(&rdata->iter, cur_len);
4016
4017		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4018
4019		if (!rc) {
4020			if (rdata->cfile->invalidHandle)
4021				rc = -EAGAIN;
4022			else
4023				rc = server->ops->async_readv(rdata);
4024		}
4025
4026		if (rc) {
4027			add_credits_and_wake_if(server, &rdata->credits, 0);
4028			kref_put(&rdata->refcount, cifs_readdata_release);
4029			if (rc == -EAGAIN)
4030				continue;
4031			break;
4032		}
4033
4034		list_add_tail(&rdata->list, rdata_list);
4035		iov_iter_advance(&ctx->iter, cur_len);
4036		fpos += cur_len;
4037		len -= cur_len;
4038	} while (len > 0);
4039
4040	return rc;
4041}
4042
4043static void
4044collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4045{
4046	struct cifs_readdata *rdata, *tmp;
4047	struct cifs_sb_info *cifs_sb;
4048	int rc;
4049
4050	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4051
4052	mutex_lock(&ctx->aio_mutex);
4053
4054	if (list_empty(&ctx->list)) {
4055		mutex_unlock(&ctx->aio_mutex);
4056		return;
4057	}
4058
4059	rc = ctx->rc;
4060	/* the loop below should proceed in the order of increasing offsets */
4061again:
4062	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4063		if (!rc) {
4064			if (!try_wait_for_completion(&rdata->done)) {
4065				mutex_unlock(&ctx->aio_mutex);
4066				return;
4067			}
4068
4069			if (rdata->result == -EAGAIN) {
4070				/* resend call if it's a retryable error */
4071				struct list_head tmp_list;
4072				unsigned int got_bytes = rdata->got_bytes;
4073
4074				list_del_init(&rdata->list);
4075				INIT_LIST_HEAD(&tmp_list);
4076
4077				if (ctx->direct_io) {
4078					/*
4079					 * Re-use rdata as this is a
4080					 * direct I/O
4081					 */
4082					rc = cifs_resend_rdata(
4083						rdata,
4084						&tmp_list, ctx);
4085				} else {
4086					rc = cifs_send_async_read(
4087						rdata->offset + got_bytes,
4088						rdata->bytes - got_bytes,
4089						rdata->cfile, cifs_sb,
4090						&tmp_list, ctx);
4091
4092					kref_put(&rdata->refcount,
4093						cifs_readdata_release);
4094				}
4095
4096				list_splice(&tmp_list, &ctx->list);
4097
4098				goto again;
4099			} else if (rdata->result)
4100				rc = rdata->result;
4101
4102			/* if there was a short read -- discard anything left */
4103			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4104				rc = -ENODATA;
4105
4106			ctx->total_len += rdata->got_bytes;
4107		}
4108		list_del_init(&rdata->list);
4109		kref_put(&rdata->refcount, cifs_readdata_release);
4110	}
4111
4112	/* mask nodata case */
4113	if (rc == -ENODATA)
4114		rc = 0;
4115
4116	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4117
4118	mutex_unlock(&ctx->aio_mutex);
4119
4120	if (ctx->iocb && ctx->iocb->ki_complete)
4121		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4122	else
4123		complete(&ctx->done);
4124}
4125
4126static ssize_t __cifs_readv(
4127	struct kiocb *iocb, struct iov_iter *to, bool direct)
4128{
4129	size_t len;
4130	struct file *file = iocb->ki_filp;
4131	struct cifs_sb_info *cifs_sb;
4132	struct cifsFileInfo *cfile;
4133	struct cifs_tcon *tcon;
4134	ssize_t rc, total_read = 0;
4135	loff_t offset = iocb->ki_pos;
4136	struct cifs_aio_ctx *ctx;
4137
4138	len = iov_iter_count(to);
4139	if (!len)
4140		return 0;
4141
4142	cifs_sb = CIFS_FILE_SB(file);
4143	cfile = file->private_data;
4144	tcon = tlink_tcon(cfile->tlink);
4145
4146	if (!tcon->ses->server->ops->async_readv)
4147		return -ENOSYS;
4148
4149	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4150		cifs_dbg(FYI, "attempting read on write only file instance\n");
4151
4152	ctx = cifs_aio_ctx_alloc();
4153	if (!ctx)
4154		return -ENOMEM;
4155
4156	ctx->pos	= offset;
4157	ctx->direct_io	= direct;
4158	ctx->len	= len;
4159	ctx->cfile	= cifsFileInfo_get(cfile);
4160	ctx->nr_pinned_pages = 0;
4161
4162	if (!is_sync_kiocb(iocb))
4163		ctx->iocb = iocb;
4164
4165	if (user_backed_iter(to)) {
4166		/*
4167		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4168		 * they contain references to the calling process's virtual
4169		 * memory layout which won't be available in an async worker
4170		 * thread.  This also takes a pin on every folio involved.
4171		 */
4172		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4173					     &ctx->iter, 0);
4174		if (rc < 0) {
4175			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4176			return rc;
4177		}
4178
4179		ctx->nr_pinned_pages = rc;
4180		ctx->bv = (void *)ctx->iter.bvec;
4181		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4182		ctx->should_dirty = true;
4183	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4184		   !is_sync_kiocb(iocb)) {
4185		/*
4186		 * If the op is asynchronous, we need to copy the list attached
4187		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4188		 * will be retained by the caller; in any case, we may or may
4189		 * not be able to pin the pages, so we don't try.
4190		 */
4191		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4192		if (!ctx->bv) {
4193			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4194			return -ENOMEM;
4195		}
4196	} else {
4197		/*
4198		 * Otherwise, we just pass the iterator down as-is and rely on
4199		 * the caller to make sure the pages referred to by the
4200		 * iterator don't evaporate.
4201		 */
4202		ctx->iter = *to;
4203	}
4204
4205	if (direct) {
4206		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4207						  offset, offset + len - 1);
4208		if (rc) {
4209			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4210			return -EAGAIN;
4211		}
4212	}
4213
4214	/* grab a lock here due to read response handlers can access ctx */
4215	mutex_lock(&ctx->aio_mutex);
4216
4217	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4218
4219	/* if at least one read request send succeeded, then reset rc */
4220	if (!list_empty(&ctx->list))
4221		rc = 0;
4222
4223	mutex_unlock(&ctx->aio_mutex);
4224
4225	if (rc) {
4226		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4227		return rc;
4228	}
4229
4230	if (!is_sync_kiocb(iocb)) {
4231		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4232		return -EIOCBQUEUED;
4233	}
4234
4235	rc = wait_for_completion_killable(&ctx->done);
4236	if (rc) {
4237		mutex_lock(&ctx->aio_mutex);
4238		ctx->rc = rc = -EINTR;
4239		total_read = ctx->total_len;
4240		mutex_unlock(&ctx->aio_mutex);
4241	} else {
4242		rc = ctx->rc;
4243		total_read = ctx->total_len;
4244	}
4245
4246	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4247
4248	if (total_read) {
4249		iocb->ki_pos += total_read;
4250		return total_read;
4251	}
4252	return rc;
4253}
4254
4255ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4256{
4257	return __cifs_readv(iocb, to, true);
4258}
4259
4260ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4261{
4262	return __cifs_readv(iocb, to, false);
4263}
4264
4265ssize_t
4266cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4267{
4268	struct inode *inode = file_inode(iocb->ki_filp);
4269	struct cifsInodeInfo *cinode = CIFS_I(inode);
4270	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4271	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4272						iocb->ki_filp->private_data;
4273	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4274	int rc = -EACCES;
4275
4276	/*
4277	 * In strict cache mode we need to read from the server all the time
4278	 * if we don't have level II oplock because the server can delay mtime
4279	 * change - so we can't make a decision about inode invalidating.
4280	 * And we can also fail with pagereading if there are mandatory locks
4281	 * on pages affected by this read but not on the region from pos to
4282	 * pos+len-1.
4283	 */
4284	if (!CIFS_CACHE_READ(cinode))
4285		return cifs_user_readv(iocb, to);
4286
4287	if (cap_unix(tcon->ses) &&
4288	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4289	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4290		return generic_file_read_iter(iocb, to);
4291
4292	/*
4293	 * We need to hold the sem to be sure nobody modifies lock list
4294	 * with a brlock that prevents reading.
4295	 */
4296	down_read(&cinode->lock_sem);
4297	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4298				     tcon->ses->server->vals->shared_lock_type,
4299				     0, NULL, CIFS_READ_OP))
4300		rc = generic_file_read_iter(iocb, to);
4301	up_read(&cinode->lock_sem);
4302	return rc;
4303}
4304
4305static ssize_t
4306cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4307{
4308	int rc = -EACCES;
4309	unsigned int bytes_read = 0;
4310	unsigned int total_read;
4311	unsigned int current_read_size;
4312	unsigned int rsize;
4313	struct cifs_sb_info *cifs_sb;
4314	struct cifs_tcon *tcon;
4315	struct TCP_Server_Info *server;
4316	unsigned int xid;
4317	char *cur_offset;
4318	struct cifsFileInfo *open_file;
4319	struct cifs_io_parms io_parms = {0};
4320	int buf_type = CIFS_NO_BUFFER;
4321	__u32 pid;
4322
4323	xid = get_xid();
4324	cifs_sb = CIFS_FILE_SB(file);
4325
4326	/* FIXME: set up handlers for larger reads and/or convert to async */
4327	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4328
4329	if (file->private_data == NULL) {
4330		rc = -EBADF;
4331		free_xid(xid);
4332		return rc;
4333	}
4334	open_file = file->private_data;
4335	tcon = tlink_tcon(open_file->tlink);
4336	server = cifs_pick_channel(tcon->ses);
4337
4338	if (!server->ops->sync_read) {
4339		free_xid(xid);
4340		return -ENOSYS;
4341	}
4342
4343	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4344		pid = open_file->pid;
4345	else
4346		pid = current->tgid;
4347
4348	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4349		cifs_dbg(FYI, "attempting read on write only file instance\n");
4350
4351	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4352	     total_read += bytes_read, cur_offset += bytes_read) {
4353		do {
4354			current_read_size = min_t(uint, read_size - total_read,
4355						  rsize);
4356			/*
4357			 * For windows me and 9x we do not want to request more
4358			 * than it negotiated since it will refuse the read
4359			 * then.
4360			 */
4361			if (!(tcon->ses->capabilities &
4362				tcon->ses->server->vals->cap_large_files)) {
4363				current_read_size = min_t(uint,
4364					current_read_size, CIFSMaxBufSize);
4365			}
4366			if (open_file->invalidHandle) {
4367				rc = cifs_reopen_file(open_file, true);
4368				if (rc != 0)
4369					break;
4370			}
4371			io_parms.pid = pid;
4372			io_parms.tcon = tcon;
4373			io_parms.offset = *offset;
4374			io_parms.length = current_read_size;
4375			io_parms.server = server;
4376			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4377						    &bytes_read, &cur_offset,
4378						    &buf_type);
4379		} while (rc == -EAGAIN);
4380
4381		if (rc || (bytes_read == 0)) {
4382			if (total_read) {
4383				break;
4384			} else {
4385				free_xid(xid);
4386				return rc;
4387			}
4388		} else {
4389			cifs_stats_bytes_read(tcon, total_read);
4390			*offset += bytes_read;
4391		}
4392	}
4393	free_xid(xid);
4394	return total_read;
4395}
4396
4397/*
4398 * If the page is mmap'ed into a process' page tables, then we need to make
4399 * sure that it doesn't change while being written back.
4400 */
4401static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4402{
4403	struct folio *folio = page_folio(vmf->page);
4404
4405	/* Wait for the folio to be written to the cache before we allow it to
4406	 * be modified.  We then assume the entire folio will need writing back.
4407	 */
4408#ifdef CONFIG_CIFS_FSCACHE
4409	if (folio_test_fscache(folio) &&
4410	    folio_wait_fscache_killable(folio) < 0)
4411		return VM_FAULT_RETRY;
4412#endif
4413
4414	folio_wait_writeback(folio);
4415
4416	if (folio_lock_killable(folio) < 0)
4417		return VM_FAULT_RETRY;
4418	return VM_FAULT_LOCKED;
4419}
4420
4421static const struct vm_operations_struct cifs_file_vm_ops = {
4422	.fault = filemap_fault,
4423	.map_pages = filemap_map_pages,
4424	.page_mkwrite = cifs_page_mkwrite,
4425};
4426
4427int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4428{
4429	int xid, rc = 0;
4430	struct inode *inode = file_inode(file);
4431
4432	xid = get_xid();
4433
4434	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4435		rc = cifs_zap_mapping(inode);
4436	if (!rc)
4437		rc = generic_file_mmap(file, vma);
4438	if (!rc)
4439		vma->vm_ops = &cifs_file_vm_ops;
4440
4441	free_xid(xid);
4442	return rc;
4443}
4444
4445int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4446{
4447	int rc, xid;
4448
4449	xid = get_xid();
4450
4451	rc = cifs_revalidate_file(file);
4452	if (rc)
4453		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4454			 rc);
4455	if (!rc)
4456		rc = generic_file_mmap(file, vma);
4457	if (!rc)
4458		vma->vm_ops = &cifs_file_vm_ops;
4459
4460	free_xid(xid);
4461	return rc;
4462}
4463
4464/*
4465 * Unlock a bunch of folios in the pagecache.
4466 */
4467static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4468{
4469	struct folio *folio;
4470	XA_STATE(xas, &mapping->i_pages, first);
4471
4472	rcu_read_lock();
4473	xas_for_each(&xas, folio, last) {
4474		folio_unlock(folio);
4475	}
4476	rcu_read_unlock();
4477}
4478
4479static void cifs_readahead_complete(struct work_struct *work)
4480{
4481	struct cifs_readdata *rdata = container_of(work,
4482						   struct cifs_readdata, work);
4483	struct folio *folio;
4484	pgoff_t last;
4485	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4486
4487	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4488
4489	if (good)
4490		cifs_readahead_to_fscache(rdata->mapping->host,
4491					  rdata->offset, rdata->bytes);
4492
4493	if (iov_iter_count(&rdata->iter) > 0)
4494		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4495
4496	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4497
4498	rcu_read_lock();
4499	xas_for_each(&xas, folio, last) {
4500		if (good) {
4501			flush_dcache_folio(folio);
4502			folio_mark_uptodate(folio);
4503		}
4504		folio_unlock(folio);
4505	}
4506	rcu_read_unlock();
4507
4508	kref_put(&rdata->refcount, cifs_readdata_release);
4509}
4510
4511static void cifs_readahead(struct readahead_control *ractl)
4512{
4513	struct cifsFileInfo *open_file = ractl->file->private_data;
4514	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4515	struct TCP_Server_Info *server;
4516	unsigned int xid, nr_pages, cache_nr_pages = 0;
4517	unsigned int ra_pages;
4518	pgoff_t next_cached = ULONG_MAX, ra_index;
4519	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4520		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4521	bool check_cache = caching;
4522	pid_t pid;
4523	int rc = 0;
4524
4525	/* Note that readahead_count() lags behind our dequeuing of pages from
4526	 * the ractl, wo we have to keep track for ourselves.
4527	 */
4528	ra_pages = readahead_count(ractl);
4529	ra_index = readahead_index(ractl);
4530
4531	xid = get_xid();
4532
4533	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4534		pid = open_file->pid;
4535	else
4536		pid = current->tgid;
4537
4538	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4539
4540	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4541		 __func__, ractl->file, ractl->mapping, ra_pages);
4542
4543	/*
4544	 * Chop the readahead request up into rsize-sized read requests.
4545	 */
4546	while ((nr_pages = ra_pages)) {
4547		unsigned int i, rsize;
4548		struct cifs_readdata *rdata;
4549		struct cifs_credits credits_on_stack;
4550		struct cifs_credits *credits = &credits_on_stack;
4551		struct folio *folio;
4552		pgoff_t fsize;
4553
4554		/*
4555		 * Find out if we have anything cached in the range of
4556		 * interest, and if so, where the next chunk of cached data is.
4557		 */
4558		if (caching) {
4559			if (check_cache) {
4560				rc = cifs_fscache_query_occupancy(
4561					ractl->mapping->host, ra_index, nr_pages,
4562					&next_cached, &cache_nr_pages);
4563				if (rc < 0)
4564					caching = false;
4565				check_cache = false;
4566			}
4567
4568			if (ra_index == next_cached) {
4569				/*
4570				 * TODO: Send a whole batch of pages to be read
4571				 * by the cache.
4572				 */
4573				folio = readahead_folio(ractl);
4574				fsize = folio_nr_pages(folio);
4575				ra_pages -= fsize;
4576				ra_index += fsize;
4577				if (cifs_readpage_from_fscache(ractl->mapping->host,
4578							       &folio->page) < 0) {
4579					/*
4580					 * TODO: Deal with cache read failure
4581					 * here, but for the moment, delegate
4582					 * that to readpage.
4583					 */
4584					caching = false;
4585				}
4586				folio_unlock(folio);
4587				next_cached += fsize;
4588				cache_nr_pages -= fsize;
4589				if (cache_nr_pages == 0)
4590					check_cache = true;
4591				continue;
4592			}
4593		}
4594
4595		if (open_file->invalidHandle) {
4596			rc = cifs_reopen_file(open_file, true);
4597			if (rc) {
4598				if (rc == -EAGAIN)
4599					continue;
4600				break;
4601			}
4602		}
4603
4604		if (cifs_sb->ctx->rsize == 0)
4605			cifs_sb->ctx->rsize =
4606				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4607							     cifs_sb->ctx);
4608
4609		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4610						   &rsize, credits);
4611		if (rc)
4612			break;
4613		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4614		if (next_cached != ULONG_MAX)
4615			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4616
4617		/*
4618		 * Give up immediately if rsize is too small to read an entire
4619		 * page. The VFS will fall back to readpage. We should never
4620		 * reach this point however since we set ra_pages to 0 when the
4621		 * rsize is smaller than a cache page.
4622		 */
4623		if (unlikely(!nr_pages)) {
4624			add_credits_and_wake_if(server, credits, 0);
4625			break;
4626		}
4627
4628		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4629		if (!rdata) {
4630			/* best to give up if we're out of mem */
4631			add_credits_and_wake_if(server, credits, 0);
4632			break;
4633		}
4634
4635		rdata->offset	= ra_index * PAGE_SIZE;
4636		rdata->bytes	= nr_pages * PAGE_SIZE;
4637		rdata->cfile	= cifsFileInfo_get(open_file);
4638		rdata->server	= server;
4639		rdata->mapping	= ractl->mapping;
4640		rdata->pid	= pid;
4641		rdata->credits	= credits_on_stack;
4642
4643		for (i = 0; i < nr_pages; i++) {
4644			if (!readahead_folio(ractl))
4645				WARN_ON(1);
4646		}
4647		ra_pages -= nr_pages;
4648		ra_index += nr_pages;
4649
4650		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4651				rdata->offset, rdata->bytes);
4652
4653		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4654		if (!rc) {
4655			if (rdata->cfile->invalidHandle)
4656				rc = -EAGAIN;
4657			else
4658				rc = server->ops->async_readv(rdata);
4659		}
4660
4661		if (rc) {
4662			add_credits_and_wake_if(server, &rdata->credits, 0);
4663			cifs_unlock_folios(rdata->mapping,
4664					   rdata->offset / PAGE_SIZE,
4665					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4666			/* Fallback to the readpage in error/reconnect cases */
4667			kref_put(&rdata->refcount, cifs_readdata_release);
4668			break;
4669		}
4670
4671		kref_put(&rdata->refcount, cifs_readdata_release);
4672	}
4673
4674	free_xid(xid);
4675}
4676
4677/*
4678 * cifs_readpage_worker must be called with the page pinned
4679 */
4680static int cifs_readpage_worker(struct file *file, struct page *page,
4681	loff_t *poffset)
4682{
4683	struct inode *inode = file_inode(file);
4684	struct timespec64 atime, mtime;
4685	char *read_data;
4686	int rc;
4687
4688	/* Is the page cached? */
4689	rc = cifs_readpage_from_fscache(inode, page);
4690	if (rc == 0)
4691		goto read_complete;
4692
4693	read_data = kmap(page);
4694	/* for reads over a certain size could initiate async read ahead */
4695
4696	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4697
4698	if (rc < 0)
4699		goto io_error;
4700	else
4701		cifs_dbg(FYI, "Bytes read %d\n", rc);
4702
4703	/* we do not want atime to be less than mtime, it broke some apps */
4704	atime = inode_set_atime_to_ts(inode, current_time(inode));
4705	mtime = inode_get_mtime(inode);
4706	if (timespec64_compare(&atime, &mtime) < 0)
4707		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4708
4709	if (PAGE_SIZE > rc)
4710		memset(read_data + rc, 0, PAGE_SIZE - rc);
4711
4712	flush_dcache_page(page);
4713	SetPageUptodate(page);
4714	rc = 0;
4715
4716io_error:
4717	kunmap(page);
4718
4719read_complete:
4720	unlock_page(page);
4721	return rc;
4722}
4723
4724static int cifs_read_folio(struct file *file, struct folio *folio)
4725{
4726	struct page *page = &folio->page;
4727	loff_t offset = page_file_offset(page);
4728	int rc = -EACCES;
4729	unsigned int xid;
4730
4731	xid = get_xid();
4732
4733	if (file->private_data == NULL) {
4734		rc = -EBADF;
4735		free_xid(xid);
4736		return rc;
4737	}
4738
4739	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4740		 page, (int)offset, (int)offset);
4741
4742	rc = cifs_readpage_worker(file, page, &offset);
4743
4744	free_xid(xid);
4745	return rc;
4746}
4747
4748static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4749{
4750	struct cifsFileInfo *open_file;
4751
4752	spin_lock(&cifs_inode->open_file_lock);
4753	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4754		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4755			spin_unlock(&cifs_inode->open_file_lock);
4756			return 1;
4757		}
4758	}
4759	spin_unlock(&cifs_inode->open_file_lock);
4760	return 0;
4761}
4762
4763/* We do not want to update the file size from server for inodes
4764   open for write - to avoid races with writepage extending
4765   the file - in the future we could consider allowing
4766   refreshing the inode only on increases in the file size
4767   but this is tricky to do without racing with writebehind
4768   page caching in the current Linux kernel design */
4769bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4770			    bool from_readdir)
4771{
4772	if (!cifsInode)
4773		return true;
4774
4775	if (is_inode_writable(cifsInode) ||
4776		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4777		/* This inode is open for write at least once */
4778		struct cifs_sb_info *cifs_sb;
4779
4780		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4781		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4782			/* since no page cache to corrupt on directio
4783			we can change size safely */
4784			return true;
4785		}
4786
4787		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4788			return true;
4789
4790		return false;
4791	} else
4792		return true;
4793}
4794
4795static int cifs_write_begin(struct file *file, struct address_space *mapping,
4796			loff_t pos, unsigned len,
4797			struct page **pagep, void **fsdata)
4798{
4799	int oncethru = 0;
4800	pgoff_t index = pos >> PAGE_SHIFT;
4801	loff_t offset = pos & (PAGE_SIZE - 1);
4802	loff_t page_start = pos & PAGE_MASK;
4803	loff_t i_size;
4804	struct page *page;
4805	int rc = 0;
4806
4807	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4808
4809start:
4810	page = grab_cache_page_write_begin(mapping, index);
4811	if (!page) {
4812		rc = -ENOMEM;
4813		goto out;
4814	}
4815
4816	if (PageUptodate(page))
4817		goto out;
4818
4819	/*
4820	 * If we write a full page it will be up to date, no need to read from
4821	 * the server. If the write is short, we'll end up doing a sync write
4822	 * instead.
4823	 */
4824	if (len == PAGE_SIZE)
4825		goto out;
4826
4827	/*
4828	 * optimize away the read when we have an oplock, and we're not
4829	 * expecting to use any of the data we'd be reading in. That
4830	 * is, when the page lies beyond the EOF, or straddles the EOF
4831	 * and the write will cover all of the existing data.
4832	 */
4833	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4834		i_size = i_size_read(mapping->host);
4835		if (page_start >= i_size ||
4836		    (offset == 0 && (pos + len) >= i_size)) {
4837			zero_user_segments(page, 0, offset,
4838					   offset + len,
4839					   PAGE_SIZE);
4840			/*
4841			 * PageChecked means that the parts of the page
4842			 * to which we're not writing are considered up
4843			 * to date. Once the data is copied to the
4844			 * page, it can be set uptodate.
4845			 */
4846			SetPageChecked(page);
4847			goto out;
4848		}
4849	}
4850
4851	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4852		/*
4853		 * might as well read a page, it is fast enough. If we get
4854		 * an error, we don't need to return it. cifs_write_end will
4855		 * do a sync write instead since PG_uptodate isn't set.
4856		 */
4857		cifs_readpage_worker(file, page, &page_start);
4858		put_page(page);
4859		oncethru = 1;
4860		goto start;
4861	} else {
4862		/* we could try using another file handle if there is one -
4863		   but how would we lock it to prevent close of that handle
4864		   racing with this read? In any case
4865		   this will be written out by write_end so is fine */
4866	}
4867out:
4868	*pagep = page;
4869	return rc;
4870}
4871
4872static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4873{
4874	if (folio_test_private(folio))
4875		return 0;
4876	if (folio_test_fscache(folio)) {
4877		if (current_is_kswapd() || !(gfp & __GFP_FS))
4878			return false;
4879		folio_wait_fscache(folio);
4880	}
4881	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4882	return true;
4883}
4884
4885static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4886				 size_t length)
4887{
4888	folio_wait_fscache(folio);
4889}
4890
4891static int cifs_launder_folio(struct folio *folio)
4892{
4893	int rc = 0;
4894	loff_t range_start = folio_pos(folio);
4895	loff_t range_end = range_start + folio_size(folio);
4896	struct writeback_control wbc = {
4897		.sync_mode = WB_SYNC_ALL,
4898		.nr_to_write = 0,
4899		.range_start = range_start,
4900		.range_end = range_end,
4901	};
4902
4903	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4904
4905	if (folio_clear_dirty_for_io(folio))
4906		rc = cifs_writepage_locked(&folio->page, &wbc);
4907
4908	folio_wait_fscache(folio);
4909	return rc;
4910}
4911
4912void cifs_oplock_break(struct work_struct *work)
4913{
4914	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4915						  oplock_break);
4916	struct inode *inode = d_inode(cfile->dentry);
4917	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4918	struct cifsInodeInfo *cinode = CIFS_I(inode);
4919	struct cifs_tcon *tcon;
4920	struct TCP_Server_Info *server;
4921	struct tcon_link *tlink;
4922	int rc = 0;
4923	bool purge_cache = false, oplock_break_cancelled;
4924	__u64 persistent_fid, volatile_fid;
4925	__u16 net_fid;
4926
4927	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4928			TASK_UNINTERRUPTIBLE);
4929
4930	tlink = cifs_sb_tlink(cifs_sb);
4931	if (IS_ERR(tlink))
4932		goto out;
4933	tcon = tlink_tcon(tlink);
4934	server = tcon->ses->server;
4935
4936	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4937				      cfile->oplock_epoch, &purge_cache);
4938
4939	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4940						cifs_has_mand_locks(cinode)) {
4941		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4942			 inode);
4943		cinode->oplock = 0;
4944	}
4945
4946	if (inode && S_ISREG(inode->i_mode)) {
4947		if (CIFS_CACHE_READ(cinode))
4948			break_lease(inode, O_RDONLY);
4949		else
4950			break_lease(inode, O_WRONLY);
4951		rc = filemap_fdatawrite(inode->i_mapping);
4952		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4953			rc = filemap_fdatawait(inode->i_mapping);
4954			mapping_set_error(inode->i_mapping, rc);
4955			cifs_zap_mapping(inode);
4956		}
4957		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4958		if (CIFS_CACHE_WRITE(cinode))
4959			goto oplock_break_ack;
4960	}
4961
4962	rc = cifs_push_locks(cfile);
4963	if (rc)
4964		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4965
4966oplock_break_ack:
4967	/*
4968	 * When oplock break is received and there are no active
4969	 * file handles but cached, then schedule deferred close immediately.
4970	 * So, new open will not use cached handle.
4971	 */
4972
4973	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4974		cifs_close_deferred_file(cinode);
4975
4976	persistent_fid = cfile->fid.persistent_fid;
4977	volatile_fid = cfile->fid.volatile_fid;
4978	net_fid = cfile->fid.netfid;
4979	oplock_break_cancelled = cfile->oplock_break_cancelled;
4980
4981	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4982	/*
4983	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4984	 * an acknowledgment to be sent when the file has already been closed.
4985	 */
4986	spin_lock(&cinode->open_file_lock);
4987	/* check list empty since can race with kill_sb calling tree disconnect */
4988	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4989		spin_unlock(&cinode->open_file_lock);
4990		rc = server->ops->oplock_response(tcon, persistent_fid,
4991						  volatile_fid, net_fid, cinode);
4992		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4993	} else
4994		spin_unlock(&cinode->open_file_lock);
4995
4996	cifs_put_tlink(tlink);
4997out:
4998	cifs_done_oplock_break(cinode);
4999}
5000
5001/*
5002 * The presence of cifs_direct_io() in the address space ops vector
5003 * allowes open() O_DIRECT flags which would have failed otherwise.
5004 *
5005 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5006 * so this method should never be called.
5007 *
5008 * Direct IO is not yet supported in the cached mode.
5009 */
5010static ssize_t
5011cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5012{
5013        /*
5014         * FIXME
5015         * Eventually need to support direct IO for non forcedirectio mounts
5016         */
5017        return -EINVAL;
5018}
5019
5020static int cifs_swap_activate(struct swap_info_struct *sis,
5021			      struct file *swap_file, sector_t *span)
5022{
5023	struct cifsFileInfo *cfile = swap_file->private_data;
5024	struct inode *inode = swap_file->f_mapping->host;
5025	unsigned long blocks;
5026	long long isize;
5027
5028	cifs_dbg(FYI, "swap activate\n");
5029
5030	if (!swap_file->f_mapping->a_ops->swap_rw)
5031		/* Cannot support swap */
5032		return -EINVAL;
5033
5034	spin_lock(&inode->i_lock);
5035	blocks = inode->i_blocks;
5036	isize = inode->i_size;
5037	spin_unlock(&inode->i_lock);
5038	if (blocks*512 < isize) {
5039		pr_warn("swap activate: swapfile has holes\n");
5040		return -EINVAL;
5041	}
5042	*span = sis->pages;
5043
5044	pr_warn_once("Swap support over SMB3 is experimental\n");
5045
5046	/*
5047	 * TODO: consider adding ACL (or documenting how) to prevent other
5048	 * users (on this or other systems) from reading it
5049	 */
5050
5051
5052	/* TODO: add sk_set_memalloc(inet) or similar */
5053
5054	if (cfile)
5055		cfile->swapfile = true;
5056	/*
5057	 * TODO: Since file already open, we can't open with DENY_ALL here
5058	 * but we could add call to grab a byte range lock to prevent others
5059	 * from reading or writing the file
5060	 */
5061
5062	sis->flags |= SWP_FS_OPS;
5063	return add_swap_extent(sis, 0, sis->max, 0);
5064}
5065
5066static void cifs_swap_deactivate(struct file *file)
5067{
5068	struct cifsFileInfo *cfile = file->private_data;
5069
5070	cifs_dbg(FYI, "swap deactivate\n");
5071
5072	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5073
5074	if (cfile)
5075		cfile->swapfile = false;
5076
5077	/* do we need to unpin (or unlock) the file */
5078}
5079
5080/*
5081 * Mark a page as having been made dirty and thus needing writeback.  We also
5082 * need to pin the cache object to write back to.
5083 */
5084#ifdef CONFIG_CIFS_FSCACHE
5085static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5086{
5087	return fscache_dirty_folio(mapping, folio,
5088					cifs_inode_cookie(mapping->host));
5089}
5090#else
5091#define cifs_dirty_folio filemap_dirty_folio
5092#endif
5093
5094const struct address_space_operations cifs_addr_ops = {
5095	.read_folio = cifs_read_folio,
5096	.readahead = cifs_readahead,
5097	.writepages = cifs_writepages,
5098	.write_begin = cifs_write_begin,
5099	.write_end = cifs_write_end,
5100	.dirty_folio = cifs_dirty_folio,
5101	.release_folio = cifs_release_folio,
5102	.direct_IO = cifs_direct_io,
5103	.invalidate_folio = cifs_invalidate_folio,
5104	.launder_folio = cifs_launder_folio,
5105	.migrate_folio = filemap_migrate_folio,
5106	/*
5107	 * TODO: investigate and if useful we could add an is_dirty_writeback
5108	 * helper if needed
5109	 */
5110	.swap_activate = cifs_swap_activate,
5111	.swap_deactivate = cifs_swap_deactivate,
5112};
5113
5114/*
5115 * cifs_readahead requires the server to support a buffer large enough to
5116 * contain the header plus one complete page of data.  Otherwise, we need
5117 * to leave cifs_readahead out of the address space operations.
5118 */
5119const struct address_space_operations cifs_addr_ops_smallbuf = {
5120	.read_folio = cifs_read_folio,
5121	.writepages = cifs_writepages,
5122	.write_begin = cifs_write_begin,
5123	.write_end = cifs_write_end,
5124	.dirty_folio = cifs_dirty_folio,
5125	.release_folio = cifs_release_folio,
5126	.invalidate_folio = cifs_invalidate_folio,
5127	.launder_folio = cifs_launder_folio,
5128	.migrate_folio = filemap_migrate_folio,
5129};
5130