xref: /kernel/linux/linux-5.10/fs/cifs/file.c (revision 8c2ecf20)
1/*
2 *   fs/cifs/file.c
3 *
4 *   vfs operations that deal with files
5 *
6 *   Copyright (C) International Business Machines  Corp., 2002,2010
7 *   Author(s): Steve French (sfrench@us.ibm.com)
8 *              Jeremy Allison (jra@samba.org)
9 *
10 *   This library is free software; you can redistribute it and/or modify
11 *   it under the terms of the GNU Lesser General Public License as published
12 *   by the Free Software Foundation; either version 2.1 of the License, or
13 *   (at your option) any later version.
14 *
15 *   This library is distributed in the hope that it will be useful,
16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18 *   the GNU Lesser General Public License for more details.
19 *
20 *   You should have received a copy of the GNU Lesser General Public License
21 *   along with this library; if not, write to the Free Software
22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24#include <linux/fs.h>
25#include <linux/backing-dev.h>
26#include <linux/stat.h>
27#include <linux/fcntl.h>
28#include <linux/pagemap.h>
29#include <linux/pagevec.h>
30#include <linux/writeback.h>
31#include <linux/task_io_accounting_ops.h>
32#include <linux/delay.h>
33#include <linux/mount.h>
34#include <linux/slab.h>
35#include <linux/swap.h>
36#include <linux/mm.h>
37#include <asm/div64.h>
38#include "cifsfs.h"
39#include "cifspdu.h"
40#include "cifsglob.h"
41#include "cifsproto.h"
42#include "cifs_unicode.h"
43#include "cifs_debug.h"
44#include "cifs_fs_sb.h"
45#include "fscache.h"
46#include "smbdirect.h"
47
48static inline int cifs_convert_flags(unsigned int flags)
49{
50	if ((flags & O_ACCMODE) == O_RDONLY)
51		return GENERIC_READ;
52	else if ((flags & O_ACCMODE) == O_WRONLY)
53		return GENERIC_WRITE;
54	else if ((flags & O_ACCMODE) == O_RDWR) {
55		/* GENERIC_ALL is too much permission to request
56		   can cause unnecessary access denied on create */
57		/* return GENERIC_ALL; */
58		return (GENERIC_READ | GENERIC_WRITE);
59	}
60
61	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63		FILE_READ_DATA);
64}
65
66static u32 cifs_posix_convert_flags(unsigned int flags)
67{
68	u32 posix_flags = 0;
69
70	if ((flags & O_ACCMODE) == O_RDONLY)
71		posix_flags = SMB_O_RDONLY;
72	else if ((flags & O_ACCMODE) == O_WRONLY)
73		posix_flags = SMB_O_WRONLY;
74	else if ((flags & O_ACCMODE) == O_RDWR)
75		posix_flags = SMB_O_RDWR;
76
77	if (flags & O_CREAT) {
78		posix_flags |= SMB_O_CREAT;
79		if (flags & O_EXCL)
80			posix_flags |= SMB_O_EXCL;
81	} else if (flags & O_EXCL)
82		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83			 current->comm, current->tgid);
84
85	if (flags & O_TRUNC)
86		posix_flags |= SMB_O_TRUNC;
87	/* be safe and imply O_SYNC for O_DSYNC */
88	if (flags & O_DSYNC)
89		posix_flags |= SMB_O_SYNC;
90	if (flags & O_DIRECTORY)
91		posix_flags |= SMB_O_DIRECTORY;
92	if (flags & O_NOFOLLOW)
93		posix_flags |= SMB_O_NOFOLLOW;
94	if (flags & O_DIRECT)
95		posix_flags |= SMB_O_DIRECT;
96
97	return posix_flags;
98}
99
100static inline int cifs_get_disposition(unsigned int flags)
101{
102	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103		return FILE_CREATE;
104	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105		return FILE_OVERWRITE_IF;
106	else if ((flags & O_CREAT) == O_CREAT)
107		return FILE_OPEN_IF;
108	else if ((flags & O_TRUNC) == O_TRUNC)
109		return FILE_OVERWRITE;
110	else
111		return FILE_OPEN;
112}
113
114int cifs_posix_open(char *full_path, struct inode **pinode,
115			struct super_block *sb, int mode, unsigned int f_flags,
116			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
117{
118	int rc;
119	FILE_UNIX_BASIC_INFO *presp_data;
120	__u32 posix_flags = 0;
121	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122	struct cifs_fattr fattr;
123	struct tcon_link *tlink;
124	struct cifs_tcon *tcon;
125
126	cifs_dbg(FYI, "posix open %s\n", full_path);
127
128	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129	if (presp_data == NULL)
130		return -ENOMEM;
131
132	tlink = cifs_sb_tlink(cifs_sb);
133	if (IS_ERR(tlink)) {
134		rc = PTR_ERR(tlink);
135		goto posix_open_ret;
136	}
137
138	tcon = tlink_tcon(tlink);
139	mode &= ~current_umask();
140
141	posix_flags = cifs_posix_convert_flags(f_flags);
142	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143			     poplock, full_path, cifs_sb->local_nls,
144			     cifs_remap(cifs_sb));
145	cifs_put_tlink(tlink);
146
147	if (rc)
148		goto posix_open_ret;
149
150	if (presp_data->Type == cpu_to_le32(-1))
151		goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153	if (!pinode)
154		goto posix_open_ret; /* caller does not need info */
155
156	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158	/* get new inode and set it up */
159	if (*pinode == NULL) {
160		cifs_fill_uniqueid(sb, &fattr);
161		*pinode = cifs_iget(sb, &fattr);
162		if (!*pinode) {
163			rc = -ENOMEM;
164			goto posix_open_ret;
165		}
166	} else {
167		cifs_revalidate_mapping(*pinode);
168		cifs_fattr_to_inode(*pinode, &fattr);
169	}
170
171posix_open_ret:
172	kfree(presp_data);
173	return rc;
174}
175
176static int
177cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179	     struct cifs_fid *fid, unsigned int xid)
180{
181	int rc;
182	int desired_access;
183	int disposition;
184	int create_options = CREATE_NOT_DIR;
185	FILE_ALL_INFO *buf;
186	struct TCP_Server_Info *server = tcon->ses->server;
187	struct cifs_open_parms oparms;
188
189	if (!server->ops->open)
190		return -ENOSYS;
191
192	desired_access = cifs_convert_flags(f_flags);
193
194/*********************************************************************
195 *  open flag mapping table:
196 *
197 *	POSIX Flag            CIFS Disposition
198 *	----------            ----------------
199 *	O_CREAT               FILE_OPEN_IF
200 *	O_CREAT | O_EXCL      FILE_CREATE
201 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
202 *	O_TRUNC               FILE_OVERWRITE
203 *	none of the above     FILE_OPEN
204 *
205 *	Note that there is not a direct match between disposition
206 *	FILE_SUPERSEDE (ie create whether or not file exists although
207 *	O_CREAT | O_TRUNC is similar but truncates the existing
208 *	file rather than creating a new file as FILE_SUPERSEDE does
209 *	(which uses the attributes / metadata passed in on open call)
210 *?
211 *?  O_SYNC is a reasonable match to CIFS writethrough flag
212 *?  and the read write flags match reasonably.  O_LARGEFILE
213 *?  is irrelevant because largefile support is always used
214 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
215 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
216 *********************************************************************/
217
218	disposition = cifs_get_disposition(f_flags);
219
220	/* BB pass O_SYNC flag through on file attributes .. BB */
221
222	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
223	if (!buf)
224		return -ENOMEM;
225
226	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
227	if (f_flags & O_SYNC)
228		create_options |= CREATE_WRITE_THROUGH;
229
230	if (f_flags & O_DIRECT)
231		create_options |= CREATE_NO_BUFFER;
232
233	oparms.tcon = tcon;
234	oparms.cifs_sb = cifs_sb;
235	oparms.desired_access = desired_access;
236	oparms.create_options = cifs_create_options(cifs_sb, create_options);
237	oparms.disposition = disposition;
238	oparms.path = full_path;
239	oparms.fid = fid;
240	oparms.reconnect = false;
241
242	rc = server->ops->open(xid, &oparms, oplock, buf);
243
244	if (rc)
245		goto out;
246
247	/* TODO: Add support for calling posix query info but with passing in fid */
248	if (tcon->unix_ext)
249		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250					      xid);
251	else
252		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253					 xid, fid);
254
255	if (rc) {
256		server->ops->close(xid, tcon, fid);
257		if (rc == -ESTALE)
258			rc = -EOPENSTALE;
259	}
260
261out:
262	kfree(buf);
263	return rc;
264}
265
266static bool
267cifs_has_mand_locks(struct cifsInodeInfo *cinode)
268{
269	struct cifs_fid_locks *cur;
270	bool has_locks = false;
271
272	down_read(&cinode->lock_sem);
273	list_for_each_entry(cur, &cinode->llist, llist) {
274		if (!list_empty(&cur->locks)) {
275			has_locks = true;
276			break;
277		}
278	}
279	up_read(&cinode->lock_sem);
280	return has_locks;
281}
282
283void
284cifs_down_write(struct rw_semaphore *sem)
285{
286	while (!down_write_trylock(sem))
287		msleep(10);
288}
289
290static void cifsFileInfo_put_work(struct work_struct *work);
291
292struct cifsFileInfo *
293cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
294		  struct tcon_link *tlink, __u32 oplock)
295{
296	struct dentry *dentry = file_dentry(file);
297	struct inode *inode = d_inode(dentry);
298	struct cifsInodeInfo *cinode = CIFS_I(inode);
299	struct cifsFileInfo *cfile;
300	struct cifs_fid_locks *fdlocks;
301	struct cifs_tcon *tcon = tlink_tcon(tlink);
302	struct TCP_Server_Info *server = tcon->ses->server;
303
304	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
305	if (cfile == NULL)
306		return cfile;
307
308	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
309	if (!fdlocks) {
310		kfree(cfile);
311		return NULL;
312	}
313
314	INIT_LIST_HEAD(&fdlocks->locks);
315	fdlocks->cfile = cfile;
316	cfile->llist = fdlocks;
317
318	cfile->count = 1;
319	cfile->pid = current->tgid;
320	cfile->uid = current_fsuid();
321	cfile->dentry = dget(dentry);
322	cfile->f_flags = file->f_flags;
323	cfile->invalidHandle = false;
324	cfile->tlink = cifs_get_tlink(tlink);
325	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
326	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
327	mutex_init(&cfile->fh_mutex);
328	spin_lock_init(&cfile->file_info_lock);
329
330	cifs_sb_active(inode->i_sb);
331
332	/*
333	 * If the server returned a read oplock and we have mandatory brlocks,
334	 * set oplock level to None.
335	 */
336	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
337		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
338		oplock = 0;
339	}
340
341	cifs_down_write(&cinode->lock_sem);
342	list_add(&fdlocks->llist, &cinode->llist);
343	up_write(&cinode->lock_sem);
344
345	spin_lock(&tcon->open_file_lock);
346	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
347		oplock = fid->pending_open->oplock;
348	list_del(&fid->pending_open->olist);
349
350	fid->purge_cache = false;
351	server->ops->set_fid(cfile, fid, oplock);
352
353	list_add(&cfile->tlist, &tcon->openFileList);
354	atomic_inc(&tcon->num_local_opens);
355
356	/* if readable file instance put first in list*/
357	spin_lock(&cinode->open_file_lock);
358	if (file->f_mode & FMODE_READ)
359		list_add(&cfile->flist, &cinode->openFileList);
360	else
361		list_add_tail(&cfile->flist, &cinode->openFileList);
362	spin_unlock(&cinode->open_file_lock);
363	spin_unlock(&tcon->open_file_lock);
364
365	if (fid->purge_cache)
366		cifs_zap_mapping(inode);
367
368	file->private_data = cfile;
369	return cfile;
370}
371
372struct cifsFileInfo *
373cifsFileInfo_get(struct cifsFileInfo *cifs_file)
374{
375	spin_lock(&cifs_file->file_info_lock);
376	cifsFileInfo_get_locked(cifs_file);
377	spin_unlock(&cifs_file->file_info_lock);
378	return cifs_file;
379}
380
381static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
382{
383	struct inode *inode = d_inode(cifs_file->dentry);
384	struct cifsInodeInfo *cifsi = CIFS_I(inode);
385	struct cifsLockInfo *li, *tmp;
386	struct super_block *sb = inode->i_sb;
387
388	/*
389	 * Delete any outstanding lock records. We'll lose them when the file
390	 * is closed anyway.
391	 */
392	cifs_down_write(&cifsi->lock_sem);
393	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
394		list_del(&li->llist);
395		cifs_del_lock_waiters(li);
396		kfree(li);
397	}
398	list_del(&cifs_file->llist->llist);
399	kfree(cifs_file->llist);
400	up_write(&cifsi->lock_sem);
401
402	cifs_put_tlink(cifs_file->tlink);
403	dput(cifs_file->dentry);
404	cifs_sb_deactive(sb);
405	kfree(cifs_file);
406}
407
408static void cifsFileInfo_put_work(struct work_struct *work)
409{
410	struct cifsFileInfo *cifs_file = container_of(work,
411			struct cifsFileInfo, put);
412
413	cifsFileInfo_put_final(cifs_file);
414}
415
416/**
417 * cifsFileInfo_put - release a reference of file priv data
418 *
419 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
420 */
421void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
422{
423	_cifsFileInfo_put(cifs_file, true, true);
424}
425
426/**
427 * _cifsFileInfo_put - release a reference of file priv data
428 *
429 * This may involve closing the filehandle @cifs_file out on the
430 * server. Must be called without holding tcon->open_file_lock,
431 * cinode->open_file_lock and cifs_file->file_info_lock.
432 *
433 * If @wait_for_oplock_handler is true and we are releasing the last
434 * reference, wait for any running oplock break handler of the file
435 * and cancel any pending one. If calling this function from the
436 * oplock break handler, you need to pass false.
437 *
438 */
439void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
440		       bool wait_oplock_handler, bool offload)
441{
442	struct inode *inode = d_inode(cifs_file->dentry);
443	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
444	struct TCP_Server_Info *server = tcon->ses->server;
445	struct cifsInodeInfo *cifsi = CIFS_I(inode);
446	struct super_block *sb = inode->i_sb;
447	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
448	struct cifs_fid fid;
449	struct cifs_pending_open open;
450	bool oplock_break_cancelled;
451
452	spin_lock(&tcon->open_file_lock);
453	spin_lock(&cifsi->open_file_lock);
454	spin_lock(&cifs_file->file_info_lock);
455	if (--cifs_file->count > 0) {
456		spin_unlock(&cifs_file->file_info_lock);
457		spin_unlock(&cifsi->open_file_lock);
458		spin_unlock(&tcon->open_file_lock);
459		return;
460	}
461	spin_unlock(&cifs_file->file_info_lock);
462
463	if (server->ops->get_lease_key)
464		server->ops->get_lease_key(inode, &fid);
465
466	/* store open in pending opens to make sure we don't miss lease break */
467	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
468
469	/* remove it from the lists */
470	list_del(&cifs_file->flist);
471	list_del(&cifs_file->tlist);
472	atomic_dec(&tcon->num_local_opens);
473
474	if (list_empty(&cifsi->openFileList)) {
475		cifs_dbg(FYI, "closing last open instance for inode %p\n",
476			 d_inode(cifs_file->dentry));
477		/*
478		 * In strict cache mode we need invalidate mapping on the last
479		 * close  because it may cause a error when we open this file
480		 * again and get at least level II oplock.
481		 */
482		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
483			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
484		cifs_set_oplock_level(cifsi, 0);
485	}
486
487	spin_unlock(&cifsi->open_file_lock);
488	spin_unlock(&tcon->open_file_lock);
489
490	oplock_break_cancelled = wait_oplock_handler ?
491		cancel_work_sync(&cifs_file->oplock_break) : false;
492
493	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
494		struct TCP_Server_Info *server = tcon->ses->server;
495		unsigned int xid;
496
497		xid = get_xid();
498		if (server->ops->close_getattr)
499			server->ops->close_getattr(xid, tcon, cifs_file);
500		else if (server->ops->close)
501			server->ops->close(xid, tcon, &cifs_file->fid);
502		_free_xid(xid);
503	}
504
505	if (oplock_break_cancelled)
506		cifs_done_oplock_break(cifsi);
507
508	cifs_del_pending_open(&open);
509
510	if (offload)
511		queue_work(fileinfo_put_wq, &cifs_file->put);
512	else
513		cifsFileInfo_put_final(cifs_file);
514}
515
516int cifs_open(struct inode *inode, struct file *file)
517
518{
519	int rc = -EACCES;
520	unsigned int xid;
521	__u32 oplock;
522	struct cifs_sb_info *cifs_sb;
523	struct TCP_Server_Info *server;
524	struct cifs_tcon *tcon;
525	struct tcon_link *tlink;
526	struct cifsFileInfo *cfile = NULL;
527	char *full_path = NULL;
528	bool posix_open_ok = false;
529	struct cifs_fid fid;
530	struct cifs_pending_open open;
531
532	xid = get_xid();
533
534	cifs_sb = CIFS_SB(inode->i_sb);
535	tlink = cifs_sb_tlink(cifs_sb);
536	if (IS_ERR(tlink)) {
537		free_xid(xid);
538		return PTR_ERR(tlink);
539	}
540	tcon = tlink_tcon(tlink);
541	server = tcon->ses->server;
542
543	full_path = build_path_from_dentry(file_dentry(file));
544	if (full_path == NULL) {
545		rc = -ENOMEM;
546		goto out;
547	}
548
549	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
550		 inode, file->f_flags, full_path);
551
552	if (file->f_flags & O_DIRECT &&
553	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
554		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
555			file->f_op = &cifs_file_direct_nobrl_ops;
556		else
557			file->f_op = &cifs_file_direct_ops;
558	}
559
560	if (server->oplocks)
561		oplock = REQ_OPLOCK;
562	else
563		oplock = 0;
564
565	if (!tcon->broken_posix_open && tcon->unix_ext &&
566	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
567				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
568		/* can not refresh inode info since size could be stale */
569		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
570				cifs_sb->mnt_file_mode /* ignored */,
571				file->f_flags, &oplock, &fid.netfid, xid);
572		if (rc == 0) {
573			cifs_dbg(FYI, "posix open succeeded\n");
574			posix_open_ok = true;
575		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
576			if (tcon->ses->serverNOS)
577				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
578					 tcon->ses->serverName,
579					 tcon->ses->serverNOS);
580			tcon->broken_posix_open = true;
581		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
582			 (rc != -EOPNOTSUPP)) /* path not found or net err */
583			goto out;
584		/*
585		 * Else fallthrough to retry open the old way on network i/o
586		 * or DFS errors.
587		 */
588	}
589
590	if (server->ops->get_lease_key)
591		server->ops->get_lease_key(inode, &fid);
592
593	cifs_add_pending_open(&fid, tlink, &open);
594
595	if (!posix_open_ok) {
596		if (server->ops->get_lease_key)
597			server->ops->get_lease_key(inode, &fid);
598
599		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
600				  file->f_flags, &oplock, &fid, xid);
601		if (rc) {
602			cifs_del_pending_open(&open);
603			goto out;
604		}
605	}
606
607	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
608	if (cfile == NULL) {
609		if (server->ops->close)
610			server->ops->close(xid, tcon, &fid);
611		cifs_del_pending_open(&open);
612		rc = -ENOMEM;
613		goto out;
614	}
615
616	cifs_fscache_set_inode_cookie(inode, file);
617
618	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
619		/*
620		 * Time to set mode which we can not set earlier due to
621		 * problems creating new read-only files.
622		 */
623		struct cifs_unix_set_info_args args = {
624			.mode	= inode->i_mode,
625			.uid	= INVALID_UID, /* no change */
626			.gid	= INVALID_GID, /* no change */
627			.ctime	= NO_CHANGE_64,
628			.atime	= NO_CHANGE_64,
629			.mtime	= NO_CHANGE_64,
630			.device	= 0,
631		};
632		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
633				       cfile->pid);
634	}
635
636out:
637	kfree(full_path);
638	free_xid(xid);
639	cifs_put_tlink(tlink);
640	return rc;
641}
642
643static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
644
645/*
646 * Try to reacquire byte range locks that were released when session
647 * to server was lost.
648 */
649static int
650cifs_relock_file(struct cifsFileInfo *cfile)
651{
652	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
653	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
654	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
655	int rc = 0;
656
657	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
658	if (cinode->can_cache_brlcks) {
659		/* can cache locks - no need to relock */
660		up_read(&cinode->lock_sem);
661		return rc;
662	}
663
664	if (cap_unix(tcon->ses) &&
665	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
666	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
667		rc = cifs_push_posix_locks(cfile);
668	else
669		rc = tcon->ses->server->ops->push_mand_locks(cfile);
670
671	up_read(&cinode->lock_sem);
672	return rc;
673}
674
675static int
676cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
677{
678	int rc = -EACCES;
679	unsigned int xid;
680	__u32 oplock;
681	struct cifs_sb_info *cifs_sb;
682	struct cifs_tcon *tcon;
683	struct TCP_Server_Info *server;
684	struct cifsInodeInfo *cinode;
685	struct inode *inode;
686	char *full_path = NULL;
687	int desired_access;
688	int disposition = FILE_OPEN;
689	int create_options = CREATE_NOT_DIR;
690	struct cifs_open_parms oparms;
691
692	xid = get_xid();
693	mutex_lock(&cfile->fh_mutex);
694	if (!cfile->invalidHandle) {
695		mutex_unlock(&cfile->fh_mutex);
696		rc = 0;
697		free_xid(xid);
698		return rc;
699	}
700
701	inode = d_inode(cfile->dentry);
702	cifs_sb = CIFS_SB(inode->i_sb);
703	tcon = tlink_tcon(cfile->tlink);
704	server = tcon->ses->server;
705
706	/*
707	 * Can not grab rename sem here because various ops, including those
708	 * that already have the rename sem can end up causing writepage to get
709	 * called and if the server was down that means we end up here, and we
710	 * can never tell if the caller already has the rename_sem.
711	 */
712	full_path = build_path_from_dentry(cfile->dentry);
713	if (full_path == NULL) {
714		rc = -ENOMEM;
715		mutex_unlock(&cfile->fh_mutex);
716		free_xid(xid);
717		return rc;
718	}
719
720	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
721		 inode, cfile->f_flags, full_path);
722
723	if (tcon->ses->server->oplocks)
724		oplock = REQ_OPLOCK;
725	else
726		oplock = 0;
727
728	if (tcon->unix_ext && cap_unix(tcon->ses) &&
729	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
730				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
731		/*
732		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
733		 * original open. Must mask them off for a reopen.
734		 */
735		unsigned int oflags = cfile->f_flags &
736						~(O_CREAT | O_EXCL | O_TRUNC);
737
738		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
739				     cifs_sb->mnt_file_mode /* ignored */,
740				     oflags, &oplock, &cfile->fid.netfid, xid);
741		if (rc == 0) {
742			cifs_dbg(FYI, "posix reopen succeeded\n");
743			oparms.reconnect = true;
744			goto reopen_success;
745		}
746		/*
747		 * fallthrough to retry open the old way on errors, especially
748		 * in the reconnect path it is important to retry hard
749		 */
750	}
751
752	desired_access = cifs_convert_flags(cfile->f_flags);
753
754	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
755	if (cfile->f_flags & O_SYNC)
756		create_options |= CREATE_WRITE_THROUGH;
757
758	if (cfile->f_flags & O_DIRECT)
759		create_options |= CREATE_NO_BUFFER;
760
761	if (server->ops->get_lease_key)
762		server->ops->get_lease_key(inode, &cfile->fid);
763
764	oparms.tcon = tcon;
765	oparms.cifs_sb = cifs_sb;
766	oparms.desired_access = desired_access;
767	oparms.create_options = cifs_create_options(cifs_sb, create_options);
768	oparms.disposition = disposition;
769	oparms.path = full_path;
770	oparms.fid = &cfile->fid;
771	oparms.reconnect = true;
772
773	/*
774	 * Can not refresh inode by passing in file_info buf to be returned by
775	 * ops->open and then calling get_inode_info with returned buf since
776	 * file might have write behind data that needs to be flushed and server
777	 * version of file size can be stale. If we knew for sure that inode was
778	 * not dirty locally we could do this.
779	 */
780	rc = server->ops->open(xid, &oparms, &oplock, NULL);
781	if (rc == -ENOENT && oparms.reconnect == false) {
782		/* durable handle timeout is expired - open the file again */
783		rc = server->ops->open(xid, &oparms, &oplock, NULL);
784		/* indicate that we need to relock the file */
785		oparms.reconnect = true;
786	}
787
788	if (rc) {
789		mutex_unlock(&cfile->fh_mutex);
790		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
791		cifs_dbg(FYI, "oplock: %d\n", oplock);
792		goto reopen_error_exit;
793	}
794
795reopen_success:
796	cfile->invalidHandle = false;
797	mutex_unlock(&cfile->fh_mutex);
798	cinode = CIFS_I(inode);
799
800	if (can_flush) {
801		rc = filemap_write_and_wait(inode->i_mapping);
802		if (!is_interrupt_error(rc))
803			mapping_set_error(inode->i_mapping, rc);
804
805		if (tcon->posix_extensions)
806			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
807		else if (tcon->unix_ext)
808			rc = cifs_get_inode_info_unix(&inode, full_path,
809						      inode->i_sb, xid);
810		else
811			rc = cifs_get_inode_info(&inode, full_path, NULL,
812						 inode->i_sb, xid, NULL);
813	}
814	/*
815	 * Else we are writing out data to server already and could deadlock if
816	 * we tried to flush data, and since we do not know if we have data that
817	 * would invalidate the current end of file on the server we can not go
818	 * to the server to get the new inode info.
819	 */
820
821	/*
822	 * If the server returned a read oplock and we have mandatory brlocks,
823	 * set oplock level to None.
824	 */
825	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
826		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
827		oplock = 0;
828	}
829
830	server->ops->set_fid(cfile, &cfile->fid, oplock);
831	if (oparms.reconnect)
832		cifs_relock_file(cfile);
833
834reopen_error_exit:
835	kfree(full_path);
836	free_xid(xid);
837	return rc;
838}
839
840int cifs_close(struct inode *inode, struct file *file)
841{
842	if (file->private_data != NULL) {
843		_cifsFileInfo_put(file->private_data, true, false);
844		file->private_data = NULL;
845	}
846
847	/* return code from the ->release op is always ignored */
848	return 0;
849}
850
851void
852cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
853{
854	struct cifsFileInfo *open_file;
855	struct list_head *tmp;
856	struct list_head *tmp1;
857	struct list_head tmp_list;
858
859	if (!tcon->use_persistent || !tcon->need_reopen_files)
860		return;
861
862	tcon->need_reopen_files = false;
863
864	cifs_dbg(FYI, "Reopen persistent handles\n");
865	INIT_LIST_HEAD(&tmp_list);
866
867	/* list all files open on tree connection, reopen resilient handles  */
868	spin_lock(&tcon->open_file_lock);
869	list_for_each(tmp, &tcon->openFileList) {
870		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
871		if (!open_file->invalidHandle)
872			continue;
873		cifsFileInfo_get(open_file);
874		list_add_tail(&open_file->rlist, &tmp_list);
875	}
876	spin_unlock(&tcon->open_file_lock);
877
878	list_for_each_safe(tmp, tmp1, &tmp_list) {
879		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
880		if (cifs_reopen_file(open_file, false /* do not flush */))
881			tcon->need_reopen_files = true;
882		list_del_init(&open_file->rlist);
883		cifsFileInfo_put(open_file);
884	}
885}
886
887int cifs_closedir(struct inode *inode, struct file *file)
888{
889	int rc = 0;
890	unsigned int xid;
891	struct cifsFileInfo *cfile = file->private_data;
892	struct cifs_tcon *tcon;
893	struct TCP_Server_Info *server;
894	char *buf;
895
896	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
897
898	if (cfile == NULL)
899		return rc;
900
901	xid = get_xid();
902	tcon = tlink_tcon(cfile->tlink);
903	server = tcon->ses->server;
904
905	cifs_dbg(FYI, "Freeing private data in close dir\n");
906	spin_lock(&cfile->file_info_lock);
907	if (server->ops->dir_needs_close(cfile)) {
908		cfile->invalidHandle = true;
909		spin_unlock(&cfile->file_info_lock);
910		if (server->ops->close_dir)
911			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
912		else
913			rc = -ENOSYS;
914		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
915		/* not much we can do if it fails anyway, ignore rc */
916		rc = 0;
917	} else
918		spin_unlock(&cfile->file_info_lock);
919
920	buf = cfile->srch_inf.ntwrk_buf_start;
921	if (buf) {
922		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
923		cfile->srch_inf.ntwrk_buf_start = NULL;
924		if (cfile->srch_inf.smallBuf)
925			cifs_small_buf_release(buf);
926		else
927			cifs_buf_release(buf);
928	}
929
930	cifs_put_tlink(cfile->tlink);
931	kfree(file->private_data);
932	file->private_data = NULL;
933	/* BB can we lock the filestruct while this is going on? */
934	free_xid(xid);
935	return rc;
936}
937
938static struct cifsLockInfo *
939cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
940{
941	struct cifsLockInfo *lock =
942		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
943	if (!lock)
944		return lock;
945	lock->offset = offset;
946	lock->length = length;
947	lock->type = type;
948	lock->pid = current->tgid;
949	lock->flags = flags;
950	INIT_LIST_HEAD(&lock->blist);
951	init_waitqueue_head(&lock->block_q);
952	return lock;
953}
954
955void
956cifs_del_lock_waiters(struct cifsLockInfo *lock)
957{
958	struct cifsLockInfo *li, *tmp;
959	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
960		list_del_init(&li->blist);
961		wake_up(&li->block_q);
962	}
963}
964
965#define CIFS_LOCK_OP	0
966#define CIFS_READ_OP	1
967#define CIFS_WRITE_OP	2
968
969/* @rw_check : 0 - no op, 1 - read, 2 - write */
970static bool
971cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
972			    __u64 length, __u8 type, __u16 flags,
973			    struct cifsFileInfo *cfile,
974			    struct cifsLockInfo **conf_lock, int rw_check)
975{
976	struct cifsLockInfo *li;
977	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
978	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
979
980	list_for_each_entry(li, &fdlocks->locks, llist) {
981		if (offset + length <= li->offset ||
982		    offset >= li->offset + li->length)
983			continue;
984		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
985		    server->ops->compare_fids(cfile, cur_cfile)) {
986			/* shared lock prevents write op through the same fid */
987			if (!(li->type & server->vals->shared_lock_type) ||
988			    rw_check != CIFS_WRITE_OP)
989				continue;
990		}
991		if ((type & server->vals->shared_lock_type) &&
992		    ((server->ops->compare_fids(cfile, cur_cfile) &&
993		     current->tgid == li->pid) || type == li->type))
994			continue;
995		if (rw_check == CIFS_LOCK_OP &&
996		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
997		    server->ops->compare_fids(cfile, cur_cfile))
998			continue;
999		if (conf_lock)
1000			*conf_lock = li;
1001		return true;
1002	}
1003	return false;
1004}
1005
1006bool
1007cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1008			__u8 type, __u16 flags,
1009			struct cifsLockInfo **conf_lock, int rw_check)
1010{
1011	bool rc = false;
1012	struct cifs_fid_locks *cur;
1013	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1014
1015	list_for_each_entry(cur, &cinode->llist, llist) {
1016		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1017						 flags, cfile, conf_lock,
1018						 rw_check);
1019		if (rc)
1020			break;
1021	}
1022
1023	return rc;
1024}
1025
1026/*
1027 * Check if there is another lock that prevents us to set the lock (mandatory
1028 * style). If such a lock exists, update the flock structure with its
1029 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1030 * or leave it the same if we can't. Returns 0 if we don't need to request to
1031 * the server or 1 otherwise.
1032 */
1033static int
1034cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1035	       __u8 type, struct file_lock *flock)
1036{
1037	int rc = 0;
1038	struct cifsLockInfo *conf_lock;
1039	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1040	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1041	bool exist;
1042
1043	down_read(&cinode->lock_sem);
1044
1045	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1046					flock->fl_flags, &conf_lock,
1047					CIFS_LOCK_OP);
1048	if (exist) {
1049		flock->fl_start = conf_lock->offset;
1050		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1051		flock->fl_pid = conf_lock->pid;
1052		if (conf_lock->type & server->vals->shared_lock_type)
1053			flock->fl_type = F_RDLCK;
1054		else
1055			flock->fl_type = F_WRLCK;
1056	} else if (!cinode->can_cache_brlcks)
1057		rc = 1;
1058	else
1059		flock->fl_type = F_UNLCK;
1060
1061	up_read(&cinode->lock_sem);
1062	return rc;
1063}
1064
1065static void
1066cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1067{
1068	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1069	cifs_down_write(&cinode->lock_sem);
1070	list_add_tail(&lock->llist, &cfile->llist->locks);
1071	up_write(&cinode->lock_sem);
1072}
1073
1074/*
1075 * Set the byte-range lock (mandatory style). Returns:
1076 * 1) 0, if we set the lock and don't need to request to the server;
1077 * 2) 1, if no locks prevent us but we need to request to the server;
1078 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1079 */
1080static int
1081cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1082		 bool wait)
1083{
1084	struct cifsLockInfo *conf_lock;
1085	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1086	bool exist;
1087	int rc = 0;
1088
1089try_again:
1090	exist = false;
1091	cifs_down_write(&cinode->lock_sem);
1092
1093	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1094					lock->type, lock->flags, &conf_lock,
1095					CIFS_LOCK_OP);
1096	if (!exist && cinode->can_cache_brlcks) {
1097		list_add_tail(&lock->llist, &cfile->llist->locks);
1098		up_write(&cinode->lock_sem);
1099		return rc;
1100	}
1101
1102	if (!exist)
1103		rc = 1;
1104	else if (!wait)
1105		rc = -EACCES;
1106	else {
1107		list_add_tail(&lock->blist, &conf_lock->blist);
1108		up_write(&cinode->lock_sem);
1109		rc = wait_event_interruptible(lock->block_q,
1110					(lock->blist.prev == &lock->blist) &&
1111					(lock->blist.next == &lock->blist));
1112		if (!rc)
1113			goto try_again;
1114		cifs_down_write(&cinode->lock_sem);
1115		list_del_init(&lock->blist);
1116	}
1117
1118	up_write(&cinode->lock_sem);
1119	return rc;
1120}
1121
1122/*
1123 * Check if there is another lock that prevents us to set the lock (posix
1124 * style). If such a lock exists, update the flock structure with its
1125 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1126 * or leave it the same if we can't. Returns 0 if we don't need to request to
1127 * the server or 1 otherwise.
1128 */
1129static int
1130cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1131{
1132	int rc = 0;
1133	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1134	unsigned char saved_type = flock->fl_type;
1135
1136	if ((flock->fl_flags & FL_POSIX) == 0)
1137		return 1;
1138
1139	down_read(&cinode->lock_sem);
1140	posix_test_lock(file, flock);
1141
1142	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1143		flock->fl_type = saved_type;
1144		rc = 1;
1145	}
1146
1147	up_read(&cinode->lock_sem);
1148	return rc;
1149}
1150
1151/*
1152 * Set the byte-range lock (posix style). Returns:
1153 * 1) <0, if the error occurs while setting the lock;
1154 * 2) 0, if we set the lock and don't need to request to the server;
1155 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1156 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1157 */
1158static int
1159cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1160{
1161	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1162	int rc = FILE_LOCK_DEFERRED + 1;
1163
1164	if ((flock->fl_flags & FL_POSIX) == 0)
1165		return rc;
1166
1167	cifs_down_write(&cinode->lock_sem);
1168	if (!cinode->can_cache_brlcks) {
1169		up_write(&cinode->lock_sem);
1170		return rc;
1171	}
1172
1173	rc = posix_lock_file(file, flock, NULL);
1174	up_write(&cinode->lock_sem);
1175	return rc;
1176}
1177
1178int
1179cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1180{
1181	unsigned int xid;
1182	int rc = 0, stored_rc;
1183	struct cifsLockInfo *li, *tmp;
1184	struct cifs_tcon *tcon;
1185	unsigned int num, max_num, max_buf;
1186	LOCKING_ANDX_RANGE *buf, *cur;
1187	static const int types[] = {
1188		LOCKING_ANDX_LARGE_FILES,
1189		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1190	};
1191	int i;
1192
1193	xid = get_xid();
1194	tcon = tlink_tcon(cfile->tlink);
1195
1196	/*
1197	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1198	 * and check it before using.
1199	 */
1200	max_buf = tcon->ses->server->maxBuf;
1201	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1202		free_xid(xid);
1203		return -EINVAL;
1204	}
1205
1206	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1207		     PAGE_SIZE);
1208	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1209			PAGE_SIZE);
1210	max_num = (max_buf - sizeof(struct smb_hdr)) /
1211						sizeof(LOCKING_ANDX_RANGE);
1212	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1213	if (!buf) {
1214		free_xid(xid);
1215		return -ENOMEM;
1216	}
1217
1218	for (i = 0; i < 2; i++) {
1219		cur = buf;
1220		num = 0;
1221		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1222			if (li->type != types[i])
1223				continue;
1224			cur->Pid = cpu_to_le16(li->pid);
1225			cur->LengthLow = cpu_to_le32((u32)li->length);
1226			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1227			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1228			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1229			if (++num == max_num) {
1230				stored_rc = cifs_lockv(xid, tcon,
1231						       cfile->fid.netfid,
1232						       (__u8)li->type, 0, num,
1233						       buf);
1234				if (stored_rc)
1235					rc = stored_rc;
1236				cur = buf;
1237				num = 0;
1238			} else
1239				cur++;
1240		}
1241
1242		if (num) {
1243			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1244					       (__u8)types[i], 0, num, buf);
1245			if (stored_rc)
1246				rc = stored_rc;
1247		}
1248	}
1249
1250	kfree(buf);
1251	free_xid(xid);
1252	return rc;
1253}
1254
1255static __u32
1256hash_lockowner(fl_owner_t owner)
1257{
1258	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1259}
1260
1261struct lock_to_push {
1262	struct list_head llist;
1263	__u64 offset;
1264	__u64 length;
1265	__u32 pid;
1266	__u16 netfid;
1267	__u8 type;
1268};
1269
1270static int
1271cifs_push_posix_locks(struct cifsFileInfo *cfile)
1272{
1273	struct inode *inode = d_inode(cfile->dentry);
1274	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1275	struct file_lock *flock;
1276	struct file_lock_context *flctx = inode->i_flctx;
1277	unsigned int count = 0, i;
1278	int rc = 0, xid, type;
1279	struct list_head locks_to_send, *el;
1280	struct lock_to_push *lck, *tmp;
1281	__u64 length;
1282
1283	xid = get_xid();
1284
1285	if (!flctx)
1286		goto out;
1287
1288	spin_lock(&flctx->flc_lock);
1289	list_for_each(el, &flctx->flc_posix) {
1290		count++;
1291	}
1292	spin_unlock(&flctx->flc_lock);
1293
1294	INIT_LIST_HEAD(&locks_to_send);
1295
1296	/*
1297	 * Allocating count locks is enough because no FL_POSIX locks can be
1298	 * added to the list while we are holding cinode->lock_sem that
1299	 * protects locking operations of this inode.
1300	 */
1301	for (i = 0; i < count; i++) {
1302		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1303		if (!lck) {
1304			rc = -ENOMEM;
1305			goto err_out;
1306		}
1307		list_add_tail(&lck->llist, &locks_to_send);
1308	}
1309
1310	el = locks_to_send.next;
1311	spin_lock(&flctx->flc_lock);
1312	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1313		if (el == &locks_to_send) {
1314			/*
1315			 * The list ended. We don't have enough allocated
1316			 * structures - something is really wrong.
1317			 */
1318			cifs_dbg(VFS, "Can't push all brlocks!\n");
1319			break;
1320		}
1321		length = 1 + flock->fl_end - flock->fl_start;
1322		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1323			type = CIFS_RDLCK;
1324		else
1325			type = CIFS_WRLCK;
1326		lck = list_entry(el, struct lock_to_push, llist);
1327		lck->pid = hash_lockowner(flock->fl_owner);
1328		lck->netfid = cfile->fid.netfid;
1329		lck->length = length;
1330		lck->type = type;
1331		lck->offset = flock->fl_start;
1332	}
1333	spin_unlock(&flctx->flc_lock);
1334
1335	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1336		int stored_rc;
1337
1338		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1339					     lck->offset, lck->length, NULL,
1340					     lck->type, 0);
1341		if (stored_rc)
1342			rc = stored_rc;
1343		list_del(&lck->llist);
1344		kfree(lck);
1345	}
1346
1347out:
1348	free_xid(xid);
1349	return rc;
1350err_out:
1351	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1352		list_del(&lck->llist);
1353		kfree(lck);
1354	}
1355	goto out;
1356}
1357
1358static int
1359cifs_push_locks(struct cifsFileInfo *cfile)
1360{
1361	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1362	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1363	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1364	int rc = 0;
1365
1366	/* we are going to update can_cache_brlcks here - need a write access */
1367	cifs_down_write(&cinode->lock_sem);
1368	if (!cinode->can_cache_brlcks) {
1369		up_write(&cinode->lock_sem);
1370		return rc;
1371	}
1372
1373	if (cap_unix(tcon->ses) &&
1374	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1375	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1376		rc = cifs_push_posix_locks(cfile);
1377	else
1378		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1379
1380	cinode->can_cache_brlcks = false;
1381	up_write(&cinode->lock_sem);
1382	return rc;
1383}
1384
1385static void
1386cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1387		bool *wait_flag, struct TCP_Server_Info *server)
1388{
1389	if (flock->fl_flags & FL_POSIX)
1390		cifs_dbg(FYI, "Posix\n");
1391	if (flock->fl_flags & FL_FLOCK)
1392		cifs_dbg(FYI, "Flock\n");
1393	if (flock->fl_flags & FL_SLEEP) {
1394		cifs_dbg(FYI, "Blocking lock\n");
1395		*wait_flag = true;
1396	}
1397	if (flock->fl_flags & FL_ACCESS)
1398		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1399	if (flock->fl_flags & FL_LEASE)
1400		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1401	if (flock->fl_flags &
1402	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1403	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1404		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1405
1406	*type = server->vals->large_lock_type;
1407	if (flock->fl_type == F_WRLCK) {
1408		cifs_dbg(FYI, "F_WRLCK\n");
1409		*type |= server->vals->exclusive_lock_type;
1410		*lock = 1;
1411	} else if (flock->fl_type == F_UNLCK) {
1412		cifs_dbg(FYI, "F_UNLCK\n");
1413		*type |= server->vals->unlock_lock_type;
1414		*unlock = 1;
1415		/* Check if unlock includes more than one lock range */
1416	} else if (flock->fl_type == F_RDLCK) {
1417		cifs_dbg(FYI, "F_RDLCK\n");
1418		*type |= server->vals->shared_lock_type;
1419		*lock = 1;
1420	} else if (flock->fl_type == F_EXLCK) {
1421		cifs_dbg(FYI, "F_EXLCK\n");
1422		*type |= server->vals->exclusive_lock_type;
1423		*lock = 1;
1424	} else if (flock->fl_type == F_SHLCK) {
1425		cifs_dbg(FYI, "F_SHLCK\n");
1426		*type |= server->vals->shared_lock_type;
1427		*lock = 1;
1428	} else
1429		cifs_dbg(FYI, "Unknown type of lock\n");
1430}
1431
1432static int
1433cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1434	   bool wait_flag, bool posix_lck, unsigned int xid)
1435{
1436	int rc = 0;
1437	__u64 length = 1 + flock->fl_end - flock->fl_start;
1438	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1439	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1440	struct TCP_Server_Info *server = tcon->ses->server;
1441	__u16 netfid = cfile->fid.netfid;
1442
1443	if (posix_lck) {
1444		int posix_lock_type;
1445
1446		rc = cifs_posix_lock_test(file, flock);
1447		if (!rc)
1448			return rc;
1449
1450		if (type & server->vals->shared_lock_type)
1451			posix_lock_type = CIFS_RDLCK;
1452		else
1453			posix_lock_type = CIFS_WRLCK;
1454		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1455				      hash_lockowner(flock->fl_owner),
1456				      flock->fl_start, length, flock,
1457				      posix_lock_type, wait_flag);
1458		return rc;
1459	}
1460
1461	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1462	if (!rc)
1463		return rc;
1464
1465	/* BB we could chain these into one lock request BB */
1466	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1467				    1, 0, false);
1468	if (rc == 0) {
1469		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1470					    type, 0, 1, false);
1471		flock->fl_type = F_UNLCK;
1472		if (rc != 0)
1473			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1474				 rc);
1475		return 0;
1476	}
1477
1478	if (type & server->vals->shared_lock_type) {
1479		flock->fl_type = F_WRLCK;
1480		return 0;
1481	}
1482
1483	type &= ~server->vals->exclusive_lock_type;
1484
1485	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1486				    type | server->vals->shared_lock_type,
1487				    1, 0, false);
1488	if (rc == 0) {
1489		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1490			type | server->vals->shared_lock_type, 0, 1, false);
1491		flock->fl_type = F_RDLCK;
1492		if (rc != 0)
1493			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1494				 rc);
1495	} else
1496		flock->fl_type = F_WRLCK;
1497
1498	return 0;
1499}
1500
1501void
1502cifs_move_llist(struct list_head *source, struct list_head *dest)
1503{
1504	struct list_head *li, *tmp;
1505	list_for_each_safe(li, tmp, source)
1506		list_move(li, dest);
1507}
1508
1509void
1510cifs_free_llist(struct list_head *llist)
1511{
1512	struct cifsLockInfo *li, *tmp;
1513	list_for_each_entry_safe(li, tmp, llist, llist) {
1514		cifs_del_lock_waiters(li);
1515		list_del(&li->llist);
1516		kfree(li);
1517	}
1518}
1519
1520int
1521cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1522		  unsigned int xid)
1523{
1524	int rc = 0, stored_rc;
1525	static const int types[] = {
1526		LOCKING_ANDX_LARGE_FILES,
1527		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1528	};
1529	unsigned int i;
1530	unsigned int max_num, num, max_buf;
1531	LOCKING_ANDX_RANGE *buf, *cur;
1532	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1533	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1534	struct cifsLockInfo *li, *tmp;
1535	__u64 length = 1 + flock->fl_end - flock->fl_start;
1536	struct list_head tmp_llist;
1537
1538	INIT_LIST_HEAD(&tmp_llist);
1539
1540	/*
1541	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1542	 * and check it before using.
1543	 */
1544	max_buf = tcon->ses->server->maxBuf;
1545	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1546		return -EINVAL;
1547
1548	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1549		     PAGE_SIZE);
1550	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1551			PAGE_SIZE);
1552	max_num = (max_buf - sizeof(struct smb_hdr)) /
1553						sizeof(LOCKING_ANDX_RANGE);
1554	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1555	if (!buf)
1556		return -ENOMEM;
1557
1558	cifs_down_write(&cinode->lock_sem);
1559	for (i = 0; i < 2; i++) {
1560		cur = buf;
1561		num = 0;
1562		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1563			if (flock->fl_start > li->offset ||
1564			    (flock->fl_start + length) <
1565			    (li->offset + li->length))
1566				continue;
1567			if (current->tgid != li->pid)
1568				continue;
1569			if (types[i] != li->type)
1570				continue;
1571			if (cinode->can_cache_brlcks) {
1572				/*
1573				 * We can cache brlock requests - simply remove
1574				 * a lock from the file's list.
1575				 */
1576				list_del(&li->llist);
1577				cifs_del_lock_waiters(li);
1578				kfree(li);
1579				continue;
1580			}
1581			cur->Pid = cpu_to_le16(li->pid);
1582			cur->LengthLow = cpu_to_le32((u32)li->length);
1583			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1584			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1585			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1586			/*
1587			 * We need to save a lock here to let us add it again to
1588			 * the file's list if the unlock range request fails on
1589			 * the server.
1590			 */
1591			list_move(&li->llist, &tmp_llist);
1592			if (++num == max_num) {
1593				stored_rc = cifs_lockv(xid, tcon,
1594						       cfile->fid.netfid,
1595						       li->type, num, 0, buf);
1596				if (stored_rc) {
1597					/*
1598					 * We failed on the unlock range
1599					 * request - add all locks from the tmp
1600					 * list to the head of the file's list.
1601					 */
1602					cifs_move_llist(&tmp_llist,
1603							&cfile->llist->locks);
1604					rc = stored_rc;
1605				} else
1606					/*
1607					 * The unlock range request succeed -
1608					 * free the tmp list.
1609					 */
1610					cifs_free_llist(&tmp_llist);
1611				cur = buf;
1612				num = 0;
1613			} else
1614				cur++;
1615		}
1616		if (num) {
1617			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1618					       types[i], num, 0, buf);
1619			if (stored_rc) {
1620				cifs_move_llist(&tmp_llist,
1621						&cfile->llist->locks);
1622				rc = stored_rc;
1623			} else
1624				cifs_free_llist(&tmp_llist);
1625		}
1626	}
1627
1628	up_write(&cinode->lock_sem);
1629	kfree(buf);
1630	return rc;
1631}
1632
1633static int
1634cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1635	   bool wait_flag, bool posix_lck, int lock, int unlock,
1636	   unsigned int xid)
1637{
1638	int rc = 0;
1639	__u64 length = 1 + flock->fl_end - flock->fl_start;
1640	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1641	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1642	struct TCP_Server_Info *server = tcon->ses->server;
1643	struct inode *inode = d_inode(cfile->dentry);
1644
1645	if (posix_lck) {
1646		int posix_lock_type;
1647
1648		rc = cifs_posix_lock_set(file, flock);
1649		if (rc <= FILE_LOCK_DEFERRED)
1650			return rc;
1651
1652		if (type & server->vals->shared_lock_type)
1653			posix_lock_type = CIFS_RDLCK;
1654		else
1655			posix_lock_type = CIFS_WRLCK;
1656
1657		if (unlock == 1)
1658			posix_lock_type = CIFS_UNLCK;
1659
1660		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1661				      hash_lockowner(flock->fl_owner),
1662				      flock->fl_start, length,
1663				      NULL, posix_lock_type, wait_flag);
1664		goto out;
1665	}
1666
1667	if (lock) {
1668		struct cifsLockInfo *lock;
1669
1670		lock = cifs_lock_init(flock->fl_start, length, type,
1671				      flock->fl_flags);
1672		if (!lock)
1673			return -ENOMEM;
1674
1675		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1676		if (rc < 0) {
1677			kfree(lock);
1678			return rc;
1679		}
1680		if (!rc)
1681			goto out;
1682
1683		/*
1684		 * Windows 7 server can delay breaking lease from read to None
1685		 * if we set a byte-range lock on a file - break it explicitly
1686		 * before sending the lock to the server to be sure the next
1687		 * read won't conflict with non-overlapted locks due to
1688		 * pagereading.
1689		 */
1690		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1691					CIFS_CACHE_READ(CIFS_I(inode))) {
1692			cifs_zap_mapping(inode);
1693			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1694				 inode);
1695			CIFS_I(inode)->oplock = 0;
1696		}
1697
1698		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1699					    type, 1, 0, wait_flag);
1700		if (rc) {
1701			kfree(lock);
1702			return rc;
1703		}
1704
1705		cifs_lock_add(cfile, lock);
1706	} else if (unlock)
1707		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1708
1709out:
1710	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1711		/*
1712		 * If this is a request to remove all locks because we
1713		 * are closing the file, it doesn't matter if the
1714		 * unlocking failed as both cifs.ko and the SMB server
1715		 * remove the lock on file close
1716		 */
1717		if (rc) {
1718			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1719			if (!(flock->fl_flags & FL_CLOSE))
1720				return rc;
1721		}
1722		rc = locks_lock_file_wait(file, flock);
1723	}
1724	return rc;
1725}
1726
1727int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1728{
1729	int rc, xid;
1730	int lock = 0, unlock = 0;
1731	bool wait_flag = false;
1732	bool posix_lck = false;
1733	struct cifs_sb_info *cifs_sb;
1734	struct cifs_tcon *tcon;
1735	struct cifsFileInfo *cfile;
1736	__u32 type;
1737
1738	xid = get_xid();
1739
1740	if (!(fl->fl_flags & FL_FLOCK)) {
1741		rc = -ENOLCK;
1742		free_xid(xid);
1743		return rc;
1744	}
1745
1746	cfile = (struct cifsFileInfo *)file->private_data;
1747	tcon = tlink_tcon(cfile->tlink);
1748
1749	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1750			tcon->ses->server);
1751	cifs_sb = CIFS_FILE_SB(file);
1752
1753	if (cap_unix(tcon->ses) &&
1754	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1755	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1756		posix_lck = true;
1757
1758	if (!lock && !unlock) {
1759		/*
1760		 * if no lock or unlock then nothing to do since we do not
1761		 * know what it is
1762		 */
1763		rc = -EOPNOTSUPP;
1764		free_xid(xid);
1765		return rc;
1766	}
1767
1768	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1769			xid);
1770	free_xid(xid);
1771	return rc;
1772
1773
1774}
1775
1776int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1777{
1778	int rc, xid;
1779	int lock = 0, unlock = 0;
1780	bool wait_flag = false;
1781	bool posix_lck = false;
1782	struct cifs_sb_info *cifs_sb;
1783	struct cifs_tcon *tcon;
1784	struct cifsFileInfo *cfile;
1785	__u32 type;
1786
1787	rc = -EACCES;
1788	xid = get_xid();
1789
1790	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1791		 cmd, flock->fl_flags, flock->fl_type,
1792		 flock->fl_start, flock->fl_end);
1793
1794	cfile = (struct cifsFileInfo *)file->private_data;
1795	tcon = tlink_tcon(cfile->tlink);
1796
1797	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1798			tcon->ses->server);
1799	cifs_sb = CIFS_FILE_SB(file);
1800
1801	if (cap_unix(tcon->ses) &&
1802	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1803	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1804		posix_lck = true;
1805	/*
1806	 * BB add code here to normalize offset and length to account for
1807	 * negative length which we can not accept over the wire.
1808	 */
1809	if (IS_GETLK(cmd)) {
1810		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1811		free_xid(xid);
1812		return rc;
1813	}
1814
1815	if (!lock && !unlock) {
1816		/*
1817		 * if no lock or unlock then nothing to do since we do not
1818		 * know what it is
1819		 */
1820		free_xid(xid);
1821		return -EOPNOTSUPP;
1822	}
1823
1824	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1825			xid);
1826	free_xid(xid);
1827	return rc;
1828}
1829
1830/*
1831 * update the file size (if needed) after a write. Should be called with
1832 * the inode->i_lock held
1833 */
1834void
1835cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1836		      unsigned int bytes_written)
1837{
1838	loff_t end_of_write = offset + bytes_written;
1839
1840	if (end_of_write > cifsi->server_eof)
1841		cifsi->server_eof = end_of_write;
1842}
1843
1844static ssize_t
1845cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1846	   size_t write_size, loff_t *offset)
1847{
1848	int rc = 0;
1849	unsigned int bytes_written = 0;
1850	unsigned int total_written;
1851	struct cifs_tcon *tcon;
1852	struct TCP_Server_Info *server;
1853	unsigned int xid;
1854	struct dentry *dentry = open_file->dentry;
1855	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1856	struct cifs_io_parms io_parms = {0};
1857
1858	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1859		 write_size, *offset, dentry);
1860
1861	tcon = tlink_tcon(open_file->tlink);
1862	server = tcon->ses->server;
1863
1864	if (!server->ops->sync_write)
1865		return -ENOSYS;
1866
1867	xid = get_xid();
1868
1869	for (total_written = 0; write_size > total_written;
1870	     total_written += bytes_written) {
1871		rc = -EAGAIN;
1872		while (rc == -EAGAIN) {
1873			struct kvec iov[2];
1874			unsigned int len;
1875
1876			if (open_file->invalidHandle) {
1877				/* we could deadlock if we called
1878				   filemap_fdatawait from here so tell
1879				   reopen_file not to flush data to
1880				   server now */
1881				rc = cifs_reopen_file(open_file, false);
1882				if (rc != 0)
1883					break;
1884			}
1885
1886			len = min(server->ops->wp_retry_size(d_inode(dentry)),
1887				  (unsigned int)write_size - total_written);
1888			/* iov[0] is reserved for smb header */
1889			iov[1].iov_base = (char *)write_data + total_written;
1890			iov[1].iov_len = len;
1891			io_parms.pid = pid;
1892			io_parms.tcon = tcon;
1893			io_parms.offset = *offset;
1894			io_parms.length = len;
1895			rc = server->ops->sync_write(xid, &open_file->fid,
1896					&io_parms, &bytes_written, iov, 1);
1897		}
1898		if (rc || (bytes_written == 0)) {
1899			if (total_written)
1900				break;
1901			else {
1902				free_xid(xid);
1903				return rc;
1904			}
1905		} else {
1906			spin_lock(&d_inode(dentry)->i_lock);
1907			cifs_update_eof(cifsi, *offset, bytes_written);
1908			spin_unlock(&d_inode(dentry)->i_lock);
1909			*offset += bytes_written;
1910		}
1911	}
1912
1913	cifs_stats_bytes_written(tcon, total_written);
1914
1915	if (total_written > 0) {
1916		spin_lock(&d_inode(dentry)->i_lock);
1917		if (*offset > d_inode(dentry)->i_size)
1918			i_size_write(d_inode(dentry), *offset);
1919		spin_unlock(&d_inode(dentry)->i_lock);
1920	}
1921	mark_inode_dirty_sync(d_inode(dentry));
1922	free_xid(xid);
1923	return total_written;
1924}
1925
1926struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1927					bool fsuid_only)
1928{
1929	struct cifsFileInfo *open_file = NULL;
1930	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1931
1932	/* only filter by fsuid on multiuser mounts */
1933	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1934		fsuid_only = false;
1935
1936	spin_lock(&cifs_inode->open_file_lock);
1937	/* we could simply get the first_list_entry since write-only entries
1938	   are always at the end of the list but since the first entry might
1939	   have a close pending, we go through the whole list */
1940	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1941		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1942			continue;
1943		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1944			if (!open_file->invalidHandle) {
1945				/* found a good file */
1946				/* lock it so it will not be closed on us */
1947				cifsFileInfo_get(open_file);
1948				spin_unlock(&cifs_inode->open_file_lock);
1949				return open_file;
1950			} /* else might as well continue, and look for
1951			     another, or simply have the caller reopen it
1952			     again rather than trying to fix this handle */
1953		} else /* write only file */
1954			break; /* write only files are last so must be done */
1955	}
1956	spin_unlock(&cifs_inode->open_file_lock);
1957	return NULL;
1958}
1959
1960/* Return -EBADF if no handle is found and general rc otherwise */
1961int
1962cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1963		       struct cifsFileInfo **ret_file)
1964{
1965	struct cifsFileInfo *open_file, *inv_file = NULL;
1966	struct cifs_sb_info *cifs_sb;
1967	bool any_available = false;
1968	int rc = -EBADF;
1969	unsigned int refind = 0;
1970	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1971	bool with_delete = flags & FIND_WR_WITH_DELETE;
1972	*ret_file = NULL;
1973
1974	/*
1975	 * Having a null inode here (because mapping->host was set to zero by
1976	 * the VFS or MM) should not happen but we had reports of on oops (due
1977	 * to it being zero) during stress testcases so we need to check for it
1978	 */
1979
1980	if (cifs_inode == NULL) {
1981		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1982		dump_stack();
1983		return rc;
1984	}
1985
1986	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1987
1988	/* only filter by fsuid on multiuser mounts */
1989	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1990		fsuid_only = false;
1991
1992	spin_lock(&cifs_inode->open_file_lock);
1993refind_writable:
1994	if (refind > MAX_REOPEN_ATT) {
1995		spin_unlock(&cifs_inode->open_file_lock);
1996		return rc;
1997	}
1998	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1999		if (!any_available && open_file->pid != current->tgid)
2000			continue;
2001		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2002			continue;
2003		if (with_delete && !(open_file->fid.access & DELETE))
2004			continue;
2005		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2006			if (!open_file->invalidHandle) {
2007				/* found a good writable file */
2008				cifsFileInfo_get(open_file);
2009				spin_unlock(&cifs_inode->open_file_lock);
2010				*ret_file = open_file;
2011				return 0;
2012			} else {
2013				if (!inv_file)
2014					inv_file = open_file;
2015			}
2016		}
2017	}
2018	/* couldn't find useable FH with same pid, try any available */
2019	if (!any_available) {
2020		any_available = true;
2021		goto refind_writable;
2022	}
2023
2024	if (inv_file) {
2025		any_available = false;
2026		cifsFileInfo_get(inv_file);
2027	}
2028
2029	spin_unlock(&cifs_inode->open_file_lock);
2030
2031	if (inv_file) {
2032		rc = cifs_reopen_file(inv_file, false);
2033		if (!rc) {
2034			*ret_file = inv_file;
2035			return 0;
2036		}
2037
2038		spin_lock(&cifs_inode->open_file_lock);
2039		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2040		spin_unlock(&cifs_inode->open_file_lock);
2041		cifsFileInfo_put(inv_file);
2042		++refind;
2043		inv_file = NULL;
2044		spin_lock(&cifs_inode->open_file_lock);
2045		goto refind_writable;
2046	}
2047
2048	return rc;
2049}
2050
2051struct cifsFileInfo *
2052find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2053{
2054	struct cifsFileInfo *cfile;
2055	int rc;
2056
2057	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2058	if (rc)
2059		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2060
2061	return cfile;
2062}
2063
2064int
2065cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2066		       int flags,
2067		       struct cifsFileInfo **ret_file)
2068{
2069	struct list_head *tmp;
2070	struct cifsFileInfo *cfile;
2071	struct cifsInodeInfo *cinode;
2072	char *full_path;
2073
2074	*ret_file = NULL;
2075
2076	spin_lock(&tcon->open_file_lock);
2077	list_for_each(tmp, &tcon->openFileList) {
2078		cfile = list_entry(tmp, struct cifsFileInfo,
2079			     tlist);
2080		full_path = build_path_from_dentry(cfile->dentry);
2081		if (full_path == NULL) {
2082			spin_unlock(&tcon->open_file_lock);
2083			return -ENOMEM;
2084		}
2085		if (strcmp(full_path, name)) {
2086			kfree(full_path);
2087			continue;
2088		}
2089
2090		kfree(full_path);
2091		cinode = CIFS_I(d_inode(cfile->dentry));
2092		spin_unlock(&tcon->open_file_lock);
2093		return cifs_get_writable_file(cinode, flags, ret_file);
2094	}
2095
2096	spin_unlock(&tcon->open_file_lock);
2097	return -ENOENT;
2098}
2099
2100int
2101cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2102		       struct cifsFileInfo **ret_file)
2103{
2104	struct list_head *tmp;
2105	struct cifsFileInfo *cfile;
2106	struct cifsInodeInfo *cinode;
2107	char *full_path;
2108
2109	*ret_file = NULL;
2110
2111	spin_lock(&tcon->open_file_lock);
2112	list_for_each(tmp, &tcon->openFileList) {
2113		cfile = list_entry(tmp, struct cifsFileInfo,
2114			     tlist);
2115		full_path = build_path_from_dentry(cfile->dentry);
2116		if (full_path == NULL) {
2117			spin_unlock(&tcon->open_file_lock);
2118			return -ENOMEM;
2119		}
2120		if (strcmp(full_path, name)) {
2121			kfree(full_path);
2122			continue;
2123		}
2124
2125		kfree(full_path);
2126		cinode = CIFS_I(d_inode(cfile->dentry));
2127		spin_unlock(&tcon->open_file_lock);
2128		*ret_file = find_readable_file(cinode, 0);
2129		return *ret_file ? 0 : -ENOENT;
2130	}
2131
2132	spin_unlock(&tcon->open_file_lock);
2133	return -ENOENT;
2134}
2135
2136static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2137{
2138	struct address_space *mapping = page->mapping;
2139	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2140	char *write_data;
2141	int rc = -EFAULT;
2142	int bytes_written = 0;
2143	struct inode *inode;
2144	struct cifsFileInfo *open_file;
2145
2146	if (!mapping || !mapping->host)
2147		return -EFAULT;
2148
2149	inode = page->mapping->host;
2150
2151	offset += (loff_t)from;
2152	write_data = kmap(page);
2153	write_data += from;
2154
2155	if ((to > PAGE_SIZE) || (from > to)) {
2156		kunmap(page);
2157		return -EIO;
2158	}
2159
2160	/* racing with truncate? */
2161	if (offset > mapping->host->i_size) {
2162		kunmap(page);
2163		return 0; /* don't care */
2164	}
2165
2166	/* check to make sure that we are not extending the file */
2167	if (mapping->host->i_size - offset < (loff_t)to)
2168		to = (unsigned)(mapping->host->i_size - offset);
2169
2170	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2171				    &open_file);
2172	if (!rc) {
2173		bytes_written = cifs_write(open_file, open_file->pid,
2174					   write_data, to - from, &offset);
2175		cifsFileInfo_put(open_file);
2176		/* Does mm or vfs already set times? */
2177		inode->i_atime = inode->i_mtime = current_time(inode);
2178		if ((bytes_written > 0) && (offset))
2179			rc = 0;
2180		else if (bytes_written < 0)
2181			rc = bytes_written;
2182		else
2183			rc = -EFAULT;
2184	} else {
2185		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2186		if (!is_retryable_error(rc))
2187			rc = -EIO;
2188	}
2189
2190	kunmap(page);
2191	return rc;
2192}
2193
2194static struct cifs_writedata *
2195wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2196			  pgoff_t end, pgoff_t *index,
2197			  unsigned int *found_pages)
2198{
2199	struct cifs_writedata *wdata;
2200
2201	wdata = cifs_writedata_alloc((unsigned int)tofind,
2202				     cifs_writev_complete);
2203	if (!wdata)
2204		return NULL;
2205
2206	*found_pages = find_get_pages_range_tag(mapping, index, end,
2207				PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2208	return wdata;
2209}
2210
2211static unsigned int
2212wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2213		    struct address_space *mapping,
2214		    struct writeback_control *wbc,
2215		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2216{
2217	unsigned int nr_pages = 0, i;
2218	struct page *page;
2219
2220	for (i = 0; i < found_pages; i++) {
2221		page = wdata->pages[i];
2222		/*
2223		 * At this point we hold neither the i_pages lock nor the
2224		 * page lock: the page may be truncated or invalidated
2225		 * (changing page->mapping to NULL), or even swizzled
2226		 * back from swapper_space to tmpfs file mapping
2227		 */
2228
2229		if (nr_pages == 0)
2230			lock_page(page);
2231		else if (!trylock_page(page))
2232			break;
2233
2234		if (unlikely(page->mapping != mapping)) {
2235			unlock_page(page);
2236			break;
2237		}
2238
2239		if (!wbc->range_cyclic && page->index > end) {
2240			*done = true;
2241			unlock_page(page);
2242			break;
2243		}
2244
2245		if (*next && (page->index != *next)) {
2246			/* Not next consecutive page */
2247			unlock_page(page);
2248			break;
2249		}
2250
2251		if (wbc->sync_mode != WB_SYNC_NONE)
2252			wait_on_page_writeback(page);
2253
2254		if (PageWriteback(page) ||
2255				!clear_page_dirty_for_io(page)) {
2256			unlock_page(page);
2257			break;
2258		}
2259
2260		/*
2261		 * This actually clears the dirty bit in the radix tree.
2262		 * See cifs_writepage() for more commentary.
2263		 */
2264		set_page_writeback(page);
2265		if (page_offset(page) >= i_size_read(mapping->host)) {
2266			*done = true;
2267			unlock_page(page);
2268			end_page_writeback(page);
2269			break;
2270		}
2271
2272		wdata->pages[i] = page;
2273		*next = page->index + 1;
2274		++nr_pages;
2275	}
2276
2277	/* reset index to refind any pages skipped */
2278	if (nr_pages == 0)
2279		*index = wdata->pages[0]->index + 1;
2280
2281	/* put any pages we aren't going to use */
2282	for (i = nr_pages; i < found_pages; i++) {
2283		put_page(wdata->pages[i]);
2284		wdata->pages[i] = NULL;
2285	}
2286
2287	return nr_pages;
2288}
2289
2290static int
2291wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2292		 struct address_space *mapping, struct writeback_control *wbc)
2293{
2294	int rc;
2295
2296	wdata->sync_mode = wbc->sync_mode;
2297	wdata->nr_pages = nr_pages;
2298	wdata->offset = page_offset(wdata->pages[0]);
2299	wdata->pagesz = PAGE_SIZE;
2300	wdata->tailsz = min(i_size_read(mapping->host) -
2301			page_offset(wdata->pages[nr_pages - 1]),
2302			(loff_t)PAGE_SIZE);
2303	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2304	wdata->pid = wdata->cfile->pid;
2305
2306	rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2307	if (rc)
2308		return rc;
2309
2310	if (wdata->cfile->invalidHandle)
2311		rc = -EAGAIN;
2312	else
2313		rc = wdata->server->ops->async_writev(wdata,
2314						      cifs_writedata_release);
2315
2316	return rc;
2317}
2318
2319static int cifs_writepages(struct address_space *mapping,
2320			   struct writeback_control *wbc)
2321{
2322	struct inode *inode = mapping->host;
2323	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2324	struct TCP_Server_Info *server;
2325	bool done = false, scanned = false, range_whole = false;
2326	pgoff_t end, index;
2327	struct cifs_writedata *wdata;
2328	struct cifsFileInfo *cfile = NULL;
2329	int rc = 0;
2330	int saved_rc = 0;
2331	unsigned int xid;
2332
2333	/*
2334	 * If wsize is smaller than the page cache size, default to writing
2335	 * one page at a time via cifs_writepage
2336	 */
2337	if (cifs_sb->wsize < PAGE_SIZE)
2338		return generic_writepages(mapping, wbc);
2339
2340	xid = get_xid();
2341	if (wbc->range_cyclic) {
2342		index = mapping->writeback_index; /* Start from prev offset */
2343		end = -1;
2344	} else {
2345		index = wbc->range_start >> PAGE_SHIFT;
2346		end = wbc->range_end >> PAGE_SHIFT;
2347		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2348			range_whole = true;
2349		scanned = true;
2350	}
2351	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2352
2353retry:
2354	while (!done && index <= end) {
2355		unsigned int i, nr_pages, found_pages, wsize;
2356		pgoff_t next = 0, tofind, saved_index = index;
2357		struct cifs_credits credits_on_stack;
2358		struct cifs_credits *credits = &credits_on_stack;
2359		int get_file_rc = 0;
2360
2361		if (cfile)
2362			cifsFileInfo_put(cfile);
2363
2364		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2365
2366		/* in case of an error store it to return later */
2367		if (rc)
2368			get_file_rc = rc;
2369
2370		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2371						   &wsize, credits);
2372		if (rc != 0) {
2373			done = true;
2374			break;
2375		}
2376
2377		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2378
2379		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2380						  &found_pages);
2381		if (!wdata) {
2382			rc = -ENOMEM;
2383			done = true;
2384			add_credits_and_wake_if(server, credits, 0);
2385			break;
2386		}
2387
2388		if (found_pages == 0) {
2389			kref_put(&wdata->refcount, cifs_writedata_release);
2390			add_credits_and_wake_if(server, credits, 0);
2391			break;
2392		}
2393
2394		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2395					       end, &index, &next, &done);
2396
2397		/* nothing to write? */
2398		if (nr_pages == 0) {
2399			kref_put(&wdata->refcount, cifs_writedata_release);
2400			add_credits_and_wake_if(server, credits, 0);
2401			continue;
2402		}
2403
2404		wdata->credits = credits_on_stack;
2405		wdata->cfile = cfile;
2406		wdata->server = server;
2407		cfile = NULL;
2408
2409		if (!wdata->cfile) {
2410			cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2411				 get_file_rc);
2412			if (is_retryable_error(get_file_rc))
2413				rc = get_file_rc;
2414			else
2415				rc = -EBADF;
2416		} else
2417			rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2418
2419		for (i = 0; i < nr_pages; ++i)
2420			unlock_page(wdata->pages[i]);
2421
2422		/* send failure -- clean up the mess */
2423		if (rc != 0) {
2424			add_credits_and_wake_if(server, &wdata->credits, 0);
2425			for (i = 0; i < nr_pages; ++i) {
2426				if (is_retryable_error(rc))
2427					redirty_page_for_writepage(wbc,
2428							   wdata->pages[i]);
2429				else
2430					SetPageError(wdata->pages[i]);
2431				end_page_writeback(wdata->pages[i]);
2432				put_page(wdata->pages[i]);
2433			}
2434			if (!is_retryable_error(rc))
2435				mapping_set_error(mapping, rc);
2436		}
2437		kref_put(&wdata->refcount, cifs_writedata_release);
2438
2439		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2440			index = saved_index;
2441			continue;
2442		}
2443
2444		/* Return immediately if we received a signal during writing */
2445		if (is_interrupt_error(rc)) {
2446			done = true;
2447			break;
2448		}
2449
2450		if (rc != 0 && saved_rc == 0)
2451			saved_rc = rc;
2452
2453		wbc->nr_to_write -= nr_pages;
2454		if (wbc->nr_to_write <= 0)
2455			done = true;
2456
2457		index = next;
2458	}
2459
2460	if (!scanned && !done) {
2461		/*
2462		 * We hit the last page and there is more work to be done: wrap
2463		 * back to the start of the file
2464		 */
2465		scanned = true;
2466		index = 0;
2467		goto retry;
2468	}
2469
2470	if (saved_rc != 0)
2471		rc = saved_rc;
2472
2473	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2474		mapping->writeback_index = index;
2475
2476	if (cfile)
2477		cifsFileInfo_put(cfile);
2478	free_xid(xid);
2479	return rc;
2480}
2481
2482static int
2483cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2484{
2485	int rc;
2486	unsigned int xid;
2487
2488	xid = get_xid();
2489/* BB add check for wbc flags */
2490	get_page(page);
2491	if (!PageUptodate(page))
2492		cifs_dbg(FYI, "ppw - page not up to date\n");
2493
2494	/*
2495	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2496	 *
2497	 * A writepage() implementation always needs to do either this,
2498	 * or re-dirty the page with "redirty_page_for_writepage()" in
2499	 * the case of a failure.
2500	 *
2501	 * Just unlocking the page will cause the radix tree tag-bits
2502	 * to fail to update with the state of the page correctly.
2503	 */
2504	set_page_writeback(page);
2505retry_write:
2506	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2507	if (is_retryable_error(rc)) {
2508		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2509			goto retry_write;
2510		redirty_page_for_writepage(wbc, page);
2511	} else if (rc != 0) {
2512		SetPageError(page);
2513		mapping_set_error(page->mapping, rc);
2514	} else {
2515		SetPageUptodate(page);
2516	}
2517	end_page_writeback(page);
2518	put_page(page);
2519	free_xid(xid);
2520	return rc;
2521}
2522
2523static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2524{
2525	int rc = cifs_writepage_locked(page, wbc);
2526	unlock_page(page);
2527	return rc;
2528}
2529
2530static int cifs_write_end(struct file *file, struct address_space *mapping,
2531			loff_t pos, unsigned len, unsigned copied,
2532			struct page *page, void *fsdata)
2533{
2534	int rc;
2535	struct inode *inode = mapping->host;
2536	struct cifsFileInfo *cfile = file->private_data;
2537	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2538	__u32 pid;
2539
2540	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2541		pid = cfile->pid;
2542	else
2543		pid = current->tgid;
2544
2545	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2546		 page, pos, copied);
2547
2548	if (PageChecked(page)) {
2549		if (copied == len)
2550			SetPageUptodate(page);
2551		ClearPageChecked(page);
2552	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
2553		SetPageUptodate(page);
2554
2555	if (!PageUptodate(page)) {
2556		char *page_data;
2557		unsigned offset = pos & (PAGE_SIZE - 1);
2558		unsigned int xid;
2559
2560		xid = get_xid();
2561		/* this is probably better than directly calling
2562		   partialpage_write since in this function the file handle is
2563		   known which we might as well	leverage */
2564		/* BB check if anything else missing out of ppw
2565		   such as updating last write time */
2566		page_data = kmap(page);
2567		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2568		/* if (rc < 0) should we set writebehind rc? */
2569		kunmap(page);
2570
2571		free_xid(xid);
2572	} else {
2573		rc = copied;
2574		pos += copied;
2575		set_page_dirty(page);
2576	}
2577
2578	if (rc > 0) {
2579		spin_lock(&inode->i_lock);
2580		if (pos > inode->i_size)
2581			i_size_write(inode, pos);
2582		spin_unlock(&inode->i_lock);
2583	}
2584
2585	unlock_page(page);
2586	put_page(page);
2587
2588	return rc;
2589}
2590
2591int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2592		      int datasync)
2593{
2594	unsigned int xid;
2595	int rc = 0;
2596	struct cifs_tcon *tcon;
2597	struct TCP_Server_Info *server;
2598	struct cifsFileInfo *smbfile = file->private_data;
2599	struct inode *inode = file_inode(file);
2600	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2601
2602	rc = file_write_and_wait_range(file, start, end);
2603	if (rc) {
2604		trace_cifs_fsync_err(inode->i_ino, rc);
2605		return rc;
2606	}
2607
2608	xid = get_xid();
2609
2610	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2611		 file, datasync);
2612
2613	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2614		rc = cifs_zap_mapping(inode);
2615		if (rc) {
2616			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2617			rc = 0; /* don't care about it in fsync */
2618		}
2619	}
2620
2621	tcon = tlink_tcon(smbfile->tlink);
2622	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2623		server = tcon->ses->server;
2624		if (server->ops->flush == NULL) {
2625			rc = -ENOSYS;
2626			goto strict_fsync_exit;
2627		}
2628
2629		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2630			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2631			if (smbfile) {
2632				rc = server->ops->flush(xid, tcon, &smbfile->fid);
2633				cifsFileInfo_put(smbfile);
2634			} else
2635				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2636		} else
2637			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2638	}
2639
2640strict_fsync_exit:
2641	free_xid(xid);
2642	return rc;
2643}
2644
2645int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2646{
2647	unsigned int xid;
2648	int rc = 0;
2649	struct cifs_tcon *tcon;
2650	struct TCP_Server_Info *server;
2651	struct cifsFileInfo *smbfile = file->private_data;
2652	struct inode *inode = file_inode(file);
2653	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2654
2655	rc = file_write_and_wait_range(file, start, end);
2656	if (rc) {
2657		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2658		return rc;
2659	}
2660
2661	xid = get_xid();
2662
2663	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2664		 file, datasync);
2665
2666	tcon = tlink_tcon(smbfile->tlink);
2667	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2668		server = tcon->ses->server;
2669		if (server->ops->flush == NULL) {
2670			rc = -ENOSYS;
2671			goto fsync_exit;
2672		}
2673
2674		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2675			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2676			if (smbfile) {
2677				rc = server->ops->flush(xid, tcon, &smbfile->fid);
2678				cifsFileInfo_put(smbfile);
2679			} else
2680				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2681		} else
2682			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2683	}
2684
2685fsync_exit:
2686	free_xid(xid);
2687	return rc;
2688}
2689
2690/*
2691 * As file closes, flush all cached write data for this inode checking
2692 * for write behind errors.
2693 */
2694int cifs_flush(struct file *file, fl_owner_t id)
2695{
2696	struct inode *inode = file_inode(file);
2697	int rc = 0;
2698
2699	if (file->f_mode & FMODE_WRITE)
2700		rc = filemap_write_and_wait(inode->i_mapping);
2701
2702	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2703	if (rc)
2704		trace_cifs_flush_err(inode->i_ino, rc);
2705	return rc;
2706}
2707
2708static int
2709cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2710{
2711	int rc = 0;
2712	unsigned long i;
2713
2714	for (i = 0; i < num_pages; i++) {
2715		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2716		if (!pages[i]) {
2717			/*
2718			 * save number of pages we have already allocated and
2719			 * return with ENOMEM error
2720			 */
2721			num_pages = i;
2722			rc = -ENOMEM;
2723			break;
2724		}
2725	}
2726
2727	if (rc) {
2728		for (i = 0; i < num_pages; i++)
2729			put_page(pages[i]);
2730	}
2731	return rc;
2732}
2733
2734static inline
2735size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2736{
2737	size_t num_pages;
2738	size_t clen;
2739
2740	clen = min_t(const size_t, len, wsize);
2741	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2742
2743	if (cur_len)
2744		*cur_len = clen;
2745
2746	return num_pages;
2747}
2748
2749static void
2750cifs_uncached_writedata_release(struct kref *refcount)
2751{
2752	int i;
2753	struct cifs_writedata *wdata = container_of(refcount,
2754					struct cifs_writedata, refcount);
2755
2756	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2757	for (i = 0; i < wdata->nr_pages; i++)
2758		put_page(wdata->pages[i]);
2759	cifs_writedata_release(refcount);
2760}
2761
2762static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2763
2764static void
2765cifs_uncached_writev_complete(struct work_struct *work)
2766{
2767	struct cifs_writedata *wdata = container_of(work,
2768					struct cifs_writedata, work);
2769	struct inode *inode = d_inode(wdata->cfile->dentry);
2770	struct cifsInodeInfo *cifsi = CIFS_I(inode);
2771
2772	spin_lock(&inode->i_lock);
2773	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2774	if (cifsi->server_eof > inode->i_size)
2775		i_size_write(inode, cifsi->server_eof);
2776	spin_unlock(&inode->i_lock);
2777
2778	complete(&wdata->done);
2779	collect_uncached_write_data(wdata->ctx);
2780	/* the below call can possibly free the last ref to aio ctx */
2781	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2782}
2783
2784static int
2785wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2786		      size_t *len, unsigned long *num_pages)
2787{
2788	size_t save_len, copied, bytes, cur_len = *len;
2789	unsigned long i, nr_pages = *num_pages;
2790
2791	save_len = cur_len;
2792	for (i = 0; i < nr_pages; i++) {
2793		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2794		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2795		cur_len -= copied;
2796		/*
2797		 * If we didn't copy as much as we expected, then that
2798		 * may mean we trod into an unmapped area. Stop copying
2799		 * at that point. On the next pass through the big
2800		 * loop, we'll likely end up getting a zero-length
2801		 * write and bailing out of it.
2802		 */
2803		if (copied < bytes)
2804			break;
2805	}
2806	cur_len = save_len - cur_len;
2807	*len = cur_len;
2808
2809	/*
2810	 * If we have no data to send, then that probably means that
2811	 * the copy above failed altogether. That's most likely because
2812	 * the address in the iovec was bogus. Return -EFAULT and let
2813	 * the caller free anything we allocated and bail out.
2814	 */
2815	if (!cur_len)
2816		return -EFAULT;
2817
2818	/*
2819	 * i + 1 now represents the number of pages we actually used in
2820	 * the copy phase above.
2821	 */
2822	*num_pages = i + 1;
2823	return 0;
2824}
2825
2826static int
2827cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2828	struct cifs_aio_ctx *ctx)
2829{
2830	unsigned int wsize;
2831	struct cifs_credits credits;
2832	int rc;
2833	struct TCP_Server_Info *server = wdata->server;
2834
2835	do {
2836		if (wdata->cfile->invalidHandle) {
2837			rc = cifs_reopen_file(wdata->cfile, false);
2838			if (rc == -EAGAIN)
2839				continue;
2840			else if (rc)
2841				break;
2842		}
2843
2844
2845		/*
2846		 * Wait for credits to resend this wdata.
2847		 * Note: we are attempting to resend the whole wdata not in
2848		 * segments
2849		 */
2850		do {
2851			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2852						&wsize, &credits);
2853			if (rc)
2854				goto fail;
2855
2856			if (wsize < wdata->bytes) {
2857				add_credits_and_wake_if(server, &credits, 0);
2858				msleep(1000);
2859			}
2860		} while (wsize < wdata->bytes);
2861		wdata->credits = credits;
2862
2863		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2864
2865		if (!rc) {
2866			if (wdata->cfile->invalidHandle)
2867				rc = -EAGAIN;
2868			else {
2869#ifdef CONFIG_CIFS_SMB_DIRECT
2870				if (wdata->mr) {
2871					wdata->mr->need_invalidate = true;
2872					smbd_deregister_mr(wdata->mr);
2873					wdata->mr = NULL;
2874				}
2875#endif
2876				rc = server->ops->async_writev(wdata,
2877					cifs_uncached_writedata_release);
2878			}
2879		}
2880
2881		/* If the write was successfully sent, we are done */
2882		if (!rc) {
2883			list_add_tail(&wdata->list, wdata_list);
2884			return 0;
2885		}
2886
2887		/* Roll back credits and retry if needed */
2888		add_credits_and_wake_if(server, &wdata->credits, 0);
2889	} while (rc == -EAGAIN);
2890
2891fail:
2892	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2893	return rc;
2894}
2895
2896static int
2897cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2898		     struct cifsFileInfo *open_file,
2899		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2900		     struct cifs_aio_ctx *ctx)
2901{
2902	int rc = 0;
2903	size_t cur_len;
2904	unsigned long nr_pages, num_pages, i;
2905	struct cifs_writedata *wdata;
2906	struct iov_iter saved_from = *from;
2907	loff_t saved_offset = offset;
2908	pid_t pid;
2909	struct TCP_Server_Info *server;
2910	struct page **pagevec;
2911	size_t start;
2912	unsigned int xid;
2913
2914	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2915		pid = open_file->pid;
2916	else
2917		pid = current->tgid;
2918
2919	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2920	xid = get_xid();
2921
2922	do {
2923		unsigned int wsize;
2924		struct cifs_credits credits_on_stack;
2925		struct cifs_credits *credits = &credits_on_stack;
2926
2927		if (open_file->invalidHandle) {
2928			rc = cifs_reopen_file(open_file, false);
2929			if (rc == -EAGAIN)
2930				continue;
2931			else if (rc)
2932				break;
2933		}
2934
2935		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2936						   &wsize, credits);
2937		if (rc)
2938			break;
2939
2940		cur_len = min_t(const size_t, len, wsize);
2941
2942		if (ctx->direct_io) {
2943			ssize_t result;
2944
2945			result = iov_iter_get_pages_alloc(
2946				from, &pagevec, cur_len, &start);
2947			if (result < 0) {
2948				cifs_dbg(VFS,
2949					 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2950					 result, iov_iter_type(from),
2951					 from->iov_offset, from->count);
2952				dump_stack();
2953
2954				rc = result;
2955				add_credits_and_wake_if(server, credits, 0);
2956				break;
2957			}
2958			cur_len = (size_t)result;
2959			iov_iter_advance(from, cur_len);
2960
2961			nr_pages =
2962				(cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2963
2964			wdata = cifs_writedata_direct_alloc(pagevec,
2965					     cifs_uncached_writev_complete);
2966			if (!wdata) {
2967				rc = -ENOMEM;
2968				add_credits_and_wake_if(server, credits, 0);
2969				break;
2970			}
2971
2972
2973			wdata->page_offset = start;
2974			wdata->tailsz =
2975				nr_pages > 1 ?
2976					cur_len - (PAGE_SIZE - start) -
2977					(nr_pages - 2) * PAGE_SIZE :
2978					cur_len;
2979		} else {
2980			nr_pages = get_numpages(wsize, len, &cur_len);
2981			wdata = cifs_writedata_alloc(nr_pages,
2982					     cifs_uncached_writev_complete);
2983			if (!wdata) {
2984				rc = -ENOMEM;
2985				add_credits_and_wake_if(server, credits, 0);
2986				break;
2987			}
2988
2989			rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2990			if (rc) {
2991				kvfree(wdata->pages);
2992				kfree(wdata);
2993				add_credits_and_wake_if(server, credits, 0);
2994				break;
2995			}
2996
2997			num_pages = nr_pages;
2998			rc = wdata_fill_from_iovec(
2999				wdata, from, &cur_len, &num_pages);
3000			if (rc) {
3001				for (i = 0; i < nr_pages; i++)
3002					put_page(wdata->pages[i]);
3003				kvfree(wdata->pages);
3004				kfree(wdata);
3005				add_credits_and_wake_if(server, credits, 0);
3006				break;
3007			}
3008
3009			/*
3010			 * Bring nr_pages down to the number of pages we
3011			 * actually used, and free any pages that we didn't use.
3012			 */
3013			for ( ; nr_pages > num_pages; nr_pages--)
3014				put_page(wdata->pages[nr_pages - 1]);
3015
3016			wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3017		}
3018
3019		wdata->sync_mode = WB_SYNC_ALL;
3020		wdata->nr_pages = nr_pages;
3021		wdata->offset = (__u64)offset;
3022		wdata->cfile = cifsFileInfo_get(open_file);
3023		wdata->server = server;
3024		wdata->pid = pid;
3025		wdata->bytes = cur_len;
3026		wdata->pagesz = PAGE_SIZE;
3027		wdata->credits = credits_on_stack;
3028		wdata->ctx = ctx;
3029		kref_get(&ctx->refcount);
3030
3031		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3032
3033		if (!rc) {
3034			if (wdata->cfile->invalidHandle)
3035				rc = -EAGAIN;
3036			else
3037				rc = server->ops->async_writev(wdata,
3038					cifs_uncached_writedata_release);
3039		}
3040
3041		if (rc) {
3042			add_credits_and_wake_if(server, &wdata->credits, 0);
3043			kref_put(&wdata->refcount,
3044				 cifs_uncached_writedata_release);
3045			if (rc == -EAGAIN) {
3046				*from = saved_from;
3047				iov_iter_advance(from, offset - saved_offset);
3048				continue;
3049			}
3050			break;
3051		}
3052
3053		list_add_tail(&wdata->list, wdata_list);
3054		offset += cur_len;
3055		len -= cur_len;
3056	} while (len > 0);
3057
3058	free_xid(xid);
3059	return rc;
3060}
3061
3062static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3063{
3064	struct cifs_writedata *wdata, *tmp;
3065	struct cifs_tcon *tcon;
3066	struct cifs_sb_info *cifs_sb;
3067	struct dentry *dentry = ctx->cfile->dentry;
3068	ssize_t rc;
3069
3070	tcon = tlink_tcon(ctx->cfile->tlink);
3071	cifs_sb = CIFS_SB(dentry->d_sb);
3072
3073	mutex_lock(&ctx->aio_mutex);
3074
3075	if (list_empty(&ctx->list)) {
3076		mutex_unlock(&ctx->aio_mutex);
3077		return;
3078	}
3079
3080	rc = ctx->rc;
3081	/*
3082	 * Wait for and collect replies for any successful sends in order of
3083	 * increasing offset. Once an error is hit, then return without waiting
3084	 * for any more replies.
3085	 */
3086restart_loop:
3087	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3088		if (!rc) {
3089			if (!try_wait_for_completion(&wdata->done)) {
3090				mutex_unlock(&ctx->aio_mutex);
3091				return;
3092			}
3093
3094			if (wdata->result)
3095				rc = wdata->result;
3096			else
3097				ctx->total_len += wdata->bytes;
3098
3099			/* resend call if it's a retryable error */
3100			if (rc == -EAGAIN) {
3101				struct list_head tmp_list;
3102				struct iov_iter tmp_from = ctx->iter;
3103
3104				INIT_LIST_HEAD(&tmp_list);
3105				list_del_init(&wdata->list);
3106
3107				if (ctx->direct_io)
3108					rc = cifs_resend_wdata(
3109						wdata, &tmp_list, ctx);
3110				else {
3111					iov_iter_advance(&tmp_from,
3112						 wdata->offset - ctx->pos);
3113
3114					rc = cifs_write_from_iter(wdata->offset,
3115						wdata->bytes, &tmp_from,
3116						ctx->cfile, cifs_sb, &tmp_list,
3117						ctx);
3118
3119					kref_put(&wdata->refcount,
3120						cifs_uncached_writedata_release);
3121				}
3122
3123				list_splice(&tmp_list, &ctx->list);
3124				goto restart_loop;
3125			}
3126		}
3127		list_del_init(&wdata->list);
3128		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3129	}
3130
3131	cifs_stats_bytes_written(tcon, ctx->total_len);
3132	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3133
3134	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3135
3136	mutex_unlock(&ctx->aio_mutex);
3137
3138	if (ctx->iocb && ctx->iocb->ki_complete)
3139		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3140	else
3141		complete(&ctx->done);
3142}
3143
3144static ssize_t __cifs_writev(
3145	struct kiocb *iocb, struct iov_iter *from, bool direct)
3146{
3147	struct file *file = iocb->ki_filp;
3148	ssize_t total_written = 0;
3149	struct cifsFileInfo *cfile;
3150	struct cifs_tcon *tcon;
3151	struct cifs_sb_info *cifs_sb;
3152	struct cifs_aio_ctx *ctx;
3153	struct iov_iter saved_from = *from;
3154	size_t len = iov_iter_count(from);
3155	int rc;
3156
3157	/*
3158	 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3159	 * In this case, fall back to non-direct write function.
3160	 * this could be improved by getting pages directly in ITER_KVEC
3161	 */
3162	if (direct && iov_iter_is_kvec(from)) {
3163		cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3164		direct = false;
3165	}
3166
3167	rc = generic_write_checks(iocb, from);
3168	if (rc <= 0)
3169		return rc;
3170
3171	cifs_sb = CIFS_FILE_SB(file);
3172	cfile = file->private_data;
3173	tcon = tlink_tcon(cfile->tlink);
3174
3175	if (!tcon->ses->server->ops->async_writev)
3176		return -ENOSYS;
3177
3178	ctx = cifs_aio_ctx_alloc();
3179	if (!ctx)
3180		return -ENOMEM;
3181
3182	ctx->cfile = cifsFileInfo_get(cfile);
3183
3184	if (!is_sync_kiocb(iocb))
3185		ctx->iocb = iocb;
3186
3187	ctx->pos = iocb->ki_pos;
3188
3189	if (direct) {
3190		ctx->direct_io = true;
3191		ctx->iter = *from;
3192		ctx->len = len;
3193	} else {
3194		rc = setup_aio_ctx_iter(ctx, from, WRITE);
3195		if (rc) {
3196			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3197			return rc;
3198		}
3199	}
3200
3201	/* grab a lock here due to read response handlers can access ctx */
3202	mutex_lock(&ctx->aio_mutex);
3203
3204	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3205				  cfile, cifs_sb, &ctx->list, ctx);
3206
3207	/*
3208	 * If at least one write was successfully sent, then discard any rc
3209	 * value from the later writes. If the other write succeeds, then
3210	 * we'll end up returning whatever was written. If it fails, then
3211	 * we'll get a new rc value from that.
3212	 */
3213	if (!list_empty(&ctx->list))
3214		rc = 0;
3215
3216	mutex_unlock(&ctx->aio_mutex);
3217
3218	if (rc) {
3219		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3220		return rc;
3221	}
3222
3223	if (!is_sync_kiocb(iocb)) {
3224		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3225		return -EIOCBQUEUED;
3226	}
3227
3228	rc = wait_for_completion_killable(&ctx->done);
3229	if (rc) {
3230		mutex_lock(&ctx->aio_mutex);
3231		ctx->rc = rc = -EINTR;
3232		total_written = ctx->total_len;
3233		mutex_unlock(&ctx->aio_mutex);
3234	} else {
3235		rc = ctx->rc;
3236		total_written = ctx->total_len;
3237	}
3238
3239	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3240
3241	if (unlikely(!total_written))
3242		return rc;
3243
3244	iocb->ki_pos += total_written;
3245	return total_written;
3246}
3247
3248ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3249{
3250	struct file *file = iocb->ki_filp;
3251
3252	cifs_revalidate_mapping(file->f_inode);
3253	return __cifs_writev(iocb, from, true);
3254}
3255
3256ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3257{
3258	return __cifs_writev(iocb, from, false);
3259}
3260
3261static ssize_t
3262cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3263{
3264	struct file *file = iocb->ki_filp;
3265	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3266	struct inode *inode = file->f_mapping->host;
3267	struct cifsInodeInfo *cinode = CIFS_I(inode);
3268	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3269	ssize_t rc;
3270
3271	inode_lock(inode);
3272	/*
3273	 * We need to hold the sem to be sure nobody modifies lock list
3274	 * with a brlock that prevents writing.
3275	 */
3276	down_read(&cinode->lock_sem);
3277
3278	rc = generic_write_checks(iocb, from);
3279	if (rc <= 0)
3280		goto out;
3281
3282	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3283				     server->vals->exclusive_lock_type, 0,
3284				     NULL, CIFS_WRITE_OP))
3285		rc = __generic_file_write_iter(iocb, from);
3286	else
3287		rc = -EACCES;
3288out:
3289	up_read(&cinode->lock_sem);
3290	inode_unlock(inode);
3291
3292	if (rc > 0)
3293		rc = generic_write_sync(iocb, rc);
3294	return rc;
3295}
3296
3297ssize_t
3298cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3299{
3300	struct inode *inode = file_inode(iocb->ki_filp);
3301	struct cifsInodeInfo *cinode = CIFS_I(inode);
3302	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3303	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3304						iocb->ki_filp->private_data;
3305	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3306	ssize_t written;
3307
3308	written = cifs_get_writer(cinode);
3309	if (written)
3310		return written;
3311
3312	if (CIFS_CACHE_WRITE(cinode)) {
3313		if (cap_unix(tcon->ses) &&
3314		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3315		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3316			written = generic_file_write_iter(iocb, from);
3317			goto out;
3318		}
3319		written = cifs_writev(iocb, from);
3320		goto out;
3321	}
3322	/*
3323	 * For non-oplocked files in strict cache mode we need to write the data
3324	 * to the server exactly from the pos to pos+len-1 rather than flush all
3325	 * affected pages because it may cause a error with mandatory locks on
3326	 * these pages but not on the region from pos to ppos+len-1.
3327	 */
3328	written = cifs_user_writev(iocb, from);
3329	if (CIFS_CACHE_READ(cinode)) {
3330		/*
3331		 * We have read level caching and we have just sent a write
3332		 * request to the server thus making data in the cache stale.
3333		 * Zap the cache and set oplock/lease level to NONE to avoid
3334		 * reading stale data from the cache. All subsequent read
3335		 * operations will read new data from the server.
3336		 */
3337		cifs_zap_mapping(inode);
3338		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3339			 inode);
3340		cinode->oplock = 0;
3341	}
3342out:
3343	cifs_put_writer(cinode);
3344	return written;
3345}
3346
3347static struct cifs_readdata *
3348cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3349{
3350	struct cifs_readdata *rdata;
3351
3352	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3353	if (rdata != NULL) {
3354		rdata->pages = pages;
3355		kref_init(&rdata->refcount);
3356		INIT_LIST_HEAD(&rdata->list);
3357		init_completion(&rdata->done);
3358		INIT_WORK(&rdata->work, complete);
3359	}
3360
3361	return rdata;
3362}
3363
3364static struct cifs_readdata *
3365cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3366{
3367	struct page **pages =
3368		kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3369	struct cifs_readdata *ret = NULL;
3370
3371	if (pages) {
3372		ret = cifs_readdata_direct_alloc(pages, complete);
3373		if (!ret)
3374			kfree(pages);
3375	}
3376
3377	return ret;
3378}
3379
3380void
3381cifs_readdata_release(struct kref *refcount)
3382{
3383	struct cifs_readdata *rdata = container_of(refcount,
3384					struct cifs_readdata, refcount);
3385#ifdef CONFIG_CIFS_SMB_DIRECT
3386	if (rdata->mr) {
3387		smbd_deregister_mr(rdata->mr);
3388		rdata->mr = NULL;
3389	}
3390#endif
3391	if (rdata->cfile)
3392		cifsFileInfo_put(rdata->cfile);
3393
3394	kvfree(rdata->pages);
3395	kfree(rdata);
3396}
3397
3398static int
3399cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3400{
3401	int rc = 0;
3402	struct page *page;
3403	unsigned int i;
3404
3405	for (i = 0; i < nr_pages; i++) {
3406		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3407		if (!page) {
3408			rc = -ENOMEM;
3409			break;
3410		}
3411		rdata->pages[i] = page;
3412	}
3413
3414	if (rc) {
3415		unsigned int nr_page_failed = i;
3416
3417		for (i = 0; i < nr_page_failed; i++) {
3418			put_page(rdata->pages[i]);
3419			rdata->pages[i] = NULL;
3420		}
3421	}
3422	return rc;
3423}
3424
3425static void
3426cifs_uncached_readdata_release(struct kref *refcount)
3427{
3428	struct cifs_readdata *rdata = container_of(refcount,
3429					struct cifs_readdata, refcount);
3430	unsigned int i;
3431
3432	kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3433	for (i = 0; i < rdata->nr_pages; i++) {
3434		put_page(rdata->pages[i]);
3435	}
3436	cifs_readdata_release(refcount);
3437}
3438
3439/**
3440 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3441 * @rdata:	the readdata response with list of pages holding data
3442 * @iter:	destination for our data
3443 *
3444 * This function copies data from a list of pages in a readdata response into
3445 * an array of iovecs. It will first calculate where the data should go
3446 * based on the info in the readdata and then copy the data into that spot.
3447 */
3448static int
3449cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3450{
3451	size_t remaining = rdata->got_bytes;
3452	unsigned int i;
3453
3454	for (i = 0; i < rdata->nr_pages; i++) {
3455		struct page *page = rdata->pages[i];
3456		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3457		size_t written;
3458
3459		if (unlikely(iov_iter_is_pipe(iter))) {
3460			void *addr = kmap_atomic(page);
3461
3462			written = copy_to_iter(addr, copy, iter);
3463			kunmap_atomic(addr);
3464		} else
3465			written = copy_page_to_iter(page, 0, copy, iter);
3466		remaining -= written;
3467		if (written < copy && iov_iter_count(iter) > 0)
3468			break;
3469	}
3470	return remaining ? -EFAULT : 0;
3471}
3472
3473static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3474
3475static void
3476cifs_uncached_readv_complete(struct work_struct *work)
3477{
3478	struct cifs_readdata *rdata = container_of(work,
3479						struct cifs_readdata, work);
3480
3481	complete(&rdata->done);
3482	collect_uncached_read_data(rdata->ctx);
3483	/* the below call can possibly free the last ref to aio ctx */
3484	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3485}
3486
3487static int
3488uncached_fill_pages(struct TCP_Server_Info *server,
3489		    struct cifs_readdata *rdata, struct iov_iter *iter,
3490		    unsigned int len)
3491{
3492	int result = 0;
3493	unsigned int i;
3494	unsigned int nr_pages = rdata->nr_pages;
3495	unsigned int page_offset = rdata->page_offset;
3496
3497	rdata->got_bytes = 0;
3498	rdata->tailsz = PAGE_SIZE;
3499	for (i = 0; i < nr_pages; i++) {
3500		struct page *page = rdata->pages[i];
3501		size_t n;
3502		unsigned int segment_size = rdata->pagesz;
3503
3504		if (i == 0)
3505			segment_size -= page_offset;
3506		else
3507			page_offset = 0;
3508
3509
3510		if (len <= 0) {
3511			/* no need to hold page hostage */
3512			rdata->pages[i] = NULL;
3513			rdata->nr_pages--;
3514			put_page(page);
3515			continue;
3516		}
3517
3518		n = len;
3519		if (len >= segment_size)
3520			/* enough data to fill the page */
3521			n = segment_size;
3522		else
3523			rdata->tailsz = len;
3524		len -= n;
3525
3526		if (iter)
3527			result = copy_page_from_iter(
3528					page, page_offset, n, iter);
3529#ifdef CONFIG_CIFS_SMB_DIRECT
3530		else if (rdata->mr)
3531			result = n;
3532#endif
3533		else
3534			result = cifs_read_page_from_socket(
3535					server, page, page_offset, n);
3536		if (result < 0)
3537			break;
3538
3539		rdata->got_bytes += result;
3540	}
3541
3542	return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3543						rdata->got_bytes : result;
3544}
3545
3546static int
3547cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3548			      struct cifs_readdata *rdata, unsigned int len)
3549{
3550	return uncached_fill_pages(server, rdata, NULL, len);
3551}
3552
3553static int
3554cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3555			      struct cifs_readdata *rdata,
3556			      struct iov_iter *iter)
3557{
3558	return uncached_fill_pages(server, rdata, iter, iter->count);
3559}
3560
3561static int cifs_resend_rdata(struct cifs_readdata *rdata,
3562			struct list_head *rdata_list,
3563			struct cifs_aio_ctx *ctx)
3564{
3565	unsigned int rsize;
3566	struct cifs_credits credits;
3567	int rc;
3568	struct TCP_Server_Info *server;
3569
3570	/* XXX: should we pick a new channel here? */
3571	server = rdata->server;
3572
3573	do {
3574		if (rdata->cfile->invalidHandle) {
3575			rc = cifs_reopen_file(rdata->cfile, true);
3576			if (rc == -EAGAIN)
3577				continue;
3578			else if (rc)
3579				break;
3580		}
3581
3582		/*
3583		 * Wait for credits to resend this rdata.
3584		 * Note: we are attempting to resend the whole rdata not in
3585		 * segments
3586		 */
3587		do {
3588			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3589						&rsize, &credits);
3590
3591			if (rc)
3592				goto fail;
3593
3594			if (rsize < rdata->bytes) {
3595				add_credits_and_wake_if(server, &credits, 0);
3596				msleep(1000);
3597			}
3598		} while (rsize < rdata->bytes);
3599		rdata->credits = credits;
3600
3601		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3602		if (!rc) {
3603			if (rdata->cfile->invalidHandle)
3604				rc = -EAGAIN;
3605			else {
3606#ifdef CONFIG_CIFS_SMB_DIRECT
3607				if (rdata->mr) {
3608					rdata->mr->need_invalidate = true;
3609					smbd_deregister_mr(rdata->mr);
3610					rdata->mr = NULL;
3611				}
3612#endif
3613				rc = server->ops->async_readv(rdata);
3614			}
3615		}
3616
3617		/* If the read was successfully sent, we are done */
3618		if (!rc) {
3619			/* Add to aio pending list */
3620			list_add_tail(&rdata->list, rdata_list);
3621			return 0;
3622		}
3623
3624		/* Roll back credits and retry if needed */
3625		add_credits_and_wake_if(server, &rdata->credits, 0);
3626	} while (rc == -EAGAIN);
3627
3628fail:
3629	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3630	return rc;
3631}
3632
3633static int
3634cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3635		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3636		     struct cifs_aio_ctx *ctx)
3637{
3638	struct cifs_readdata *rdata;
3639	unsigned int npages, rsize;
3640	struct cifs_credits credits_on_stack;
3641	struct cifs_credits *credits = &credits_on_stack;
3642	size_t cur_len;
3643	int rc;
3644	pid_t pid;
3645	struct TCP_Server_Info *server;
3646	struct page **pagevec;
3647	size_t start;
3648	struct iov_iter direct_iov = ctx->iter;
3649
3650	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3651
3652	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3653		pid = open_file->pid;
3654	else
3655		pid = current->tgid;
3656
3657	if (ctx->direct_io)
3658		iov_iter_advance(&direct_iov, offset - ctx->pos);
3659
3660	do {
3661		if (open_file->invalidHandle) {
3662			rc = cifs_reopen_file(open_file, true);
3663			if (rc == -EAGAIN)
3664				continue;
3665			else if (rc)
3666				break;
3667		}
3668
3669		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3670						   &rsize, credits);
3671		if (rc)
3672			break;
3673
3674		cur_len = min_t(const size_t, len, rsize);
3675
3676		if (ctx->direct_io) {
3677			ssize_t result;
3678
3679			result = iov_iter_get_pages_alloc(
3680					&direct_iov, &pagevec,
3681					cur_len, &start);
3682			if (result < 0) {
3683				cifs_dbg(VFS,
3684					 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3685					 result, iov_iter_type(&direct_iov),
3686					 direct_iov.iov_offset,
3687					 direct_iov.count);
3688				dump_stack();
3689
3690				rc = result;
3691				add_credits_and_wake_if(server, credits, 0);
3692				break;
3693			}
3694			cur_len = (size_t)result;
3695			iov_iter_advance(&direct_iov, cur_len);
3696
3697			rdata = cifs_readdata_direct_alloc(
3698					pagevec, cifs_uncached_readv_complete);
3699			if (!rdata) {
3700				add_credits_and_wake_if(server, credits, 0);
3701				rc = -ENOMEM;
3702				break;
3703			}
3704
3705			npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3706			rdata->page_offset = start;
3707			rdata->tailsz = npages > 1 ?
3708				cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3709				cur_len;
3710
3711		} else {
3712
3713			npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3714			/* allocate a readdata struct */
3715			rdata = cifs_readdata_alloc(npages,
3716					    cifs_uncached_readv_complete);
3717			if (!rdata) {
3718				add_credits_and_wake_if(server, credits, 0);
3719				rc = -ENOMEM;
3720				break;
3721			}
3722
3723			rc = cifs_read_allocate_pages(rdata, npages);
3724			if (rc) {
3725				kvfree(rdata->pages);
3726				kfree(rdata);
3727				add_credits_and_wake_if(server, credits, 0);
3728				break;
3729			}
3730
3731			rdata->tailsz = PAGE_SIZE;
3732		}
3733
3734		rdata->server = server;
3735		rdata->cfile = cifsFileInfo_get(open_file);
3736		rdata->nr_pages = npages;
3737		rdata->offset = offset;
3738		rdata->bytes = cur_len;
3739		rdata->pid = pid;
3740		rdata->pagesz = PAGE_SIZE;
3741		rdata->read_into_pages = cifs_uncached_read_into_pages;
3742		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3743		rdata->credits = credits_on_stack;
3744		rdata->ctx = ctx;
3745		kref_get(&ctx->refcount);
3746
3747		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3748
3749		if (!rc) {
3750			if (rdata->cfile->invalidHandle)
3751				rc = -EAGAIN;
3752			else
3753				rc = server->ops->async_readv(rdata);
3754		}
3755
3756		if (rc) {
3757			add_credits_and_wake_if(server, &rdata->credits, 0);
3758			kref_put(&rdata->refcount,
3759				cifs_uncached_readdata_release);
3760			if (rc == -EAGAIN) {
3761				iov_iter_revert(&direct_iov, cur_len);
3762				continue;
3763			}
3764			break;
3765		}
3766
3767		list_add_tail(&rdata->list, rdata_list);
3768		offset += cur_len;
3769		len -= cur_len;
3770	} while (len > 0);
3771
3772	return rc;
3773}
3774
3775static void
3776collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3777{
3778	struct cifs_readdata *rdata, *tmp;
3779	struct iov_iter *to = &ctx->iter;
3780	struct cifs_sb_info *cifs_sb;
3781	int rc;
3782
3783	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3784
3785	mutex_lock(&ctx->aio_mutex);
3786
3787	if (list_empty(&ctx->list)) {
3788		mutex_unlock(&ctx->aio_mutex);
3789		return;
3790	}
3791
3792	rc = ctx->rc;
3793	/* the loop below should proceed in the order of increasing offsets */
3794again:
3795	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3796		if (!rc) {
3797			if (!try_wait_for_completion(&rdata->done)) {
3798				mutex_unlock(&ctx->aio_mutex);
3799				return;
3800			}
3801
3802			if (rdata->result == -EAGAIN) {
3803				/* resend call if it's a retryable error */
3804				struct list_head tmp_list;
3805				unsigned int got_bytes = rdata->got_bytes;
3806
3807				list_del_init(&rdata->list);
3808				INIT_LIST_HEAD(&tmp_list);
3809
3810				/*
3811				 * Got a part of data and then reconnect has
3812				 * happened -- fill the buffer and continue
3813				 * reading.
3814				 */
3815				if (got_bytes && got_bytes < rdata->bytes) {
3816					rc = 0;
3817					if (!ctx->direct_io)
3818						rc = cifs_readdata_to_iov(rdata, to);
3819					if (rc) {
3820						kref_put(&rdata->refcount,
3821							cifs_uncached_readdata_release);
3822						continue;
3823					}
3824				}
3825
3826				if (ctx->direct_io) {
3827					/*
3828					 * Re-use rdata as this is a
3829					 * direct I/O
3830					 */
3831					rc = cifs_resend_rdata(
3832						rdata,
3833						&tmp_list, ctx);
3834				} else {
3835					rc = cifs_send_async_read(
3836						rdata->offset + got_bytes,
3837						rdata->bytes - got_bytes,
3838						rdata->cfile, cifs_sb,
3839						&tmp_list, ctx);
3840
3841					kref_put(&rdata->refcount,
3842						cifs_uncached_readdata_release);
3843				}
3844
3845				list_splice(&tmp_list, &ctx->list);
3846
3847				goto again;
3848			} else if (rdata->result)
3849				rc = rdata->result;
3850			else if (!ctx->direct_io)
3851				rc = cifs_readdata_to_iov(rdata, to);
3852
3853			/* if there was a short read -- discard anything left */
3854			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3855				rc = -ENODATA;
3856
3857			ctx->total_len += rdata->got_bytes;
3858		}
3859		list_del_init(&rdata->list);
3860		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3861	}
3862
3863	if (!ctx->direct_io)
3864		ctx->total_len = ctx->len - iov_iter_count(to);
3865
3866	/* mask nodata case */
3867	if (rc == -ENODATA)
3868		rc = 0;
3869
3870	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3871
3872	mutex_unlock(&ctx->aio_mutex);
3873
3874	if (ctx->iocb && ctx->iocb->ki_complete)
3875		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3876	else
3877		complete(&ctx->done);
3878}
3879
3880static ssize_t __cifs_readv(
3881	struct kiocb *iocb, struct iov_iter *to, bool direct)
3882{
3883	size_t len;
3884	struct file *file = iocb->ki_filp;
3885	struct cifs_sb_info *cifs_sb;
3886	struct cifsFileInfo *cfile;
3887	struct cifs_tcon *tcon;
3888	ssize_t rc, total_read = 0;
3889	loff_t offset = iocb->ki_pos;
3890	struct cifs_aio_ctx *ctx;
3891
3892	/*
3893	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3894	 * fall back to data copy read path
3895	 * this could be improved by getting pages directly in ITER_KVEC
3896	 */
3897	if (direct && iov_iter_is_kvec(to)) {
3898		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3899		direct = false;
3900	}
3901
3902	len = iov_iter_count(to);
3903	if (!len)
3904		return 0;
3905
3906	cifs_sb = CIFS_FILE_SB(file);
3907	cfile = file->private_data;
3908	tcon = tlink_tcon(cfile->tlink);
3909
3910	if (!tcon->ses->server->ops->async_readv)
3911		return -ENOSYS;
3912
3913	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3914		cifs_dbg(FYI, "attempting read on write only file instance\n");
3915
3916	ctx = cifs_aio_ctx_alloc();
3917	if (!ctx)
3918		return -ENOMEM;
3919
3920	ctx->cfile = cifsFileInfo_get(cfile);
3921
3922	if (!is_sync_kiocb(iocb))
3923		ctx->iocb = iocb;
3924
3925	if (iter_is_iovec(to))
3926		ctx->should_dirty = true;
3927
3928	if (direct) {
3929		ctx->pos = offset;
3930		ctx->direct_io = true;
3931		ctx->iter = *to;
3932		ctx->len = len;
3933	} else {
3934		rc = setup_aio_ctx_iter(ctx, to, READ);
3935		if (rc) {
3936			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3937			return rc;
3938		}
3939		len = ctx->len;
3940	}
3941
3942	if (direct) {
3943		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
3944						  offset, offset + len - 1);
3945		if (rc) {
3946			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3947			return -EAGAIN;
3948		}
3949	}
3950
3951	/* grab a lock here due to read response handlers can access ctx */
3952	mutex_lock(&ctx->aio_mutex);
3953
3954	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3955
3956	/* if at least one read request send succeeded, then reset rc */
3957	if (!list_empty(&ctx->list))
3958		rc = 0;
3959
3960	mutex_unlock(&ctx->aio_mutex);
3961
3962	if (rc) {
3963		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3964		return rc;
3965	}
3966
3967	if (!is_sync_kiocb(iocb)) {
3968		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3969		return -EIOCBQUEUED;
3970	}
3971
3972	rc = wait_for_completion_killable(&ctx->done);
3973	if (rc) {
3974		mutex_lock(&ctx->aio_mutex);
3975		ctx->rc = rc = -EINTR;
3976		total_read = ctx->total_len;
3977		mutex_unlock(&ctx->aio_mutex);
3978	} else {
3979		rc = ctx->rc;
3980		total_read = ctx->total_len;
3981	}
3982
3983	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3984
3985	if (total_read) {
3986		iocb->ki_pos += total_read;
3987		return total_read;
3988	}
3989	return rc;
3990}
3991
3992ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3993{
3994	return __cifs_readv(iocb, to, true);
3995}
3996
3997ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3998{
3999	return __cifs_readv(iocb, to, false);
4000}
4001
4002ssize_t
4003cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4004{
4005	struct inode *inode = file_inode(iocb->ki_filp);
4006	struct cifsInodeInfo *cinode = CIFS_I(inode);
4007	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4008	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4009						iocb->ki_filp->private_data;
4010	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4011	int rc = -EACCES;
4012
4013	/*
4014	 * In strict cache mode we need to read from the server all the time
4015	 * if we don't have level II oplock because the server can delay mtime
4016	 * change - so we can't make a decision about inode invalidating.
4017	 * And we can also fail with pagereading if there are mandatory locks
4018	 * on pages affected by this read but not on the region from pos to
4019	 * pos+len-1.
4020	 */
4021	if (!CIFS_CACHE_READ(cinode))
4022		return cifs_user_readv(iocb, to);
4023
4024	if (cap_unix(tcon->ses) &&
4025	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4026	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4027		return generic_file_read_iter(iocb, to);
4028
4029	/*
4030	 * We need to hold the sem to be sure nobody modifies lock list
4031	 * with a brlock that prevents reading.
4032	 */
4033	down_read(&cinode->lock_sem);
4034	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4035				     tcon->ses->server->vals->shared_lock_type,
4036				     0, NULL, CIFS_READ_OP))
4037		rc = generic_file_read_iter(iocb, to);
4038	up_read(&cinode->lock_sem);
4039	return rc;
4040}
4041
4042static ssize_t
4043cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4044{
4045	int rc = -EACCES;
4046	unsigned int bytes_read = 0;
4047	unsigned int total_read;
4048	unsigned int current_read_size;
4049	unsigned int rsize;
4050	struct cifs_sb_info *cifs_sb;
4051	struct cifs_tcon *tcon;
4052	struct TCP_Server_Info *server;
4053	unsigned int xid;
4054	char *cur_offset;
4055	struct cifsFileInfo *open_file;
4056	struct cifs_io_parms io_parms = {0};
4057	int buf_type = CIFS_NO_BUFFER;
4058	__u32 pid;
4059
4060	xid = get_xid();
4061	cifs_sb = CIFS_FILE_SB(file);
4062
4063	/* FIXME: set up handlers for larger reads and/or convert to async */
4064	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4065
4066	if (file->private_data == NULL) {
4067		rc = -EBADF;
4068		free_xid(xid);
4069		return rc;
4070	}
4071	open_file = file->private_data;
4072	tcon = tlink_tcon(open_file->tlink);
4073	server = cifs_pick_channel(tcon->ses);
4074
4075	if (!server->ops->sync_read) {
4076		free_xid(xid);
4077		return -ENOSYS;
4078	}
4079
4080	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4081		pid = open_file->pid;
4082	else
4083		pid = current->tgid;
4084
4085	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4086		cifs_dbg(FYI, "attempting read on write only file instance\n");
4087
4088	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4089	     total_read += bytes_read, cur_offset += bytes_read) {
4090		do {
4091			current_read_size = min_t(uint, read_size - total_read,
4092						  rsize);
4093			/*
4094			 * For windows me and 9x we do not want to request more
4095			 * than it negotiated since it will refuse the read
4096			 * then.
4097			 */
4098			if (!(tcon->ses->capabilities &
4099				tcon->ses->server->vals->cap_large_files)) {
4100				current_read_size = min_t(uint,
4101					current_read_size, CIFSMaxBufSize);
4102			}
4103			if (open_file->invalidHandle) {
4104				rc = cifs_reopen_file(open_file, true);
4105				if (rc != 0)
4106					break;
4107			}
4108			io_parms.pid = pid;
4109			io_parms.tcon = tcon;
4110			io_parms.offset = *offset;
4111			io_parms.length = current_read_size;
4112			io_parms.server = server;
4113			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4114						    &bytes_read, &cur_offset,
4115						    &buf_type);
4116		} while (rc == -EAGAIN);
4117
4118		if (rc || (bytes_read == 0)) {
4119			if (total_read) {
4120				break;
4121			} else {
4122				free_xid(xid);
4123				return rc;
4124			}
4125		} else {
4126			cifs_stats_bytes_read(tcon, total_read);
4127			*offset += bytes_read;
4128		}
4129	}
4130	free_xid(xid);
4131	return total_read;
4132}
4133
4134/*
4135 * If the page is mmap'ed into a process' page tables, then we need to make
4136 * sure that it doesn't change while being written back.
4137 */
4138static vm_fault_t
4139cifs_page_mkwrite(struct vm_fault *vmf)
4140{
4141	struct page *page = vmf->page;
4142
4143	lock_page(page);
4144	return VM_FAULT_LOCKED;
4145}
4146
4147static const struct vm_operations_struct cifs_file_vm_ops = {
4148	.fault = filemap_fault,
4149	.map_pages = filemap_map_pages,
4150	.page_mkwrite = cifs_page_mkwrite,
4151};
4152
4153int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4154{
4155	int xid, rc = 0;
4156	struct inode *inode = file_inode(file);
4157
4158	xid = get_xid();
4159
4160	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4161		rc = cifs_zap_mapping(inode);
4162	if (!rc)
4163		rc = generic_file_mmap(file, vma);
4164	if (!rc)
4165		vma->vm_ops = &cifs_file_vm_ops;
4166
4167	free_xid(xid);
4168	return rc;
4169}
4170
4171int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4172{
4173	int rc, xid;
4174
4175	xid = get_xid();
4176
4177	rc = cifs_revalidate_file(file);
4178	if (rc)
4179		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4180			 rc);
4181	if (!rc)
4182		rc = generic_file_mmap(file, vma);
4183	if (!rc)
4184		vma->vm_ops = &cifs_file_vm_ops;
4185
4186	free_xid(xid);
4187	return rc;
4188}
4189
4190static void
4191cifs_readv_complete(struct work_struct *work)
4192{
4193	unsigned int i, got_bytes;
4194	struct cifs_readdata *rdata = container_of(work,
4195						struct cifs_readdata, work);
4196
4197	got_bytes = rdata->got_bytes;
4198	for (i = 0; i < rdata->nr_pages; i++) {
4199		struct page *page = rdata->pages[i];
4200
4201		lru_cache_add(page);
4202
4203		if (rdata->result == 0 ||
4204		    (rdata->result == -EAGAIN && got_bytes)) {
4205			flush_dcache_page(page);
4206			SetPageUptodate(page);
4207		}
4208
4209		unlock_page(page);
4210
4211		if (rdata->result == 0 ||
4212		    (rdata->result == -EAGAIN && got_bytes))
4213			cifs_readpage_to_fscache(rdata->mapping->host, page);
4214
4215		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4216
4217		put_page(page);
4218		rdata->pages[i] = NULL;
4219	}
4220	kref_put(&rdata->refcount, cifs_readdata_release);
4221}
4222
4223static int
4224readpages_fill_pages(struct TCP_Server_Info *server,
4225		     struct cifs_readdata *rdata, struct iov_iter *iter,
4226		     unsigned int len)
4227{
4228	int result = 0;
4229	unsigned int i;
4230	u64 eof;
4231	pgoff_t eof_index;
4232	unsigned int nr_pages = rdata->nr_pages;
4233	unsigned int page_offset = rdata->page_offset;
4234
4235	/* determine the eof that the server (probably) has */
4236	eof = CIFS_I(rdata->mapping->host)->server_eof;
4237	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4238	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4239
4240	rdata->got_bytes = 0;
4241	rdata->tailsz = PAGE_SIZE;
4242	for (i = 0; i < nr_pages; i++) {
4243		struct page *page = rdata->pages[i];
4244		unsigned int to_read = rdata->pagesz;
4245		size_t n;
4246
4247		if (i == 0)
4248			to_read -= page_offset;
4249		else
4250			page_offset = 0;
4251
4252		n = to_read;
4253
4254		if (len >= to_read) {
4255			len -= to_read;
4256		} else if (len > 0) {
4257			/* enough for partial page, fill and zero the rest */
4258			zero_user(page, len + page_offset, to_read - len);
4259			n = rdata->tailsz = len;
4260			len = 0;
4261		} else if (page->index > eof_index) {
4262			/*
4263			 * The VFS will not try to do readahead past the
4264			 * i_size, but it's possible that we have outstanding
4265			 * writes with gaps in the middle and the i_size hasn't
4266			 * caught up yet. Populate those with zeroed out pages
4267			 * to prevent the VFS from repeatedly attempting to
4268			 * fill them until the writes are flushed.
4269			 */
4270			zero_user(page, 0, PAGE_SIZE);
4271			lru_cache_add(page);
4272			flush_dcache_page(page);
4273			SetPageUptodate(page);
4274			unlock_page(page);
4275			put_page(page);
4276			rdata->pages[i] = NULL;
4277			rdata->nr_pages--;
4278			continue;
4279		} else {
4280			/* no need to hold page hostage */
4281			lru_cache_add(page);
4282			unlock_page(page);
4283			put_page(page);
4284			rdata->pages[i] = NULL;
4285			rdata->nr_pages--;
4286			continue;
4287		}
4288
4289		if (iter)
4290			result = copy_page_from_iter(
4291					page, page_offset, n, iter);
4292#ifdef CONFIG_CIFS_SMB_DIRECT
4293		else if (rdata->mr)
4294			result = n;
4295#endif
4296		else
4297			result = cifs_read_page_from_socket(
4298					server, page, page_offset, n);
4299		if (result < 0)
4300			break;
4301
4302		rdata->got_bytes += result;
4303	}
4304
4305	return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4306						rdata->got_bytes : result;
4307}
4308
4309static int
4310cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4311			       struct cifs_readdata *rdata, unsigned int len)
4312{
4313	return readpages_fill_pages(server, rdata, NULL, len);
4314}
4315
4316static int
4317cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4318			       struct cifs_readdata *rdata,
4319			       struct iov_iter *iter)
4320{
4321	return readpages_fill_pages(server, rdata, iter, iter->count);
4322}
4323
4324static int
4325readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4326		    unsigned int rsize, struct list_head *tmplist,
4327		    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4328{
4329	struct page *page, *tpage;
4330	unsigned int expected_index;
4331	int rc;
4332	gfp_t gfp = readahead_gfp_mask(mapping);
4333
4334	INIT_LIST_HEAD(tmplist);
4335
4336	page = lru_to_page(page_list);
4337
4338	/*
4339	 * Lock the page and put it in the cache. Since no one else
4340	 * should have access to this page, we're safe to simply set
4341	 * PG_locked without checking it first.
4342	 */
4343	__SetPageLocked(page);
4344	rc = add_to_page_cache_locked(page, mapping,
4345				      page->index, gfp);
4346
4347	/* give up if we can't stick it in the cache */
4348	if (rc) {
4349		__ClearPageLocked(page);
4350		return rc;
4351	}
4352
4353	/* move first page to the tmplist */
4354	*offset = (loff_t)page->index << PAGE_SHIFT;
4355	*bytes = PAGE_SIZE;
4356	*nr_pages = 1;
4357	list_move_tail(&page->lru, tmplist);
4358
4359	/* now try and add more pages onto the request */
4360	expected_index = page->index + 1;
4361	list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4362		/* discontinuity ? */
4363		if (page->index != expected_index)
4364			break;
4365
4366		/* would this page push the read over the rsize? */
4367		if (*bytes + PAGE_SIZE > rsize)
4368			break;
4369
4370		__SetPageLocked(page);
4371		rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4372		if (rc) {
4373			__ClearPageLocked(page);
4374			break;
4375		}
4376		list_move_tail(&page->lru, tmplist);
4377		(*bytes) += PAGE_SIZE;
4378		expected_index++;
4379		(*nr_pages)++;
4380	}
4381	return rc;
4382}
4383
4384static int cifs_readpages(struct file *file, struct address_space *mapping,
4385	struct list_head *page_list, unsigned num_pages)
4386{
4387	int rc;
4388	int err = 0;
4389	struct list_head tmplist;
4390	struct cifsFileInfo *open_file = file->private_data;
4391	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4392	struct TCP_Server_Info *server;
4393	pid_t pid;
4394	unsigned int xid;
4395
4396	xid = get_xid();
4397	/*
4398	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4399	 * immediately if the cookie is negative
4400	 *
4401	 * After this point, every page in the list might have PG_fscache set,
4402	 * so we will need to clean that up off of every page we don't use.
4403	 */
4404	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4405					 &num_pages);
4406	if (rc == 0) {
4407		free_xid(xid);
4408		return rc;
4409	}
4410
4411	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4412		pid = open_file->pid;
4413	else
4414		pid = current->tgid;
4415
4416	rc = 0;
4417	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4418
4419	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4420		 __func__, file, mapping, num_pages);
4421
4422	/*
4423	 * Start with the page at end of list and move it to private
4424	 * list. Do the same with any following pages until we hit
4425	 * the rsize limit, hit an index discontinuity, or run out of
4426	 * pages. Issue the async read and then start the loop again
4427	 * until the list is empty.
4428	 *
4429	 * Note that list order is important. The page_list is in
4430	 * the order of declining indexes. When we put the pages in
4431	 * the rdata->pages, then we want them in increasing order.
4432	 */
4433	while (!list_empty(page_list) && !err) {
4434		unsigned int i, nr_pages, bytes, rsize;
4435		loff_t offset;
4436		struct page *page, *tpage;
4437		struct cifs_readdata *rdata;
4438		struct cifs_credits credits_on_stack;
4439		struct cifs_credits *credits = &credits_on_stack;
4440
4441		if (open_file->invalidHandle) {
4442			rc = cifs_reopen_file(open_file, true);
4443			if (rc == -EAGAIN)
4444				continue;
4445			else if (rc)
4446				break;
4447		}
4448
4449		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4450						   &rsize, credits);
4451		if (rc)
4452			break;
4453
4454		/*
4455		 * Give up immediately if rsize is too small to read an entire
4456		 * page. The VFS will fall back to readpage. We should never
4457		 * reach this point however since we set ra_pages to 0 when the
4458		 * rsize is smaller than a cache page.
4459		 */
4460		if (unlikely(rsize < PAGE_SIZE)) {
4461			add_credits_and_wake_if(server, credits, 0);
4462			free_xid(xid);
4463			return 0;
4464		}
4465
4466		nr_pages = 0;
4467		err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4468					 &nr_pages, &offset, &bytes);
4469		if (!nr_pages) {
4470			add_credits_and_wake_if(server, credits, 0);
4471			break;
4472		}
4473
4474		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4475		if (!rdata) {
4476			/* best to give up if we're out of mem */
4477			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4478				list_del(&page->lru);
4479				lru_cache_add(page);
4480				unlock_page(page);
4481				put_page(page);
4482			}
4483			rc = -ENOMEM;
4484			add_credits_and_wake_if(server, credits, 0);
4485			break;
4486		}
4487
4488		rdata->cfile = cifsFileInfo_get(open_file);
4489		rdata->server = server;
4490		rdata->mapping = mapping;
4491		rdata->offset = offset;
4492		rdata->bytes = bytes;
4493		rdata->pid = pid;
4494		rdata->pagesz = PAGE_SIZE;
4495		rdata->tailsz = PAGE_SIZE;
4496		rdata->read_into_pages = cifs_readpages_read_into_pages;
4497		rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4498		rdata->credits = credits_on_stack;
4499
4500		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4501			list_del(&page->lru);
4502			rdata->pages[rdata->nr_pages++] = page;
4503		}
4504
4505		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4506
4507		if (!rc) {
4508			if (rdata->cfile->invalidHandle)
4509				rc = -EAGAIN;
4510			else
4511				rc = server->ops->async_readv(rdata);
4512		}
4513
4514		if (rc) {
4515			add_credits_and_wake_if(server, &rdata->credits, 0);
4516			for (i = 0; i < rdata->nr_pages; i++) {
4517				page = rdata->pages[i];
4518				lru_cache_add(page);
4519				unlock_page(page);
4520				put_page(page);
4521			}
4522			/* Fallback to the readpage in error/reconnect cases */
4523			kref_put(&rdata->refcount, cifs_readdata_release);
4524			break;
4525		}
4526
4527		kref_put(&rdata->refcount, cifs_readdata_release);
4528	}
4529
4530	/* Any pages that have been shown to fscache but didn't get added to
4531	 * the pagecache must be uncached before they get returned to the
4532	 * allocator.
4533	 */
4534	cifs_fscache_readpages_cancel(mapping->host, page_list);
4535	free_xid(xid);
4536	return rc;
4537}
4538
4539/*
4540 * cifs_readpage_worker must be called with the page pinned
4541 */
4542static int cifs_readpage_worker(struct file *file, struct page *page,
4543	loff_t *poffset)
4544{
4545	char *read_data;
4546	int rc;
4547
4548	/* Is the page cached? */
4549	rc = cifs_readpage_from_fscache(file_inode(file), page);
4550	if (rc == 0)
4551		goto read_complete;
4552
4553	read_data = kmap(page);
4554	/* for reads over a certain size could initiate async read ahead */
4555
4556	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4557
4558	if (rc < 0)
4559		goto io_error;
4560	else
4561		cifs_dbg(FYI, "Bytes read %d\n", rc);
4562
4563	/* we do not want atime to be less than mtime, it broke some apps */
4564	file_inode(file)->i_atime = current_time(file_inode(file));
4565	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4566		file_inode(file)->i_atime = file_inode(file)->i_mtime;
4567	else
4568		file_inode(file)->i_atime = current_time(file_inode(file));
4569
4570	if (PAGE_SIZE > rc)
4571		memset(read_data + rc, 0, PAGE_SIZE - rc);
4572
4573	flush_dcache_page(page);
4574	SetPageUptodate(page);
4575
4576	/* send this page to the cache */
4577	cifs_readpage_to_fscache(file_inode(file), page);
4578
4579	rc = 0;
4580
4581io_error:
4582	kunmap(page);
4583
4584read_complete:
4585	unlock_page(page);
4586	return rc;
4587}
4588
4589static int cifs_readpage(struct file *file, struct page *page)
4590{
4591	loff_t offset = page_file_offset(page);
4592	int rc = -EACCES;
4593	unsigned int xid;
4594
4595	xid = get_xid();
4596
4597	if (file->private_data == NULL) {
4598		rc = -EBADF;
4599		free_xid(xid);
4600		return rc;
4601	}
4602
4603	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4604		 page, (int)offset, (int)offset);
4605
4606	rc = cifs_readpage_worker(file, page, &offset);
4607
4608	free_xid(xid);
4609	return rc;
4610}
4611
4612static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4613{
4614	struct cifsFileInfo *open_file;
4615
4616	spin_lock(&cifs_inode->open_file_lock);
4617	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4618		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4619			spin_unlock(&cifs_inode->open_file_lock);
4620			return 1;
4621		}
4622	}
4623	spin_unlock(&cifs_inode->open_file_lock);
4624	return 0;
4625}
4626
4627/* We do not want to update the file size from server for inodes
4628   open for write - to avoid races with writepage extending
4629   the file - in the future we could consider allowing
4630   refreshing the inode only on increases in the file size
4631   but this is tricky to do without racing with writebehind
4632   page caching in the current Linux kernel design */
4633bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4634{
4635	if (!cifsInode)
4636		return true;
4637
4638	if (is_inode_writable(cifsInode)) {
4639		/* This inode is open for write at least once */
4640		struct cifs_sb_info *cifs_sb;
4641
4642		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4643		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4644			/* since no page cache to corrupt on directio
4645			we can change size safely */
4646			return true;
4647		}
4648
4649		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4650			return true;
4651
4652		return false;
4653	} else
4654		return true;
4655}
4656
4657static int cifs_write_begin(struct file *file, struct address_space *mapping,
4658			loff_t pos, unsigned len, unsigned flags,
4659			struct page **pagep, void **fsdata)
4660{
4661	int oncethru = 0;
4662	pgoff_t index = pos >> PAGE_SHIFT;
4663	loff_t offset = pos & (PAGE_SIZE - 1);
4664	loff_t page_start = pos & PAGE_MASK;
4665	loff_t i_size;
4666	struct page *page;
4667	int rc = 0;
4668
4669	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4670
4671start:
4672	page = grab_cache_page_write_begin(mapping, index, flags);
4673	if (!page) {
4674		rc = -ENOMEM;
4675		goto out;
4676	}
4677
4678	if (PageUptodate(page))
4679		goto out;
4680
4681	/*
4682	 * If we write a full page it will be up to date, no need to read from
4683	 * the server. If the write is short, we'll end up doing a sync write
4684	 * instead.
4685	 */
4686	if (len == PAGE_SIZE)
4687		goto out;
4688
4689	/*
4690	 * optimize away the read when we have an oplock, and we're not
4691	 * expecting to use any of the data we'd be reading in. That
4692	 * is, when the page lies beyond the EOF, or straddles the EOF
4693	 * and the write will cover all of the existing data.
4694	 */
4695	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4696		i_size = i_size_read(mapping->host);
4697		if (page_start >= i_size ||
4698		    (offset == 0 && (pos + len) >= i_size)) {
4699			zero_user_segments(page, 0, offset,
4700					   offset + len,
4701					   PAGE_SIZE);
4702			/*
4703			 * PageChecked means that the parts of the page
4704			 * to which we're not writing are considered up
4705			 * to date. Once the data is copied to the
4706			 * page, it can be set uptodate.
4707			 */
4708			SetPageChecked(page);
4709			goto out;
4710		}
4711	}
4712
4713	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4714		/*
4715		 * might as well read a page, it is fast enough. If we get
4716		 * an error, we don't need to return it. cifs_write_end will
4717		 * do a sync write instead since PG_uptodate isn't set.
4718		 */
4719		cifs_readpage_worker(file, page, &page_start);
4720		put_page(page);
4721		oncethru = 1;
4722		goto start;
4723	} else {
4724		/* we could try using another file handle if there is one -
4725		   but how would we lock it to prevent close of that handle
4726		   racing with this read? In any case
4727		   this will be written out by write_end so is fine */
4728	}
4729out:
4730	*pagep = page;
4731	return rc;
4732}
4733
4734static int cifs_release_page(struct page *page, gfp_t gfp)
4735{
4736	if (PagePrivate(page))
4737		return 0;
4738
4739	return cifs_fscache_release_page(page, gfp);
4740}
4741
4742static void cifs_invalidate_page(struct page *page, unsigned int offset,
4743				 unsigned int length)
4744{
4745	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4746
4747	if (offset == 0 && length == PAGE_SIZE)
4748		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4749}
4750
4751static int cifs_launder_page(struct page *page)
4752{
4753	int rc = 0;
4754	loff_t range_start = page_offset(page);
4755	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4756	struct writeback_control wbc = {
4757		.sync_mode = WB_SYNC_ALL,
4758		.nr_to_write = 0,
4759		.range_start = range_start,
4760		.range_end = range_end,
4761	};
4762
4763	cifs_dbg(FYI, "Launder page: %p\n", page);
4764
4765	if (clear_page_dirty_for_io(page))
4766		rc = cifs_writepage_locked(page, &wbc);
4767
4768	cifs_fscache_invalidate_page(page, page->mapping->host);
4769	return rc;
4770}
4771
4772void cifs_oplock_break(struct work_struct *work)
4773{
4774	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4775						  oplock_break);
4776	struct inode *inode = d_inode(cfile->dentry);
4777	struct cifsInodeInfo *cinode = CIFS_I(inode);
4778	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4779	struct TCP_Server_Info *server = tcon->ses->server;
4780	int rc = 0;
4781	bool purge_cache = false;
4782
4783	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4784			TASK_UNINTERRUPTIBLE);
4785
4786	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4787				      cfile->oplock_epoch, &purge_cache);
4788
4789	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4790						cifs_has_mand_locks(cinode)) {
4791		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4792			 inode);
4793		cinode->oplock = 0;
4794	}
4795
4796	if (inode && S_ISREG(inode->i_mode)) {
4797		if (CIFS_CACHE_READ(cinode))
4798			break_lease(inode, O_RDONLY);
4799		else
4800			break_lease(inode, O_WRONLY);
4801		rc = filemap_fdatawrite(inode->i_mapping);
4802		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4803			rc = filemap_fdatawait(inode->i_mapping);
4804			mapping_set_error(inode->i_mapping, rc);
4805			cifs_zap_mapping(inode);
4806		}
4807		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4808		if (CIFS_CACHE_WRITE(cinode))
4809			goto oplock_break_ack;
4810	}
4811
4812	rc = cifs_push_locks(cfile);
4813	if (rc)
4814		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4815
4816oplock_break_ack:
4817	/*
4818	 * releasing stale oplock after recent reconnect of smb session using
4819	 * a now incorrect file handle is not a data integrity issue but do
4820	 * not bother sending an oplock release if session to server still is
4821	 * disconnected since oplock already released by the server
4822	 */
4823	if (!cfile->oplock_break_cancelled) {
4824		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4825							     cinode);
4826		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4827	}
4828	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4829	cifs_done_oplock_break(cinode);
4830}
4831
4832/*
4833 * The presence of cifs_direct_io() in the address space ops vector
4834 * allowes open() O_DIRECT flags which would have failed otherwise.
4835 *
4836 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4837 * so this method should never be called.
4838 *
4839 * Direct IO is not yet supported in the cached mode.
4840 */
4841static ssize_t
4842cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4843{
4844        /*
4845         * FIXME
4846         * Eventually need to support direct IO for non forcedirectio mounts
4847         */
4848        return -EINVAL;
4849}
4850
4851static int cifs_swap_activate(struct swap_info_struct *sis,
4852			      struct file *swap_file, sector_t *span)
4853{
4854	struct cifsFileInfo *cfile = swap_file->private_data;
4855	struct inode *inode = swap_file->f_mapping->host;
4856	unsigned long blocks;
4857	long long isize;
4858
4859	cifs_dbg(FYI, "swap activate\n");
4860
4861	spin_lock(&inode->i_lock);
4862	blocks = inode->i_blocks;
4863	isize = inode->i_size;
4864	spin_unlock(&inode->i_lock);
4865	if (blocks*512 < isize) {
4866		pr_warn("swap activate: swapfile has holes\n");
4867		return -EINVAL;
4868	}
4869	*span = sis->pages;
4870
4871	pr_warn_once("Swap support over SMB3 is experimental\n");
4872
4873	/*
4874	 * TODO: consider adding ACL (or documenting how) to prevent other
4875	 * users (on this or other systems) from reading it
4876	 */
4877
4878
4879	/* TODO: add sk_set_memalloc(inet) or similar */
4880
4881	if (cfile)
4882		cfile->swapfile = true;
4883	/*
4884	 * TODO: Since file already open, we can't open with DENY_ALL here
4885	 * but we could add call to grab a byte range lock to prevent others
4886	 * from reading or writing the file
4887	 */
4888
4889	return 0;
4890}
4891
4892static void cifs_swap_deactivate(struct file *file)
4893{
4894	struct cifsFileInfo *cfile = file->private_data;
4895
4896	cifs_dbg(FYI, "swap deactivate\n");
4897
4898	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4899
4900	if (cfile)
4901		cfile->swapfile = false;
4902
4903	/* do we need to unpin (or unlock) the file */
4904}
4905
4906const struct address_space_operations cifs_addr_ops = {
4907	.readpage = cifs_readpage,
4908	.readpages = cifs_readpages,
4909	.writepage = cifs_writepage,
4910	.writepages = cifs_writepages,
4911	.write_begin = cifs_write_begin,
4912	.write_end = cifs_write_end,
4913	.set_page_dirty = __set_page_dirty_nobuffers,
4914	.releasepage = cifs_release_page,
4915	.direct_IO = cifs_direct_io,
4916	.invalidatepage = cifs_invalidate_page,
4917	.launder_page = cifs_launder_page,
4918	/*
4919	 * TODO: investigate and if useful we could add an cifs_migratePage
4920	 * helper (under an CONFIG_MIGRATION) in the future, and also
4921	 * investigate and add an is_dirty_writeback helper if needed
4922	 */
4923	.swap_activate = cifs_swap_activate,
4924	.swap_deactivate = cifs_swap_deactivate,
4925};
4926
4927/*
4928 * cifs_readpages requires the server to support a buffer large enough to
4929 * contain the header plus one complete page of data.  Otherwise, we need
4930 * to leave cifs_readpages out of the address space operations.
4931 */
4932const struct address_space_operations cifs_addr_ops_smallbuf = {
4933	.readpage = cifs_readpage,
4934	.writepage = cifs_writepage,
4935	.writepages = cifs_writepages,
4936	.write_begin = cifs_write_begin,
4937	.write_end = cifs_write_end,
4938	.set_page_dirty = __set_page_dirty_nobuffers,
4939	.releasepage = cifs_release_page,
4940	.invalidatepage = cifs_invalidate_page,
4941	.launder_page = cifs_launder_page,
4942};
4943