xref: /kernel/linux/linux-5.10/fs/fuse/dev.c (revision 8c2ecf20)
1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/poll.h>
14#include <linux/sched/signal.h>
15#include <linux/uio.h>
16#include <linux/miscdevice.h>
17#include <linux/pagemap.h>
18#include <linux/file.h>
19#include <linux/slab.h>
20#include <linux/pipe_fs_i.h>
21#include <linux/swap.h>
22#include <linux/splice.h>
23#include <linux/sched.h>
24
25MODULE_ALIAS_MISCDEV(FUSE_MINOR);
26MODULE_ALIAS("devname:fuse");
27
28/* Ordinary requests have even IDs, while interrupts IDs are odd */
29#define FUSE_INT_REQ_BIT (1ULL << 0)
30#define FUSE_REQ_ID_STEP (1ULL << 1)
31
32static struct kmem_cache *fuse_req_cachep;
33
34static struct fuse_dev *fuse_get_dev(struct file *file)
35{
36	/*
37	 * Lockless access is OK, because file->private data is set
38	 * once during mount and is valid until the file is released.
39	 */
40	return READ_ONCE(file->private_data);
41}
42
43static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
44{
45	INIT_LIST_HEAD(&req->list);
46	INIT_LIST_HEAD(&req->intr_entry);
47	init_waitqueue_head(&req->waitq);
48	refcount_set(&req->count, 1);
49	__set_bit(FR_PENDING, &req->flags);
50	req->fm = fm;
51}
52
53static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
54{
55	struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
56	if (req)
57		fuse_request_init(fm, req);
58
59	return req;
60}
61
62static void fuse_request_free(struct fuse_req *req)
63{
64	kmem_cache_free(fuse_req_cachep, req);
65}
66
67static void __fuse_get_request(struct fuse_req *req)
68{
69	refcount_inc(&req->count);
70}
71
72/* Must be called with > 1 refcount */
73static void __fuse_put_request(struct fuse_req *req)
74{
75	refcount_dec(&req->count);
76}
77
78void fuse_set_initialized(struct fuse_conn *fc)
79{
80	/* Make sure stores before this are seen on another CPU */
81	smp_wmb();
82	fc->initialized = 1;
83}
84
85static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
86{
87	return !fc->initialized || (for_background && fc->blocked);
88}
89
90static void fuse_drop_waiting(struct fuse_conn *fc)
91{
92	/*
93	 * lockess check of fc->connected is okay, because atomic_dec_and_test()
94	 * provides a memory barrier mached with the one in fuse_wait_aborted()
95	 * to ensure no wake-up is missed.
96	 */
97	if (atomic_dec_and_test(&fc->num_waiting) &&
98	    !READ_ONCE(fc->connected)) {
99		/* wake up aborters */
100		wake_up_all(&fc->blocked_waitq);
101	}
102}
103
104static void fuse_put_request(struct fuse_req *req);
105
106static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
107{
108	struct fuse_conn *fc = fm->fc;
109	struct fuse_req *req;
110	int err;
111	atomic_inc(&fc->num_waiting);
112
113	if (fuse_block_alloc(fc, for_background)) {
114		err = -EINTR;
115		if (wait_event_killable_exclusive(fc->blocked_waitq,
116				!fuse_block_alloc(fc, for_background)))
117			goto out;
118	}
119	/* Matches smp_wmb() in fuse_set_initialized() */
120	smp_rmb();
121
122	err = -ENOTCONN;
123	if (!fc->connected)
124		goto out;
125
126	err = -ECONNREFUSED;
127	if (fc->conn_error)
128		goto out;
129
130	req = fuse_request_alloc(fm, GFP_KERNEL);
131	err = -ENOMEM;
132	if (!req) {
133		if (for_background)
134			wake_up(&fc->blocked_waitq);
135		goto out;
136	}
137
138	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
139	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
140	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
141
142	__set_bit(FR_WAITING, &req->flags);
143	if (for_background)
144		__set_bit(FR_BACKGROUND, &req->flags);
145
146	if (unlikely(req->in.h.uid == ((uid_t)-1) ||
147		     req->in.h.gid == ((gid_t)-1))) {
148		fuse_put_request(req);
149		return ERR_PTR(-EOVERFLOW);
150	}
151	return req;
152
153 out:
154	fuse_drop_waiting(fc);
155	return ERR_PTR(err);
156}
157
158static void fuse_put_request(struct fuse_req *req)
159{
160	struct fuse_conn *fc = req->fm->fc;
161
162	if (refcount_dec_and_test(&req->count)) {
163		if (test_bit(FR_BACKGROUND, &req->flags)) {
164			/*
165			 * We get here in the unlikely case that a background
166			 * request was allocated but not sent
167			 */
168			spin_lock(&fc->bg_lock);
169			if (!fc->blocked)
170				wake_up(&fc->blocked_waitq);
171			spin_unlock(&fc->bg_lock);
172		}
173
174		if (test_bit(FR_WAITING, &req->flags)) {
175			__clear_bit(FR_WAITING, &req->flags);
176			fuse_drop_waiting(fc);
177		}
178
179		fuse_request_free(req);
180	}
181}
182
183unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
184{
185	unsigned nbytes = 0;
186	unsigned i;
187
188	for (i = 0; i < numargs; i++)
189		nbytes += args[i].size;
190
191	return nbytes;
192}
193EXPORT_SYMBOL_GPL(fuse_len_args);
194
195u64 fuse_get_unique(struct fuse_iqueue *fiq)
196{
197	fiq->reqctr += FUSE_REQ_ID_STEP;
198	return fiq->reqctr;
199}
200EXPORT_SYMBOL_GPL(fuse_get_unique);
201
202static unsigned int fuse_req_hash(u64 unique)
203{
204	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
205}
206
207/**
208 * A new request is available, wake fiq->waitq
209 */
210static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
211__releases(fiq->lock)
212{
213	wake_up(&fiq->waitq);
214	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
215	spin_unlock(&fiq->lock);
216}
217
218const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
219	.wake_forget_and_unlock		= fuse_dev_wake_and_unlock,
220	.wake_interrupt_and_unlock	= fuse_dev_wake_and_unlock,
221	.wake_pending_and_unlock	= fuse_dev_wake_and_unlock,
222};
223EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
224
225static void queue_request_and_unlock(struct fuse_iqueue *fiq,
226				     struct fuse_req *req)
227__releases(fiq->lock)
228{
229	req->in.h.len = sizeof(struct fuse_in_header) +
230		fuse_len_args(req->args->in_numargs,
231			      (struct fuse_arg *) req->args->in_args);
232	list_add_tail(&req->list, &fiq->pending);
233	fiq->ops->wake_pending_and_unlock(fiq);
234}
235
236void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
237		       u64 nodeid, u64 nlookup)
238{
239	struct fuse_iqueue *fiq = &fc->iq;
240
241	forget->forget_one.nodeid = nodeid;
242	forget->forget_one.nlookup = nlookup;
243
244	spin_lock(&fiq->lock);
245	if (fiq->connected) {
246		fiq->forget_list_tail->next = forget;
247		fiq->forget_list_tail = forget;
248		fiq->ops->wake_forget_and_unlock(fiq);
249	} else {
250		kfree(forget);
251		spin_unlock(&fiq->lock);
252	}
253}
254
255static void flush_bg_queue(struct fuse_conn *fc)
256{
257	struct fuse_iqueue *fiq = &fc->iq;
258
259	while (fc->active_background < fc->max_background &&
260	       !list_empty(&fc->bg_queue)) {
261		struct fuse_req *req;
262
263		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
264		list_del(&req->list);
265		fc->active_background++;
266		spin_lock(&fiq->lock);
267		req->in.h.unique = fuse_get_unique(fiq);
268		queue_request_and_unlock(fiq, req);
269	}
270}
271
272/*
273 * This function is called when a request is finished.  Either a reply
274 * has arrived or it was aborted (and not yet sent) or some error
275 * occurred during communication with userspace, or the device file
276 * was closed.  The requester thread is woken up (if still waiting),
277 * the 'end' callback is called if given, else the reference to the
278 * request is released
279 */
280void fuse_request_end(struct fuse_req *req)
281{
282	struct fuse_mount *fm = req->fm;
283	struct fuse_conn *fc = fm->fc;
284	struct fuse_iqueue *fiq = &fc->iq;
285
286	if (test_and_set_bit(FR_FINISHED, &req->flags))
287		goto put_request;
288
289	/*
290	 * test_and_set_bit() implies smp_mb() between bit
291	 * changing and below FR_INTERRUPTED check. Pairs with
292	 * smp_mb() from queue_interrupt().
293	 */
294	if (test_bit(FR_INTERRUPTED, &req->flags)) {
295		spin_lock(&fiq->lock);
296		list_del_init(&req->intr_entry);
297		spin_unlock(&fiq->lock);
298	}
299	WARN_ON(test_bit(FR_PENDING, &req->flags));
300	WARN_ON(test_bit(FR_SENT, &req->flags));
301	if (test_bit(FR_BACKGROUND, &req->flags)) {
302		spin_lock(&fc->bg_lock);
303		clear_bit(FR_BACKGROUND, &req->flags);
304		if (fc->num_background == fc->max_background) {
305			fc->blocked = 0;
306			wake_up(&fc->blocked_waitq);
307		} else if (!fc->blocked) {
308			/*
309			 * Wake up next waiter, if any.  It's okay to use
310			 * waitqueue_active(), as we've already synced up
311			 * fc->blocked with waiters with the wake_up() call
312			 * above.
313			 */
314			if (waitqueue_active(&fc->blocked_waitq))
315				wake_up(&fc->blocked_waitq);
316		}
317
318		if (fc->num_background == fc->congestion_threshold && fm->sb) {
319			clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
320			clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
321		}
322		fc->num_background--;
323		fc->active_background--;
324		flush_bg_queue(fc);
325		spin_unlock(&fc->bg_lock);
326	} else {
327		/* Wake up waiter sleeping in request_wait_answer() */
328		wake_up(&req->waitq);
329	}
330
331	if (test_bit(FR_ASYNC, &req->flags))
332		req->args->end(fm, req->args, req->out.h.error);
333put_request:
334	fuse_put_request(req);
335}
336EXPORT_SYMBOL_GPL(fuse_request_end);
337
338static int queue_interrupt(struct fuse_req *req)
339{
340	struct fuse_iqueue *fiq = &req->fm->fc->iq;
341
342	spin_lock(&fiq->lock);
343	/* Check for we've sent request to interrupt this req */
344	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
345		spin_unlock(&fiq->lock);
346		return -EINVAL;
347	}
348
349	if (list_empty(&req->intr_entry)) {
350		list_add_tail(&req->intr_entry, &fiq->interrupts);
351		/*
352		 * Pairs with smp_mb() implied by test_and_set_bit()
353		 * from fuse_request_end().
354		 */
355		smp_mb();
356		if (test_bit(FR_FINISHED, &req->flags)) {
357			list_del_init(&req->intr_entry);
358			spin_unlock(&fiq->lock);
359			return 0;
360		}
361		fiq->ops->wake_interrupt_and_unlock(fiq);
362	} else {
363		spin_unlock(&fiq->lock);
364	}
365	return 0;
366}
367
368static void request_wait_answer(struct fuse_req *req)
369{
370	struct fuse_conn *fc = req->fm->fc;
371	struct fuse_iqueue *fiq = &fc->iq;
372	int err;
373
374	if (!fc->no_interrupt) {
375		/* Any signal may interrupt this */
376		err = wait_event_interruptible(req->waitq,
377					test_bit(FR_FINISHED, &req->flags));
378		if (!err)
379			return;
380
381		set_bit(FR_INTERRUPTED, &req->flags);
382		/* matches barrier in fuse_dev_do_read() */
383		smp_mb__after_atomic();
384		if (test_bit(FR_SENT, &req->flags))
385			queue_interrupt(req);
386	}
387
388	if (!test_bit(FR_FORCE, &req->flags)) {
389		/* Only fatal signals may interrupt this */
390		err = wait_event_killable(req->waitq,
391					test_bit(FR_FINISHED, &req->flags));
392		if (!err)
393			return;
394
395		spin_lock(&fiq->lock);
396		/* Request is not yet in userspace, bail out */
397		if (test_bit(FR_PENDING, &req->flags)) {
398			list_del(&req->list);
399			spin_unlock(&fiq->lock);
400			__fuse_put_request(req);
401			req->out.h.error = -EINTR;
402			return;
403		}
404		spin_unlock(&fiq->lock);
405	}
406
407	/*
408	 * Either request is already in userspace, or it was forced.
409	 * Wait it out.
410	 */
411	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
412}
413
414static void __fuse_request_send(struct fuse_req *req)
415{
416	struct fuse_iqueue *fiq = &req->fm->fc->iq;
417
418	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
419	spin_lock(&fiq->lock);
420	if (!fiq->connected) {
421		spin_unlock(&fiq->lock);
422		req->out.h.error = -ENOTCONN;
423	} else {
424		req->in.h.unique = fuse_get_unique(fiq);
425		/* acquire extra reference, since request is still needed
426		   after fuse_request_end() */
427		__fuse_get_request(req);
428		queue_request_and_unlock(fiq, req);
429
430		request_wait_answer(req);
431		/* Pairs with smp_wmb() in fuse_request_end() */
432		smp_rmb();
433	}
434}
435
436static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
437{
438	if (fc->minor < 4 && args->opcode == FUSE_STATFS)
439		args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
440
441	if (fc->minor < 9) {
442		switch (args->opcode) {
443		case FUSE_LOOKUP:
444		case FUSE_CREATE:
445		case FUSE_MKNOD:
446		case FUSE_MKDIR:
447		case FUSE_SYMLINK:
448		case FUSE_LINK:
449			args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
450			break;
451		case FUSE_GETATTR:
452		case FUSE_SETATTR:
453			args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
454			break;
455		}
456	}
457	if (fc->minor < 12) {
458		switch (args->opcode) {
459		case FUSE_CREATE:
460			args->in_args[0].size = sizeof(struct fuse_open_in);
461			break;
462		case FUSE_MKNOD:
463			args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
464			break;
465		}
466	}
467}
468
469static void fuse_force_creds(struct fuse_req *req)
470{
471	struct fuse_conn *fc = req->fm->fc;
472
473	req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
474	req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
475	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
476}
477
478static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
479{
480	req->in.h.opcode = args->opcode;
481	req->in.h.nodeid = args->nodeid;
482	req->args = args;
483	if (args->end)
484		__set_bit(FR_ASYNC, &req->flags);
485}
486
487ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
488{
489	struct fuse_conn *fc = fm->fc;
490	struct fuse_req *req;
491	ssize_t ret;
492
493	if (args->force) {
494		atomic_inc(&fc->num_waiting);
495		req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
496
497		if (!args->nocreds)
498			fuse_force_creds(req);
499
500		__set_bit(FR_WAITING, &req->flags);
501		__set_bit(FR_FORCE, &req->flags);
502	} else {
503		WARN_ON(args->nocreds);
504		req = fuse_get_req(fm, false);
505		if (IS_ERR(req))
506			return PTR_ERR(req);
507	}
508
509	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
510	fuse_adjust_compat(fc, args);
511	fuse_args_to_req(req, args);
512
513	if (!args->noreply)
514		__set_bit(FR_ISREPLY, &req->flags);
515	__fuse_request_send(req);
516	ret = req->out.h.error;
517	if (!ret && args->out_argvar) {
518		BUG_ON(args->out_numargs == 0);
519		ret = args->out_args[args->out_numargs - 1].size;
520	}
521	fuse_put_request(req);
522
523	return ret;
524}
525
526static bool fuse_request_queue_background(struct fuse_req *req)
527{
528	struct fuse_mount *fm = req->fm;
529	struct fuse_conn *fc = fm->fc;
530	bool queued = false;
531
532	WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
533	if (!test_bit(FR_WAITING, &req->flags)) {
534		__set_bit(FR_WAITING, &req->flags);
535		atomic_inc(&fc->num_waiting);
536	}
537	__set_bit(FR_ISREPLY, &req->flags);
538	spin_lock(&fc->bg_lock);
539	if (likely(fc->connected)) {
540		fc->num_background++;
541		if (fc->num_background == fc->max_background)
542			fc->blocked = 1;
543		if (fc->num_background == fc->congestion_threshold && fm->sb) {
544			set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
545			set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
546		}
547		list_add_tail(&req->list, &fc->bg_queue);
548		flush_bg_queue(fc);
549		queued = true;
550	}
551	spin_unlock(&fc->bg_lock);
552
553	return queued;
554}
555
556int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
557			    gfp_t gfp_flags)
558{
559	struct fuse_req *req;
560
561	if (args->force) {
562		WARN_ON(!args->nocreds);
563		req = fuse_request_alloc(fm, gfp_flags);
564		if (!req)
565			return -ENOMEM;
566		__set_bit(FR_BACKGROUND, &req->flags);
567	} else {
568		WARN_ON(args->nocreds);
569		req = fuse_get_req(fm, true);
570		if (IS_ERR(req))
571			return PTR_ERR(req);
572	}
573
574	fuse_args_to_req(req, args);
575
576	if (!fuse_request_queue_background(req)) {
577		fuse_put_request(req);
578		return -ENOTCONN;
579	}
580
581	return 0;
582}
583EXPORT_SYMBOL_GPL(fuse_simple_background);
584
585static int fuse_simple_notify_reply(struct fuse_mount *fm,
586				    struct fuse_args *args, u64 unique)
587{
588	struct fuse_req *req;
589	struct fuse_iqueue *fiq = &fm->fc->iq;
590	int err = 0;
591
592	req = fuse_get_req(fm, false);
593	if (IS_ERR(req))
594		return PTR_ERR(req);
595
596	__clear_bit(FR_ISREPLY, &req->flags);
597	req->in.h.unique = unique;
598
599	fuse_args_to_req(req, args);
600
601	spin_lock(&fiq->lock);
602	if (fiq->connected) {
603		queue_request_and_unlock(fiq, req);
604	} else {
605		err = -ENODEV;
606		spin_unlock(&fiq->lock);
607		fuse_put_request(req);
608	}
609
610	return err;
611}
612
613/*
614 * Lock the request.  Up to the next unlock_request() there mustn't be
615 * anything that could cause a page-fault.  If the request was already
616 * aborted bail out.
617 */
618static int lock_request(struct fuse_req *req)
619{
620	int err = 0;
621	if (req) {
622		spin_lock(&req->waitq.lock);
623		if (test_bit(FR_ABORTED, &req->flags))
624			err = -ENOENT;
625		else
626			set_bit(FR_LOCKED, &req->flags);
627		spin_unlock(&req->waitq.lock);
628	}
629	return err;
630}
631
632/*
633 * Unlock request.  If it was aborted while locked, caller is responsible
634 * for unlocking and ending the request.
635 */
636static int unlock_request(struct fuse_req *req)
637{
638	int err = 0;
639	if (req) {
640		spin_lock(&req->waitq.lock);
641		if (test_bit(FR_ABORTED, &req->flags))
642			err = -ENOENT;
643		else
644			clear_bit(FR_LOCKED, &req->flags);
645		spin_unlock(&req->waitq.lock);
646	}
647	return err;
648}
649
650struct fuse_copy_state {
651	int write;
652	struct fuse_req *req;
653	struct iov_iter *iter;
654	struct pipe_buffer *pipebufs;
655	struct pipe_buffer *currbuf;
656	struct pipe_inode_info *pipe;
657	unsigned long nr_segs;
658	struct page *pg;
659	unsigned len;
660	unsigned offset;
661	unsigned move_pages:1;
662};
663
664static void fuse_copy_init(struct fuse_copy_state *cs, int write,
665			   struct iov_iter *iter)
666{
667	memset(cs, 0, sizeof(*cs));
668	cs->write = write;
669	cs->iter = iter;
670}
671
672/* Unmap and put previous page of userspace buffer */
673static void fuse_copy_finish(struct fuse_copy_state *cs)
674{
675	if (cs->currbuf) {
676		struct pipe_buffer *buf = cs->currbuf;
677
678		if (cs->write)
679			buf->len = PAGE_SIZE - cs->len;
680		cs->currbuf = NULL;
681	} else if (cs->pg) {
682		if (cs->write) {
683			flush_dcache_page(cs->pg);
684			set_page_dirty_lock(cs->pg);
685		}
686		put_page(cs->pg);
687	}
688	cs->pg = NULL;
689}
690
691/*
692 * Get another pagefull of userspace buffer, and map it to kernel
693 * address space, and lock request
694 */
695static int fuse_copy_fill(struct fuse_copy_state *cs)
696{
697	struct page *page;
698	int err;
699
700	err = unlock_request(cs->req);
701	if (err)
702		return err;
703
704	fuse_copy_finish(cs);
705	if (cs->pipebufs) {
706		struct pipe_buffer *buf = cs->pipebufs;
707
708		if (!cs->write) {
709			err = pipe_buf_confirm(cs->pipe, buf);
710			if (err)
711				return err;
712
713			BUG_ON(!cs->nr_segs);
714			cs->currbuf = buf;
715			cs->pg = buf->page;
716			cs->offset = buf->offset;
717			cs->len = buf->len;
718			cs->pipebufs++;
719			cs->nr_segs--;
720		} else {
721			if (cs->nr_segs >= cs->pipe->max_usage)
722				return -EIO;
723
724			page = alloc_page(GFP_HIGHUSER);
725			if (!page)
726				return -ENOMEM;
727
728			buf->page = page;
729			buf->offset = 0;
730			buf->len = 0;
731
732			cs->currbuf = buf;
733			cs->pg = page;
734			cs->offset = 0;
735			cs->len = PAGE_SIZE;
736			cs->pipebufs++;
737			cs->nr_segs++;
738		}
739	} else {
740		size_t off;
741		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
742		if (err < 0)
743			return err;
744		BUG_ON(!err);
745		cs->len = err;
746		cs->offset = off;
747		cs->pg = page;
748		iov_iter_advance(cs->iter, err);
749	}
750
751	return lock_request(cs->req);
752}
753
754/* Do as much copy to/from userspace buffer as we can */
755static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
756{
757	unsigned ncpy = min(*size, cs->len);
758	if (val) {
759		void *pgaddr = kmap_atomic(cs->pg);
760		void *buf = pgaddr + cs->offset;
761
762		if (cs->write)
763			memcpy(buf, *val, ncpy);
764		else
765			memcpy(*val, buf, ncpy);
766
767		kunmap_atomic(pgaddr);
768		*val += ncpy;
769	}
770	*size -= ncpy;
771	cs->len -= ncpy;
772	cs->offset += ncpy;
773	return ncpy;
774}
775
776static int fuse_check_page(struct page *page)
777{
778	if (page_mapcount(page) ||
779	    page->mapping != NULL ||
780	    (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
781	     ~(1 << PG_locked |
782	       1 << PG_referenced |
783	       1 << PG_uptodate |
784	       1 << PG_lru |
785	       1 << PG_active |
786	       1 << PG_workingset |
787	       1 << PG_reclaim |
788	       1 << PG_waiters))) {
789		dump_page(page, "fuse: trying to steal weird page");
790		return 1;
791	}
792	return 0;
793}
794
795static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
796{
797	int err;
798	struct page *oldpage = *pagep;
799	struct page *newpage;
800	struct pipe_buffer *buf = cs->pipebufs;
801
802	get_page(oldpage);
803	err = unlock_request(cs->req);
804	if (err)
805		goto out_put_old;
806
807	fuse_copy_finish(cs);
808
809	err = pipe_buf_confirm(cs->pipe, buf);
810	if (err)
811		goto out_put_old;
812
813	BUG_ON(!cs->nr_segs);
814	cs->currbuf = buf;
815	cs->len = buf->len;
816	cs->pipebufs++;
817	cs->nr_segs--;
818
819	if (cs->len != PAGE_SIZE)
820		goto out_fallback;
821
822	if (!pipe_buf_try_steal(cs->pipe, buf))
823		goto out_fallback;
824
825	newpage = buf->page;
826
827	if (!PageUptodate(newpage))
828		SetPageUptodate(newpage);
829
830	ClearPageMappedToDisk(newpage);
831
832	if (fuse_check_page(newpage) != 0)
833		goto out_fallback_unlock;
834
835	/*
836	 * This is a new and locked page, it shouldn't be mapped or
837	 * have any special flags on it
838	 */
839	if (WARN_ON(page_mapped(oldpage)))
840		goto out_fallback_unlock;
841	if (WARN_ON(page_has_private(oldpage)))
842		goto out_fallback_unlock;
843	if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
844		goto out_fallback_unlock;
845	if (WARN_ON(PageMlocked(oldpage)))
846		goto out_fallback_unlock;
847
848	err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
849	if (err) {
850		unlock_page(newpage);
851		goto out_put_old;
852	}
853
854	get_page(newpage);
855
856	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
857		lru_cache_add(newpage);
858
859	/*
860	 * Release while we have extra ref on stolen page.  Otherwise
861	 * anon_pipe_buf_release() might think the page can be reused.
862	 */
863	pipe_buf_release(cs->pipe, buf);
864
865	err = 0;
866	spin_lock(&cs->req->waitq.lock);
867	if (test_bit(FR_ABORTED, &cs->req->flags))
868		err = -ENOENT;
869	else
870		*pagep = newpage;
871	spin_unlock(&cs->req->waitq.lock);
872
873	if (err) {
874		unlock_page(newpage);
875		put_page(newpage);
876		goto out_put_old;
877	}
878
879	unlock_page(oldpage);
880	/* Drop ref for ap->pages[] array */
881	put_page(oldpage);
882	cs->len = 0;
883
884	err = 0;
885out_put_old:
886	/* Drop ref obtained in this function */
887	put_page(oldpage);
888	return err;
889
890out_fallback_unlock:
891	unlock_page(newpage);
892out_fallback:
893	cs->pg = buf->page;
894	cs->offset = buf->offset;
895
896	err = lock_request(cs->req);
897	if (!err)
898		err = 1;
899
900	goto out_put_old;
901}
902
903static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
904			 unsigned offset, unsigned count)
905{
906	struct pipe_buffer *buf;
907	int err;
908
909	if (cs->nr_segs >= cs->pipe->max_usage)
910		return -EIO;
911
912	get_page(page);
913	err = unlock_request(cs->req);
914	if (err) {
915		put_page(page);
916		return err;
917	}
918
919	fuse_copy_finish(cs);
920
921	buf = cs->pipebufs;
922	buf->page = page;
923	buf->offset = offset;
924	buf->len = count;
925
926	cs->pipebufs++;
927	cs->nr_segs++;
928	cs->len = 0;
929
930	return 0;
931}
932
933/*
934 * Copy a page in the request to/from the userspace buffer.  Must be
935 * done atomically
936 */
937static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
938			  unsigned offset, unsigned count, int zeroing)
939{
940	int err;
941	struct page *page = *pagep;
942
943	if (page && zeroing && count < PAGE_SIZE)
944		clear_highpage(page);
945
946	while (count) {
947		if (cs->write && cs->pipebufs && page) {
948			/*
949			 * Can't control lifetime of pipe buffers, so always
950			 * copy user pages.
951			 */
952			if (cs->req->args->user_pages) {
953				err = fuse_copy_fill(cs);
954				if (err)
955					return err;
956			} else {
957				return fuse_ref_page(cs, page, offset, count);
958			}
959		} else if (!cs->len) {
960			if (cs->move_pages && page &&
961			    offset == 0 && count == PAGE_SIZE) {
962				err = fuse_try_move_page(cs, pagep);
963				if (err <= 0)
964					return err;
965			} else {
966				err = fuse_copy_fill(cs);
967				if (err)
968					return err;
969			}
970		}
971		if (page) {
972			void *mapaddr = kmap_atomic(page);
973			void *buf = mapaddr + offset;
974			offset += fuse_copy_do(cs, &buf, &count);
975			kunmap_atomic(mapaddr);
976		} else
977			offset += fuse_copy_do(cs, NULL, &count);
978	}
979	if (page && !cs->write)
980		flush_dcache_page(page);
981	return 0;
982}
983
984/* Copy pages in the request to/from userspace buffer */
985static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
986			   int zeroing)
987{
988	unsigned i;
989	struct fuse_req *req = cs->req;
990	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
991
992
993	for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
994		int err;
995		unsigned int offset = ap->descs[i].offset;
996		unsigned int count = min(nbytes, ap->descs[i].length);
997
998		err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
999		if (err)
1000			return err;
1001
1002		nbytes -= count;
1003	}
1004	return 0;
1005}
1006
1007/* Copy a single argument in the request to/from userspace buffer */
1008static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1009{
1010	while (size) {
1011		if (!cs->len) {
1012			int err = fuse_copy_fill(cs);
1013			if (err)
1014				return err;
1015		}
1016		fuse_copy_do(cs, &val, &size);
1017	}
1018	return 0;
1019}
1020
1021/* Copy request arguments to/from userspace buffer */
1022static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1023			  unsigned argpages, struct fuse_arg *args,
1024			  int zeroing)
1025{
1026	int err = 0;
1027	unsigned i;
1028
1029	for (i = 0; !err && i < numargs; i++)  {
1030		struct fuse_arg *arg = &args[i];
1031		if (i == numargs - 1 && argpages)
1032			err = fuse_copy_pages(cs, arg->size, zeroing);
1033		else
1034			err = fuse_copy_one(cs, arg->value, arg->size);
1035	}
1036	return err;
1037}
1038
1039static int forget_pending(struct fuse_iqueue *fiq)
1040{
1041	return fiq->forget_list_head.next != NULL;
1042}
1043
1044static int request_pending(struct fuse_iqueue *fiq)
1045{
1046	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1047		forget_pending(fiq);
1048}
1049
1050/*
1051 * Transfer an interrupt request to userspace
1052 *
1053 * Unlike other requests this is assembled on demand, without a need
1054 * to allocate a separate fuse_req structure.
1055 *
1056 * Called with fiq->lock held, releases it
1057 */
1058static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1059			       struct fuse_copy_state *cs,
1060			       size_t nbytes, struct fuse_req *req)
1061__releases(fiq->lock)
1062{
1063	struct fuse_in_header ih;
1064	struct fuse_interrupt_in arg;
1065	unsigned reqsize = sizeof(ih) + sizeof(arg);
1066	int err;
1067
1068	list_del_init(&req->intr_entry);
1069	memset(&ih, 0, sizeof(ih));
1070	memset(&arg, 0, sizeof(arg));
1071	ih.len = reqsize;
1072	ih.opcode = FUSE_INTERRUPT;
1073	ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
1074	arg.unique = req->in.h.unique;
1075
1076	spin_unlock(&fiq->lock);
1077	if (nbytes < reqsize)
1078		return -EINVAL;
1079
1080	err = fuse_copy_one(cs, &ih, sizeof(ih));
1081	if (!err)
1082		err = fuse_copy_one(cs, &arg, sizeof(arg));
1083	fuse_copy_finish(cs);
1084
1085	return err ? err : reqsize;
1086}
1087
1088struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1089					     unsigned int max,
1090					     unsigned int *countp)
1091{
1092	struct fuse_forget_link *head = fiq->forget_list_head.next;
1093	struct fuse_forget_link **newhead = &head;
1094	unsigned count;
1095
1096	for (count = 0; *newhead != NULL && count < max; count++)
1097		newhead = &(*newhead)->next;
1098
1099	fiq->forget_list_head.next = *newhead;
1100	*newhead = NULL;
1101	if (fiq->forget_list_head.next == NULL)
1102		fiq->forget_list_tail = &fiq->forget_list_head;
1103
1104	if (countp != NULL)
1105		*countp = count;
1106
1107	return head;
1108}
1109EXPORT_SYMBOL(fuse_dequeue_forget);
1110
1111static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1112				   struct fuse_copy_state *cs,
1113				   size_t nbytes)
1114__releases(fiq->lock)
1115{
1116	int err;
1117	struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
1118	struct fuse_forget_in arg = {
1119		.nlookup = forget->forget_one.nlookup,
1120	};
1121	struct fuse_in_header ih = {
1122		.opcode = FUSE_FORGET,
1123		.nodeid = forget->forget_one.nodeid,
1124		.unique = fuse_get_unique(fiq),
1125		.len = sizeof(ih) + sizeof(arg),
1126	};
1127
1128	spin_unlock(&fiq->lock);
1129	kfree(forget);
1130	if (nbytes < ih.len)
1131		return -EINVAL;
1132
1133	err = fuse_copy_one(cs, &ih, sizeof(ih));
1134	if (!err)
1135		err = fuse_copy_one(cs, &arg, sizeof(arg));
1136	fuse_copy_finish(cs);
1137
1138	if (err)
1139		return err;
1140
1141	return ih.len;
1142}
1143
1144static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1145				   struct fuse_copy_state *cs, size_t nbytes)
1146__releases(fiq->lock)
1147{
1148	int err;
1149	unsigned max_forgets;
1150	unsigned count;
1151	struct fuse_forget_link *head;
1152	struct fuse_batch_forget_in arg = { .count = 0 };
1153	struct fuse_in_header ih = {
1154		.opcode = FUSE_BATCH_FORGET,
1155		.unique = fuse_get_unique(fiq),
1156		.len = sizeof(ih) + sizeof(arg),
1157	};
1158
1159	if (nbytes < ih.len) {
1160		spin_unlock(&fiq->lock);
1161		return -EINVAL;
1162	}
1163
1164	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1165	head = fuse_dequeue_forget(fiq, max_forgets, &count);
1166	spin_unlock(&fiq->lock);
1167
1168	arg.count = count;
1169	ih.len += count * sizeof(struct fuse_forget_one);
1170	err = fuse_copy_one(cs, &ih, sizeof(ih));
1171	if (!err)
1172		err = fuse_copy_one(cs, &arg, sizeof(arg));
1173
1174	while (head) {
1175		struct fuse_forget_link *forget = head;
1176
1177		if (!err) {
1178			err = fuse_copy_one(cs, &forget->forget_one,
1179					    sizeof(forget->forget_one));
1180		}
1181		head = forget->next;
1182		kfree(forget);
1183	}
1184
1185	fuse_copy_finish(cs);
1186
1187	if (err)
1188		return err;
1189
1190	return ih.len;
1191}
1192
1193static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1194			    struct fuse_copy_state *cs,
1195			    size_t nbytes)
1196__releases(fiq->lock)
1197{
1198	if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1199		return fuse_read_single_forget(fiq, cs, nbytes);
1200	else
1201		return fuse_read_batch_forget(fiq, cs, nbytes);
1202}
1203
1204/*
1205 * Read a single request into the userspace filesystem's buffer.  This
1206 * function waits until a request is available, then removes it from
1207 * the pending list and copies request data to userspace buffer.  If
1208 * no reply is needed (FORGET) or request has been aborted or there
1209 * was an error during the copying then it's finished by calling
1210 * fuse_request_end().  Otherwise add it to the processing list, and set
1211 * the 'sent' flag.
1212 */
1213static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1214				struct fuse_copy_state *cs, size_t nbytes)
1215{
1216	ssize_t err;
1217	struct fuse_conn *fc = fud->fc;
1218	struct fuse_iqueue *fiq = &fc->iq;
1219	struct fuse_pqueue *fpq = &fud->pq;
1220	struct fuse_req *req;
1221	struct fuse_args *args;
1222	unsigned reqsize;
1223	unsigned int hash;
1224
1225	/*
1226	 * Require sane minimum read buffer - that has capacity for fixed part
1227	 * of any request header + negotiated max_write room for data.
1228	 *
1229	 * Historically libfuse reserves 4K for fixed header room, but e.g.
1230	 * GlusterFS reserves only 80 bytes
1231	 *
1232	 *	= `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1233	 *
1234	 * which is the absolute minimum any sane filesystem should be using
1235	 * for header room.
1236	 */
1237	if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1238			   sizeof(struct fuse_in_header) +
1239			   sizeof(struct fuse_write_in) +
1240			   fc->max_write))
1241		return -EINVAL;
1242
1243 restart:
1244	for (;;) {
1245		spin_lock(&fiq->lock);
1246		if (!fiq->connected || request_pending(fiq))
1247			break;
1248		spin_unlock(&fiq->lock);
1249
1250		if (file->f_flags & O_NONBLOCK)
1251			return -EAGAIN;
1252		err = wait_event_interruptible_exclusive(fiq->waitq,
1253				!fiq->connected || request_pending(fiq));
1254		if (err)
1255			return err;
1256	}
1257
1258	if (!fiq->connected) {
1259		err = fc->aborted ? -ECONNABORTED : -ENODEV;
1260		goto err_unlock;
1261	}
1262
1263	if (!list_empty(&fiq->interrupts)) {
1264		req = list_entry(fiq->interrupts.next, struct fuse_req,
1265				 intr_entry);
1266		return fuse_read_interrupt(fiq, cs, nbytes, req);
1267	}
1268
1269	if (forget_pending(fiq)) {
1270		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1271			return fuse_read_forget(fc, fiq, cs, nbytes);
1272
1273		if (fiq->forget_batch <= -8)
1274			fiq->forget_batch = 16;
1275	}
1276
1277	req = list_entry(fiq->pending.next, struct fuse_req, list);
1278	clear_bit(FR_PENDING, &req->flags);
1279	list_del_init(&req->list);
1280	spin_unlock(&fiq->lock);
1281
1282	args = req->args;
1283	reqsize = req->in.h.len;
1284
1285	/* If request is too large, reply with an error and restart the read */
1286	if (nbytes < reqsize) {
1287		req->out.h.error = -EIO;
1288		/* SETXATTR is special, since it may contain too large data */
1289		if (args->opcode == FUSE_SETXATTR)
1290			req->out.h.error = -E2BIG;
1291		fuse_request_end(req);
1292		goto restart;
1293	}
1294	spin_lock(&fpq->lock);
1295	/*
1296	 *  Must not put request on fpq->io queue after having been shut down by
1297	 *  fuse_abort_conn()
1298	 */
1299	if (!fpq->connected) {
1300		req->out.h.error = err = -ECONNABORTED;
1301		goto out_end;
1302
1303	}
1304	list_add(&req->list, &fpq->io);
1305	spin_unlock(&fpq->lock);
1306	cs->req = req;
1307	err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
1308	if (!err)
1309		err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1310				     (struct fuse_arg *) args->in_args, 0);
1311	fuse_copy_finish(cs);
1312	spin_lock(&fpq->lock);
1313	clear_bit(FR_LOCKED, &req->flags);
1314	if (!fpq->connected) {
1315		err = fc->aborted ? -ECONNABORTED : -ENODEV;
1316		goto out_end;
1317	}
1318	if (err) {
1319		req->out.h.error = -EIO;
1320		goto out_end;
1321	}
1322	if (!test_bit(FR_ISREPLY, &req->flags)) {
1323		err = reqsize;
1324		goto out_end;
1325	}
1326	hash = fuse_req_hash(req->in.h.unique);
1327	list_move_tail(&req->list, &fpq->processing[hash]);
1328	__fuse_get_request(req);
1329	set_bit(FR_SENT, &req->flags);
1330	spin_unlock(&fpq->lock);
1331	/* matches barrier in request_wait_answer() */
1332	smp_mb__after_atomic();
1333	if (test_bit(FR_INTERRUPTED, &req->flags))
1334		queue_interrupt(req);
1335	fuse_put_request(req);
1336
1337	return reqsize;
1338
1339out_end:
1340	if (!test_bit(FR_PRIVATE, &req->flags))
1341		list_del_init(&req->list);
1342	spin_unlock(&fpq->lock);
1343	fuse_request_end(req);
1344	return err;
1345
1346 err_unlock:
1347	spin_unlock(&fiq->lock);
1348	return err;
1349}
1350
1351static int fuse_dev_open(struct inode *inode, struct file *file)
1352{
1353	/*
1354	 * The fuse device's file's private_data is used to hold
1355	 * the fuse_conn(ection) when it is mounted, and is used to
1356	 * keep track of whether the file has been mounted already.
1357	 */
1358	file->private_data = NULL;
1359	return 0;
1360}
1361
1362static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1363{
1364	struct fuse_copy_state cs;
1365	struct file *file = iocb->ki_filp;
1366	struct fuse_dev *fud = fuse_get_dev(file);
1367
1368	if (!fud)
1369		return -EPERM;
1370
1371	if (!iter_is_iovec(to))
1372		return -EINVAL;
1373
1374	fuse_copy_init(&cs, 1, to);
1375
1376	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1377}
1378
1379static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1380				    struct pipe_inode_info *pipe,
1381				    size_t len, unsigned int flags)
1382{
1383	int total, ret;
1384	int page_nr = 0;
1385	struct pipe_buffer *bufs;
1386	struct fuse_copy_state cs;
1387	struct fuse_dev *fud = fuse_get_dev(in);
1388
1389	if (!fud)
1390		return -EPERM;
1391
1392	bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
1393			      GFP_KERNEL);
1394	if (!bufs)
1395		return -ENOMEM;
1396
1397	fuse_copy_init(&cs, 1, NULL);
1398	cs.pipebufs = bufs;
1399	cs.pipe = pipe;
1400	ret = fuse_dev_do_read(fud, in, &cs, len);
1401	if (ret < 0)
1402		goto out;
1403
1404	if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
1405		ret = -EIO;
1406		goto out;
1407	}
1408
1409	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1410		/*
1411		 * Need to be careful about this.  Having buf->ops in module
1412		 * code can Oops if the buffer persists after module unload.
1413		 */
1414		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1415		bufs[page_nr].flags = 0;
1416		ret = add_to_pipe(pipe, &bufs[page_nr++]);
1417		if (unlikely(ret < 0))
1418			break;
1419	}
1420	if (total)
1421		ret = total;
1422out:
1423	for (; page_nr < cs.nr_segs; page_nr++)
1424		put_page(bufs[page_nr].page);
1425
1426	kvfree(bufs);
1427	return ret;
1428}
1429
1430static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1431			    struct fuse_copy_state *cs)
1432{
1433	struct fuse_notify_poll_wakeup_out outarg;
1434	int err = -EINVAL;
1435
1436	if (size != sizeof(outarg))
1437		goto err;
1438
1439	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1440	if (err)
1441		goto err;
1442
1443	fuse_copy_finish(cs);
1444	return fuse_notify_poll_wakeup(fc, &outarg);
1445
1446err:
1447	fuse_copy_finish(cs);
1448	return err;
1449}
1450
1451static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1452				   struct fuse_copy_state *cs)
1453{
1454	struct fuse_notify_inval_inode_out outarg;
1455	int err = -EINVAL;
1456
1457	if (size != sizeof(outarg))
1458		goto err;
1459
1460	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1461	if (err)
1462		goto err;
1463	fuse_copy_finish(cs);
1464
1465	down_read(&fc->killsb);
1466	err = fuse_reverse_inval_inode(fc, outarg.ino,
1467				       outarg.off, outarg.len);
1468	up_read(&fc->killsb);
1469	return err;
1470
1471err:
1472	fuse_copy_finish(cs);
1473	return err;
1474}
1475
1476static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1477				   struct fuse_copy_state *cs)
1478{
1479	struct fuse_notify_inval_entry_out outarg;
1480	int err = -ENOMEM;
1481	char *buf;
1482	struct qstr name;
1483
1484	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1485	if (!buf)
1486		goto err;
1487
1488	err = -EINVAL;
1489	if (size < sizeof(outarg))
1490		goto err;
1491
1492	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1493	if (err)
1494		goto err;
1495
1496	err = -ENAMETOOLONG;
1497	if (outarg.namelen > FUSE_NAME_MAX)
1498		goto err;
1499
1500	err = -EINVAL;
1501	if (size != sizeof(outarg) + outarg.namelen + 1)
1502		goto err;
1503
1504	name.name = buf;
1505	name.len = outarg.namelen;
1506	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1507	if (err)
1508		goto err;
1509	fuse_copy_finish(cs);
1510	buf[outarg.namelen] = 0;
1511
1512	down_read(&fc->killsb);
1513	err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
1514	up_read(&fc->killsb);
1515	kfree(buf);
1516	return err;
1517
1518err:
1519	kfree(buf);
1520	fuse_copy_finish(cs);
1521	return err;
1522}
1523
1524static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1525			      struct fuse_copy_state *cs)
1526{
1527	struct fuse_notify_delete_out outarg;
1528	int err = -ENOMEM;
1529	char *buf;
1530	struct qstr name;
1531
1532	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1533	if (!buf)
1534		goto err;
1535
1536	err = -EINVAL;
1537	if (size < sizeof(outarg))
1538		goto err;
1539
1540	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1541	if (err)
1542		goto err;
1543
1544	err = -ENAMETOOLONG;
1545	if (outarg.namelen > FUSE_NAME_MAX)
1546		goto err;
1547
1548	err = -EINVAL;
1549	if (size != sizeof(outarg) + outarg.namelen + 1)
1550		goto err;
1551
1552	name.name = buf;
1553	name.len = outarg.namelen;
1554	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1555	if (err)
1556		goto err;
1557	fuse_copy_finish(cs);
1558	buf[outarg.namelen] = 0;
1559
1560	down_read(&fc->killsb);
1561	err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
1562	up_read(&fc->killsb);
1563	kfree(buf);
1564	return err;
1565
1566err:
1567	kfree(buf);
1568	fuse_copy_finish(cs);
1569	return err;
1570}
1571
1572static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1573			     struct fuse_copy_state *cs)
1574{
1575	struct fuse_notify_store_out outarg;
1576	struct inode *inode;
1577	struct address_space *mapping;
1578	u64 nodeid;
1579	int err;
1580	pgoff_t index;
1581	unsigned int offset;
1582	unsigned int num;
1583	loff_t file_size;
1584	loff_t end;
1585
1586	err = -EINVAL;
1587	if (size < sizeof(outarg))
1588		goto out_finish;
1589
1590	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1591	if (err)
1592		goto out_finish;
1593
1594	err = -EINVAL;
1595	if (size - sizeof(outarg) != outarg.size)
1596		goto out_finish;
1597
1598	nodeid = outarg.nodeid;
1599
1600	down_read(&fc->killsb);
1601
1602	err = -ENOENT;
1603	inode = fuse_ilookup(fc, nodeid,  NULL);
1604	if (!inode)
1605		goto out_up_killsb;
1606
1607	mapping = inode->i_mapping;
1608	index = outarg.offset >> PAGE_SHIFT;
1609	offset = outarg.offset & ~PAGE_MASK;
1610	file_size = i_size_read(inode);
1611	end = outarg.offset + outarg.size;
1612	if (end > file_size) {
1613		file_size = end;
1614		fuse_write_update_size(inode, file_size);
1615	}
1616
1617	num = outarg.size;
1618	while (num) {
1619		struct page *page;
1620		unsigned int this_num;
1621
1622		err = -ENOMEM;
1623		page = find_or_create_page(mapping, index,
1624					   mapping_gfp_mask(mapping));
1625		if (!page)
1626			goto out_iput;
1627
1628		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1629		err = fuse_copy_page(cs, &page, offset, this_num, 0);
1630		if (!PageUptodate(page) && !err && offset == 0 &&
1631		    (this_num == PAGE_SIZE || file_size == end)) {
1632			zero_user_segment(page, this_num, PAGE_SIZE);
1633			SetPageUptodate(page);
1634		}
1635		unlock_page(page);
1636		put_page(page);
1637
1638		if (err)
1639			goto out_iput;
1640
1641		num -= this_num;
1642		offset = 0;
1643		index++;
1644	}
1645
1646	err = 0;
1647
1648out_iput:
1649	iput(inode);
1650out_up_killsb:
1651	up_read(&fc->killsb);
1652out_finish:
1653	fuse_copy_finish(cs);
1654	return err;
1655}
1656
1657struct fuse_retrieve_args {
1658	struct fuse_args_pages ap;
1659	struct fuse_notify_retrieve_in inarg;
1660};
1661
1662static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
1663			      int error)
1664{
1665	struct fuse_retrieve_args *ra =
1666		container_of(args, typeof(*ra), ap.args);
1667
1668	release_pages(ra->ap.pages, ra->ap.num_pages);
1669	kfree(ra);
1670}
1671
1672static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
1673			 struct fuse_notify_retrieve_out *outarg)
1674{
1675	int err;
1676	struct address_space *mapping = inode->i_mapping;
1677	pgoff_t index;
1678	loff_t file_size;
1679	unsigned int num;
1680	unsigned int offset;
1681	size_t total_len = 0;
1682	unsigned int num_pages;
1683	struct fuse_conn *fc = fm->fc;
1684	struct fuse_retrieve_args *ra;
1685	size_t args_size = sizeof(*ra);
1686	struct fuse_args_pages *ap;
1687	struct fuse_args *args;
1688
1689	offset = outarg->offset & ~PAGE_MASK;
1690	file_size = i_size_read(inode);
1691
1692	num = min(outarg->size, fc->max_write);
1693	if (outarg->offset > file_size)
1694		num = 0;
1695	else if (outarg->offset + num > file_size)
1696		num = file_size - outarg->offset;
1697
1698	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1699	num_pages = min(num_pages, fc->max_pages);
1700
1701	args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
1702
1703	ra = kzalloc(args_size, GFP_KERNEL);
1704	if (!ra)
1705		return -ENOMEM;
1706
1707	ap = &ra->ap;
1708	ap->pages = (void *) (ra + 1);
1709	ap->descs = (void *) (ap->pages + num_pages);
1710
1711	args = &ap->args;
1712	args->nodeid = outarg->nodeid;
1713	args->opcode = FUSE_NOTIFY_REPLY;
1714	args->in_numargs = 2;
1715	args->in_pages = true;
1716	args->end = fuse_retrieve_end;
1717
1718	index = outarg->offset >> PAGE_SHIFT;
1719
1720	while (num && ap->num_pages < num_pages) {
1721		struct page *page;
1722		unsigned int this_num;
1723
1724		page = find_get_page(mapping, index);
1725		if (!page)
1726			break;
1727
1728		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1729		ap->pages[ap->num_pages] = page;
1730		ap->descs[ap->num_pages].offset = offset;
1731		ap->descs[ap->num_pages].length = this_num;
1732		ap->num_pages++;
1733
1734		offset = 0;
1735		num -= this_num;
1736		total_len += this_num;
1737		index++;
1738	}
1739	ra->inarg.offset = outarg->offset;
1740	ra->inarg.size = total_len;
1741	args->in_args[0].size = sizeof(ra->inarg);
1742	args->in_args[0].value = &ra->inarg;
1743	args->in_args[1].size = total_len;
1744
1745	err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
1746	if (err)
1747		fuse_retrieve_end(fm, args, err);
1748
1749	return err;
1750}
1751
1752static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1753				struct fuse_copy_state *cs)
1754{
1755	struct fuse_notify_retrieve_out outarg;
1756	struct fuse_mount *fm;
1757	struct inode *inode;
1758	u64 nodeid;
1759	int err;
1760
1761	err = -EINVAL;
1762	if (size != sizeof(outarg))
1763		goto copy_finish;
1764
1765	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1766	if (err)
1767		goto copy_finish;
1768
1769	fuse_copy_finish(cs);
1770
1771	down_read(&fc->killsb);
1772	err = -ENOENT;
1773	nodeid = outarg.nodeid;
1774
1775	inode = fuse_ilookup(fc, nodeid, &fm);
1776	if (inode) {
1777		err = fuse_retrieve(fm, inode, &outarg);
1778		iput(inode);
1779	}
1780	up_read(&fc->killsb);
1781
1782	return err;
1783
1784copy_finish:
1785	fuse_copy_finish(cs);
1786	return err;
1787}
1788
1789static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1790		       unsigned int size, struct fuse_copy_state *cs)
1791{
1792	/* Don't try to move pages (yet) */
1793	cs->move_pages = 0;
1794
1795	switch (code) {
1796	case FUSE_NOTIFY_POLL:
1797		return fuse_notify_poll(fc, size, cs);
1798
1799	case FUSE_NOTIFY_INVAL_INODE:
1800		return fuse_notify_inval_inode(fc, size, cs);
1801
1802	case FUSE_NOTIFY_INVAL_ENTRY:
1803		return fuse_notify_inval_entry(fc, size, cs);
1804
1805	case FUSE_NOTIFY_STORE:
1806		return fuse_notify_store(fc, size, cs);
1807
1808	case FUSE_NOTIFY_RETRIEVE:
1809		return fuse_notify_retrieve(fc, size, cs);
1810
1811	case FUSE_NOTIFY_DELETE:
1812		return fuse_notify_delete(fc, size, cs);
1813
1814	default:
1815		fuse_copy_finish(cs);
1816		return -EINVAL;
1817	}
1818}
1819
1820/* Look up request on processing list by unique ID */
1821static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1822{
1823	unsigned int hash = fuse_req_hash(unique);
1824	struct fuse_req *req;
1825
1826	list_for_each_entry(req, &fpq->processing[hash], list) {
1827		if (req->in.h.unique == unique)
1828			return req;
1829	}
1830	return NULL;
1831}
1832
1833static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
1834			 unsigned nbytes)
1835{
1836	unsigned reqsize = sizeof(struct fuse_out_header);
1837
1838	reqsize += fuse_len_args(args->out_numargs, args->out_args);
1839
1840	if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
1841		return -EINVAL;
1842	else if (reqsize > nbytes) {
1843		struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
1844		unsigned diffsize = reqsize - nbytes;
1845
1846		if (diffsize > lastarg->size)
1847			return -EINVAL;
1848		lastarg->size -= diffsize;
1849	}
1850	return fuse_copy_args(cs, args->out_numargs, args->out_pages,
1851			      args->out_args, args->page_zeroing);
1852}
1853
1854/*
1855 * Write a single reply to a request.  First the header is copied from
1856 * the write buffer.  The request is then searched on the processing
1857 * list by the unique ID found in the header.  If found, then remove
1858 * it from the list and copy the rest of the buffer to the request.
1859 * The request is finished by calling fuse_request_end().
1860 */
1861static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1862				 struct fuse_copy_state *cs, size_t nbytes)
1863{
1864	int err;
1865	struct fuse_conn *fc = fud->fc;
1866	struct fuse_pqueue *fpq = &fud->pq;
1867	struct fuse_req *req;
1868	struct fuse_out_header oh;
1869
1870	err = -EINVAL;
1871	if (nbytes < sizeof(struct fuse_out_header))
1872		goto out;
1873
1874	err = fuse_copy_one(cs, &oh, sizeof(oh));
1875	if (err)
1876		goto copy_finish;
1877
1878	err = -EINVAL;
1879	if (oh.len != nbytes)
1880		goto copy_finish;
1881
1882	/*
1883	 * Zero oh.unique indicates unsolicited notification message
1884	 * and error contains notification code.
1885	 */
1886	if (!oh.unique) {
1887		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1888		goto out;
1889	}
1890
1891	err = -EINVAL;
1892	if (oh.error <= -512 || oh.error > 0)
1893		goto copy_finish;
1894
1895	spin_lock(&fpq->lock);
1896	req = NULL;
1897	if (fpq->connected)
1898		req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
1899
1900	err = -ENOENT;
1901	if (!req) {
1902		spin_unlock(&fpq->lock);
1903		goto copy_finish;
1904	}
1905
1906	/* Is it an interrupt reply ID? */
1907	if (oh.unique & FUSE_INT_REQ_BIT) {
1908		__fuse_get_request(req);
1909		spin_unlock(&fpq->lock);
1910
1911		err = 0;
1912		if (nbytes != sizeof(struct fuse_out_header))
1913			err = -EINVAL;
1914		else if (oh.error == -ENOSYS)
1915			fc->no_interrupt = 1;
1916		else if (oh.error == -EAGAIN)
1917			err = queue_interrupt(req);
1918
1919		fuse_put_request(req);
1920
1921		goto copy_finish;
1922	}
1923
1924	clear_bit(FR_SENT, &req->flags);
1925	list_move(&req->list, &fpq->io);
1926	req->out.h = oh;
1927	set_bit(FR_LOCKED, &req->flags);
1928	spin_unlock(&fpq->lock);
1929	cs->req = req;
1930	if (!req->args->page_replace)
1931		cs->move_pages = 0;
1932
1933	if (oh.error)
1934		err = nbytes != sizeof(oh) ? -EINVAL : 0;
1935	else
1936		err = copy_out_args(cs, req->args, nbytes);
1937	fuse_copy_finish(cs);
1938
1939	spin_lock(&fpq->lock);
1940	clear_bit(FR_LOCKED, &req->flags);
1941	if (!fpq->connected)
1942		err = -ENOENT;
1943	else if (err)
1944		req->out.h.error = -EIO;
1945	if (!test_bit(FR_PRIVATE, &req->flags))
1946		list_del_init(&req->list);
1947	spin_unlock(&fpq->lock);
1948
1949	fuse_request_end(req);
1950out:
1951	return err ? err : nbytes;
1952
1953copy_finish:
1954	fuse_copy_finish(cs);
1955	goto out;
1956}
1957
1958static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1959{
1960	struct fuse_copy_state cs;
1961	struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
1962
1963	if (!fud)
1964		return -EPERM;
1965
1966	if (!iter_is_iovec(from))
1967		return -EINVAL;
1968
1969	fuse_copy_init(&cs, 0, from);
1970
1971	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
1972}
1973
1974static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1975				     struct file *out, loff_t *ppos,
1976				     size_t len, unsigned int flags)
1977{
1978	unsigned int head, tail, mask, count;
1979	unsigned nbuf;
1980	unsigned idx;
1981	struct pipe_buffer *bufs;
1982	struct fuse_copy_state cs;
1983	struct fuse_dev *fud;
1984	size_t rem;
1985	ssize_t ret;
1986
1987	fud = fuse_get_dev(out);
1988	if (!fud)
1989		return -EPERM;
1990
1991	pipe_lock(pipe);
1992
1993	head = pipe->head;
1994	tail = pipe->tail;
1995	mask = pipe->ring_size - 1;
1996	count = head - tail;
1997
1998	bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
1999	if (!bufs) {
2000		pipe_unlock(pipe);
2001		return -ENOMEM;
2002	}
2003
2004	nbuf = 0;
2005	rem = 0;
2006	for (idx = tail; idx != head && rem < len; idx++)
2007		rem += pipe->bufs[idx & mask].len;
2008
2009	ret = -EINVAL;
2010	if (rem < len)
2011		goto out_free;
2012
2013	rem = len;
2014	while (rem) {
2015		struct pipe_buffer *ibuf;
2016		struct pipe_buffer *obuf;
2017
2018		if (WARN_ON(nbuf >= count || tail == head))
2019			goto out_free;
2020
2021		ibuf = &pipe->bufs[tail & mask];
2022		obuf = &bufs[nbuf];
2023
2024		if (rem >= ibuf->len) {
2025			*obuf = *ibuf;
2026			ibuf->ops = NULL;
2027			tail++;
2028			pipe->tail = tail;
2029		} else {
2030			if (!pipe_buf_get(pipe, ibuf))
2031				goto out_free;
2032
2033			*obuf = *ibuf;
2034			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2035			obuf->len = rem;
2036			ibuf->offset += obuf->len;
2037			ibuf->len -= obuf->len;
2038		}
2039		nbuf++;
2040		rem -= obuf->len;
2041	}
2042	pipe_unlock(pipe);
2043
2044	fuse_copy_init(&cs, 0, NULL);
2045	cs.pipebufs = bufs;
2046	cs.nr_segs = nbuf;
2047	cs.pipe = pipe;
2048
2049	if (flags & SPLICE_F_MOVE)
2050		cs.move_pages = 1;
2051
2052	ret = fuse_dev_do_write(fud, &cs, len);
2053
2054	pipe_lock(pipe);
2055out_free:
2056	for (idx = 0; idx < nbuf; idx++) {
2057		struct pipe_buffer *buf = &bufs[idx];
2058
2059		if (buf->ops)
2060			pipe_buf_release(pipe, buf);
2061	}
2062	pipe_unlock(pipe);
2063
2064	kvfree(bufs);
2065	return ret;
2066}
2067
2068static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2069{
2070	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
2071	struct fuse_iqueue *fiq;
2072	struct fuse_dev *fud = fuse_get_dev(file);
2073
2074	if (!fud)
2075		return EPOLLERR;
2076
2077	fiq = &fud->fc->iq;
2078	poll_wait(file, &fiq->waitq, wait);
2079
2080	spin_lock(&fiq->lock);
2081	if (!fiq->connected)
2082		mask = EPOLLERR;
2083	else if (request_pending(fiq))
2084		mask |= EPOLLIN | EPOLLRDNORM;
2085	spin_unlock(&fiq->lock);
2086
2087	return mask;
2088}
2089
2090/* Abort all requests on the given list (pending or processing) */
2091static void end_requests(struct list_head *head)
2092{
2093	while (!list_empty(head)) {
2094		struct fuse_req *req;
2095		req = list_entry(head->next, struct fuse_req, list);
2096		req->out.h.error = -ECONNABORTED;
2097		clear_bit(FR_SENT, &req->flags);
2098		list_del_init(&req->list);
2099		fuse_request_end(req);
2100	}
2101}
2102
2103static void end_polls(struct fuse_conn *fc)
2104{
2105	struct rb_node *p;
2106
2107	p = rb_first(&fc->polled_files);
2108
2109	while (p) {
2110		struct fuse_file *ff;
2111		ff = rb_entry(p, struct fuse_file, polled_node);
2112		wake_up_interruptible_all(&ff->poll_wait);
2113
2114		p = rb_next(p);
2115	}
2116}
2117
2118/*
2119 * Abort all requests.
2120 *
2121 * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2122 * filesystem.
2123 *
2124 * The same effect is usually achievable through killing the filesystem daemon
2125 * and all users of the filesystem.  The exception is the combination of an
2126 * asynchronous request and the tricky deadlock (see
2127 * Documentation/filesystems/fuse.rst).
2128 *
2129 * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2130 * requests, they should be finished off immediately.  Locked requests will be
2131 * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2132 * requests.  It is possible that some request will finish before we can.  This
2133 * is OK, the request will in that case be removed from the list before we touch
2134 * it.
2135 */
2136void fuse_abort_conn(struct fuse_conn *fc)
2137{
2138	struct fuse_iqueue *fiq = &fc->iq;
2139
2140	spin_lock(&fc->lock);
2141	if (fc->connected) {
2142		struct fuse_dev *fud;
2143		struct fuse_req *req, *next;
2144		LIST_HEAD(to_end);
2145		unsigned int i;
2146
2147		/* Background queuing checks fc->connected under bg_lock */
2148		spin_lock(&fc->bg_lock);
2149		fc->connected = 0;
2150		spin_unlock(&fc->bg_lock);
2151
2152		fuse_set_initialized(fc);
2153		list_for_each_entry(fud, &fc->devices, entry) {
2154			struct fuse_pqueue *fpq = &fud->pq;
2155
2156			spin_lock(&fpq->lock);
2157			fpq->connected = 0;
2158			list_for_each_entry_safe(req, next, &fpq->io, list) {
2159				req->out.h.error = -ECONNABORTED;
2160				spin_lock(&req->waitq.lock);
2161				set_bit(FR_ABORTED, &req->flags);
2162				if (!test_bit(FR_LOCKED, &req->flags)) {
2163					set_bit(FR_PRIVATE, &req->flags);
2164					__fuse_get_request(req);
2165					list_move(&req->list, &to_end);
2166				}
2167				spin_unlock(&req->waitq.lock);
2168			}
2169			for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2170				list_splice_tail_init(&fpq->processing[i],
2171						      &to_end);
2172			spin_unlock(&fpq->lock);
2173		}
2174		spin_lock(&fc->bg_lock);
2175		fc->blocked = 0;
2176		fc->max_background = UINT_MAX;
2177		flush_bg_queue(fc);
2178		spin_unlock(&fc->bg_lock);
2179
2180		spin_lock(&fiq->lock);
2181		fiq->connected = 0;
2182		list_for_each_entry(req, &fiq->pending, list)
2183			clear_bit(FR_PENDING, &req->flags);
2184		list_splice_tail_init(&fiq->pending, &to_end);
2185		while (forget_pending(fiq))
2186			kfree(fuse_dequeue_forget(fiq, 1, NULL));
2187		wake_up_all(&fiq->waitq);
2188		spin_unlock(&fiq->lock);
2189		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2190		end_polls(fc);
2191		wake_up_all(&fc->blocked_waitq);
2192		spin_unlock(&fc->lock);
2193
2194		end_requests(&to_end);
2195	} else {
2196		spin_unlock(&fc->lock);
2197	}
2198}
2199EXPORT_SYMBOL_GPL(fuse_abort_conn);
2200
2201void fuse_wait_aborted(struct fuse_conn *fc)
2202{
2203	/* matches implicit memory barrier in fuse_drop_waiting() */
2204	smp_mb();
2205	wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2206}
2207
2208int fuse_dev_release(struct inode *inode, struct file *file)
2209{
2210	struct fuse_dev *fud = fuse_get_dev(file);
2211
2212	if (fud) {
2213		struct fuse_conn *fc = fud->fc;
2214		struct fuse_pqueue *fpq = &fud->pq;
2215		LIST_HEAD(to_end);
2216		unsigned int i;
2217
2218		spin_lock(&fpq->lock);
2219		WARN_ON(!list_empty(&fpq->io));
2220		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2221			list_splice_init(&fpq->processing[i], &to_end);
2222		spin_unlock(&fpq->lock);
2223
2224		end_requests(&to_end);
2225
2226		/* Are we the last open device? */
2227		if (atomic_dec_and_test(&fc->dev_count)) {
2228			WARN_ON(fc->iq.fasync != NULL);
2229			fuse_abort_conn(fc);
2230		}
2231		fuse_dev_free(fud);
2232	}
2233	return 0;
2234}
2235EXPORT_SYMBOL_GPL(fuse_dev_release);
2236
2237static int fuse_dev_fasync(int fd, struct file *file, int on)
2238{
2239	struct fuse_dev *fud = fuse_get_dev(file);
2240
2241	if (!fud)
2242		return -EPERM;
2243
2244	/* No locking - fasync_helper does its own locking */
2245	return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2246}
2247
2248static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2249{
2250	struct fuse_dev *fud;
2251
2252	if (new->private_data)
2253		return -EINVAL;
2254
2255	fud = fuse_dev_alloc_install(fc);
2256	if (!fud)
2257		return -ENOMEM;
2258
2259	new->private_data = fud;
2260	atomic_inc(&fc->dev_count);
2261
2262	return 0;
2263}
2264
2265static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2266			   unsigned long arg)
2267{
2268	int err = -ENOTTY;
2269
2270	if (cmd == FUSE_DEV_IOC_CLONE) {
2271		int oldfd;
2272
2273		err = -EFAULT;
2274		if (!get_user(oldfd, (__u32 __user *) arg)) {
2275			struct file *old = fget(oldfd);
2276
2277			err = -EINVAL;
2278			if (old) {
2279				struct fuse_dev *fud = NULL;
2280
2281				/*
2282				 * Check against file->f_op because CUSE
2283				 * uses the same ioctl handler.
2284				 */
2285				if (old->f_op == file->f_op &&
2286				    old->f_cred->user_ns == file->f_cred->user_ns)
2287					fud = fuse_get_dev(old);
2288
2289				if (fud) {
2290					mutex_lock(&fuse_mutex);
2291					err = fuse_device_clone(fud->fc, file);
2292					mutex_unlock(&fuse_mutex);
2293				}
2294				fput(old);
2295			}
2296		}
2297	}
2298	return err;
2299}
2300
2301const struct file_operations fuse_dev_operations = {
2302	.owner		= THIS_MODULE,
2303	.open		= fuse_dev_open,
2304	.llseek		= no_llseek,
2305	.read_iter	= fuse_dev_read,
2306	.splice_read	= fuse_dev_splice_read,
2307	.write_iter	= fuse_dev_write,
2308	.splice_write	= fuse_dev_splice_write,
2309	.poll		= fuse_dev_poll,
2310	.release	= fuse_dev_release,
2311	.fasync		= fuse_dev_fasync,
2312	.unlocked_ioctl = fuse_dev_ioctl,
2313	.compat_ioctl   = compat_ptr_ioctl,
2314};
2315EXPORT_SYMBOL_GPL(fuse_dev_operations);
2316
2317static struct miscdevice fuse_miscdevice = {
2318	.minor = FUSE_MINOR,
2319	.name  = "fuse",
2320	.fops = &fuse_dev_operations,
2321};
2322
2323int __init fuse_dev_init(void)
2324{
2325	int err = -ENOMEM;
2326	fuse_req_cachep = kmem_cache_create("fuse_request",
2327					    sizeof(struct fuse_req),
2328					    0, 0, NULL);
2329	if (!fuse_req_cachep)
2330		goto out;
2331
2332	err = misc_register(&fuse_miscdevice);
2333	if (err)
2334		goto out_cache_clean;
2335
2336	return 0;
2337
2338 out_cache_clean:
2339	kmem_cache_destroy(fuse_req_cachep);
2340 out:
2341	return err;
2342}
2343
2344void fuse_dev_cleanup(void)
2345{
2346	misc_deregister(&fuse_miscdevice);
2347	kmem_cache_destroy(fuse_req_cachep);
2348}
2349