162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/fs/pipe.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/mm.h>
962306a36Sopenharmony_ci#include <linux/file.h>
1062306a36Sopenharmony_ci#include <linux/poll.h>
1162306a36Sopenharmony_ci#include <linux/slab.h>
1262306a36Sopenharmony_ci#include <linux/module.h>
1362306a36Sopenharmony_ci#include <linux/init.h>
1462306a36Sopenharmony_ci#include <linux/fs.h>
1562306a36Sopenharmony_ci#include <linux/log2.h>
1662306a36Sopenharmony_ci#include <linux/mount.h>
1762306a36Sopenharmony_ci#include <linux/pseudo_fs.h>
1862306a36Sopenharmony_ci#include <linux/magic.h>
1962306a36Sopenharmony_ci#include <linux/pipe_fs_i.h>
2062306a36Sopenharmony_ci#include <linux/uio.h>
2162306a36Sopenharmony_ci#include <linux/highmem.h>
2262306a36Sopenharmony_ci#include <linux/pagemap.h>
2362306a36Sopenharmony_ci#include <linux/audit.h>
2462306a36Sopenharmony_ci#include <linux/syscalls.h>
2562306a36Sopenharmony_ci#include <linux/fcntl.h>
2662306a36Sopenharmony_ci#include <linux/memcontrol.h>
2762306a36Sopenharmony_ci#include <linux/watch_queue.h>
2862306a36Sopenharmony_ci#include <linux/sysctl.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include <linux/uaccess.h>
3162306a36Sopenharmony_ci#include <asm/ioctls.h>
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#include "internal.h"
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci/*
3662306a36Sopenharmony_ci * New pipe buffers will be restricted to this size while the user is exceeding
3762306a36Sopenharmony_ci * their pipe buffer quota. The general pipe use case needs at least two
3862306a36Sopenharmony_ci * buffers: one for data yet to be read, and one for new data. If this is less
3962306a36Sopenharmony_ci * than two, then a write to a non-empty pipe may block even if the pipe is not
4062306a36Sopenharmony_ci * full. This can occur with GNU make jobserver or similar uses of pipes as
4162306a36Sopenharmony_ci * semaphores: multiple processes may be waiting to write tokens back to the
4262306a36Sopenharmony_ci * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
4562306a36Sopenharmony_ci * own risk, namely: pipe writes to non-full pipes may block until the pipe is
4662306a36Sopenharmony_ci * emptied.
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_ci#define PIPE_MIN_DEF_BUFFERS 2
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci/*
5162306a36Sopenharmony_ci * The max size that a non-root user is allowed to grow the pipe. Can
5262306a36Sopenharmony_ci * be set by root in /proc/sys/fs/pipe-max-size
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_cistatic unsigned int pipe_max_size = 1048576;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci/* Maximum allocatable pages per user. Hard limit is unset by default, soft
5762306a36Sopenharmony_ci * matches default values.
5862306a36Sopenharmony_ci */
5962306a36Sopenharmony_cistatic unsigned long pipe_user_pages_hard;
6062306a36Sopenharmony_cistatic unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/*
6362306a36Sopenharmony_ci * We use head and tail indices that aren't masked off, except at the point of
6462306a36Sopenharmony_ci * dereference, but rather they're allowed to wrap naturally.  This means there
6562306a36Sopenharmony_ci * isn't a dead spot in the buffer, but the ring has to be a power of two and
6662306a36Sopenharmony_ci * <= 2^31.
6762306a36Sopenharmony_ci * -- David Howells 2019-09-23.
6862306a36Sopenharmony_ci *
6962306a36Sopenharmony_ci * Reads with count = 0 should always return 0.
7062306a36Sopenharmony_ci * -- Julian Bradfield 1999-06-07.
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci * FIFOs and Pipes now generate SIGIO for both readers and writers.
7362306a36Sopenharmony_ci * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
7462306a36Sopenharmony_ci *
7562306a36Sopenharmony_ci * pipe_read & write cleanup
7662306a36Sopenharmony_ci * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
7762306a36Sopenharmony_ci */
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistatic void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	if (pipe->files)
8262306a36Sopenharmony_ci		mutex_lock_nested(&pipe->mutex, subclass);
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_civoid pipe_lock(struct pipe_inode_info *pipe)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	/*
8862306a36Sopenharmony_ci	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
8962306a36Sopenharmony_ci	 */
9062306a36Sopenharmony_ci	pipe_lock_nested(pipe, I_MUTEX_PARENT);
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ciEXPORT_SYMBOL(pipe_lock);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_civoid pipe_unlock(struct pipe_inode_info *pipe)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	if (pipe->files)
9762306a36Sopenharmony_ci		mutex_unlock(&pipe->mutex);
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ciEXPORT_SYMBOL(pipe_unlock);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_cistatic inline void __pipe_lock(struct pipe_inode_info *pipe)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_cistatic inline void __pipe_unlock(struct pipe_inode_info *pipe)
10762306a36Sopenharmony_ci{
10862306a36Sopenharmony_ci	mutex_unlock(&pipe->mutex);
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_civoid pipe_double_lock(struct pipe_inode_info *pipe1,
11262306a36Sopenharmony_ci		      struct pipe_inode_info *pipe2)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	BUG_ON(pipe1 == pipe2);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	if (pipe1 < pipe2) {
11762306a36Sopenharmony_ci		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
11862306a36Sopenharmony_ci		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
11962306a36Sopenharmony_ci	} else {
12062306a36Sopenharmony_ci		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
12162306a36Sopenharmony_ci		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
12262306a36Sopenharmony_ci	}
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_cistatic void anon_pipe_buf_release(struct pipe_inode_info *pipe,
12662306a36Sopenharmony_ci				  struct pipe_buffer *buf)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	struct page *page = buf->page;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/*
13162306a36Sopenharmony_ci	 * If nobody else uses this page, and we don't already have a
13262306a36Sopenharmony_ci	 * temporary page, let's keep track of it as a one-deep
13362306a36Sopenharmony_ci	 * allocation cache. (Otherwise just release our reference to it)
13462306a36Sopenharmony_ci	 */
13562306a36Sopenharmony_ci	if (page_count(page) == 1 && !pipe->tmp_page)
13662306a36Sopenharmony_ci		pipe->tmp_page = page;
13762306a36Sopenharmony_ci	else
13862306a36Sopenharmony_ci		put_page(page);
13962306a36Sopenharmony_ci}
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_cistatic bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
14262306a36Sopenharmony_ci		struct pipe_buffer *buf)
14362306a36Sopenharmony_ci{
14462306a36Sopenharmony_ci	struct page *page = buf->page;
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	if (page_count(page) != 1)
14762306a36Sopenharmony_ci		return false;
14862306a36Sopenharmony_ci	memcg_kmem_uncharge_page(page, 0);
14962306a36Sopenharmony_ci	__SetPageLocked(page);
15062306a36Sopenharmony_ci	return true;
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci/**
15462306a36Sopenharmony_ci * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
15562306a36Sopenharmony_ci * @pipe:	the pipe that the buffer belongs to
15662306a36Sopenharmony_ci * @buf:	the buffer to attempt to steal
15762306a36Sopenharmony_ci *
15862306a36Sopenharmony_ci * Description:
15962306a36Sopenharmony_ci *	This function attempts to steal the &struct page attached to
16062306a36Sopenharmony_ci *	@buf. If successful, this function returns 0 and returns with
16162306a36Sopenharmony_ci *	the page locked. The caller may then reuse the page for whatever
16262306a36Sopenharmony_ci *	he wishes; the typical use is insertion into a different file
16362306a36Sopenharmony_ci *	page cache.
16462306a36Sopenharmony_ci */
16562306a36Sopenharmony_cibool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
16662306a36Sopenharmony_ci		struct pipe_buffer *buf)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	struct page *page = buf->page;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	/*
17162306a36Sopenharmony_ci	 * A reference of one is golden, that means that the owner of this
17262306a36Sopenharmony_ci	 * page is the only one holding a reference to it. lock the page
17362306a36Sopenharmony_ci	 * and return OK.
17462306a36Sopenharmony_ci	 */
17562306a36Sopenharmony_ci	if (page_count(page) == 1) {
17662306a36Sopenharmony_ci		lock_page(page);
17762306a36Sopenharmony_ci		return true;
17862306a36Sopenharmony_ci	}
17962306a36Sopenharmony_ci	return false;
18062306a36Sopenharmony_ci}
18162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_try_steal);
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci/**
18462306a36Sopenharmony_ci * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
18562306a36Sopenharmony_ci * @pipe:	the pipe that the buffer belongs to
18662306a36Sopenharmony_ci * @buf:	the buffer to get a reference to
18762306a36Sopenharmony_ci *
18862306a36Sopenharmony_ci * Description:
18962306a36Sopenharmony_ci *	This function grabs an extra reference to @buf. It's used in
19062306a36Sopenharmony_ci *	the tee() system call, when we duplicate the buffers in one
19162306a36Sopenharmony_ci *	pipe into another.
19262306a36Sopenharmony_ci */
19362306a36Sopenharmony_cibool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	return try_get_page(buf->page);
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_get);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/**
20062306a36Sopenharmony_ci * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
20162306a36Sopenharmony_ci * @pipe:	the pipe that the buffer belongs to
20262306a36Sopenharmony_ci * @buf:	the buffer to put a reference to
20362306a36Sopenharmony_ci *
20462306a36Sopenharmony_ci * Description:
20562306a36Sopenharmony_ci *	This function releases a reference to @buf.
20662306a36Sopenharmony_ci */
20762306a36Sopenharmony_civoid generic_pipe_buf_release(struct pipe_inode_info *pipe,
20862306a36Sopenharmony_ci			      struct pipe_buffer *buf)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	put_page(buf->page);
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_release);
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_cistatic const struct pipe_buf_operations anon_pipe_buf_ops = {
21562306a36Sopenharmony_ci	.release	= anon_pipe_buf_release,
21662306a36Sopenharmony_ci	.try_steal	= anon_pipe_buf_try_steal,
21762306a36Sopenharmony_ci	.get		= generic_pipe_buf_get,
21862306a36Sopenharmony_ci};
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
22162306a36Sopenharmony_cistatic inline bool pipe_readable(const struct pipe_inode_info *pipe)
22262306a36Sopenharmony_ci{
22362306a36Sopenharmony_ci	unsigned int head = READ_ONCE(pipe->head);
22462306a36Sopenharmony_ci	unsigned int tail = READ_ONCE(pipe->tail);
22562306a36Sopenharmony_ci	unsigned int writers = READ_ONCE(pipe->writers);
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	return !pipe_empty(head, tail) || !writers;
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_cistatic ssize_t
23162306a36Sopenharmony_cipipe_read(struct kiocb *iocb, struct iov_iter *to)
23262306a36Sopenharmony_ci{
23362306a36Sopenharmony_ci	size_t total_len = iov_iter_count(to);
23462306a36Sopenharmony_ci	struct file *filp = iocb->ki_filp;
23562306a36Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
23662306a36Sopenharmony_ci	bool was_full, wake_next_reader = false;
23762306a36Sopenharmony_ci	ssize_t ret;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	/* Null read succeeds. */
24062306a36Sopenharmony_ci	if (unlikely(total_len == 0))
24162306a36Sopenharmony_ci		return 0;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	ret = 0;
24462306a36Sopenharmony_ci	__pipe_lock(pipe);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	/*
24762306a36Sopenharmony_ci	 * We only wake up writers if the pipe was full when we started
24862306a36Sopenharmony_ci	 * reading in order to avoid unnecessary wakeups.
24962306a36Sopenharmony_ci	 *
25062306a36Sopenharmony_ci	 * But when we do wake up writers, we do so using a sync wakeup
25162306a36Sopenharmony_ci	 * (WF_SYNC), because we want them to get going and generate more
25262306a36Sopenharmony_ci	 * data for us.
25362306a36Sopenharmony_ci	 */
25462306a36Sopenharmony_ci	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
25562306a36Sopenharmony_ci	for (;;) {
25662306a36Sopenharmony_ci		/* Read ->head with a barrier vs post_one_notification() */
25762306a36Sopenharmony_ci		unsigned int head = smp_load_acquire(&pipe->head);
25862306a36Sopenharmony_ci		unsigned int tail = pipe->tail;
25962306a36Sopenharmony_ci		unsigned int mask = pipe->ring_size - 1;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
26262306a36Sopenharmony_ci		if (pipe->note_loss) {
26362306a36Sopenharmony_ci			struct watch_notification n;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci			if (total_len < 8) {
26662306a36Sopenharmony_ci				if (ret == 0)
26762306a36Sopenharmony_ci					ret = -ENOBUFS;
26862306a36Sopenharmony_ci				break;
26962306a36Sopenharmony_ci			}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci			n.type = WATCH_TYPE_META;
27262306a36Sopenharmony_ci			n.subtype = WATCH_META_LOSS_NOTIFICATION;
27362306a36Sopenharmony_ci			n.info = watch_sizeof(n);
27462306a36Sopenharmony_ci			if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
27562306a36Sopenharmony_ci				if (ret == 0)
27662306a36Sopenharmony_ci					ret = -EFAULT;
27762306a36Sopenharmony_ci				break;
27862306a36Sopenharmony_ci			}
27962306a36Sopenharmony_ci			ret += sizeof(n);
28062306a36Sopenharmony_ci			total_len -= sizeof(n);
28162306a36Sopenharmony_ci			pipe->note_loss = false;
28262306a36Sopenharmony_ci		}
28362306a36Sopenharmony_ci#endif
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci		if (!pipe_empty(head, tail)) {
28662306a36Sopenharmony_ci			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
28762306a36Sopenharmony_ci			size_t chars = buf->len;
28862306a36Sopenharmony_ci			size_t written;
28962306a36Sopenharmony_ci			int error;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci			if (chars > total_len) {
29262306a36Sopenharmony_ci				if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
29362306a36Sopenharmony_ci					if (ret == 0)
29462306a36Sopenharmony_ci						ret = -ENOBUFS;
29562306a36Sopenharmony_ci					break;
29662306a36Sopenharmony_ci				}
29762306a36Sopenharmony_ci				chars = total_len;
29862306a36Sopenharmony_ci			}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci			error = pipe_buf_confirm(pipe, buf);
30162306a36Sopenharmony_ci			if (error) {
30262306a36Sopenharmony_ci				if (!ret)
30362306a36Sopenharmony_ci					ret = error;
30462306a36Sopenharmony_ci				break;
30562306a36Sopenharmony_ci			}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci			written = copy_page_to_iter(buf->page, buf->offset, chars, to);
30862306a36Sopenharmony_ci			if (unlikely(written < chars)) {
30962306a36Sopenharmony_ci				if (!ret)
31062306a36Sopenharmony_ci					ret = -EFAULT;
31162306a36Sopenharmony_ci				break;
31262306a36Sopenharmony_ci			}
31362306a36Sopenharmony_ci			ret += chars;
31462306a36Sopenharmony_ci			buf->offset += chars;
31562306a36Sopenharmony_ci			buf->len -= chars;
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci			/* Was it a packet buffer? Clean up and exit */
31862306a36Sopenharmony_ci			if (buf->flags & PIPE_BUF_FLAG_PACKET) {
31962306a36Sopenharmony_ci				total_len = chars;
32062306a36Sopenharmony_ci				buf->len = 0;
32162306a36Sopenharmony_ci			}
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci			if (!buf->len) {
32462306a36Sopenharmony_ci				pipe_buf_release(pipe, buf);
32562306a36Sopenharmony_ci				spin_lock_irq(&pipe->rd_wait.lock);
32662306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
32762306a36Sopenharmony_ci				if (buf->flags & PIPE_BUF_FLAG_LOSS)
32862306a36Sopenharmony_ci					pipe->note_loss = true;
32962306a36Sopenharmony_ci#endif
33062306a36Sopenharmony_ci				tail++;
33162306a36Sopenharmony_ci				pipe->tail = tail;
33262306a36Sopenharmony_ci				spin_unlock_irq(&pipe->rd_wait.lock);
33362306a36Sopenharmony_ci			}
33462306a36Sopenharmony_ci			total_len -= chars;
33562306a36Sopenharmony_ci			if (!total_len)
33662306a36Sopenharmony_ci				break;	/* common path: read succeeded */
33762306a36Sopenharmony_ci			if (!pipe_empty(head, tail))	/* More to do? */
33862306a36Sopenharmony_ci				continue;
33962306a36Sopenharmony_ci		}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci		if (!pipe->writers)
34262306a36Sopenharmony_ci			break;
34362306a36Sopenharmony_ci		if (ret)
34462306a36Sopenharmony_ci			break;
34562306a36Sopenharmony_ci		if ((filp->f_flags & O_NONBLOCK) ||
34662306a36Sopenharmony_ci		    (iocb->ki_flags & IOCB_NOWAIT)) {
34762306a36Sopenharmony_ci			ret = -EAGAIN;
34862306a36Sopenharmony_ci			break;
34962306a36Sopenharmony_ci		}
35062306a36Sopenharmony_ci		__pipe_unlock(pipe);
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci		/*
35362306a36Sopenharmony_ci		 * We only get here if we didn't actually read anything.
35462306a36Sopenharmony_ci		 *
35562306a36Sopenharmony_ci		 * However, we could have seen (and removed) a zero-sized
35662306a36Sopenharmony_ci		 * pipe buffer, and might have made space in the buffers
35762306a36Sopenharmony_ci		 * that way.
35862306a36Sopenharmony_ci		 *
35962306a36Sopenharmony_ci		 * You can't make zero-sized pipe buffers by doing an empty
36062306a36Sopenharmony_ci		 * write (not even in packet mode), but they can happen if
36162306a36Sopenharmony_ci		 * the writer gets an EFAULT when trying to fill a buffer
36262306a36Sopenharmony_ci		 * that already got allocated and inserted in the buffer
36362306a36Sopenharmony_ci		 * array.
36462306a36Sopenharmony_ci		 *
36562306a36Sopenharmony_ci		 * So we still need to wake up any pending writers in the
36662306a36Sopenharmony_ci		 * _very_ unlikely case that the pipe was full, but we got
36762306a36Sopenharmony_ci		 * no data.
36862306a36Sopenharmony_ci		 */
36962306a36Sopenharmony_ci		if (unlikely(was_full))
37062306a36Sopenharmony_ci			wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
37162306a36Sopenharmony_ci		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci		/*
37462306a36Sopenharmony_ci		 * But because we didn't read anything, at this point we can
37562306a36Sopenharmony_ci		 * just return directly with -ERESTARTSYS if we're interrupted,
37662306a36Sopenharmony_ci		 * since we've done any required wakeups and there's no need
37762306a36Sopenharmony_ci		 * to mark anything accessed. And we've dropped the lock.
37862306a36Sopenharmony_ci		 */
37962306a36Sopenharmony_ci		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
38062306a36Sopenharmony_ci			return -ERESTARTSYS;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci		__pipe_lock(pipe);
38362306a36Sopenharmony_ci		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
38462306a36Sopenharmony_ci		wake_next_reader = true;
38562306a36Sopenharmony_ci	}
38662306a36Sopenharmony_ci	if (pipe_empty(pipe->head, pipe->tail))
38762306a36Sopenharmony_ci		wake_next_reader = false;
38862306a36Sopenharmony_ci	__pipe_unlock(pipe);
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	if (was_full)
39162306a36Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
39262306a36Sopenharmony_ci	if (wake_next_reader)
39362306a36Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
39462306a36Sopenharmony_ci	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
39562306a36Sopenharmony_ci	if (ret > 0)
39662306a36Sopenharmony_ci		file_accessed(filp);
39762306a36Sopenharmony_ci	return ret;
39862306a36Sopenharmony_ci}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_cistatic inline int is_packetized(struct file *file)
40162306a36Sopenharmony_ci{
40262306a36Sopenharmony_ci	return (file->f_flags & O_DIRECT) != 0;
40362306a36Sopenharmony_ci}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
40662306a36Sopenharmony_cistatic inline bool pipe_writable(const struct pipe_inode_info *pipe)
40762306a36Sopenharmony_ci{
40862306a36Sopenharmony_ci	unsigned int head = READ_ONCE(pipe->head);
40962306a36Sopenharmony_ci	unsigned int tail = READ_ONCE(pipe->tail);
41062306a36Sopenharmony_ci	unsigned int max_usage = READ_ONCE(pipe->max_usage);
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	return !pipe_full(head, tail, max_usage) ||
41362306a36Sopenharmony_ci		!READ_ONCE(pipe->readers);
41462306a36Sopenharmony_ci}
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_cistatic ssize_t
41762306a36Sopenharmony_cipipe_write(struct kiocb *iocb, struct iov_iter *from)
41862306a36Sopenharmony_ci{
41962306a36Sopenharmony_ci	struct file *filp = iocb->ki_filp;
42062306a36Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
42162306a36Sopenharmony_ci	unsigned int head;
42262306a36Sopenharmony_ci	ssize_t ret = 0;
42362306a36Sopenharmony_ci	size_t total_len = iov_iter_count(from);
42462306a36Sopenharmony_ci	ssize_t chars;
42562306a36Sopenharmony_ci	bool was_empty = false;
42662306a36Sopenharmony_ci	bool wake_next_writer = false;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	/* Null write succeeds. */
42962306a36Sopenharmony_ci	if (unlikely(total_len == 0))
43062306a36Sopenharmony_ci		return 0;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	__pipe_lock(pipe);
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	if (!pipe->readers) {
43562306a36Sopenharmony_ci		send_sig(SIGPIPE, current, 0);
43662306a36Sopenharmony_ci		ret = -EPIPE;
43762306a36Sopenharmony_ci		goto out;
43862306a36Sopenharmony_ci	}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	if (pipe_has_watch_queue(pipe)) {
44162306a36Sopenharmony_ci		ret = -EXDEV;
44262306a36Sopenharmony_ci		goto out;
44362306a36Sopenharmony_ci	}
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	/*
44662306a36Sopenharmony_ci	 * If it wasn't empty we try to merge new data into
44762306a36Sopenharmony_ci	 * the last buffer.
44862306a36Sopenharmony_ci	 *
44962306a36Sopenharmony_ci	 * That naturally merges small writes, but it also
45062306a36Sopenharmony_ci	 * page-aligns the rest of the writes for large writes
45162306a36Sopenharmony_ci	 * spanning multiple pages.
45262306a36Sopenharmony_ci	 */
45362306a36Sopenharmony_ci	head = pipe->head;
45462306a36Sopenharmony_ci	was_empty = pipe_empty(head, pipe->tail);
45562306a36Sopenharmony_ci	chars = total_len & (PAGE_SIZE-1);
45662306a36Sopenharmony_ci	if (chars && !was_empty) {
45762306a36Sopenharmony_ci		unsigned int mask = pipe->ring_size - 1;
45862306a36Sopenharmony_ci		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
45962306a36Sopenharmony_ci		int offset = buf->offset + buf->len;
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
46262306a36Sopenharmony_ci		    offset + chars <= PAGE_SIZE) {
46362306a36Sopenharmony_ci			ret = pipe_buf_confirm(pipe, buf);
46462306a36Sopenharmony_ci			if (ret)
46562306a36Sopenharmony_ci				goto out;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci			ret = copy_page_from_iter(buf->page, offset, chars, from);
46862306a36Sopenharmony_ci			if (unlikely(ret < chars)) {
46962306a36Sopenharmony_ci				ret = -EFAULT;
47062306a36Sopenharmony_ci				goto out;
47162306a36Sopenharmony_ci			}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci			buf->len += ret;
47462306a36Sopenharmony_ci			if (!iov_iter_count(from))
47562306a36Sopenharmony_ci				goto out;
47662306a36Sopenharmony_ci		}
47762306a36Sopenharmony_ci	}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	for (;;) {
48062306a36Sopenharmony_ci		if (!pipe->readers) {
48162306a36Sopenharmony_ci			send_sig(SIGPIPE, current, 0);
48262306a36Sopenharmony_ci			if (!ret)
48362306a36Sopenharmony_ci				ret = -EPIPE;
48462306a36Sopenharmony_ci			break;
48562306a36Sopenharmony_ci		}
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci		head = pipe->head;
48862306a36Sopenharmony_ci		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
48962306a36Sopenharmony_ci			unsigned int mask = pipe->ring_size - 1;
49062306a36Sopenharmony_ci			struct pipe_buffer *buf;
49162306a36Sopenharmony_ci			struct page *page = pipe->tmp_page;
49262306a36Sopenharmony_ci			int copied;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci			if (!page) {
49562306a36Sopenharmony_ci				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
49662306a36Sopenharmony_ci				if (unlikely(!page)) {
49762306a36Sopenharmony_ci					ret = ret ? : -ENOMEM;
49862306a36Sopenharmony_ci					break;
49962306a36Sopenharmony_ci				}
50062306a36Sopenharmony_ci				pipe->tmp_page = page;
50162306a36Sopenharmony_ci			}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci			/* Allocate a slot in the ring in advance and attach an
50462306a36Sopenharmony_ci			 * empty buffer.  If we fault or otherwise fail to use
50562306a36Sopenharmony_ci			 * it, either the reader will consume it or it'll still
50662306a36Sopenharmony_ci			 * be there for the next write.
50762306a36Sopenharmony_ci			 */
50862306a36Sopenharmony_ci			spin_lock_irq(&pipe->rd_wait.lock);
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci			head = pipe->head;
51162306a36Sopenharmony_ci			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
51262306a36Sopenharmony_ci				spin_unlock_irq(&pipe->rd_wait.lock);
51362306a36Sopenharmony_ci				continue;
51462306a36Sopenharmony_ci			}
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci			pipe->head = head + 1;
51762306a36Sopenharmony_ci			spin_unlock_irq(&pipe->rd_wait.lock);
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci			/* Insert it into the buffer array */
52062306a36Sopenharmony_ci			buf = &pipe->bufs[head & mask];
52162306a36Sopenharmony_ci			buf->page = page;
52262306a36Sopenharmony_ci			buf->ops = &anon_pipe_buf_ops;
52362306a36Sopenharmony_ci			buf->offset = 0;
52462306a36Sopenharmony_ci			buf->len = 0;
52562306a36Sopenharmony_ci			if (is_packetized(filp))
52662306a36Sopenharmony_ci				buf->flags = PIPE_BUF_FLAG_PACKET;
52762306a36Sopenharmony_ci			else
52862306a36Sopenharmony_ci				buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
52962306a36Sopenharmony_ci			pipe->tmp_page = NULL;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
53262306a36Sopenharmony_ci			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
53362306a36Sopenharmony_ci				if (!ret)
53462306a36Sopenharmony_ci					ret = -EFAULT;
53562306a36Sopenharmony_ci				break;
53662306a36Sopenharmony_ci			}
53762306a36Sopenharmony_ci			ret += copied;
53862306a36Sopenharmony_ci			buf->len = copied;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci			if (!iov_iter_count(from))
54162306a36Sopenharmony_ci				break;
54262306a36Sopenharmony_ci		}
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci		if (!pipe_full(head, pipe->tail, pipe->max_usage))
54562306a36Sopenharmony_ci			continue;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci		/* Wait for buffer space to become available. */
54862306a36Sopenharmony_ci		if ((filp->f_flags & O_NONBLOCK) ||
54962306a36Sopenharmony_ci		    (iocb->ki_flags & IOCB_NOWAIT)) {
55062306a36Sopenharmony_ci			if (!ret)
55162306a36Sopenharmony_ci				ret = -EAGAIN;
55262306a36Sopenharmony_ci			break;
55362306a36Sopenharmony_ci		}
55462306a36Sopenharmony_ci		if (signal_pending(current)) {
55562306a36Sopenharmony_ci			if (!ret)
55662306a36Sopenharmony_ci				ret = -ERESTARTSYS;
55762306a36Sopenharmony_ci			break;
55862306a36Sopenharmony_ci		}
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci		/*
56162306a36Sopenharmony_ci		 * We're going to release the pipe lock and wait for more
56262306a36Sopenharmony_ci		 * space. We wake up any readers if necessary, and then
56362306a36Sopenharmony_ci		 * after waiting we need to re-check whether the pipe
56462306a36Sopenharmony_ci		 * become empty while we dropped the lock.
56562306a36Sopenharmony_ci		 */
56662306a36Sopenharmony_ci		__pipe_unlock(pipe);
56762306a36Sopenharmony_ci		if (was_empty)
56862306a36Sopenharmony_ci			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
56962306a36Sopenharmony_ci		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
57062306a36Sopenharmony_ci		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
57162306a36Sopenharmony_ci		__pipe_lock(pipe);
57262306a36Sopenharmony_ci		was_empty = pipe_empty(pipe->head, pipe->tail);
57362306a36Sopenharmony_ci		wake_next_writer = true;
57462306a36Sopenharmony_ci	}
57562306a36Sopenharmony_ciout:
57662306a36Sopenharmony_ci	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
57762306a36Sopenharmony_ci		wake_next_writer = false;
57862306a36Sopenharmony_ci	__pipe_unlock(pipe);
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	/*
58162306a36Sopenharmony_ci	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
58262306a36Sopenharmony_ci	 * want the reader to start processing things asap, rather than
58362306a36Sopenharmony_ci	 * leave the data pending.
58462306a36Sopenharmony_ci	 *
58562306a36Sopenharmony_ci	 * This is particularly important for small writes, because of
58662306a36Sopenharmony_ci	 * how (for example) the GNU make jobserver uses small writes to
58762306a36Sopenharmony_ci	 * wake up pending jobs
58862306a36Sopenharmony_ci	 *
58962306a36Sopenharmony_ci	 * Epoll nonsensically wants a wakeup whether the pipe
59062306a36Sopenharmony_ci	 * was already empty or not.
59162306a36Sopenharmony_ci	 */
59262306a36Sopenharmony_ci	if (was_empty || pipe->poll_usage)
59362306a36Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
59462306a36Sopenharmony_ci	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
59562306a36Sopenharmony_ci	if (wake_next_writer)
59662306a36Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
59762306a36Sopenharmony_ci	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
59862306a36Sopenharmony_ci		int err = file_update_time(filp);
59962306a36Sopenharmony_ci		if (err)
60062306a36Sopenharmony_ci			ret = err;
60162306a36Sopenharmony_ci		sb_end_write(file_inode(filp)->i_sb);
60262306a36Sopenharmony_ci	}
60362306a36Sopenharmony_ci	return ret;
60462306a36Sopenharmony_ci}
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_cistatic long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
60762306a36Sopenharmony_ci{
60862306a36Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
60962306a36Sopenharmony_ci	unsigned int count, head, tail, mask;
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	switch (cmd) {
61262306a36Sopenharmony_ci	case FIONREAD:
61362306a36Sopenharmony_ci		__pipe_lock(pipe);
61462306a36Sopenharmony_ci		count = 0;
61562306a36Sopenharmony_ci		head = pipe->head;
61662306a36Sopenharmony_ci		tail = pipe->tail;
61762306a36Sopenharmony_ci		mask = pipe->ring_size - 1;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci		while (tail != head) {
62062306a36Sopenharmony_ci			count += pipe->bufs[tail & mask].len;
62162306a36Sopenharmony_ci			tail++;
62262306a36Sopenharmony_ci		}
62362306a36Sopenharmony_ci		__pipe_unlock(pipe);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci		return put_user(count, (int __user *)arg);
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
62862306a36Sopenharmony_ci	case IOC_WATCH_QUEUE_SET_SIZE: {
62962306a36Sopenharmony_ci		int ret;
63062306a36Sopenharmony_ci		__pipe_lock(pipe);
63162306a36Sopenharmony_ci		ret = watch_queue_set_size(pipe, arg);
63262306a36Sopenharmony_ci		__pipe_unlock(pipe);
63362306a36Sopenharmony_ci		return ret;
63462306a36Sopenharmony_ci	}
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	case IOC_WATCH_QUEUE_SET_FILTER:
63762306a36Sopenharmony_ci		return watch_queue_set_filter(
63862306a36Sopenharmony_ci			pipe, (struct watch_notification_filter __user *)arg);
63962306a36Sopenharmony_ci#endif
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	default:
64262306a36Sopenharmony_ci		return -ENOIOCTLCMD;
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci/* No kernel lock held - fine */
64762306a36Sopenharmony_cistatic __poll_t
64862306a36Sopenharmony_cipipe_poll(struct file *filp, poll_table *wait)
64962306a36Sopenharmony_ci{
65062306a36Sopenharmony_ci	__poll_t mask;
65162306a36Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
65262306a36Sopenharmony_ci	unsigned int head, tail;
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	/* Epoll has some historical nasty semantics, this enables them */
65562306a36Sopenharmony_ci	WRITE_ONCE(pipe->poll_usage, true);
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	/*
65862306a36Sopenharmony_ci	 * Reading pipe state only -- no need for acquiring the semaphore.
65962306a36Sopenharmony_ci	 *
66062306a36Sopenharmony_ci	 * But because this is racy, the code has to add the
66162306a36Sopenharmony_ci	 * entry to the poll table _first_ ..
66262306a36Sopenharmony_ci	 */
66362306a36Sopenharmony_ci	if (filp->f_mode & FMODE_READ)
66462306a36Sopenharmony_ci		poll_wait(filp, &pipe->rd_wait, wait);
66562306a36Sopenharmony_ci	if (filp->f_mode & FMODE_WRITE)
66662306a36Sopenharmony_ci		poll_wait(filp, &pipe->wr_wait, wait);
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	/*
66962306a36Sopenharmony_ci	 * .. and only then can you do the racy tests. That way,
67062306a36Sopenharmony_ci	 * if something changes and you got it wrong, the poll
67162306a36Sopenharmony_ci	 * table entry will wake you up and fix it.
67262306a36Sopenharmony_ci	 */
67362306a36Sopenharmony_ci	head = READ_ONCE(pipe->head);
67462306a36Sopenharmony_ci	tail = READ_ONCE(pipe->tail);
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	mask = 0;
67762306a36Sopenharmony_ci	if (filp->f_mode & FMODE_READ) {
67862306a36Sopenharmony_ci		if (!pipe_empty(head, tail))
67962306a36Sopenharmony_ci			mask |= EPOLLIN | EPOLLRDNORM;
68062306a36Sopenharmony_ci		if (!pipe->writers && filp->f_version != pipe->w_counter)
68162306a36Sopenharmony_ci			mask |= EPOLLHUP;
68262306a36Sopenharmony_ci	}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	if (filp->f_mode & FMODE_WRITE) {
68562306a36Sopenharmony_ci		if (!pipe_full(head, tail, pipe->max_usage))
68662306a36Sopenharmony_ci			mask |= EPOLLOUT | EPOLLWRNORM;
68762306a36Sopenharmony_ci		/*
68862306a36Sopenharmony_ci		 * Most Unices do not set EPOLLERR for FIFOs but on Linux they
68962306a36Sopenharmony_ci		 * behave exactly like pipes for poll().
69062306a36Sopenharmony_ci		 */
69162306a36Sopenharmony_ci		if (!pipe->readers)
69262306a36Sopenharmony_ci			mask |= EPOLLERR;
69362306a36Sopenharmony_ci	}
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci	return mask;
69662306a36Sopenharmony_ci}
69762306a36Sopenharmony_ci
69862306a36Sopenharmony_cistatic void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
69962306a36Sopenharmony_ci{
70062306a36Sopenharmony_ci	int kill = 0;
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
70362306a36Sopenharmony_ci	if (!--pipe->files) {
70462306a36Sopenharmony_ci		inode->i_pipe = NULL;
70562306a36Sopenharmony_ci		kill = 1;
70662306a36Sopenharmony_ci	}
70762306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	if (kill)
71062306a36Sopenharmony_ci		free_pipe_info(pipe);
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_cistatic int
71462306a36Sopenharmony_cipipe_release(struct inode *inode, struct file *file)
71562306a36Sopenharmony_ci{
71662306a36Sopenharmony_ci	struct pipe_inode_info *pipe = file->private_data;
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	__pipe_lock(pipe);
71962306a36Sopenharmony_ci	if (file->f_mode & FMODE_READ)
72062306a36Sopenharmony_ci		pipe->readers--;
72162306a36Sopenharmony_ci	if (file->f_mode & FMODE_WRITE)
72262306a36Sopenharmony_ci		pipe->writers--;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/* Was that the last reader or writer, but not the other side? */
72562306a36Sopenharmony_ci	if (!pipe->readers != !pipe->writers) {
72662306a36Sopenharmony_ci		wake_up_interruptible_all(&pipe->rd_wait);
72762306a36Sopenharmony_ci		wake_up_interruptible_all(&pipe->wr_wait);
72862306a36Sopenharmony_ci		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
72962306a36Sopenharmony_ci		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
73062306a36Sopenharmony_ci	}
73162306a36Sopenharmony_ci	__pipe_unlock(pipe);
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	put_pipe_info(inode, pipe);
73462306a36Sopenharmony_ci	return 0;
73562306a36Sopenharmony_ci}
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_cistatic int
73862306a36Sopenharmony_cipipe_fasync(int fd, struct file *filp, int on)
73962306a36Sopenharmony_ci{
74062306a36Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
74162306a36Sopenharmony_ci	int retval = 0;
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	__pipe_lock(pipe);
74462306a36Sopenharmony_ci	if (filp->f_mode & FMODE_READ)
74562306a36Sopenharmony_ci		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
74662306a36Sopenharmony_ci	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
74762306a36Sopenharmony_ci		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
74862306a36Sopenharmony_ci		if (retval < 0 && (filp->f_mode & FMODE_READ))
74962306a36Sopenharmony_ci			/* this can happen only if on == T */
75062306a36Sopenharmony_ci			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
75162306a36Sopenharmony_ci	}
75262306a36Sopenharmony_ci	__pipe_unlock(pipe);
75362306a36Sopenharmony_ci	return retval;
75462306a36Sopenharmony_ci}
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ciunsigned long account_pipe_buffers(struct user_struct *user,
75762306a36Sopenharmony_ci				   unsigned long old, unsigned long new)
75862306a36Sopenharmony_ci{
75962306a36Sopenharmony_ci	return atomic_long_add_return(new - old, &user->pipe_bufs);
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_cibool too_many_pipe_buffers_soft(unsigned long user_bufs)
76362306a36Sopenharmony_ci{
76462306a36Sopenharmony_ci	unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	return soft_limit && user_bufs > soft_limit;
76762306a36Sopenharmony_ci}
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_cibool too_many_pipe_buffers_hard(unsigned long user_bufs)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	return hard_limit && user_bufs > hard_limit;
77462306a36Sopenharmony_ci}
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_cibool pipe_is_unprivileged_user(void)
77762306a36Sopenharmony_ci{
77862306a36Sopenharmony_ci	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
77962306a36Sopenharmony_ci}
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_cistruct pipe_inode_info *alloc_pipe_info(void)
78262306a36Sopenharmony_ci{
78362306a36Sopenharmony_ci	struct pipe_inode_info *pipe;
78462306a36Sopenharmony_ci	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
78562306a36Sopenharmony_ci	struct user_struct *user = get_current_user();
78662306a36Sopenharmony_ci	unsigned long user_bufs;
78762306a36Sopenharmony_ci	unsigned int max_size = READ_ONCE(pipe_max_size);
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
79062306a36Sopenharmony_ci	if (pipe == NULL)
79162306a36Sopenharmony_ci		goto out_free_uid;
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
79462306a36Sopenharmony_ci		pipe_bufs = max_size >> PAGE_SHIFT;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci	if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
79962306a36Sopenharmony_ci		user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
80062306a36Sopenharmony_ci		pipe_bufs = PIPE_MIN_DEF_BUFFERS;
80162306a36Sopenharmony_ci	}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
80462306a36Sopenharmony_ci		goto out_revert_acct;
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
80762306a36Sopenharmony_ci			     GFP_KERNEL_ACCOUNT);
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	if (pipe->bufs) {
81062306a36Sopenharmony_ci		init_waitqueue_head(&pipe->rd_wait);
81162306a36Sopenharmony_ci		init_waitqueue_head(&pipe->wr_wait);
81262306a36Sopenharmony_ci		pipe->r_counter = pipe->w_counter = 1;
81362306a36Sopenharmony_ci		pipe->max_usage = pipe_bufs;
81462306a36Sopenharmony_ci		pipe->ring_size = pipe_bufs;
81562306a36Sopenharmony_ci		pipe->nr_accounted = pipe_bufs;
81662306a36Sopenharmony_ci		pipe->user = user;
81762306a36Sopenharmony_ci		mutex_init(&pipe->mutex);
81862306a36Sopenharmony_ci		return pipe;
81962306a36Sopenharmony_ci	}
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ciout_revert_acct:
82262306a36Sopenharmony_ci	(void) account_pipe_buffers(user, pipe_bufs, 0);
82362306a36Sopenharmony_ci	kfree(pipe);
82462306a36Sopenharmony_ciout_free_uid:
82562306a36Sopenharmony_ci	free_uid(user);
82662306a36Sopenharmony_ci	return NULL;
82762306a36Sopenharmony_ci}
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_civoid free_pipe_info(struct pipe_inode_info *pipe)
83062306a36Sopenharmony_ci{
83162306a36Sopenharmony_ci	unsigned int i;
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
83462306a36Sopenharmony_ci	if (pipe->watch_queue)
83562306a36Sopenharmony_ci		watch_queue_clear(pipe->watch_queue);
83662306a36Sopenharmony_ci#endif
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
83962306a36Sopenharmony_ci	free_uid(pipe->user);
84062306a36Sopenharmony_ci	for (i = 0; i < pipe->ring_size; i++) {
84162306a36Sopenharmony_ci		struct pipe_buffer *buf = pipe->bufs + i;
84262306a36Sopenharmony_ci		if (buf->ops)
84362306a36Sopenharmony_ci			pipe_buf_release(pipe, buf);
84462306a36Sopenharmony_ci	}
84562306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
84662306a36Sopenharmony_ci	if (pipe->watch_queue)
84762306a36Sopenharmony_ci		put_watch_queue(pipe->watch_queue);
84862306a36Sopenharmony_ci#endif
84962306a36Sopenharmony_ci	if (pipe->tmp_page)
85062306a36Sopenharmony_ci		__free_page(pipe->tmp_page);
85162306a36Sopenharmony_ci	kfree(pipe->bufs);
85262306a36Sopenharmony_ci	kfree(pipe);
85362306a36Sopenharmony_ci}
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_cistatic struct vfsmount *pipe_mnt __read_mostly;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci/*
85862306a36Sopenharmony_ci * pipefs_dname() is called from d_path().
85962306a36Sopenharmony_ci */
86062306a36Sopenharmony_cistatic char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
86162306a36Sopenharmony_ci{
86262306a36Sopenharmony_ci	return dynamic_dname(buffer, buflen, "pipe:[%lu]",
86362306a36Sopenharmony_ci				d_inode(dentry)->i_ino);
86462306a36Sopenharmony_ci}
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_cistatic const struct dentry_operations pipefs_dentry_operations = {
86762306a36Sopenharmony_ci	.d_dname	= pipefs_dname,
86862306a36Sopenharmony_ci};
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_cistatic struct inode * get_pipe_inode(void)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
87362306a36Sopenharmony_ci	struct pipe_inode_info *pipe;
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	if (!inode)
87662306a36Sopenharmony_ci		goto fail_inode;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	inode->i_ino = get_next_ino();
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	pipe = alloc_pipe_info();
88162306a36Sopenharmony_ci	if (!pipe)
88262306a36Sopenharmony_ci		goto fail_iput;
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_ci	inode->i_pipe = pipe;
88562306a36Sopenharmony_ci	pipe->files = 2;
88662306a36Sopenharmony_ci	pipe->readers = pipe->writers = 1;
88762306a36Sopenharmony_ci	inode->i_fop = &pipefifo_fops;
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci	/*
89062306a36Sopenharmony_ci	 * Mark the inode dirty from the very beginning,
89162306a36Sopenharmony_ci	 * that way it will never be moved to the dirty
89262306a36Sopenharmony_ci	 * list because "mark_inode_dirty()" will think
89362306a36Sopenharmony_ci	 * that it already _is_ on the dirty list.
89462306a36Sopenharmony_ci	 */
89562306a36Sopenharmony_ci	inode->i_state = I_DIRTY;
89662306a36Sopenharmony_ci	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
89762306a36Sopenharmony_ci	inode->i_uid = current_fsuid();
89862306a36Sopenharmony_ci	inode->i_gid = current_fsgid();
89962306a36Sopenharmony_ci	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci	return inode;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_cifail_iput:
90462306a36Sopenharmony_ci	iput(inode);
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_cifail_inode:
90762306a36Sopenharmony_ci	return NULL;
90862306a36Sopenharmony_ci}
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ciint create_pipe_files(struct file **res, int flags)
91162306a36Sopenharmony_ci{
91262306a36Sopenharmony_ci	struct inode *inode = get_pipe_inode();
91362306a36Sopenharmony_ci	struct file *f;
91462306a36Sopenharmony_ci	int error;
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	if (!inode)
91762306a36Sopenharmony_ci		return -ENFILE;
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	if (flags & O_NOTIFICATION_PIPE) {
92062306a36Sopenharmony_ci		error = watch_queue_init(inode->i_pipe);
92162306a36Sopenharmony_ci		if (error) {
92262306a36Sopenharmony_ci			free_pipe_info(inode->i_pipe);
92362306a36Sopenharmony_ci			iput(inode);
92462306a36Sopenharmony_ci			return error;
92562306a36Sopenharmony_ci		}
92662306a36Sopenharmony_ci	}
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	f = alloc_file_pseudo(inode, pipe_mnt, "",
92962306a36Sopenharmony_ci				O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
93062306a36Sopenharmony_ci				&pipefifo_fops);
93162306a36Sopenharmony_ci	if (IS_ERR(f)) {
93262306a36Sopenharmony_ci		free_pipe_info(inode->i_pipe);
93362306a36Sopenharmony_ci		iput(inode);
93462306a36Sopenharmony_ci		return PTR_ERR(f);
93562306a36Sopenharmony_ci	}
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	f->private_data = inode->i_pipe;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
94062306a36Sopenharmony_ci				  &pipefifo_fops);
94162306a36Sopenharmony_ci	if (IS_ERR(res[0])) {
94262306a36Sopenharmony_ci		put_pipe_info(inode, inode->i_pipe);
94362306a36Sopenharmony_ci		fput(f);
94462306a36Sopenharmony_ci		return PTR_ERR(res[0]);
94562306a36Sopenharmony_ci	}
94662306a36Sopenharmony_ci	res[0]->private_data = inode->i_pipe;
94762306a36Sopenharmony_ci	res[1] = f;
94862306a36Sopenharmony_ci	stream_open(inode, res[0]);
94962306a36Sopenharmony_ci	stream_open(inode, res[1]);
95062306a36Sopenharmony_ci	return 0;
95162306a36Sopenharmony_ci}
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_cistatic int __do_pipe_flags(int *fd, struct file **files, int flags)
95462306a36Sopenharmony_ci{
95562306a36Sopenharmony_ci	int error;
95662306a36Sopenharmony_ci	int fdw, fdr;
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
95962306a36Sopenharmony_ci		return -EINVAL;
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	error = create_pipe_files(files, flags);
96262306a36Sopenharmony_ci	if (error)
96362306a36Sopenharmony_ci		return error;
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	error = get_unused_fd_flags(flags);
96662306a36Sopenharmony_ci	if (error < 0)
96762306a36Sopenharmony_ci		goto err_read_pipe;
96862306a36Sopenharmony_ci	fdr = error;
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	error = get_unused_fd_flags(flags);
97162306a36Sopenharmony_ci	if (error < 0)
97262306a36Sopenharmony_ci		goto err_fdr;
97362306a36Sopenharmony_ci	fdw = error;
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci	audit_fd_pair(fdr, fdw);
97662306a36Sopenharmony_ci	fd[0] = fdr;
97762306a36Sopenharmony_ci	fd[1] = fdw;
97862306a36Sopenharmony_ci	/* pipe groks IOCB_NOWAIT */
97962306a36Sopenharmony_ci	files[0]->f_mode |= FMODE_NOWAIT;
98062306a36Sopenharmony_ci	files[1]->f_mode |= FMODE_NOWAIT;
98162306a36Sopenharmony_ci	return 0;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci err_fdr:
98462306a36Sopenharmony_ci	put_unused_fd(fdr);
98562306a36Sopenharmony_ci err_read_pipe:
98662306a36Sopenharmony_ci	fput(files[0]);
98762306a36Sopenharmony_ci	fput(files[1]);
98862306a36Sopenharmony_ci	return error;
98962306a36Sopenharmony_ci}
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ciint do_pipe_flags(int *fd, int flags)
99262306a36Sopenharmony_ci{
99362306a36Sopenharmony_ci	struct file *files[2];
99462306a36Sopenharmony_ci	int error = __do_pipe_flags(fd, files, flags);
99562306a36Sopenharmony_ci	if (!error) {
99662306a36Sopenharmony_ci		fd_install(fd[0], files[0]);
99762306a36Sopenharmony_ci		fd_install(fd[1], files[1]);
99862306a36Sopenharmony_ci	}
99962306a36Sopenharmony_ci	return error;
100062306a36Sopenharmony_ci}
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci/*
100362306a36Sopenharmony_ci * sys_pipe() is the normal C calling standard for creating
100462306a36Sopenharmony_ci * a pipe. It's not the way Unix traditionally does this, though.
100562306a36Sopenharmony_ci */
100662306a36Sopenharmony_cistatic int do_pipe2(int __user *fildes, int flags)
100762306a36Sopenharmony_ci{
100862306a36Sopenharmony_ci	struct file *files[2];
100962306a36Sopenharmony_ci	int fd[2];
101062306a36Sopenharmony_ci	int error;
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci	error = __do_pipe_flags(fd, files, flags);
101362306a36Sopenharmony_ci	if (!error) {
101462306a36Sopenharmony_ci		if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
101562306a36Sopenharmony_ci			fput(files[0]);
101662306a36Sopenharmony_ci			fput(files[1]);
101762306a36Sopenharmony_ci			put_unused_fd(fd[0]);
101862306a36Sopenharmony_ci			put_unused_fd(fd[1]);
101962306a36Sopenharmony_ci			error = -EFAULT;
102062306a36Sopenharmony_ci		} else {
102162306a36Sopenharmony_ci			fd_install(fd[0], files[0]);
102262306a36Sopenharmony_ci			fd_install(fd[1], files[1]);
102362306a36Sopenharmony_ci		}
102462306a36Sopenharmony_ci	}
102562306a36Sopenharmony_ci	return error;
102662306a36Sopenharmony_ci}
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ciSYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
102962306a36Sopenharmony_ci{
103062306a36Sopenharmony_ci	return do_pipe2(fildes, flags);
103162306a36Sopenharmony_ci}
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ciSYSCALL_DEFINE1(pipe, int __user *, fildes)
103462306a36Sopenharmony_ci{
103562306a36Sopenharmony_ci	return do_pipe2(fildes, 0);
103662306a36Sopenharmony_ci}
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci/*
103962306a36Sopenharmony_ci * This is the stupid "wait for pipe to be readable or writable"
104062306a36Sopenharmony_ci * model.
104162306a36Sopenharmony_ci *
104262306a36Sopenharmony_ci * See pipe_read/write() for the proper kind of exclusive wait,
104362306a36Sopenharmony_ci * but that requires that we wake up any other readers/writers
104462306a36Sopenharmony_ci * if we then do not end up reading everything (ie the whole
104562306a36Sopenharmony_ci * "wake_next_reader/writer" logic in pipe_read/write()).
104662306a36Sopenharmony_ci */
104762306a36Sopenharmony_civoid pipe_wait_readable(struct pipe_inode_info *pipe)
104862306a36Sopenharmony_ci{
104962306a36Sopenharmony_ci	pipe_unlock(pipe);
105062306a36Sopenharmony_ci	wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
105162306a36Sopenharmony_ci	pipe_lock(pipe);
105262306a36Sopenharmony_ci}
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_civoid pipe_wait_writable(struct pipe_inode_info *pipe)
105562306a36Sopenharmony_ci{
105662306a36Sopenharmony_ci	pipe_unlock(pipe);
105762306a36Sopenharmony_ci	wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
105862306a36Sopenharmony_ci	pipe_lock(pipe);
105962306a36Sopenharmony_ci}
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci/*
106262306a36Sopenharmony_ci * This depends on both the wait (here) and the wakeup (wake_up_partner)
106362306a36Sopenharmony_ci * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
106462306a36Sopenharmony_ci * race with the count check and waitqueue prep.
106562306a36Sopenharmony_ci *
106662306a36Sopenharmony_ci * Normally in order to avoid races, you'd do the prepare_to_wait() first,
106762306a36Sopenharmony_ci * then check the condition you're waiting for, and only then sleep. But
106862306a36Sopenharmony_ci * because of the pipe lock, we can check the condition before being on
106962306a36Sopenharmony_ci * the wait queue.
107062306a36Sopenharmony_ci *
107162306a36Sopenharmony_ci * We use the 'rd_wait' waitqueue for pipe partner waiting.
107262306a36Sopenharmony_ci */
107362306a36Sopenharmony_cistatic int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
107462306a36Sopenharmony_ci{
107562306a36Sopenharmony_ci	DEFINE_WAIT(rdwait);
107662306a36Sopenharmony_ci	int cur = *cnt;
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci	while (cur == *cnt) {
107962306a36Sopenharmony_ci		prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
108062306a36Sopenharmony_ci		pipe_unlock(pipe);
108162306a36Sopenharmony_ci		schedule();
108262306a36Sopenharmony_ci		finish_wait(&pipe->rd_wait, &rdwait);
108362306a36Sopenharmony_ci		pipe_lock(pipe);
108462306a36Sopenharmony_ci		if (signal_pending(current))
108562306a36Sopenharmony_ci			break;
108662306a36Sopenharmony_ci	}
108762306a36Sopenharmony_ci	return cur == *cnt ? -ERESTARTSYS : 0;
108862306a36Sopenharmony_ci}
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_cistatic void wake_up_partner(struct pipe_inode_info *pipe)
109162306a36Sopenharmony_ci{
109262306a36Sopenharmony_ci	wake_up_interruptible_all(&pipe->rd_wait);
109362306a36Sopenharmony_ci}
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_cistatic int fifo_open(struct inode *inode, struct file *filp)
109662306a36Sopenharmony_ci{
109762306a36Sopenharmony_ci	struct pipe_inode_info *pipe;
109862306a36Sopenharmony_ci	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
109962306a36Sopenharmony_ci	int ret;
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci	filp->f_version = 0;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
110462306a36Sopenharmony_ci	if (inode->i_pipe) {
110562306a36Sopenharmony_ci		pipe = inode->i_pipe;
110662306a36Sopenharmony_ci		pipe->files++;
110762306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
110862306a36Sopenharmony_ci	} else {
110962306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
111062306a36Sopenharmony_ci		pipe = alloc_pipe_info();
111162306a36Sopenharmony_ci		if (!pipe)
111262306a36Sopenharmony_ci			return -ENOMEM;
111362306a36Sopenharmony_ci		pipe->files = 1;
111462306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
111562306a36Sopenharmony_ci		if (unlikely(inode->i_pipe)) {
111662306a36Sopenharmony_ci			inode->i_pipe->files++;
111762306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
111862306a36Sopenharmony_ci			free_pipe_info(pipe);
111962306a36Sopenharmony_ci			pipe = inode->i_pipe;
112062306a36Sopenharmony_ci		} else {
112162306a36Sopenharmony_ci			inode->i_pipe = pipe;
112262306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
112362306a36Sopenharmony_ci		}
112462306a36Sopenharmony_ci	}
112562306a36Sopenharmony_ci	filp->private_data = pipe;
112662306a36Sopenharmony_ci	/* OK, we have a pipe and it's pinned down */
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	__pipe_lock(pipe);
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	/* We can only do regular read/write on fifos */
113162306a36Sopenharmony_ci	stream_open(inode, filp);
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci	switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
113462306a36Sopenharmony_ci	case FMODE_READ:
113562306a36Sopenharmony_ci	/*
113662306a36Sopenharmony_ci	 *  O_RDONLY
113762306a36Sopenharmony_ci	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
113862306a36Sopenharmony_ci	 *  opened, even when there is no process writing the FIFO.
113962306a36Sopenharmony_ci	 */
114062306a36Sopenharmony_ci		pipe->r_counter++;
114162306a36Sopenharmony_ci		if (pipe->readers++ == 0)
114262306a36Sopenharmony_ci			wake_up_partner(pipe);
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci		if (!is_pipe && !pipe->writers) {
114562306a36Sopenharmony_ci			if ((filp->f_flags & O_NONBLOCK)) {
114662306a36Sopenharmony_ci				/* suppress EPOLLHUP until we have
114762306a36Sopenharmony_ci				 * seen a writer */
114862306a36Sopenharmony_ci				filp->f_version = pipe->w_counter;
114962306a36Sopenharmony_ci			} else {
115062306a36Sopenharmony_ci				if (wait_for_partner(pipe, &pipe->w_counter))
115162306a36Sopenharmony_ci					goto err_rd;
115262306a36Sopenharmony_ci			}
115362306a36Sopenharmony_ci		}
115462306a36Sopenharmony_ci		break;
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci	case FMODE_WRITE:
115762306a36Sopenharmony_ci	/*
115862306a36Sopenharmony_ci	 *  O_WRONLY
115962306a36Sopenharmony_ci	 *  POSIX.1 says that O_NONBLOCK means return -1 with
116062306a36Sopenharmony_ci	 *  errno=ENXIO when there is no process reading the FIFO.
116162306a36Sopenharmony_ci	 */
116262306a36Sopenharmony_ci		ret = -ENXIO;
116362306a36Sopenharmony_ci		if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
116462306a36Sopenharmony_ci			goto err;
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci		pipe->w_counter++;
116762306a36Sopenharmony_ci		if (!pipe->writers++)
116862306a36Sopenharmony_ci			wake_up_partner(pipe);
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci		if (!is_pipe && !pipe->readers) {
117162306a36Sopenharmony_ci			if (wait_for_partner(pipe, &pipe->r_counter))
117262306a36Sopenharmony_ci				goto err_wr;
117362306a36Sopenharmony_ci		}
117462306a36Sopenharmony_ci		break;
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	case FMODE_READ | FMODE_WRITE:
117762306a36Sopenharmony_ci	/*
117862306a36Sopenharmony_ci	 *  O_RDWR
117962306a36Sopenharmony_ci	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
118062306a36Sopenharmony_ci	 *  This implementation will NEVER block on a O_RDWR open, since
118162306a36Sopenharmony_ci	 *  the process can at least talk to itself.
118262306a36Sopenharmony_ci	 */
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_ci		pipe->readers++;
118562306a36Sopenharmony_ci		pipe->writers++;
118662306a36Sopenharmony_ci		pipe->r_counter++;
118762306a36Sopenharmony_ci		pipe->w_counter++;
118862306a36Sopenharmony_ci		if (pipe->readers == 1 || pipe->writers == 1)
118962306a36Sopenharmony_ci			wake_up_partner(pipe);
119062306a36Sopenharmony_ci		break;
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	default:
119362306a36Sopenharmony_ci		ret = -EINVAL;
119462306a36Sopenharmony_ci		goto err;
119562306a36Sopenharmony_ci	}
119662306a36Sopenharmony_ci
119762306a36Sopenharmony_ci	/* Ok! */
119862306a36Sopenharmony_ci	__pipe_unlock(pipe);
119962306a36Sopenharmony_ci	return 0;
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_cierr_rd:
120262306a36Sopenharmony_ci	if (!--pipe->readers)
120362306a36Sopenharmony_ci		wake_up_interruptible(&pipe->wr_wait);
120462306a36Sopenharmony_ci	ret = -ERESTARTSYS;
120562306a36Sopenharmony_ci	goto err;
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_cierr_wr:
120862306a36Sopenharmony_ci	if (!--pipe->writers)
120962306a36Sopenharmony_ci		wake_up_interruptible_all(&pipe->rd_wait);
121062306a36Sopenharmony_ci	ret = -ERESTARTSYS;
121162306a36Sopenharmony_ci	goto err;
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_cierr:
121462306a36Sopenharmony_ci	__pipe_unlock(pipe);
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	put_pipe_info(inode, pipe);
121762306a36Sopenharmony_ci	return ret;
121862306a36Sopenharmony_ci}
121962306a36Sopenharmony_ci
122062306a36Sopenharmony_ciconst struct file_operations pipefifo_fops = {
122162306a36Sopenharmony_ci	.open		= fifo_open,
122262306a36Sopenharmony_ci	.llseek		= no_llseek,
122362306a36Sopenharmony_ci	.read_iter	= pipe_read,
122462306a36Sopenharmony_ci	.write_iter	= pipe_write,
122562306a36Sopenharmony_ci	.poll		= pipe_poll,
122662306a36Sopenharmony_ci	.unlocked_ioctl	= pipe_ioctl,
122762306a36Sopenharmony_ci	.release	= pipe_release,
122862306a36Sopenharmony_ci	.fasync		= pipe_fasync,
122962306a36Sopenharmony_ci	.splice_write	= iter_file_splice_write,
123062306a36Sopenharmony_ci};
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci/*
123362306a36Sopenharmony_ci * Currently we rely on the pipe array holding a power-of-2 number
123462306a36Sopenharmony_ci * of pages. Returns 0 on error.
123562306a36Sopenharmony_ci */
123662306a36Sopenharmony_ciunsigned int round_pipe_size(unsigned int size)
123762306a36Sopenharmony_ci{
123862306a36Sopenharmony_ci	if (size > (1U << 31))
123962306a36Sopenharmony_ci		return 0;
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_ci	/* Minimum pipe size, as required by POSIX */
124262306a36Sopenharmony_ci	if (size < PAGE_SIZE)
124362306a36Sopenharmony_ci		return PAGE_SIZE;
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_ci	return roundup_pow_of_two(size);
124662306a36Sopenharmony_ci}
124762306a36Sopenharmony_ci
124862306a36Sopenharmony_ci/*
124962306a36Sopenharmony_ci * Resize the pipe ring to a number of slots.
125062306a36Sopenharmony_ci *
125162306a36Sopenharmony_ci * Note the pipe can be reduced in capacity, but only if the current
125262306a36Sopenharmony_ci * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
125362306a36Sopenharmony_ci * returned instead.
125462306a36Sopenharmony_ci */
125562306a36Sopenharmony_ciint pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
125662306a36Sopenharmony_ci{
125762306a36Sopenharmony_ci	struct pipe_buffer *bufs;
125862306a36Sopenharmony_ci	unsigned int head, tail, mask, n;
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	bufs = kcalloc(nr_slots, sizeof(*bufs),
126162306a36Sopenharmony_ci		       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
126262306a36Sopenharmony_ci	if (unlikely(!bufs))
126362306a36Sopenharmony_ci		return -ENOMEM;
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	spin_lock_irq(&pipe->rd_wait.lock);
126662306a36Sopenharmony_ci	mask = pipe->ring_size - 1;
126762306a36Sopenharmony_ci	head = pipe->head;
126862306a36Sopenharmony_ci	tail = pipe->tail;
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	n = pipe_occupancy(head, tail);
127162306a36Sopenharmony_ci	if (nr_slots < n) {
127262306a36Sopenharmony_ci		spin_unlock_irq(&pipe->rd_wait.lock);
127362306a36Sopenharmony_ci		kfree(bufs);
127462306a36Sopenharmony_ci		return -EBUSY;
127562306a36Sopenharmony_ci	}
127662306a36Sopenharmony_ci
127762306a36Sopenharmony_ci	/*
127862306a36Sopenharmony_ci	 * The pipe array wraps around, so just start the new one at zero
127962306a36Sopenharmony_ci	 * and adjust the indices.
128062306a36Sopenharmony_ci	 */
128162306a36Sopenharmony_ci	if (n > 0) {
128262306a36Sopenharmony_ci		unsigned int h = head & mask;
128362306a36Sopenharmony_ci		unsigned int t = tail & mask;
128462306a36Sopenharmony_ci		if (h > t) {
128562306a36Sopenharmony_ci			memcpy(bufs, pipe->bufs + t,
128662306a36Sopenharmony_ci			       n * sizeof(struct pipe_buffer));
128762306a36Sopenharmony_ci		} else {
128862306a36Sopenharmony_ci			unsigned int tsize = pipe->ring_size - t;
128962306a36Sopenharmony_ci			if (h > 0)
129062306a36Sopenharmony_ci				memcpy(bufs + tsize, pipe->bufs,
129162306a36Sopenharmony_ci				       h * sizeof(struct pipe_buffer));
129262306a36Sopenharmony_ci			memcpy(bufs, pipe->bufs + t,
129362306a36Sopenharmony_ci			       tsize * sizeof(struct pipe_buffer));
129462306a36Sopenharmony_ci		}
129562306a36Sopenharmony_ci	}
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	head = n;
129862306a36Sopenharmony_ci	tail = 0;
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_ci	kfree(pipe->bufs);
130162306a36Sopenharmony_ci	pipe->bufs = bufs;
130262306a36Sopenharmony_ci	pipe->ring_size = nr_slots;
130362306a36Sopenharmony_ci	if (pipe->max_usage > nr_slots)
130462306a36Sopenharmony_ci		pipe->max_usage = nr_slots;
130562306a36Sopenharmony_ci	pipe->tail = tail;
130662306a36Sopenharmony_ci	pipe->head = head;
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci	if (!pipe_has_watch_queue(pipe)) {
130962306a36Sopenharmony_ci		pipe->max_usage = nr_slots;
131062306a36Sopenharmony_ci		pipe->nr_accounted = nr_slots;
131162306a36Sopenharmony_ci	}
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	spin_unlock_irq(&pipe->rd_wait.lock);
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	/* This might have made more room for writers */
131662306a36Sopenharmony_ci	wake_up_interruptible(&pipe->wr_wait);
131762306a36Sopenharmony_ci	return 0;
131862306a36Sopenharmony_ci}
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci/*
132162306a36Sopenharmony_ci * Allocate a new array of pipe buffers and copy the info over. Returns the
132262306a36Sopenharmony_ci * pipe size if successful, or return -ERROR on error.
132362306a36Sopenharmony_ci */
132462306a36Sopenharmony_cistatic long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
132562306a36Sopenharmony_ci{
132662306a36Sopenharmony_ci	unsigned long user_bufs;
132762306a36Sopenharmony_ci	unsigned int nr_slots, size;
132862306a36Sopenharmony_ci	long ret = 0;
132962306a36Sopenharmony_ci
133062306a36Sopenharmony_ci	if (pipe_has_watch_queue(pipe))
133162306a36Sopenharmony_ci		return -EBUSY;
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_ci	size = round_pipe_size(arg);
133462306a36Sopenharmony_ci	nr_slots = size >> PAGE_SHIFT;
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_ci	if (!nr_slots)
133762306a36Sopenharmony_ci		return -EINVAL;
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci	/*
134062306a36Sopenharmony_ci	 * If trying to increase the pipe capacity, check that an
134162306a36Sopenharmony_ci	 * unprivileged user is not trying to exceed various limits
134262306a36Sopenharmony_ci	 * (soft limit check here, hard limit check just below).
134362306a36Sopenharmony_ci	 * Decreasing the pipe capacity is always permitted, even
134462306a36Sopenharmony_ci	 * if the user is currently over a limit.
134562306a36Sopenharmony_ci	 */
134662306a36Sopenharmony_ci	if (nr_slots > pipe->max_usage &&
134762306a36Sopenharmony_ci			size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
134862306a36Sopenharmony_ci		return -EPERM;
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_ci	user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci	if (nr_slots > pipe->max_usage &&
135362306a36Sopenharmony_ci			(too_many_pipe_buffers_hard(user_bufs) ||
135462306a36Sopenharmony_ci			 too_many_pipe_buffers_soft(user_bufs)) &&
135562306a36Sopenharmony_ci			pipe_is_unprivileged_user()) {
135662306a36Sopenharmony_ci		ret = -EPERM;
135762306a36Sopenharmony_ci		goto out_revert_acct;
135862306a36Sopenharmony_ci	}
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci	ret = pipe_resize_ring(pipe, nr_slots);
136162306a36Sopenharmony_ci	if (ret < 0)
136262306a36Sopenharmony_ci		goto out_revert_acct;
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_ci	return pipe->max_usage * PAGE_SIZE;
136562306a36Sopenharmony_ci
136662306a36Sopenharmony_ciout_revert_acct:
136762306a36Sopenharmony_ci	(void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
136862306a36Sopenharmony_ci	return ret;
136962306a36Sopenharmony_ci}
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci/*
137262306a36Sopenharmony_ci * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
137362306a36Sopenharmony_ci * not enough to verify that this is a pipe.
137462306a36Sopenharmony_ci */
137562306a36Sopenharmony_cistruct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
137662306a36Sopenharmony_ci{
137762306a36Sopenharmony_ci	struct pipe_inode_info *pipe = file->private_data;
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	if (file->f_op != &pipefifo_fops || !pipe)
138062306a36Sopenharmony_ci		return NULL;
138162306a36Sopenharmony_ci	if (for_splice && pipe_has_watch_queue(pipe))
138262306a36Sopenharmony_ci		return NULL;
138362306a36Sopenharmony_ci	return pipe;
138462306a36Sopenharmony_ci}
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_cilong pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
138762306a36Sopenharmony_ci{
138862306a36Sopenharmony_ci	struct pipe_inode_info *pipe;
138962306a36Sopenharmony_ci	long ret;
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci	pipe = get_pipe_info(file, false);
139262306a36Sopenharmony_ci	if (!pipe)
139362306a36Sopenharmony_ci		return -EBADF;
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci	__pipe_lock(pipe);
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_ci	switch (cmd) {
139862306a36Sopenharmony_ci	case F_SETPIPE_SZ:
139962306a36Sopenharmony_ci		ret = pipe_set_size(pipe, arg);
140062306a36Sopenharmony_ci		break;
140162306a36Sopenharmony_ci	case F_GETPIPE_SZ:
140262306a36Sopenharmony_ci		ret = pipe->max_usage * PAGE_SIZE;
140362306a36Sopenharmony_ci		break;
140462306a36Sopenharmony_ci	default:
140562306a36Sopenharmony_ci		ret = -EINVAL;
140662306a36Sopenharmony_ci		break;
140762306a36Sopenharmony_ci	}
140862306a36Sopenharmony_ci
140962306a36Sopenharmony_ci	__pipe_unlock(pipe);
141062306a36Sopenharmony_ci	return ret;
141162306a36Sopenharmony_ci}
141262306a36Sopenharmony_ci
141362306a36Sopenharmony_cistatic const struct super_operations pipefs_ops = {
141462306a36Sopenharmony_ci	.destroy_inode = free_inode_nonrcu,
141562306a36Sopenharmony_ci	.statfs = simple_statfs,
141662306a36Sopenharmony_ci};
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ci/*
141962306a36Sopenharmony_ci * pipefs should _never_ be mounted by userland - too much of security hassle,
142062306a36Sopenharmony_ci * no real gain from having the whole whorehouse mounted. So we don't need
142162306a36Sopenharmony_ci * any operations on the root directory. However, we need a non-trivial
142262306a36Sopenharmony_ci * d_name - pipe: will go nicely and kill the special-casing in procfs.
142362306a36Sopenharmony_ci */
142462306a36Sopenharmony_ci
142562306a36Sopenharmony_cistatic int pipefs_init_fs_context(struct fs_context *fc)
142662306a36Sopenharmony_ci{
142762306a36Sopenharmony_ci	struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
142862306a36Sopenharmony_ci	if (!ctx)
142962306a36Sopenharmony_ci		return -ENOMEM;
143062306a36Sopenharmony_ci	ctx->ops = &pipefs_ops;
143162306a36Sopenharmony_ci	ctx->dops = &pipefs_dentry_operations;
143262306a36Sopenharmony_ci	return 0;
143362306a36Sopenharmony_ci}
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_cistatic struct file_system_type pipe_fs_type = {
143662306a36Sopenharmony_ci	.name		= "pipefs",
143762306a36Sopenharmony_ci	.init_fs_context = pipefs_init_fs_context,
143862306a36Sopenharmony_ci	.kill_sb	= kill_anon_super,
143962306a36Sopenharmony_ci};
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
144262306a36Sopenharmony_cistatic int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
144362306a36Sopenharmony_ci					unsigned int *valp,
144462306a36Sopenharmony_ci					int write, void *data)
144562306a36Sopenharmony_ci{
144662306a36Sopenharmony_ci	if (write) {
144762306a36Sopenharmony_ci		unsigned int val;
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_ci		val = round_pipe_size(*lvalp);
145062306a36Sopenharmony_ci		if (val == 0)
145162306a36Sopenharmony_ci			return -EINVAL;
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci		*valp = val;
145462306a36Sopenharmony_ci	} else {
145562306a36Sopenharmony_ci		unsigned int val = *valp;
145662306a36Sopenharmony_ci		*lvalp = (unsigned long) val;
145762306a36Sopenharmony_ci	}
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	return 0;
146062306a36Sopenharmony_ci}
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_cistatic int proc_dopipe_max_size(struct ctl_table *table, int write,
146362306a36Sopenharmony_ci				void *buffer, size_t *lenp, loff_t *ppos)
146462306a36Sopenharmony_ci{
146562306a36Sopenharmony_ci	return do_proc_douintvec(table, write, buffer, lenp, ppos,
146662306a36Sopenharmony_ci				 do_proc_dopipe_max_size_conv, NULL);
146762306a36Sopenharmony_ci}
146862306a36Sopenharmony_ci
146962306a36Sopenharmony_cistatic struct ctl_table fs_pipe_sysctls[] = {
147062306a36Sopenharmony_ci	{
147162306a36Sopenharmony_ci		.procname	= "pipe-max-size",
147262306a36Sopenharmony_ci		.data		= &pipe_max_size,
147362306a36Sopenharmony_ci		.maxlen		= sizeof(pipe_max_size),
147462306a36Sopenharmony_ci		.mode		= 0644,
147562306a36Sopenharmony_ci		.proc_handler	= proc_dopipe_max_size,
147662306a36Sopenharmony_ci	},
147762306a36Sopenharmony_ci	{
147862306a36Sopenharmony_ci		.procname	= "pipe-user-pages-hard",
147962306a36Sopenharmony_ci		.data		= &pipe_user_pages_hard,
148062306a36Sopenharmony_ci		.maxlen		= sizeof(pipe_user_pages_hard),
148162306a36Sopenharmony_ci		.mode		= 0644,
148262306a36Sopenharmony_ci		.proc_handler	= proc_doulongvec_minmax,
148362306a36Sopenharmony_ci	},
148462306a36Sopenharmony_ci	{
148562306a36Sopenharmony_ci		.procname	= "pipe-user-pages-soft",
148662306a36Sopenharmony_ci		.data		= &pipe_user_pages_soft,
148762306a36Sopenharmony_ci		.maxlen		= sizeof(pipe_user_pages_soft),
148862306a36Sopenharmony_ci		.mode		= 0644,
148962306a36Sopenharmony_ci		.proc_handler	= proc_doulongvec_minmax,
149062306a36Sopenharmony_ci	},
149162306a36Sopenharmony_ci	{ }
149262306a36Sopenharmony_ci};
149362306a36Sopenharmony_ci#endif
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_cistatic int __init init_pipe_fs(void)
149662306a36Sopenharmony_ci{
149762306a36Sopenharmony_ci	int err = register_filesystem(&pipe_fs_type);
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	if (!err) {
150062306a36Sopenharmony_ci		pipe_mnt = kern_mount(&pipe_fs_type);
150162306a36Sopenharmony_ci		if (IS_ERR(pipe_mnt)) {
150262306a36Sopenharmony_ci			err = PTR_ERR(pipe_mnt);
150362306a36Sopenharmony_ci			unregister_filesystem(&pipe_fs_type);
150462306a36Sopenharmony_ci		}
150562306a36Sopenharmony_ci	}
150662306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
150762306a36Sopenharmony_ci	register_sysctl_init("fs", fs_pipe_sysctls);
150862306a36Sopenharmony_ci#endif
150962306a36Sopenharmony_ci	return err;
151062306a36Sopenharmony_ci}
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_cifs_initcall(init_pipe_fs);
1513