xref: /kernel/linux/linux-5.10/fs/pipe.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/fs/pipe.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/mm.h>
98c2ecf20Sopenharmony_ci#include <linux/file.h>
108c2ecf20Sopenharmony_ci#include <linux/poll.h>
118c2ecf20Sopenharmony_ci#include <linux/slab.h>
128c2ecf20Sopenharmony_ci#include <linux/module.h>
138c2ecf20Sopenharmony_ci#include <linux/init.h>
148c2ecf20Sopenharmony_ci#include <linux/fs.h>
158c2ecf20Sopenharmony_ci#include <linux/log2.h>
168c2ecf20Sopenharmony_ci#include <linux/mount.h>
178c2ecf20Sopenharmony_ci#include <linux/pseudo_fs.h>
188c2ecf20Sopenharmony_ci#include <linux/magic.h>
198c2ecf20Sopenharmony_ci#include <linux/pipe_fs_i.h>
208c2ecf20Sopenharmony_ci#include <linux/uio.h>
218c2ecf20Sopenharmony_ci#include <linux/highmem.h>
228c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
238c2ecf20Sopenharmony_ci#include <linux/audit.h>
248c2ecf20Sopenharmony_ci#include <linux/syscalls.h>
258c2ecf20Sopenharmony_ci#include <linux/fcntl.h>
268c2ecf20Sopenharmony_ci#include <linux/memcontrol.h>
278c2ecf20Sopenharmony_ci#include <linux/watch_queue.h>
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
308c2ecf20Sopenharmony_ci#include <asm/ioctls.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include "internal.h"
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/*
358c2ecf20Sopenharmony_ci * New pipe buffers will be restricted to this size while the user is exceeding
368c2ecf20Sopenharmony_ci * their pipe buffer quota. The general pipe use case needs at least two
378c2ecf20Sopenharmony_ci * buffers: one for data yet to be read, and one for new data. If this is less
388c2ecf20Sopenharmony_ci * than two, then a write to a non-empty pipe may block even if the pipe is not
398c2ecf20Sopenharmony_ci * full. This can occur with GNU make jobserver or similar uses of pipes as
408c2ecf20Sopenharmony_ci * semaphores: multiple processes may be waiting to write tokens back to the
418c2ecf20Sopenharmony_ci * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
428c2ecf20Sopenharmony_ci *
438c2ecf20Sopenharmony_ci * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
448c2ecf20Sopenharmony_ci * own risk, namely: pipe writes to non-full pipes may block until the pipe is
458c2ecf20Sopenharmony_ci * emptied.
468c2ecf20Sopenharmony_ci */
478c2ecf20Sopenharmony_ci#define PIPE_MIN_DEF_BUFFERS 2
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/*
508c2ecf20Sopenharmony_ci * The max size that a non-root user is allowed to grow the pipe. Can
518c2ecf20Sopenharmony_ci * be set by root in /proc/sys/fs/pipe-max-size
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_ciunsigned int pipe_max_size = 1048576;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci/* Maximum allocatable pages per user. Hard limit is unset by default, soft
568c2ecf20Sopenharmony_ci * matches default values.
578c2ecf20Sopenharmony_ci */
588c2ecf20Sopenharmony_ciunsigned long pipe_user_pages_hard;
598c2ecf20Sopenharmony_ciunsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci/*
628c2ecf20Sopenharmony_ci * We use head and tail indices that aren't masked off, except at the point of
638c2ecf20Sopenharmony_ci * dereference, but rather they're allowed to wrap naturally.  This means there
648c2ecf20Sopenharmony_ci * isn't a dead spot in the buffer, but the ring has to be a power of two and
658c2ecf20Sopenharmony_ci * <= 2^31.
668c2ecf20Sopenharmony_ci * -- David Howells 2019-09-23.
678c2ecf20Sopenharmony_ci *
688c2ecf20Sopenharmony_ci * Reads with count = 0 should always return 0.
698c2ecf20Sopenharmony_ci * -- Julian Bradfield 1999-06-07.
708c2ecf20Sopenharmony_ci *
718c2ecf20Sopenharmony_ci * FIFOs and Pipes now generate SIGIO for both readers and writers.
728c2ecf20Sopenharmony_ci * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
738c2ecf20Sopenharmony_ci *
748c2ecf20Sopenharmony_ci * pipe_read & write cleanup
758c2ecf20Sopenharmony_ci * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
768c2ecf20Sopenharmony_ci */
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_cistatic void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
798c2ecf20Sopenharmony_ci{
808c2ecf20Sopenharmony_ci	if (pipe->files)
818c2ecf20Sopenharmony_ci		mutex_lock_nested(&pipe->mutex, subclass);
828c2ecf20Sopenharmony_ci}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_civoid pipe_lock(struct pipe_inode_info *pipe)
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	/*
878c2ecf20Sopenharmony_ci	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
888c2ecf20Sopenharmony_ci	 */
898c2ecf20Sopenharmony_ci	pipe_lock_nested(pipe, I_MUTEX_PARENT);
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ciEXPORT_SYMBOL(pipe_lock);
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_civoid pipe_unlock(struct pipe_inode_info *pipe)
948c2ecf20Sopenharmony_ci{
958c2ecf20Sopenharmony_ci	if (pipe->files)
968c2ecf20Sopenharmony_ci		mutex_unlock(&pipe->mutex);
978c2ecf20Sopenharmony_ci}
988c2ecf20Sopenharmony_ciEXPORT_SYMBOL(pipe_unlock);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_cistatic inline void __pipe_lock(struct pipe_inode_info *pipe)
1018c2ecf20Sopenharmony_ci{
1028c2ecf20Sopenharmony_ci	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
1038c2ecf20Sopenharmony_ci}
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_cistatic inline void __pipe_unlock(struct pipe_inode_info *pipe)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	mutex_unlock(&pipe->mutex);
1088c2ecf20Sopenharmony_ci}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_civoid pipe_double_lock(struct pipe_inode_info *pipe1,
1118c2ecf20Sopenharmony_ci		      struct pipe_inode_info *pipe2)
1128c2ecf20Sopenharmony_ci{
1138c2ecf20Sopenharmony_ci	BUG_ON(pipe1 == pipe2);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	if (pipe1 < pipe2) {
1168c2ecf20Sopenharmony_ci		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
1178c2ecf20Sopenharmony_ci		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
1188c2ecf20Sopenharmony_ci	} else {
1198c2ecf20Sopenharmony_ci		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
1208c2ecf20Sopenharmony_ci		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
1218c2ecf20Sopenharmony_ci	}
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_cistatic void anon_pipe_buf_release(struct pipe_inode_info *pipe,
1258c2ecf20Sopenharmony_ci				  struct pipe_buffer *buf)
1268c2ecf20Sopenharmony_ci{
1278c2ecf20Sopenharmony_ci	struct page *page = buf->page;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	/*
1308c2ecf20Sopenharmony_ci	 * If nobody else uses this page, and we don't already have a
1318c2ecf20Sopenharmony_ci	 * temporary page, let's keep track of it as a one-deep
1328c2ecf20Sopenharmony_ci	 * allocation cache. (Otherwise just release our reference to it)
1338c2ecf20Sopenharmony_ci	 */
1348c2ecf20Sopenharmony_ci	if (page_count(page) == 1 && !pipe->tmp_page)
1358c2ecf20Sopenharmony_ci		pipe->tmp_page = page;
1368c2ecf20Sopenharmony_ci	else
1378c2ecf20Sopenharmony_ci		put_page(page);
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
1418c2ecf20Sopenharmony_ci		struct pipe_buffer *buf)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	struct page *page = buf->page;
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	if (page_count(page) != 1)
1468c2ecf20Sopenharmony_ci		return false;
1478c2ecf20Sopenharmony_ci	memcg_kmem_uncharge_page(page, 0);
1488c2ecf20Sopenharmony_ci	__SetPageLocked(page);
1498c2ecf20Sopenharmony_ci	return true;
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci/**
1538c2ecf20Sopenharmony_ci * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
1548c2ecf20Sopenharmony_ci * @pipe:	the pipe that the buffer belongs to
1558c2ecf20Sopenharmony_ci * @buf:	the buffer to attempt to steal
1568c2ecf20Sopenharmony_ci *
1578c2ecf20Sopenharmony_ci * Description:
1588c2ecf20Sopenharmony_ci *	This function attempts to steal the &struct page attached to
1598c2ecf20Sopenharmony_ci *	@buf. If successful, this function returns 0 and returns with
1608c2ecf20Sopenharmony_ci *	the page locked. The caller may then reuse the page for whatever
1618c2ecf20Sopenharmony_ci *	he wishes; the typical use is insertion into a different file
1628c2ecf20Sopenharmony_ci *	page cache.
1638c2ecf20Sopenharmony_ci */
1648c2ecf20Sopenharmony_cibool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
1658c2ecf20Sopenharmony_ci		struct pipe_buffer *buf)
1668c2ecf20Sopenharmony_ci{
1678c2ecf20Sopenharmony_ci	struct page *page = buf->page;
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	/*
1708c2ecf20Sopenharmony_ci	 * A reference of one is golden, that means that the owner of this
1718c2ecf20Sopenharmony_ci	 * page is the only one holding a reference to it. lock the page
1728c2ecf20Sopenharmony_ci	 * and return OK.
1738c2ecf20Sopenharmony_ci	 */
1748c2ecf20Sopenharmony_ci	if (page_count(page) == 1) {
1758c2ecf20Sopenharmony_ci		lock_page(page);
1768c2ecf20Sopenharmony_ci		return true;
1778c2ecf20Sopenharmony_ci	}
1788c2ecf20Sopenharmony_ci	return false;
1798c2ecf20Sopenharmony_ci}
1808c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_try_steal);
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci/**
1838c2ecf20Sopenharmony_ci * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
1848c2ecf20Sopenharmony_ci * @pipe:	the pipe that the buffer belongs to
1858c2ecf20Sopenharmony_ci * @buf:	the buffer to get a reference to
1868c2ecf20Sopenharmony_ci *
1878c2ecf20Sopenharmony_ci * Description:
1888c2ecf20Sopenharmony_ci *	This function grabs an extra reference to @buf. It's used in
1898c2ecf20Sopenharmony_ci *	in the tee() system call, when we duplicate the buffers in one
1908c2ecf20Sopenharmony_ci *	pipe into another.
1918c2ecf20Sopenharmony_ci */
1928c2ecf20Sopenharmony_cibool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
1938c2ecf20Sopenharmony_ci{
1948c2ecf20Sopenharmony_ci	return try_get_page(buf->page);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_get);
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci/**
1998c2ecf20Sopenharmony_ci * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
2008c2ecf20Sopenharmony_ci * @pipe:	the pipe that the buffer belongs to
2018c2ecf20Sopenharmony_ci * @buf:	the buffer to put a reference to
2028c2ecf20Sopenharmony_ci *
2038c2ecf20Sopenharmony_ci * Description:
2048c2ecf20Sopenharmony_ci *	This function releases a reference to @buf.
2058c2ecf20Sopenharmony_ci */
2068c2ecf20Sopenharmony_civoid generic_pipe_buf_release(struct pipe_inode_info *pipe,
2078c2ecf20Sopenharmony_ci			      struct pipe_buffer *buf)
2088c2ecf20Sopenharmony_ci{
2098c2ecf20Sopenharmony_ci	put_page(buf->page);
2108c2ecf20Sopenharmony_ci}
2118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_release);
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistatic const struct pipe_buf_operations anon_pipe_buf_ops = {
2148c2ecf20Sopenharmony_ci	.release	= anon_pipe_buf_release,
2158c2ecf20Sopenharmony_ci	.try_steal	= anon_pipe_buf_try_steal,
2168c2ecf20Sopenharmony_ci	.get		= generic_pipe_buf_get,
2178c2ecf20Sopenharmony_ci};
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
2208c2ecf20Sopenharmony_cistatic inline bool pipe_readable(const struct pipe_inode_info *pipe)
2218c2ecf20Sopenharmony_ci{
2228c2ecf20Sopenharmony_ci	unsigned int head = READ_ONCE(pipe->head);
2238c2ecf20Sopenharmony_ci	unsigned int tail = READ_ONCE(pipe->tail);
2248c2ecf20Sopenharmony_ci	unsigned int writers = READ_ONCE(pipe->writers);
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	return !pipe_empty(head, tail) || !writers;
2278c2ecf20Sopenharmony_ci}
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_cistatic ssize_t
2308c2ecf20Sopenharmony_cipipe_read(struct kiocb *iocb, struct iov_iter *to)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	size_t total_len = iov_iter_count(to);
2338c2ecf20Sopenharmony_ci	struct file *filp = iocb->ki_filp;
2348c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
2358c2ecf20Sopenharmony_ci	bool was_full, wake_next_reader = false;
2368c2ecf20Sopenharmony_ci	ssize_t ret;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	/* Null read succeeds. */
2398c2ecf20Sopenharmony_ci	if (unlikely(total_len == 0))
2408c2ecf20Sopenharmony_ci		return 0;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	ret = 0;
2438c2ecf20Sopenharmony_ci	__pipe_lock(pipe);
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	/*
2468c2ecf20Sopenharmony_ci	 * We only wake up writers if the pipe was full when we started
2478c2ecf20Sopenharmony_ci	 * reading in order to avoid unnecessary wakeups.
2488c2ecf20Sopenharmony_ci	 *
2498c2ecf20Sopenharmony_ci	 * But when we do wake up writers, we do so using a sync wakeup
2508c2ecf20Sopenharmony_ci	 * (WF_SYNC), because we want them to get going and generate more
2518c2ecf20Sopenharmony_ci	 * data for us.
2528c2ecf20Sopenharmony_ci	 */
2538c2ecf20Sopenharmony_ci	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
2548c2ecf20Sopenharmony_ci	for (;;) {
2558c2ecf20Sopenharmony_ci		/* Read ->head with a barrier vs post_one_notification() */
2568c2ecf20Sopenharmony_ci		unsigned int head = smp_load_acquire(&pipe->head);
2578c2ecf20Sopenharmony_ci		unsigned int tail = pipe->tail;
2588c2ecf20Sopenharmony_ci		unsigned int mask = pipe->ring_size - 1;
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
2618c2ecf20Sopenharmony_ci		if (pipe->note_loss) {
2628c2ecf20Sopenharmony_ci			struct watch_notification n;
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci			if (total_len < 8) {
2658c2ecf20Sopenharmony_ci				if (ret == 0)
2668c2ecf20Sopenharmony_ci					ret = -ENOBUFS;
2678c2ecf20Sopenharmony_ci				break;
2688c2ecf20Sopenharmony_ci			}
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci			n.type = WATCH_TYPE_META;
2718c2ecf20Sopenharmony_ci			n.subtype = WATCH_META_LOSS_NOTIFICATION;
2728c2ecf20Sopenharmony_ci			n.info = watch_sizeof(n);
2738c2ecf20Sopenharmony_ci			if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
2748c2ecf20Sopenharmony_ci				if (ret == 0)
2758c2ecf20Sopenharmony_ci					ret = -EFAULT;
2768c2ecf20Sopenharmony_ci				break;
2778c2ecf20Sopenharmony_ci			}
2788c2ecf20Sopenharmony_ci			ret += sizeof(n);
2798c2ecf20Sopenharmony_ci			total_len -= sizeof(n);
2808c2ecf20Sopenharmony_ci			pipe->note_loss = false;
2818c2ecf20Sopenharmony_ci		}
2828c2ecf20Sopenharmony_ci#endif
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci		if (!pipe_empty(head, tail)) {
2858c2ecf20Sopenharmony_ci			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
2868c2ecf20Sopenharmony_ci			size_t chars = buf->len;
2878c2ecf20Sopenharmony_ci			size_t written;
2888c2ecf20Sopenharmony_ci			int error;
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci			if (chars > total_len) {
2918c2ecf20Sopenharmony_ci				if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
2928c2ecf20Sopenharmony_ci					if (ret == 0)
2938c2ecf20Sopenharmony_ci						ret = -ENOBUFS;
2948c2ecf20Sopenharmony_ci					break;
2958c2ecf20Sopenharmony_ci				}
2968c2ecf20Sopenharmony_ci				chars = total_len;
2978c2ecf20Sopenharmony_ci			}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci			error = pipe_buf_confirm(pipe, buf);
3008c2ecf20Sopenharmony_ci			if (error) {
3018c2ecf20Sopenharmony_ci				if (!ret)
3028c2ecf20Sopenharmony_ci					ret = error;
3038c2ecf20Sopenharmony_ci				break;
3048c2ecf20Sopenharmony_ci			}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci			written = copy_page_to_iter(buf->page, buf->offset, chars, to);
3078c2ecf20Sopenharmony_ci			if (unlikely(written < chars)) {
3088c2ecf20Sopenharmony_ci				if (!ret)
3098c2ecf20Sopenharmony_ci					ret = -EFAULT;
3108c2ecf20Sopenharmony_ci				break;
3118c2ecf20Sopenharmony_ci			}
3128c2ecf20Sopenharmony_ci			ret += chars;
3138c2ecf20Sopenharmony_ci			buf->offset += chars;
3148c2ecf20Sopenharmony_ci			buf->len -= chars;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci			/* Was it a packet buffer? Clean up and exit */
3178c2ecf20Sopenharmony_ci			if (buf->flags & PIPE_BUF_FLAG_PACKET) {
3188c2ecf20Sopenharmony_ci				total_len = chars;
3198c2ecf20Sopenharmony_ci				buf->len = 0;
3208c2ecf20Sopenharmony_ci			}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci			if (!buf->len) {
3238c2ecf20Sopenharmony_ci				pipe_buf_release(pipe, buf);
3248c2ecf20Sopenharmony_ci				spin_lock_irq(&pipe->rd_wait.lock);
3258c2ecf20Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
3268c2ecf20Sopenharmony_ci				if (buf->flags & PIPE_BUF_FLAG_LOSS)
3278c2ecf20Sopenharmony_ci					pipe->note_loss = true;
3288c2ecf20Sopenharmony_ci#endif
3298c2ecf20Sopenharmony_ci				tail++;
3308c2ecf20Sopenharmony_ci				pipe->tail = tail;
3318c2ecf20Sopenharmony_ci				spin_unlock_irq(&pipe->rd_wait.lock);
3328c2ecf20Sopenharmony_ci			}
3338c2ecf20Sopenharmony_ci			total_len -= chars;
3348c2ecf20Sopenharmony_ci			if (!total_len)
3358c2ecf20Sopenharmony_ci				break;	/* common path: read succeeded */
3368c2ecf20Sopenharmony_ci			if (!pipe_empty(head, tail))	/* More to do? */
3378c2ecf20Sopenharmony_ci				continue;
3388c2ecf20Sopenharmony_ci		}
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci		if (!pipe->writers)
3418c2ecf20Sopenharmony_ci			break;
3428c2ecf20Sopenharmony_ci		if (ret)
3438c2ecf20Sopenharmony_ci			break;
3448c2ecf20Sopenharmony_ci		if (filp->f_flags & O_NONBLOCK) {
3458c2ecf20Sopenharmony_ci			ret = -EAGAIN;
3468c2ecf20Sopenharmony_ci			break;
3478c2ecf20Sopenharmony_ci		}
3488c2ecf20Sopenharmony_ci		__pipe_unlock(pipe);
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci		/*
3518c2ecf20Sopenharmony_ci		 * We only get here if we didn't actually read anything.
3528c2ecf20Sopenharmony_ci		 *
3538c2ecf20Sopenharmony_ci		 * However, we could have seen (and removed) a zero-sized
3548c2ecf20Sopenharmony_ci		 * pipe buffer, and might have made space in the buffers
3558c2ecf20Sopenharmony_ci		 * that way.
3568c2ecf20Sopenharmony_ci		 *
3578c2ecf20Sopenharmony_ci		 * You can't make zero-sized pipe buffers by doing an empty
3588c2ecf20Sopenharmony_ci		 * write (not even in packet mode), but they can happen if
3598c2ecf20Sopenharmony_ci		 * the writer gets an EFAULT when trying to fill a buffer
3608c2ecf20Sopenharmony_ci		 * that already got allocated and inserted in the buffer
3618c2ecf20Sopenharmony_ci		 * array.
3628c2ecf20Sopenharmony_ci		 *
3638c2ecf20Sopenharmony_ci		 * So we still need to wake up any pending writers in the
3648c2ecf20Sopenharmony_ci		 * _very_ unlikely case that the pipe was full, but we got
3658c2ecf20Sopenharmony_ci		 * no data.
3668c2ecf20Sopenharmony_ci		 */
3678c2ecf20Sopenharmony_ci		if (unlikely(was_full))
3688c2ecf20Sopenharmony_ci			wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
3698c2ecf20Sopenharmony_ci		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci		/*
3728c2ecf20Sopenharmony_ci		 * But because we didn't read anything, at this point we can
3738c2ecf20Sopenharmony_ci		 * just return directly with -ERESTARTSYS if we're interrupted,
3748c2ecf20Sopenharmony_ci		 * since we've done any required wakeups and there's no need
3758c2ecf20Sopenharmony_ci		 * to mark anything accessed. And we've dropped the lock.
3768c2ecf20Sopenharmony_ci		 */
3778c2ecf20Sopenharmony_ci		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
3788c2ecf20Sopenharmony_ci			return -ERESTARTSYS;
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci		__pipe_lock(pipe);
3818c2ecf20Sopenharmony_ci		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
3828c2ecf20Sopenharmony_ci		wake_next_reader = true;
3838c2ecf20Sopenharmony_ci	}
3848c2ecf20Sopenharmony_ci	if (pipe_empty(pipe->head, pipe->tail))
3858c2ecf20Sopenharmony_ci		wake_next_reader = false;
3868c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	if (was_full)
3898c2ecf20Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
3908c2ecf20Sopenharmony_ci	if (wake_next_reader)
3918c2ecf20Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
3928c2ecf20Sopenharmony_ci	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
3938c2ecf20Sopenharmony_ci	if (ret > 0)
3948c2ecf20Sopenharmony_ci		file_accessed(filp);
3958c2ecf20Sopenharmony_ci	return ret;
3968c2ecf20Sopenharmony_ci}
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_cistatic inline int is_packetized(struct file *file)
3998c2ecf20Sopenharmony_ci{
4008c2ecf20Sopenharmony_ci	return (file->f_flags & O_DIRECT) != 0;
4018c2ecf20Sopenharmony_ci}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
4048c2ecf20Sopenharmony_cistatic inline bool pipe_writable(const struct pipe_inode_info *pipe)
4058c2ecf20Sopenharmony_ci{
4068c2ecf20Sopenharmony_ci	unsigned int head = READ_ONCE(pipe->head);
4078c2ecf20Sopenharmony_ci	unsigned int tail = READ_ONCE(pipe->tail);
4088c2ecf20Sopenharmony_ci	unsigned int max_usage = READ_ONCE(pipe->max_usage);
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	return !pipe_full(head, tail, max_usage) ||
4118c2ecf20Sopenharmony_ci		!READ_ONCE(pipe->readers);
4128c2ecf20Sopenharmony_ci}
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_cistatic ssize_t
4158c2ecf20Sopenharmony_cipipe_write(struct kiocb *iocb, struct iov_iter *from)
4168c2ecf20Sopenharmony_ci{
4178c2ecf20Sopenharmony_ci	struct file *filp = iocb->ki_filp;
4188c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
4198c2ecf20Sopenharmony_ci	unsigned int head;
4208c2ecf20Sopenharmony_ci	ssize_t ret = 0;
4218c2ecf20Sopenharmony_ci	size_t total_len = iov_iter_count(from);
4228c2ecf20Sopenharmony_ci	ssize_t chars;
4238c2ecf20Sopenharmony_ci	bool was_empty = false;
4248c2ecf20Sopenharmony_ci	bool wake_next_writer = false;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	/* Null write succeeds. */
4278c2ecf20Sopenharmony_ci	if (unlikely(total_len == 0))
4288c2ecf20Sopenharmony_ci		return 0;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	__pipe_lock(pipe);
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	if (!pipe->readers) {
4338c2ecf20Sopenharmony_ci		send_sig(SIGPIPE, current, 0);
4348c2ecf20Sopenharmony_ci		ret = -EPIPE;
4358c2ecf20Sopenharmony_ci		goto out;
4368c2ecf20Sopenharmony_ci	}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	if (pipe_has_watch_queue(pipe)) {
4398c2ecf20Sopenharmony_ci		ret = -EXDEV;
4408c2ecf20Sopenharmony_ci		goto out;
4418c2ecf20Sopenharmony_ci	}
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	/*
4448c2ecf20Sopenharmony_ci	 * If it wasn't empty we try to merge new data into
4458c2ecf20Sopenharmony_ci	 * the last buffer.
4468c2ecf20Sopenharmony_ci	 *
4478c2ecf20Sopenharmony_ci	 * That naturally merges small writes, but it also
4488c2ecf20Sopenharmony_ci	 * page-aligns the rest of the writes for large writes
4498c2ecf20Sopenharmony_ci	 * spanning multiple pages.
4508c2ecf20Sopenharmony_ci	 */
4518c2ecf20Sopenharmony_ci	head = pipe->head;
4528c2ecf20Sopenharmony_ci	was_empty = pipe_empty(head, pipe->tail);
4538c2ecf20Sopenharmony_ci	chars = total_len & (PAGE_SIZE-1);
4548c2ecf20Sopenharmony_ci	if (chars && !was_empty) {
4558c2ecf20Sopenharmony_ci		unsigned int mask = pipe->ring_size - 1;
4568c2ecf20Sopenharmony_ci		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
4578c2ecf20Sopenharmony_ci		int offset = buf->offset + buf->len;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
4608c2ecf20Sopenharmony_ci		    offset + chars <= PAGE_SIZE) {
4618c2ecf20Sopenharmony_ci			ret = pipe_buf_confirm(pipe, buf);
4628c2ecf20Sopenharmony_ci			if (ret)
4638c2ecf20Sopenharmony_ci				goto out;
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_ci			ret = copy_page_from_iter(buf->page, offset, chars, from);
4668c2ecf20Sopenharmony_ci			if (unlikely(ret < chars)) {
4678c2ecf20Sopenharmony_ci				ret = -EFAULT;
4688c2ecf20Sopenharmony_ci				goto out;
4698c2ecf20Sopenharmony_ci			}
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci			buf->len += ret;
4728c2ecf20Sopenharmony_ci			if (!iov_iter_count(from))
4738c2ecf20Sopenharmony_ci				goto out;
4748c2ecf20Sopenharmony_ci		}
4758c2ecf20Sopenharmony_ci	}
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	for (;;) {
4788c2ecf20Sopenharmony_ci		if (!pipe->readers) {
4798c2ecf20Sopenharmony_ci			send_sig(SIGPIPE, current, 0);
4808c2ecf20Sopenharmony_ci			if (!ret)
4818c2ecf20Sopenharmony_ci				ret = -EPIPE;
4828c2ecf20Sopenharmony_ci			break;
4838c2ecf20Sopenharmony_ci		}
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci		head = pipe->head;
4868c2ecf20Sopenharmony_ci		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
4878c2ecf20Sopenharmony_ci			unsigned int mask = pipe->ring_size - 1;
4888c2ecf20Sopenharmony_ci			struct pipe_buffer *buf = &pipe->bufs[head & mask];
4898c2ecf20Sopenharmony_ci			struct page *page = pipe->tmp_page;
4908c2ecf20Sopenharmony_ci			int copied;
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci			if (!page) {
4938c2ecf20Sopenharmony_ci				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
4948c2ecf20Sopenharmony_ci				if (unlikely(!page)) {
4958c2ecf20Sopenharmony_ci					ret = ret ? : -ENOMEM;
4968c2ecf20Sopenharmony_ci					break;
4978c2ecf20Sopenharmony_ci				}
4988c2ecf20Sopenharmony_ci				pipe->tmp_page = page;
4998c2ecf20Sopenharmony_ci			}
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci			/* Allocate a slot in the ring in advance and attach an
5028c2ecf20Sopenharmony_ci			 * empty buffer.  If we fault or otherwise fail to use
5038c2ecf20Sopenharmony_ci			 * it, either the reader will consume it or it'll still
5048c2ecf20Sopenharmony_ci			 * be there for the next write.
5058c2ecf20Sopenharmony_ci			 */
5068c2ecf20Sopenharmony_ci			spin_lock_irq(&pipe->rd_wait.lock);
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci			head = pipe->head;
5098c2ecf20Sopenharmony_ci			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
5108c2ecf20Sopenharmony_ci				spin_unlock_irq(&pipe->rd_wait.lock);
5118c2ecf20Sopenharmony_ci				continue;
5128c2ecf20Sopenharmony_ci			}
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci			pipe->head = head + 1;
5158c2ecf20Sopenharmony_ci			spin_unlock_irq(&pipe->rd_wait.lock);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci			/* Insert it into the buffer array */
5188c2ecf20Sopenharmony_ci			buf = &pipe->bufs[head & mask];
5198c2ecf20Sopenharmony_ci			buf->page = page;
5208c2ecf20Sopenharmony_ci			buf->ops = &anon_pipe_buf_ops;
5218c2ecf20Sopenharmony_ci			buf->offset = 0;
5228c2ecf20Sopenharmony_ci			buf->len = 0;
5238c2ecf20Sopenharmony_ci			if (is_packetized(filp))
5248c2ecf20Sopenharmony_ci				buf->flags = PIPE_BUF_FLAG_PACKET;
5258c2ecf20Sopenharmony_ci			else
5268c2ecf20Sopenharmony_ci				buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
5278c2ecf20Sopenharmony_ci			pipe->tmp_page = NULL;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
5308c2ecf20Sopenharmony_ci			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
5318c2ecf20Sopenharmony_ci				if (!ret)
5328c2ecf20Sopenharmony_ci					ret = -EFAULT;
5338c2ecf20Sopenharmony_ci				break;
5348c2ecf20Sopenharmony_ci			}
5358c2ecf20Sopenharmony_ci			ret += copied;
5368c2ecf20Sopenharmony_ci			buf->offset = 0;
5378c2ecf20Sopenharmony_ci			buf->len = copied;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci			if (!iov_iter_count(from))
5408c2ecf20Sopenharmony_ci				break;
5418c2ecf20Sopenharmony_ci		}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci		if (!pipe_full(head, pipe->tail, pipe->max_usage))
5448c2ecf20Sopenharmony_ci			continue;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci		/* Wait for buffer space to become available. */
5478c2ecf20Sopenharmony_ci		if (filp->f_flags & O_NONBLOCK) {
5488c2ecf20Sopenharmony_ci			if (!ret)
5498c2ecf20Sopenharmony_ci				ret = -EAGAIN;
5508c2ecf20Sopenharmony_ci			break;
5518c2ecf20Sopenharmony_ci		}
5528c2ecf20Sopenharmony_ci		if (signal_pending(current)) {
5538c2ecf20Sopenharmony_ci			if (!ret)
5548c2ecf20Sopenharmony_ci				ret = -ERESTARTSYS;
5558c2ecf20Sopenharmony_ci			break;
5568c2ecf20Sopenharmony_ci		}
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci		/*
5598c2ecf20Sopenharmony_ci		 * We're going to release the pipe lock and wait for more
5608c2ecf20Sopenharmony_ci		 * space. We wake up any readers if necessary, and then
5618c2ecf20Sopenharmony_ci		 * after waiting we need to re-check whether the pipe
5628c2ecf20Sopenharmony_ci		 * become empty while we dropped the lock.
5638c2ecf20Sopenharmony_ci		 */
5648c2ecf20Sopenharmony_ci		__pipe_unlock(pipe);
5658c2ecf20Sopenharmony_ci		if (was_empty)
5668c2ecf20Sopenharmony_ci			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
5678c2ecf20Sopenharmony_ci		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
5688c2ecf20Sopenharmony_ci		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
5698c2ecf20Sopenharmony_ci		__pipe_lock(pipe);
5708c2ecf20Sopenharmony_ci		was_empty = pipe_empty(pipe->head, pipe->tail);
5718c2ecf20Sopenharmony_ci		wake_next_writer = true;
5728c2ecf20Sopenharmony_ci	}
5738c2ecf20Sopenharmony_ciout:
5748c2ecf20Sopenharmony_ci	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
5758c2ecf20Sopenharmony_ci		wake_next_writer = false;
5768c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	/*
5798c2ecf20Sopenharmony_ci	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
5808c2ecf20Sopenharmony_ci	 * want the reader to start processing things asap, rather than
5818c2ecf20Sopenharmony_ci	 * leave the data pending.
5828c2ecf20Sopenharmony_ci	 *
5838c2ecf20Sopenharmony_ci	 * This is particularly important for small writes, because of
5848c2ecf20Sopenharmony_ci	 * how (for example) the GNU make jobserver uses small writes to
5858c2ecf20Sopenharmony_ci	 * wake up pending jobs
5868c2ecf20Sopenharmony_ci	 *
5878c2ecf20Sopenharmony_ci	 * Epoll nonsensically wants a wakeup whether the pipe
5888c2ecf20Sopenharmony_ci	 * was already empty or not.
5898c2ecf20Sopenharmony_ci	 */
5908c2ecf20Sopenharmony_ci	if (was_empty || pipe->poll_usage)
5918c2ecf20Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
5928c2ecf20Sopenharmony_ci	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
5938c2ecf20Sopenharmony_ci	if (wake_next_writer)
5948c2ecf20Sopenharmony_ci		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
5958c2ecf20Sopenharmony_ci	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
5968c2ecf20Sopenharmony_ci		int err = file_update_time(filp);
5978c2ecf20Sopenharmony_ci		if (err)
5988c2ecf20Sopenharmony_ci			ret = err;
5998c2ecf20Sopenharmony_ci		sb_end_write(file_inode(filp)->i_sb);
6008c2ecf20Sopenharmony_ci	}
6018c2ecf20Sopenharmony_ci	return ret;
6028c2ecf20Sopenharmony_ci}
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_cistatic long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
6058c2ecf20Sopenharmony_ci{
6068c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
6078c2ecf20Sopenharmony_ci	int count, head, tail, mask;
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	switch (cmd) {
6108c2ecf20Sopenharmony_ci	case FIONREAD:
6118c2ecf20Sopenharmony_ci		__pipe_lock(pipe);
6128c2ecf20Sopenharmony_ci		count = 0;
6138c2ecf20Sopenharmony_ci		head = pipe->head;
6148c2ecf20Sopenharmony_ci		tail = pipe->tail;
6158c2ecf20Sopenharmony_ci		mask = pipe->ring_size - 1;
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci		while (tail != head) {
6188c2ecf20Sopenharmony_ci			count += pipe->bufs[tail & mask].len;
6198c2ecf20Sopenharmony_ci			tail++;
6208c2ecf20Sopenharmony_ci		}
6218c2ecf20Sopenharmony_ci		__pipe_unlock(pipe);
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci		return put_user(count, (int __user *)arg);
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
6268c2ecf20Sopenharmony_ci	case IOC_WATCH_QUEUE_SET_SIZE: {
6278c2ecf20Sopenharmony_ci		int ret;
6288c2ecf20Sopenharmony_ci		__pipe_lock(pipe);
6298c2ecf20Sopenharmony_ci		ret = watch_queue_set_size(pipe, arg);
6308c2ecf20Sopenharmony_ci		__pipe_unlock(pipe);
6318c2ecf20Sopenharmony_ci		return ret;
6328c2ecf20Sopenharmony_ci	}
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci	case IOC_WATCH_QUEUE_SET_FILTER:
6358c2ecf20Sopenharmony_ci		return watch_queue_set_filter(
6368c2ecf20Sopenharmony_ci			pipe, (struct watch_notification_filter __user *)arg);
6378c2ecf20Sopenharmony_ci#endif
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci	default:
6408c2ecf20Sopenharmony_ci		return -ENOIOCTLCMD;
6418c2ecf20Sopenharmony_ci	}
6428c2ecf20Sopenharmony_ci}
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci/* No kernel lock held - fine */
6458c2ecf20Sopenharmony_cistatic __poll_t
6468c2ecf20Sopenharmony_cipipe_poll(struct file *filp, poll_table *wait)
6478c2ecf20Sopenharmony_ci{
6488c2ecf20Sopenharmony_ci	__poll_t mask;
6498c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
6508c2ecf20Sopenharmony_ci	unsigned int head, tail;
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	/* Epoll has some historical nasty semantics, this enables them */
6538c2ecf20Sopenharmony_ci	WRITE_ONCE(pipe->poll_usage, true);
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	/*
6568c2ecf20Sopenharmony_ci	 * Reading pipe state only -- no need for acquiring the semaphore.
6578c2ecf20Sopenharmony_ci	 *
6588c2ecf20Sopenharmony_ci	 * But because this is racy, the code has to add the
6598c2ecf20Sopenharmony_ci	 * entry to the poll table _first_ ..
6608c2ecf20Sopenharmony_ci	 */
6618c2ecf20Sopenharmony_ci	if (filp->f_mode & FMODE_READ)
6628c2ecf20Sopenharmony_ci		poll_wait(filp, &pipe->rd_wait, wait);
6638c2ecf20Sopenharmony_ci	if (filp->f_mode & FMODE_WRITE)
6648c2ecf20Sopenharmony_ci		poll_wait(filp, &pipe->wr_wait, wait);
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	/*
6678c2ecf20Sopenharmony_ci	 * .. and only then can you do the racy tests. That way,
6688c2ecf20Sopenharmony_ci	 * if something changes and you got it wrong, the poll
6698c2ecf20Sopenharmony_ci	 * table entry will wake you up and fix it.
6708c2ecf20Sopenharmony_ci	 */
6718c2ecf20Sopenharmony_ci	head = READ_ONCE(pipe->head);
6728c2ecf20Sopenharmony_ci	tail = READ_ONCE(pipe->tail);
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	mask = 0;
6758c2ecf20Sopenharmony_ci	if (filp->f_mode & FMODE_READ) {
6768c2ecf20Sopenharmony_ci		if (!pipe_empty(head, tail))
6778c2ecf20Sopenharmony_ci			mask |= EPOLLIN | EPOLLRDNORM;
6788c2ecf20Sopenharmony_ci		if (!pipe->writers && filp->f_version != pipe->w_counter)
6798c2ecf20Sopenharmony_ci			mask |= EPOLLHUP;
6808c2ecf20Sopenharmony_ci	}
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	if (filp->f_mode & FMODE_WRITE) {
6838c2ecf20Sopenharmony_ci		if (!pipe_full(head, tail, pipe->max_usage))
6848c2ecf20Sopenharmony_ci			mask |= EPOLLOUT | EPOLLWRNORM;
6858c2ecf20Sopenharmony_ci		/*
6868c2ecf20Sopenharmony_ci		 * Most Unices do not set EPOLLERR for FIFOs but on Linux they
6878c2ecf20Sopenharmony_ci		 * behave exactly like pipes for poll().
6888c2ecf20Sopenharmony_ci		 */
6898c2ecf20Sopenharmony_ci		if (!pipe->readers)
6908c2ecf20Sopenharmony_ci			mask |= EPOLLERR;
6918c2ecf20Sopenharmony_ci	}
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	return mask;
6948c2ecf20Sopenharmony_ci}
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_cistatic void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
6978c2ecf20Sopenharmony_ci{
6988c2ecf20Sopenharmony_ci	int kill = 0;
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
7018c2ecf20Sopenharmony_ci	if (!--pipe->files) {
7028c2ecf20Sopenharmony_ci		inode->i_pipe = NULL;
7038c2ecf20Sopenharmony_ci		kill = 1;
7048c2ecf20Sopenharmony_ci	}
7058c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci	if (kill)
7088c2ecf20Sopenharmony_ci		free_pipe_info(pipe);
7098c2ecf20Sopenharmony_ci}
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_cistatic int
7128c2ecf20Sopenharmony_cipipe_release(struct inode *inode, struct file *file)
7138c2ecf20Sopenharmony_ci{
7148c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = file->private_data;
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	__pipe_lock(pipe);
7178c2ecf20Sopenharmony_ci	if (file->f_mode & FMODE_READ)
7188c2ecf20Sopenharmony_ci		pipe->readers--;
7198c2ecf20Sopenharmony_ci	if (file->f_mode & FMODE_WRITE)
7208c2ecf20Sopenharmony_ci		pipe->writers--;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	/* Was that the last reader or writer, but not the other side? */
7238c2ecf20Sopenharmony_ci	if (!pipe->readers != !pipe->writers) {
7248c2ecf20Sopenharmony_ci		wake_up_interruptible_all(&pipe->rd_wait);
7258c2ecf20Sopenharmony_ci		wake_up_interruptible_all(&pipe->wr_wait);
7268c2ecf20Sopenharmony_ci		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
7278c2ecf20Sopenharmony_ci		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
7288c2ecf20Sopenharmony_ci	}
7298c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci	put_pipe_info(inode, pipe);
7328c2ecf20Sopenharmony_ci	return 0;
7338c2ecf20Sopenharmony_ci}
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_cistatic int
7368c2ecf20Sopenharmony_cipipe_fasync(int fd, struct file *filp, int on)
7378c2ecf20Sopenharmony_ci{
7388c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = filp->private_data;
7398c2ecf20Sopenharmony_ci	int retval = 0;
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci	__pipe_lock(pipe);
7428c2ecf20Sopenharmony_ci	if (filp->f_mode & FMODE_READ)
7438c2ecf20Sopenharmony_ci		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
7448c2ecf20Sopenharmony_ci	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
7458c2ecf20Sopenharmony_ci		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
7468c2ecf20Sopenharmony_ci		if (retval < 0 && (filp->f_mode & FMODE_READ))
7478c2ecf20Sopenharmony_ci			/* this can happen only if on == T */
7488c2ecf20Sopenharmony_ci			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
7498c2ecf20Sopenharmony_ci	}
7508c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
7518c2ecf20Sopenharmony_ci	return retval;
7528c2ecf20Sopenharmony_ci}
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ciunsigned long account_pipe_buffers(struct user_struct *user,
7558c2ecf20Sopenharmony_ci				   unsigned long old, unsigned long new)
7568c2ecf20Sopenharmony_ci{
7578c2ecf20Sopenharmony_ci	return atomic_long_add_return(new - old, &user->pipe_bufs);
7588c2ecf20Sopenharmony_ci}
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_cibool too_many_pipe_buffers_soft(unsigned long user_bufs)
7618c2ecf20Sopenharmony_ci{
7628c2ecf20Sopenharmony_ci	unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	return soft_limit && user_bufs > soft_limit;
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_cibool too_many_pipe_buffers_hard(unsigned long user_bufs)
7688c2ecf20Sopenharmony_ci{
7698c2ecf20Sopenharmony_ci	unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	return hard_limit && user_bufs > hard_limit;
7728c2ecf20Sopenharmony_ci}
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_cibool pipe_is_unprivileged_user(void)
7758c2ecf20Sopenharmony_ci{
7768c2ecf20Sopenharmony_ci	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
7778c2ecf20Sopenharmony_ci}
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_cistruct pipe_inode_info *alloc_pipe_info(void)
7808c2ecf20Sopenharmony_ci{
7818c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe;
7828c2ecf20Sopenharmony_ci	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
7838c2ecf20Sopenharmony_ci	struct user_struct *user = get_current_user();
7848c2ecf20Sopenharmony_ci	unsigned long user_bufs;
7858c2ecf20Sopenharmony_ci	unsigned int max_size = READ_ONCE(pipe_max_size);
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
7888c2ecf20Sopenharmony_ci	if (pipe == NULL)
7898c2ecf20Sopenharmony_ci		goto out_free_uid;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
7928c2ecf20Sopenharmony_ci		pipe_bufs = max_size >> PAGE_SHIFT;
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_ci	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
7978c2ecf20Sopenharmony_ci		user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
7988c2ecf20Sopenharmony_ci		pipe_bufs = PIPE_MIN_DEF_BUFFERS;
7998c2ecf20Sopenharmony_ci	}
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
8028c2ecf20Sopenharmony_ci		goto out_revert_acct;
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci	pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
8058c2ecf20Sopenharmony_ci			     GFP_KERNEL_ACCOUNT);
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	if (pipe->bufs) {
8088c2ecf20Sopenharmony_ci		init_waitqueue_head(&pipe->rd_wait);
8098c2ecf20Sopenharmony_ci		init_waitqueue_head(&pipe->wr_wait);
8108c2ecf20Sopenharmony_ci		pipe->r_counter = pipe->w_counter = 1;
8118c2ecf20Sopenharmony_ci		pipe->max_usage = pipe_bufs;
8128c2ecf20Sopenharmony_ci		pipe->ring_size = pipe_bufs;
8138c2ecf20Sopenharmony_ci		pipe->nr_accounted = pipe_bufs;
8148c2ecf20Sopenharmony_ci		pipe->user = user;
8158c2ecf20Sopenharmony_ci		mutex_init(&pipe->mutex);
8168c2ecf20Sopenharmony_ci		return pipe;
8178c2ecf20Sopenharmony_ci	}
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ciout_revert_acct:
8208c2ecf20Sopenharmony_ci	(void) account_pipe_buffers(user, pipe_bufs, 0);
8218c2ecf20Sopenharmony_ci	kfree(pipe);
8228c2ecf20Sopenharmony_ciout_free_uid:
8238c2ecf20Sopenharmony_ci	free_uid(user);
8248c2ecf20Sopenharmony_ci	return NULL;
8258c2ecf20Sopenharmony_ci}
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_civoid free_pipe_info(struct pipe_inode_info *pipe)
8288c2ecf20Sopenharmony_ci{
8298c2ecf20Sopenharmony_ci	int i;
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
8328c2ecf20Sopenharmony_ci	if (pipe->watch_queue)
8338c2ecf20Sopenharmony_ci		watch_queue_clear(pipe->watch_queue);
8348c2ecf20Sopenharmony_ci#endif
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
8378c2ecf20Sopenharmony_ci	free_uid(pipe->user);
8388c2ecf20Sopenharmony_ci	for (i = 0; i < pipe->ring_size; i++) {
8398c2ecf20Sopenharmony_ci		struct pipe_buffer *buf = pipe->bufs + i;
8408c2ecf20Sopenharmony_ci		if (buf->ops)
8418c2ecf20Sopenharmony_ci			pipe_buf_release(pipe, buf);
8428c2ecf20Sopenharmony_ci	}
8438c2ecf20Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE
8448c2ecf20Sopenharmony_ci	if (pipe->watch_queue)
8458c2ecf20Sopenharmony_ci		put_watch_queue(pipe->watch_queue);
8468c2ecf20Sopenharmony_ci#endif
8478c2ecf20Sopenharmony_ci	if (pipe->tmp_page)
8488c2ecf20Sopenharmony_ci		__free_page(pipe->tmp_page);
8498c2ecf20Sopenharmony_ci	kfree(pipe->bufs);
8508c2ecf20Sopenharmony_ci	kfree(pipe);
8518c2ecf20Sopenharmony_ci}
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_cistatic struct vfsmount *pipe_mnt __read_mostly;
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci/*
8568c2ecf20Sopenharmony_ci * pipefs_dname() is called from d_path().
8578c2ecf20Sopenharmony_ci */
8588c2ecf20Sopenharmony_cistatic char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
8598c2ecf20Sopenharmony_ci{
8608c2ecf20Sopenharmony_ci	return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
8618c2ecf20Sopenharmony_ci				d_inode(dentry)->i_ino);
8628c2ecf20Sopenharmony_ci}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_cistatic const struct dentry_operations pipefs_dentry_operations = {
8658c2ecf20Sopenharmony_ci	.d_dname	= pipefs_dname,
8668c2ecf20Sopenharmony_ci};
8678c2ecf20Sopenharmony_ci
8688c2ecf20Sopenharmony_cistatic struct inode * get_pipe_inode(void)
8698c2ecf20Sopenharmony_ci{
8708c2ecf20Sopenharmony_ci	struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
8718c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe;
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci	if (!inode)
8748c2ecf20Sopenharmony_ci		goto fail_inode;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	inode->i_ino = get_next_ino();
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	pipe = alloc_pipe_info();
8798c2ecf20Sopenharmony_ci	if (!pipe)
8808c2ecf20Sopenharmony_ci		goto fail_iput;
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_ci	inode->i_pipe = pipe;
8838c2ecf20Sopenharmony_ci	pipe->files = 2;
8848c2ecf20Sopenharmony_ci	pipe->readers = pipe->writers = 1;
8858c2ecf20Sopenharmony_ci	inode->i_fop = &pipefifo_fops;
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci	/*
8888c2ecf20Sopenharmony_ci	 * Mark the inode dirty from the very beginning,
8898c2ecf20Sopenharmony_ci	 * that way it will never be moved to the dirty
8908c2ecf20Sopenharmony_ci	 * list because "mark_inode_dirty()" will think
8918c2ecf20Sopenharmony_ci	 * that it already _is_ on the dirty list.
8928c2ecf20Sopenharmony_ci	 */
8938c2ecf20Sopenharmony_ci	inode->i_state = I_DIRTY;
8948c2ecf20Sopenharmony_ci	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
8958c2ecf20Sopenharmony_ci	inode->i_uid = current_fsuid();
8968c2ecf20Sopenharmony_ci	inode->i_gid = current_fsgid();
8978c2ecf20Sopenharmony_ci	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	return inode;
9008c2ecf20Sopenharmony_ci
9018c2ecf20Sopenharmony_cifail_iput:
9028c2ecf20Sopenharmony_ci	iput(inode);
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_cifail_inode:
9058c2ecf20Sopenharmony_ci	return NULL;
9068c2ecf20Sopenharmony_ci}
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ciint create_pipe_files(struct file **res, int flags)
9098c2ecf20Sopenharmony_ci{
9108c2ecf20Sopenharmony_ci	struct inode *inode = get_pipe_inode();
9118c2ecf20Sopenharmony_ci	struct file *f;
9128c2ecf20Sopenharmony_ci	int error;
9138c2ecf20Sopenharmony_ci
9148c2ecf20Sopenharmony_ci	if (!inode)
9158c2ecf20Sopenharmony_ci		return -ENFILE;
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	if (flags & O_NOTIFICATION_PIPE) {
9188c2ecf20Sopenharmony_ci		error = watch_queue_init(inode->i_pipe);
9198c2ecf20Sopenharmony_ci		if (error) {
9208c2ecf20Sopenharmony_ci			free_pipe_info(inode->i_pipe);
9218c2ecf20Sopenharmony_ci			iput(inode);
9228c2ecf20Sopenharmony_ci			return error;
9238c2ecf20Sopenharmony_ci		}
9248c2ecf20Sopenharmony_ci	}
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	f = alloc_file_pseudo(inode, pipe_mnt, "",
9278c2ecf20Sopenharmony_ci				O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
9288c2ecf20Sopenharmony_ci				&pipefifo_fops);
9298c2ecf20Sopenharmony_ci	if (IS_ERR(f)) {
9308c2ecf20Sopenharmony_ci		free_pipe_info(inode->i_pipe);
9318c2ecf20Sopenharmony_ci		iput(inode);
9328c2ecf20Sopenharmony_ci		return PTR_ERR(f);
9338c2ecf20Sopenharmony_ci	}
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci	f->private_data = inode->i_pipe;
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
9388c2ecf20Sopenharmony_ci				  &pipefifo_fops);
9398c2ecf20Sopenharmony_ci	if (IS_ERR(res[0])) {
9408c2ecf20Sopenharmony_ci		put_pipe_info(inode, inode->i_pipe);
9418c2ecf20Sopenharmony_ci		fput(f);
9428c2ecf20Sopenharmony_ci		return PTR_ERR(res[0]);
9438c2ecf20Sopenharmony_ci	}
9448c2ecf20Sopenharmony_ci	res[0]->private_data = inode->i_pipe;
9458c2ecf20Sopenharmony_ci	res[1] = f;
9468c2ecf20Sopenharmony_ci	stream_open(inode, res[0]);
9478c2ecf20Sopenharmony_ci	stream_open(inode, res[1]);
9488c2ecf20Sopenharmony_ci	return 0;
9498c2ecf20Sopenharmony_ci}
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_cistatic int __do_pipe_flags(int *fd, struct file **files, int flags)
9528c2ecf20Sopenharmony_ci{
9538c2ecf20Sopenharmony_ci	int error;
9548c2ecf20Sopenharmony_ci	int fdw, fdr;
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_ci	if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
9578c2ecf20Sopenharmony_ci		return -EINVAL;
9588c2ecf20Sopenharmony_ci
9598c2ecf20Sopenharmony_ci	error = create_pipe_files(files, flags);
9608c2ecf20Sopenharmony_ci	if (error)
9618c2ecf20Sopenharmony_ci		return error;
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_ci	error = get_unused_fd_flags(flags);
9648c2ecf20Sopenharmony_ci	if (error < 0)
9658c2ecf20Sopenharmony_ci		goto err_read_pipe;
9668c2ecf20Sopenharmony_ci	fdr = error;
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci	error = get_unused_fd_flags(flags);
9698c2ecf20Sopenharmony_ci	if (error < 0)
9708c2ecf20Sopenharmony_ci		goto err_fdr;
9718c2ecf20Sopenharmony_ci	fdw = error;
9728c2ecf20Sopenharmony_ci
9738c2ecf20Sopenharmony_ci	audit_fd_pair(fdr, fdw);
9748c2ecf20Sopenharmony_ci	fd[0] = fdr;
9758c2ecf20Sopenharmony_ci	fd[1] = fdw;
9768c2ecf20Sopenharmony_ci	return 0;
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci err_fdr:
9798c2ecf20Sopenharmony_ci	put_unused_fd(fdr);
9808c2ecf20Sopenharmony_ci err_read_pipe:
9818c2ecf20Sopenharmony_ci	fput(files[0]);
9828c2ecf20Sopenharmony_ci	fput(files[1]);
9838c2ecf20Sopenharmony_ci	return error;
9848c2ecf20Sopenharmony_ci}
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ciint do_pipe_flags(int *fd, int flags)
9878c2ecf20Sopenharmony_ci{
9888c2ecf20Sopenharmony_ci	struct file *files[2];
9898c2ecf20Sopenharmony_ci	int error = __do_pipe_flags(fd, files, flags);
9908c2ecf20Sopenharmony_ci	if (!error) {
9918c2ecf20Sopenharmony_ci		fd_install(fd[0], files[0]);
9928c2ecf20Sopenharmony_ci		fd_install(fd[1], files[1]);
9938c2ecf20Sopenharmony_ci	}
9948c2ecf20Sopenharmony_ci	return error;
9958c2ecf20Sopenharmony_ci}
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci/*
9988c2ecf20Sopenharmony_ci * sys_pipe() is the normal C calling standard for creating
9998c2ecf20Sopenharmony_ci * a pipe. It's not the way Unix traditionally does this, though.
10008c2ecf20Sopenharmony_ci */
10018c2ecf20Sopenharmony_cistatic int do_pipe2(int __user *fildes, int flags)
10028c2ecf20Sopenharmony_ci{
10038c2ecf20Sopenharmony_ci	struct file *files[2];
10048c2ecf20Sopenharmony_ci	int fd[2];
10058c2ecf20Sopenharmony_ci	int error;
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_ci	error = __do_pipe_flags(fd, files, flags);
10088c2ecf20Sopenharmony_ci	if (!error) {
10098c2ecf20Sopenharmony_ci		if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
10108c2ecf20Sopenharmony_ci			fput(files[0]);
10118c2ecf20Sopenharmony_ci			fput(files[1]);
10128c2ecf20Sopenharmony_ci			put_unused_fd(fd[0]);
10138c2ecf20Sopenharmony_ci			put_unused_fd(fd[1]);
10148c2ecf20Sopenharmony_ci			error = -EFAULT;
10158c2ecf20Sopenharmony_ci		} else {
10168c2ecf20Sopenharmony_ci			fd_install(fd[0], files[0]);
10178c2ecf20Sopenharmony_ci			fd_install(fd[1], files[1]);
10188c2ecf20Sopenharmony_ci		}
10198c2ecf20Sopenharmony_ci	}
10208c2ecf20Sopenharmony_ci	return error;
10218c2ecf20Sopenharmony_ci}
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
10248c2ecf20Sopenharmony_ci{
10258c2ecf20Sopenharmony_ci	return do_pipe2(fildes, flags);
10268c2ecf20Sopenharmony_ci}
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(pipe, int __user *, fildes)
10298c2ecf20Sopenharmony_ci{
10308c2ecf20Sopenharmony_ci	return do_pipe2(fildes, 0);
10318c2ecf20Sopenharmony_ci}
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_ci/*
10348c2ecf20Sopenharmony_ci * This is the stupid "wait for pipe to be readable or writable"
10358c2ecf20Sopenharmony_ci * model.
10368c2ecf20Sopenharmony_ci *
10378c2ecf20Sopenharmony_ci * See pipe_read/write() for the proper kind of exclusive wait,
10388c2ecf20Sopenharmony_ci * but that requires that we wake up any other readers/writers
10398c2ecf20Sopenharmony_ci * if we then do not end up reading everything (ie the whole
10408c2ecf20Sopenharmony_ci * "wake_next_reader/writer" logic in pipe_read/write()).
10418c2ecf20Sopenharmony_ci */
10428c2ecf20Sopenharmony_civoid pipe_wait_readable(struct pipe_inode_info *pipe)
10438c2ecf20Sopenharmony_ci{
10448c2ecf20Sopenharmony_ci	pipe_unlock(pipe);
10458c2ecf20Sopenharmony_ci	wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
10468c2ecf20Sopenharmony_ci	pipe_lock(pipe);
10478c2ecf20Sopenharmony_ci}
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_civoid pipe_wait_writable(struct pipe_inode_info *pipe)
10508c2ecf20Sopenharmony_ci{
10518c2ecf20Sopenharmony_ci	pipe_unlock(pipe);
10528c2ecf20Sopenharmony_ci	wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
10538c2ecf20Sopenharmony_ci	pipe_lock(pipe);
10548c2ecf20Sopenharmony_ci}
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_ci/*
10578c2ecf20Sopenharmony_ci * This depends on both the wait (here) and the wakeup (wake_up_partner)
10588c2ecf20Sopenharmony_ci * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
10598c2ecf20Sopenharmony_ci * race with the count check and waitqueue prep.
10608c2ecf20Sopenharmony_ci *
10618c2ecf20Sopenharmony_ci * Normally in order to avoid races, you'd do the prepare_to_wait() first,
10628c2ecf20Sopenharmony_ci * then check the condition you're waiting for, and only then sleep. But
10638c2ecf20Sopenharmony_ci * because of the pipe lock, we can check the condition before being on
10648c2ecf20Sopenharmony_ci * the wait queue.
10658c2ecf20Sopenharmony_ci *
10668c2ecf20Sopenharmony_ci * We use the 'rd_wait' waitqueue for pipe partner waiting.
10678c2ecf20Sopenharmony_ci */
10688c2ecf20Sopenharmony_cistatic int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
10698c2ecf20Sopenharmony_ci{
10708c2ecf20Sopenharmony_ci	DEFINE_WAIT(rdwait);
10718c2ecf20Sopenharmony_ci	int cur = *cnt;
10728c2ecf20Sopenharmony_ci
10738c2ecf20Sopenharmony_ci	while (cur == *cnt) {
10748c2ecf20Sopenharmony_ci		prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
10758c2ecf20Sopenharmony_ci		pipe_unlock(pipe);
10768c2ecf20Sopenharmony_ci		schedule();
10778c2ecf20Sopenharmony_ci		finish_wait(&pipe->rd_wait, &rdwait);
10788c2ecf20Sopenharmony_ci		pipe_lock(pipe);
10798c2ecf20Sopenharmony_ci		if (signal_pending(current))
10808c2ecf20Sopenharmony_ci			break;
10818c2ecf20Sopenharmony_ci	}
10828c2ecf20Sopenharmony_ci	return cur == *cnt ? -ERESTARTSYS : 0;
10838c2ecf20Sopenharmony_ci}
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_cistatic void wake_up_partner(struct pipe_inode_info *pipe)
10868c2ecf20Sopenharmony_ci{
10878c2ecf20Sopenharmony_ci	wake_up_interruptible_all(&pipe->rd_wait);
10888c2ecf20Sopenharmony_ci}
10898c2ecf20Sopenharmony_ci
10908c2ecf20Sopenharmony_cistatic int fifo_open(struct inode *inode, struct file *filp)
10918c2ecf20Sopenharmony_ci{
10928c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe;
10938c2ecf20Sopenharmony_ci	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
10948c2ecf20Sopenharmony_ci	int ret;
10958c2ecf20Sopenharmony_ci
10968c2ecf20Sopenharmony_ci	filp->f_version = 0;
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
10998c2ecf20Sopenharmony_ci	if (inode->i_pipe) {
11008c2ecf20Sopenharmony_ci		pipe = inode->i_pipe;
11018c2ecf20Sopenharmony_ci		pipe->files++;
11028c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
11038c2ecf20Sopenharmony_ci	} else {
11048c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
11058c2ecf20Sopenharmony_ci		pipe = alloc_pipe_info();
11068c2ecf20Sopenharmony_ci		if (!pipe)
11078c2ecf20Sopenharmony_ci			return -ENOMEM;
11088c2ecf20Sopenharmony_ci		pipe->files = 1;
11098c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
11108c2ecf20Sopenharmony_ci		if (unlikely(inode->i_pipe)) {
11118c2ecf20Sopenharmony_ci			inode->i_pipe->files++;
11128c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
11138c2ecf20Sopenharmony_ci			free_pipe_info(pipe);
11148c2ecf20Sopenharmony_ci			pipe = inode->i_pipe;
11158c2ecf20Sopenharmony_ci		} else {
11168c2ecf20Sopenharmony_ci			inode->i_pipe = pipe;
11178c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
11188c2ecf20Sopenharmony_ci		}
11198c2ecf20Sopenharmony_ci	}
11208c2ecf20Sopenharmony_ci	filp->private_data = pipe;
11218c2ecf20Sopenharmony_ci	/* OK, we have a pipe and it's pinned down */
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ci	__pipe_lock(pipe);
11248c2ecf20Sopenharmony_ci
11258c2ecf20Sopenharmony_ci	/* We can only do regular read/write on fifos */
11268c2ecf20Sopenharmony_ci	stream_open(inode, filp);
11278c2ecf20Sopenharmony_ci
11288c2ecf20Sopenharmony_ci	switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
11298c2ecf20Sopenharmony_ci	case FMODE_READ:
11308c2ecf20Sopenharmony_ci	/*
11318c2ecf20Sopenharmony_ci	 *  O_RDONLY
11328c2ecf20Sopenharmony_ci	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
11338c2ecf20Sopenharmony_ci	 *  opened, even when there is no process writing the FIFO.
11348c2ecf20Sopenharmony_ci	 */
11358c2ecf20Sopenharmony_ci		pipe->r_counter++;
11368c2ecf20Sopenharmony_ci		if (pipe->readers++ == 0)
11378c2ecf20Sopenharmony_ci			wake_up_partner(pipe);
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci		if (!is_pipe && !pipe->writers) {
11408c2ecf20Sopenharmony_ci			if ((filp->f_flags & O_NONBLOCK)) {
11418c2ecf20Sopenharmony_ci				/* suppress EPOLLHUP until we have
11428c2ecf20Sopenharmony_ci				 * seen a writer */
11438c2ecf20Sopenharmony_ci				filp->f_version = pipe->w_counter;
11448c2ecf20Sopenharmony_ci			} else {
11458c2ecf20Sopenharmony_ci				if (wait_for_partner(pipe, &pipe->w_counter))
11468c2ecf20Sopenharmony_ci					goto err_rd;
11478c2ecf20Sopenharmony_ci			}
11488c2ecf20Sopenharmony_ci		}
11498c2ecf20Sopenharmony_ci		break;
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci	case FMODE_WRITE:
11528c2ecf20Sopenharmony_ci	/*
11538c2ecf20Sopenharmony_ci	 *  O_WRONLY
11548c2ecf20Sopenharmony_ci	 *  POSIX.1 says that O_NONBLOCK means return -1 with
11558c2ecf20Sopenharmony_ci	 *  errno=ENXIO when there is no process reading the FIFO.
11568c2ecf20Sopenharmony_ci	 */
11578c2ecf20Sopenharmony_ci		ret = -ENXIO;
11588c2ecf20Sopenharmony_ci		if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
11598c2ecf20Sopenharmony_ci			goto err;
11608c2ecf20Sopenharmony_ci
11618c2ecf20Sopenharmony_ci		pipe->w_counter++;
11628c2ecf20Sopenharmony_ci		if (!pipe->writers++)
11638c2ecf20Sopenharmony_ci			wake_up_partner(pipe);
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci		if (!is_pipe && !pipe->readers) {
11668c2ecf20Sopenharmony_ci			if (wait_for_partner(pipe, &pipe->r_counter))
11678c2ecf20Sopenharmony_ci				goto err_wr;
11688c2ecf20Sopenharmony_ci		}
11698c2ecf20Sopenharmony_ci		break;
11708c2ecf20Sopenharmony_ci
11718c2ecf20Sopenharmony_ci	case FMODE_READ | FMODE_WRITE:
11728c2ecf20Sopenharmony_ci	/*
11738c2ecf20Sopenharmony_ci	 *  O_RDWR
11748c2ecf20Sopenharmony_ci	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
11758c2ecf20Sopenharmony_ci	 *  This implementation will NEVER block on a O_RDWR open, since
11768c2ecf20Sopenharmony_ci	 *  the process can at least talk to itself.
11778c2ecf20Sopenharmony_ci	 */
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ci		pipe->readers++;
11808c2ecf20Sopenharmony_ci		pipe->writers++;
11818c2ecf20Sopenharmony_ci		pipe->r_counter++;
11828c2ecf20Sopenharmony_ci		pipe->w_counter++;
11838c2ecf20Sopenharmony_ci		if (pipe->readers == 1 || pipe->writers == 1)
11848c2ecf20Sopenharmony_ci			wake_up_partner(pipe);
11858c2ecf20Sopenharmony_ci		break;
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci	default:
11888c2ecf20Sopenharmony_ci		ret = -EINVAL;
11898c2ecf20Sopenharmony_ci		goto err;
11908c2ecf20Sopenharmony_ci	}
11918c2ecf20Sopenharmony_ci
11928c2ecf20Sopenharmony_ci	/* Ok! */
11938c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
11948c2ecf20Sopenharmony_ci	return 0;
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_cierr_rd:
11978c2ecf20Sopenharmony_ci	if (!--pipe->readers)
11988c2ecf20Sopenharmony_ci		wake_up_interruptible(&pipe->wr_wait);
11998c2ecf20Sopenharmony_ci	ret = -ERESTARTSYS;
12008c2ecf20Sopenharmony_ci	goto err;
12018c2ecf20Sopenharmony_ci
12028c2ecf20Sopenharmony_cierr_wr:
12038c2ecf20Sopenharmony_ci	if (!--pipe->writers)
12048c2ecf20Sopenharmony_ci		wake_up_interruptible_all(&pipe->rd_wait);
12058c2ecf20Sopenharmony_ci	ret = -ERESTARTSYS;
12068c2ecf20Sopenharmony_ci	goto err;
12078c2ecf20Sopenharmony_ci
12088c2ecf20Sopenharmony_cierr:
12098c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
12108c2ecf20Sopenharmony_ci
12118c2ecf20Sopenharmony_ci	put_pipe_info(inode, pipe);
12128c2ecf20Sopenharmony_ci	return ret;
12138c2ecf20Sopenharmony_ci}
12148c2ecf20Sopenharmony_ci
12158c2ecf20Sopenharmony_ciconst struct file_operations pipefifo_fops = {
12168c2ecf20Sopenharmony_ci	.open		= fifo_open,
12178c2ecf20Sopenharmony_ci	.llseek		= no_llseek,
12188c2ecf20Sopenharmony_ci	.read_iter	= pipe_read,
12198c2ecf20Sopenharmony_ci	.write_iter	= pipe_write,
12208c2ecf20Sopenharmony_ci	.poll		= pipe_poll,
12218c2ecf20Sopenharmony_ci	.unlocked_ioctl	= pipe_ioctl,
12228c2ecf20Sopenharmony_ci	.release	= pipe_release,
12238c2ecf20Sopenharmony_ci	.fasync		= pipe_fasync,
12248c2ecf20Sopenharmony_ci	.splice_write	= iter_file_splice_write,
12258c2ecf20Sopenharmony_ci};
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci/*
12288c2ecf20Sopenharmony_ci * Currently we rely on the pipe array holding a power-of-2 number
12298c2ecf20Sopenharmony_ci * of pages. Returns 0 on error.
12308c2ecf20Sopenharmony_ci */
12318c2ecf20Sopenharmony_ciunsigned int round_pipe_size(unsigned long size)
12328c2ecf20Sopenharmony_ci{
12338c2ecf20Sopenharmony_ci	if (size > (1U << 31))
12348c2ecf20Sopenharmony_ci		return 0;
12358c2ecf20Sopenharmony_ci
12368c2ecf20Sopenharmony_ci	/* Minimum pipe size, as required by POSIX */
12378c2ecf20Sopenharmony_ci	if (size < PAGE_SIZE)
12388c2ecf20Sopenharmony_ci		return PAGE_SIZE;
12398c2ecf20Sopenharmony_ci
12408c2ecf20Sopenharmony_ci	return roundup_pow_of_two(size);
12418c2ecf20Sopenharmony_ci}
12428c2ecf20Sopenharmony_ci
12438c2ecf20Sopenharmony_ci/*
12448c2ecf20Sopenharmony_ci * Resize the pipe ring to a number of slots.
12458c2ecf20Sopenharmony_ci *
12468c2ecf20Sopenharmony_ci * Note the pipe can be reduced in capacity, but only if the current
12478c2ecf20Sopenharmony_ci * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
12488c2ecf20Sopenharmony_ci * returned instead.
12498c2ecf20Sopenharmony_ci */
12508c2ecf20Sopenharmony_ciint pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
12518c2ecf20Sopenharmony_ci{
12528c2ecf20Sopenharmony_ci	struct pipe_buffer *bufs;
12538c2ecf20Sopenharmony_ci	unsigned int head, tail, mask, n;
12548c2ecf20Sopenharmony_ci
12558c2ecf20Sopenharmony_ci	bufs = kcalloc(nr_slots, sizeof(*bufs),
12568c2ecf20Sopenharmony_ci		       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
12578c2ecf20Sopenharmony_ci	if (unlikely(!bufs))
12588c2ecf20Sopenharmony_ci		return -ENOMEM;
12598c2ecf20Sopenharmony_ci
12608c2ecf20Sopenharmony_ci	spin_lock_irq(&pipe->rd_wait.lock);
12618c2ecf20Sopenharmony_ci	mask = pipe->ring_size - 1;
12628c2ecf20Sopenharmony_ci	head = pipe->head;
12638c2ecf20Sopenharmony_ci	tail = pipe->tail;
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_ci	n = pipe_occupancy(head, tail);
12668c2ecf20Sopenharmony_ci	if (nr_slots < n) {
12678c2ecf20Sopenharmony_ci		spin_unlock_irq(&pipe->rd_wait.lock);
12688c2ecf20Sopenharmony_ci		kfree(bufs);
12698c2ecf20Sopenharmony_ci		return -EBUSY;
12708c2ecf20Sopenharmony_ci	}
12718c2ecf20Sopenharmony_ci
12728c2ecf20Sopenharmony_ci	/*
12738c2ecf20Sopenharmony_ci	 * The pipe array wraps around, so just start the new one at zero
12748c2ecf20Sopenharmony_ci	 * and adjust the indices.
12758c2ecf20Sopenharmony_ci	 */
12768c2ecf20Sopenharmony_ci	if (n > 0) {
12778c2ecf20Sopenharmony_ci		unsigned int h = head & mask;
12788c2ecf20Sopenharmony_ci		unsigned int t = tail & mask;
12798c2ecf20Sopenharmony_ci		if (h > t) {
12808c2ecf20Sopenharmony_ci			memcpy(bufs, pipe->bufs + t,
12818c2ecf20Sopenharmony_ci			       n * sizeof(struct pipe_buffer));
12828c2ecf20Sopenharmony_ci		} else {
12838c2ecf20Sopenharmony_ci			unsigned int tsize = pipe->ring_size - t;
12848c2ecf20Sopenharmony_ci			if (h > 0)
12858c2ecf20Sopenharmony_ci				memcpy(bufs + tsize, pipe->bufs,
12868c2ecf20Sopenharmony_ci				       h * sizeof(struct pipe_buffer));
12878c2ecf20Sopenharmony_ci			memcpy(bufs, pipe->bufs + t,
12888c2ecf20Sopenharmony_ci			       tsize * sizeof(struct pipe_buffer));
12898c2ecf20Sopenharmony_ci		}
12908c2ecf20Sopenharmony_ci	}
12918c2ecf20Sopenharmony_ci
12928c2ecf20Sopenharmony_ci	head = n;
12938c2ecf20Sopenharmony_ci	tail = 0;
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci	kfree(pipe->bufs);
12968c2ecf20Sopenharmony_ci	pipe->bufs = bufs;
12978c2ecf20Sopenharmony_ci	pipe->ring_size = nr_slots;
12988c2ecf20Sopenharmony_ci	if (pipe->max_usage > nr_slots)
12998c2ecf20Sopenharmony_ci		pipe->max_usage = nr_slots;
13008c2ecf20Sopenharmony_ci	pipe->tail = tail;
13018c2ecf20Sopenharmony_ci	pipe->head = head;
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci	if (!pipe_has_watch_queue(pipe)) {
13048c2ecf20Sopenharmony_ci		pipe->max_usage = nr_slots;
13058c2ecf20Sopenharmony_ci		pipe->nr_accounted = nr_slots;
13068c2ecf20Sopenharmony_ci	}
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci	spin_unlock_irq(&pipe->rd_wait.lock);
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_ci	/* This might have made more room for writers */
13118c2ecf20Sopenharmony_ci	wake_up_interruptible(&pipe->wr_wait);
13128c2ecf20Sopenharmony_ci	return 0;
13138c2ecf20Sopenharmony_ci}
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci/*
13168c2ecf20Sopenharmony_ci * Allocate a new array of pipe buffers and copy the info over. Returns the
13178c2ecf20Sopenharmony_ci * pipe size if successful, or return -ERROR on error.
13188c2ecf20Sopenharmony_ci */
13198c2ecf20Sopenharmony_cistatic long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
13208c2ecf20Sopenharmony_ci{
13218c2ecf20Sopenharmony_ci	unsigned long user_bufs;
13228c2ecf20Sopenharmony_ci	unsigned int nr_slots, size;
13238c2ecf20Sopenharmony_ci	long ret = 0;
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci	if (pipe_has_watch_queue(pipe))
13268c2ecf20Sopenharmony_ci		return -EBUSY;
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	size = round_pipe_size(arg);
13298c2ecf20Sopenharmony_ci	nr_slots = size >> PAGE_SHIFT;
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_ci	if (!nr_slots)
13328c2ecf20Sopenharmony_ci		return -EINVAL;
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	/*
13358c2ecf20Sopenharmony_ci	 * If trying to increase the pipe capacity, check that an
13368c2ecf20Sopenharmony_ci	 * unprivileged user is not trying to exceed various limits
13378c2ecf20Sopenharmony_ci	 * (soft limit check here, hard limit check just below).
13388c2ecf20Sopenharmony_ci	 * Decreasing the pipe capacity is always permitted, even
13398c2ecf20Sopenharmony_ci	 * if the user is currently over a limit.
13408c2ecf20Sopenharmony_ci	 */
13418c2ecf20Sopenharmony_ci	if (nr_slots > pipe->max_usage &&
13428c2ecf20Sopenharmony_ci			size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
13438c2ecf20Sopenharmony_ci		return -EPERM;
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_ci	user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci	if (nr_slots > pipe->max_usage &&
13488c2ecf20Sopenharmony_ci			(too_many_pipe_buffers_hard(user_bufs) ||
13498c2ecf20Sopenharmony_ci			 too_many_pipe_buffers_soft(user_bufs)) &&
13508c2ecf20Sopenharmony_ci			pipe_is_unprivileged_user()) {
13518c2ecf20Sopenharmony_ci		ret = -EPERM;
13528c2ecf20Sopenharmony_ci		goto out_revert_acct;
13538c2ecf20Sopenharmony_ci	}
13548c2ecf20Sopenharmony_ci
13558c2ecf20Sopenharmony_ci	ret = pipe_resize_ring(pipe, nr_slots);
13568c2ecf20Sopenharmony_ci	if (ret < 0)
13578c2ecf20Sopenharmony_ci		goto out_revert_acct;
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_ci	return pipe->max_usage * PAGE_SIZE;
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_ciout_revert_acct:
13628c2ecf20Sopenharmony_ci	(void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
13638c2ecf20Sopenharmony_ci	return ret;
13648c2ecf20Sopenharmony_ci}
13658c2ecf20Sopenharmony_ci
13668c2ecf20Sopenharmony_ci/*
13678c2ecf20Sopenharmony_ci * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
13688c2ecf20Sopenharmony_ci * location, so checking ->i_pipe is not enough to verify that this is a
13698c2ecf20Sopenharmony_ci * pipe.
13708c2ecf20Sopenharmony_ci */
13718c2ecf20Sopenharmony_cistruct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
13728c2ecf20Sopenharmony_ci{
13738c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe = file->private_data;
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	if (file->f_op != &pipefifo_fops || !pipe)
13768c2ecf20Sopenharmony_ci		return NULL;
13778c2ecf20Sopenharmony_ci	if (for_splice && pipe_has_watch_queue(pipe))
13788c2ecf20Sopenharmony_ci		return NULL;
13798c2ecf20Sopenharmony_ci	return pipe;
13808c2ecf20Sopenharmony_ci}
13818c2ecf20Sopenharmony_ci
13828c2ecf20Sopenharmony_cilong pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
13838c2ecf20Sopenharmony_ci{
13848c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe;
13858c2ecf20Sopenharmony_ci	long ret;
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci	pipe = get_pipe_info(file, false);
13888c2ecf20Sopenharmony_ci	if (!pipe)
13898c2ecf20Sopenharmony_ci		return -EBADF;
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ci	__pipe_lock(pipe);
13928c2ecf20Sopenharmony_ci
13938c2ecf20Sopenharmony_ci	switch (cmd) {
13948c2ecf20Sopenharmony_ci	case F_SETPIPE_SZ:
13958c2ecf20Sopenharmony_ci		ret = pipe_set_size(pipe, arg);
13968c2ecf20Sopenharmony_ci		break;
13978c2ecf20Sopenharmony_ci	case F_GETPIPE_SZ:
13988c2ecf20Sopenharmony_ci		ret = pipe->max_usage * PAGE_SIZE;
13998c2ecf20Sopenharmony_ci		break;
14008c2ecf20Sopenharmony_ci	default:
14018c2ecf20Sopenharmony_ci		ret = -EINVAL;
14028c2ecf20Sopenharmony_ci		break;
14038c2ecf20Sopenharmony_ci	}
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci	__pipe_unlock(pipe);
14068c2ecf20Sopenharmony_ci	return ret;
14078c2ecf20Sopenharmony_ci}
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_cistatic const struct super_operations pipefs_ops = {
14108c2ecf20Sopenharmony_ci	.destroy_inode = free_inode_nonrcu,
14118c2ecf20Sopenharmony_ci	.statfs = simple_statfs,
14128c2ecf20Sopenharmony_ci};
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_ci/*
14158c2ecf20Sopenharmony_ci * pipefs should _never_ be mounted by userland - too much of security hassle,
14168c2ecf20Sopenharmony_ci * no real gain from having the whole whorehouse mounted. So we don't need
14178c2ecf20Sopenharmony_ci * any operations on the root directory. However, we need a non-trivial
14188c2ecf20Sopenharmony_ci * d_name - pipe: will go nicely and kill the special-casing in procfs.
14198c2ecf20Sopenharmony_ci */
14208c2ecf20Sopenharmony_ci
14218c2ecf20Sopenharmony_cistatic int pipefs_init_fs_context(struct fs_context *fc)
14228c2ecf20Sopenharmony_ci{
14238c2ecf20Sopenharmony_ci	struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
14248c2ecf20Sopenharmony_ci	if (!ctx)
14258c2ecf20Sopenharmony_ci		return -ENOMEM;
14268c2ecf20Sopenharmony_ci	ctx->ops = &pipefs_ops;
14278c2ecf20Sopenharmony_ci	ctx->dops = &pipefs_dentry_operations;
14288c2ecf20Sopenharmony_ci	return 0;
14298c2ecf20Sopenharmony_ci}
14308c2ecf20Sopenharmony_ci
14318c2ecf20Sopenharmony_cistatic struct file_system_type pipe_fs_type = {
14328c2ecf20Sopenharmony_ci	.name		= "pipefs",
14338c2ecf20Sopenharmony_ci	.init_fs_context = pipefs_init_fs_context,
14348c2ecf20Sopenharmony_ci	.kill_sb	= kill_anon_super,
14358c2ecf20Sopenharmony_ci};
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_cistatic int __init init_pipe_fs(void)
14388c2ecf20Sopenharmony_ci{
14398c2ecf20Sopenharmony_ci	int err = register_filesystem(&pipe_fs_type);
14408c2ecf20Sopenharmony_ci
14418c2ecf20Sopenharmony_ci	if (!err) {
14428c2ecf20Sopenharmony_ci		pipe_mnt = kern_mount(&pipe_fs_type);
14438c2ecf20Sopenharmony_ci		if (IS_ERR(pipe_mnt)) {
14448c2ecf20Sopenharmony_ci			err = PTR_ERR(pipe_mnt);
14458c2ecf20Sopenharmony_ci			unregister_filesystem(&pipe_fs_type);
14468c2ecf20Sopenharmony_ci		}
14478c2ecf20Sopenharmony_ci	}
14488c2ecf20Sopenharmony_ci	return err;
14498c2ecf20Sopenharmony_ci}
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_cifs_initcall(init_pipe_fs);
1452