162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/pipe.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1991, 1992, 1999 Linus Torvalds 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/mm.h> 962306a36Sopenharmony_ci#include <linux/file.h> 1062306a36Sopenharmony_ci#include <linux/poll.h> 1162306a36Sopenharmony_ci#include <linux/slab.h> 1262306a36Sopenharmony_ci#include <linux/module.h> 1362306a36Sopenharmony_ci#include <linux/init.h> 1462306a36Sopenharmony_ci#include <linux/fs.h> 1562306a36Sopenharmony_ci#include <linux/log2.h> 1662306a36Sopenharmony_ci#include <linux/mount.h> 1762306a36Sopenharmony_ci#include <linux/pseudo_fs.h> 1862306a36Sopenharmony_ci#include <linux/magic.h> 1962306a36Sopenharmony_ci#include <linux/pipe_fs_i.h> 2062306a36Sopenharmony_ci#include <linux/uio.h> 2162306a36Sopenharmony_ci#include <linux/highmem.h> 2262306a36Sopenharmony_ci#include <linux/pagemap.h> 2362306a36Sopenharmony_ci#include <linux/audit.h> 2462306a36Sopenharmony_ci#include <linux/syscalls.h> 2562306a36Sopenharmony_ci#include <linux/fcntl.h> 2662306a36Sopenharmony_ci#include <linux/memcontrol.h> 2762306a36Sopenharmony_ci#include <linux/watch_queue.h> 2862306a36Sopenharmony_ci#include <linux/sysctl.h> 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#include <linux/uaccess.h> 3162306a36Sopenharmony_ci#include <asm/ioctls.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include "internal.h" 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * New pipe buffers will be restricted to this size while the user is exceeding 3762306a36Sopenharmony_ci * their pipe buffer quota. The general pipe use case needs at least two 3862306a36Sopenharmony_ci * buffers: one for data yet to be read, and one for new data. If this is less 3962306a36Sopenharmony_ci * than two, then a write to a non-empty pipe may block even if the pipe is not 4062306a36Sopenharmony_ci * full. This can occur with GNU make jobserver or similar uses of pipes as 4162306a36Sopenharmony_ci * semaphores: multiple processes may be waiting to write tokens back to the 4262306a36Sopenharmony_ci * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. 4362306a36Sopenharmony_ci * 4462306a36Sopenharmony_ci * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their 4562306a36Sopenharmony_ci * own risk, namely: pipe writes to non-full pipes may block until the pipe is 4662306a36Sopenharmony_ci * emptied. 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci#define PIPE_MIN_DEF_BUFFERS 2 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* 5162306a36Sopenharmony_ci * The max size that a non-root user is allowed to grow the pipe. Can 5262306a36Sopenharmony_ci * be set by root in /proc/sys/fs/pipe-max-size 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_cistatic unsigned int pipe_max_size = 1048576; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci/* Maximum allocatable pages per user. Hard limit is unset by default, soft 5762306a36Sopenharmony_ci * matches default values. 5862306a36Sopenharmony_ci */ 5962306a36Sopenharmony_cistatic unsigned long pipe_user_pages_hard; 6062306a36Sopenharmony_cistatic unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci/* 6362306a36Sopenharmony_ci * We use head and tail indices that aren't masked off, except at the point of 6462306a36Sopenharmony_ci * dereference, but rather they're allowed to wrap naturally. This means there 6562306a36Sopenharmony_ci * isn't a dead spot in the buffer, but the ring has to be a power of two and 6662306a36Sopenharmony_ci * <= 2^31. 6762306a36Sopenharmony_ci * -- David Howells 2019-09-23. 6862306a36Sopenharmony_ci * 6962306a36Sopenharmony_ci * Reads with count = 0 should always return 0. 7062306a36Sopenharmony_ci * -- Julian Bradfield 1999-06-07. 7162306a36Sopenharmony_ci * 7262306a36Sopenharmony_ci * FIFOs and Pipes now generate SIGIO for both readers and writers. 7362306a36Sopenharmony_ci * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 7462306a36Sopenharmony_ci * 7562306a36Sopenharmony_ci * pipe_read & write cleanup 7662306a36Sopenharmony_ci * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 7762306a36Sopenharmony_ci */ 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cistatic void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci if (pipe->files) 8262306a36Sopenharmony_ci mutex_lock_nested(&pipe->mutex, subclass); 8362306a36Sopenharmony_ci} 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_civoid pipe_lock(struct pipe_inode_info *pipe) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci /* 8862306a36Sopenharmony_ci * pipe_lock() nests non-pipe inode locks (for writing to a file) 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_ci pipe_lock_nested(pipe, I_MUTEX_PARENT); 9162306a36Sopenharmony_ci} 9262306a36Sopenharmony_ciEXPORT_SYMBOL(pipe_lock); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_civoid pipe_unlock(struct pipe_inode_info *pipe) 9562306a36Sopenharmony_ci{ 9662306a36Sopenharmony_ci if (pipe->files) 9762306a36Sopenharmony_ci mutex_unlock(&pipe->mutex); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ciEXPORT_SYMBOL(pipe_unlock); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_cistatic inline void __pipe_lock(struct pipe_inode_info *pipe) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cistatic inline void __pipe_unlock(struct pipe_inode_info *pipe) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci mutex_unlock(&pipe->mutex); 10962306a36Sopenharmony_ci} 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_civoid pipe_double_lock(struct pipe_inode_info *pipe1, 11262306a36Sopenharmony_ci struct pipe_inode_info *pipe2) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci BUG_ON(pipe1 == pipe2); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci if (pipe1 < pipe2) { 11762306a36Sopenharmony_ci pipe_lock_nested(pipe1, I_MUTEX_PARENT); 11862306a36Sopenharmony_ci pipe_lock_nested(pipe2, I_MUTEX_CHILD); 11962306a36Sopenharmony_ci } else { 12062306a36Sopenharmony_ci pipe_lock_nested(pipe2, I_MUTEX_PARENT); 12162306a36Sopenharmony_ci pipe_lock_nested(pipe1, I_MUTEX_CHILD); 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic void anon_pipe_buf_release(struct pipe_inode_info *pipe, 12662306a36Sopenharmony_ci struct pipe_buffer *buf) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci struct page *page = buf->page; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci /* 13162306a36Sopenharmony_ci * If nobody else uses this page, and we don't already have a 13262306a36Sopenharmony_ci * temporary page, let's keep track of it as a one-deep 13362306a36Sopenharmony_ci * allocation cache. (Otherwise just release our reference to it) 13462306a36Sopenharmony_ci */ 13562306a36Sopenharmony_ci if (page_count(page) == 1 && !pipe->tmp_page) 13662306a36Sopenharmony_ci pipe->tmp_page = page; 13762306a36Sopenharmony_ci else 13862306a36Sopenharmony_ci put_page(page); 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistatic bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe, 14262306a36Sopenharmony_ci struct pipe_buffer *buf) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci struct page *page = buf->page; 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci if (page_count(page) != 1) 14762306a36Sopenharmony_ci return false; 14862306a36Sopenharmony_ci memcg_kmem_uncharge_page(page, 0); 14962306a36Sopenharmony_ci __SetPageLocked(page); 15062306a36Sopenharmony_ci return true; 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci/** 15462306a36Sopenharmony_ci * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer 15562306a36Sopenharmony_ci * @pipe: the pipe that the buffer belongs to 15662306a36Sopenharmony_ci * @buf: the buffer to attempt to steal 15762306a36Sopenharmony_ci * 15862306a36Sopenharmony_ci * Description: 15962306a36Sopenharmony_ci * This function attempts to steal the &struct page attached to 16062306a36Sopenharmony_ci * @buf. If successful, this function returns 0 and returns with 16162306a36Sopenharmony_ci * the page locked. The caller may then reuse the page for whatever 16262306a36Sopenharmony_ci * he wishes; the typical use is insertion into a different file 16362306a36Sopenharmony_ci * page cache. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_cibool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe, 16662306a36Sopenharmony_ci struct pipe_buffer *buf) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci struct page *page = buf->page; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci /* 17162306a36Sopenharmony_ci * A reference of one is golden, that means that the owner of this 17262306a36Sopenharmony_ci * page is the only one holding a reference to it. lock the page 17362306a36Sopenharmony_ci * and return OK. 17462306a36Sopenharmony_ci */ 17562306a36Sopenharmony_ci if (page_count(page) == 1) { 17662306a36Sopenharmony_ci lock_page(page); 17762306a36Sopenharmony_ci return true; 17862306a36Sopenharmony_ci } 17962306a36Sopenharmony_ci return false; 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_try_steal); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci/** 18462306a36Sopenharmony_ci * generic_pipe_buf_get - get a reference to a &struct pipe_buffer 18562306a36Sopenharmony_ci * @pipe: the pipe that the buffer belongs to 18662306a36Sopenharmony_ci * @buf: the buffer to get a reference to 18762306a36Sopenharmony_ci * 18862306a36Sopenharmony_ci * Description: 18962306a36Sopenharmony_ci * This function grabs an extra reference to @buf. It's used in 19062306a36Sopenharmony_ci * the tee() system call, when we duplicate the buffers in one 19162306a36Sopenharmony_ci * pipe into another. 19262306a36Sopenharmony_ci */ 19362306a36Sopenharmony_cibool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci return try_get_page(buf->page); 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_get); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci/** 20062306a36Sopenharmony_ci * generic_pipe_buf_release - put a reference to a &struct pipe_buffer 20162306a36Sopenharmony_ci * @pipe: the pipe that the buffer belongs to 20262306a36Sopenharmony_ci * @buf: the buffer to put a reference to 20362306a36Sopenharmony_ci * 20462306a36Sopenharmony_ci * Description: 20562306a36Sopenharmony_ci * This function releases a reference to @buf. 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_civoid generic_pipe_buf_release(struct pipe_inode_info *pipe, 20862306a36Sopenharmony_ci struct pipe_buffer *buf) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci put_page(buf->page); 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ciEXPORT_SYMBOL(generic_pipe_buf_release); 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_cistatic const struct pipe_buf_operations anon_pipe_buf_ops = { 21562306a36Sopenharmony_ci .release = anon_pipe_buf_release, 21662306a36Sopenharmony_ci .try_steal = anon_pipe_buf_try_steal, 21762306a36Sopenharmony_ci .get = generic_pipe_buf_get, 21862306a36Sopenharmony_ci}; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ 22162306a36Sopenharmony_cistatic inline bool pipe_readable(const struct pipe_inode_info *pipe) 22262306a36Sopenharmony_ci{ 22362306a36Sopenharmony_ci unsigned int head = READ_ONCE(pipe->head); 22462306a36Sopenharmony_ci unsigned int tail = READ_ONCE(pipe->tail); 22562306a36Sopenharmony_ci unsigned int writers = READ_ONCE(pipe->writers); 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci return !pipe_empty(head, tail) || !writers; 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_cistatic ssize_t 23162306a36Sopenharmony_cipipe_read(struct kiocb *iocb, struct iov_iter *to) 23262306a36Sopenharmony_ci{ 23362306a36Sopenharmony_ci size_t total_len = iov_iter_count(to); 23462306a36Sopenharmony_ci struct file *filp = iocb->ki_filp; 23562306a36Sopenharmony_ci struct pipe_inode_info *pipe = filp->private_data; 23662306a36Sopenharmony_ci bool was_full, wake_next_reader = false; 23762306a36Sopenharmony_ci ssize_t ret; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci /* Null read succeeds. */ 24062306a36Sopenharmony_ci if (unlikely(total_len == 0)) 24162306a36Sopenharmony_ci return 0; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci ret = 0; 24462306a36Sopenharmony_ci __pipe_lock(pipe); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci /* 24762306a36Sopenharmony_ci * We only wake up writers if the pipe was full when we started 24862306a36Sopenharmony_ci * reading in order to avoid unnecessary wakeups. 24962306a36Sopenharmony_ci * 25062306a36Sopenharmony_ci * But when we do wake up writers, we do so using a sync wakeup 25162306a36Sopenharmony_ci * (WF_SYNC), because we want them to get going and generate more 25262306a36Sopenharmony_ci * data for us. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_ci was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); 25562306a36Sopenharmony_ci for (;;) { 25662306a36Sopenharmony_ci /* Read ->head with a barrier vs post_one_notification() */ 25762306a36Sopenharmony_ci unsigned int head = smp_load_acquire(&pipe->head); 25862306a36Sopenharmony_ci unsigned int tail = pipe->tail; 25962306a36Sopenharmony_ci unsigned int mask = pipe->ring_size - 1; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE 26262306a36Sopenharmony_ci if (pipe->note_loss) { 26362306a36Sopenharmony_ci struct watch_notification n; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (total_len < 8) { 26662306a36Sopenharmony_ci if (ret == 0) 26762306a36Sopenharmony_ci ret = -ENOBUFS; 26862306a36Sopenharmony_ci break; 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci n.type = WATCH_TYPE_META; 27262306a36Sopenharmony_ci n.subtype = WATCH_META_LOSS_NOTIFICATION; 27362306a36Sopenharmony_ci n.info = watch_sizeof(n); 27462306a36Sopenharmony_ci if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) { 27562306a36Sopenharmony_ci if (ret == 0) 27662306a36Sopenharmony_ci ret = -EFAULT; 27762306a36Sopenharmony_ci break; 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci ret += sizeof(n); 28062306a36Sopenharmony_ci total_len -= sizeof(n); 28162306a36Sopenharmony_ci pipe->note_loss = false; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci#endif 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci if (!pipe_empty(head, tail)) { 28662306a36Sopenharmony_ci struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 28762306a36Sopenharmony_ci size_t chars = buf->len; 28862306a36Sopenharmony_ci size_t written; 28962306a36Sopenharmony_ci int error; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci if (chars > total_len) { 29262306a36Sopenharmony_ci if (buf->flags & PIPE_BUF_FLAG_WHOLE) { 29362306a36Sopenharmony_ci if (ret == 0) 29462306a36Sopenharmony_ci ret = -ENOBUFS; 29562306a36Sopenharmony_ci break; 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci chars = total_len; 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci error = pipe_buf_confirm(pipe, buf); 30162306a36Sopenharmony_ci if (error) { 30262306a36Sopenharmony_ci if (!ret) 30362306a36Sopenharmony_ci ret = error; 30462306a36Sopenharmony_ci break; 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci written = copy_page_to_iter(buf->page, buf->offset, chars, to); 30862306a36Sopenharmony_ci if (unlikely(written < chars)) { 30962306a36Sopenharmony_ci if (!ret) 31062306a36Sopenharmony_ci ret = -EFAULT; 31162306a36Sopenharmony_ci break; 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci ret += chars; 31462306a36Sopenharmony_ci buf->offset += chars; 31562306a36Sopenharmony_ci buf->len -= chars; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci /* Was it a packet buffer? Clean up and exit */ 31862306a36Sopenharmony_ci if (buf->flags & PIPE_BUF_FLAG_PACKET) { 31962306a36Sopenharmony_ci total_len = chars; 32062306a36Sopenharmony_ci buf->len = 0; 32162306a36Sopenharmony_ci } 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci if (!buf->len) { 32462306a36Sopenharmony_ci pipe_buf_release(pipe, buf); 32562306a36Sopenharmony_ci spin_lock_irq(&pipe->rd_wait.lock); 32662306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE 32762306a36Sopenharmony_ci if (buf->flags & PIPE_BUF_FLAG_LOSS) 32862306a36Sopenharmony_ci pipe->note_loss = true; 32962306a36Sopenharmony_ci#endif 33062306a36Sopenharmony_ci tail++; 33162306a36Sopenharmony_ci pipe->tail = tail; 33262306a36Sopenharmony_ci spin_unlock_irq(&pipe->rd_wait.lock); 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci total_len -= chars; 33562306a36Sopenharmony_ci if (!total_len) 33662306a36Sopenharmony_ci break; /* common path: read succeeded */ 33762306a36Sopenharmony_ci if (!pipe_empty(head, tail)) /* More to do? */ 33862306a36Sopenharmony_ci continue; 33962306a36Sopenharmony_ci } 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci if (!pipe->writers) 34262306a36Sopenharmony_ci break; 34362306a36Sopenharmony_ci if (ret) 34462306a36Sopenharmony_ci break; 34562306a36Sopenharmony_ci if ((filp->f_flags & O_NONBLOCK) || 34662306a36Sopenharmony_ci (iocb->ki_flags & IOCB_NOWAIT)) { 34762306a36Sopenharmony_ci ret = -EAGAIN; 34862306a36Sopenharmony_ci break; 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci __pipe_unlock(pipe); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci /* 35362306a36Sopenharmony_ci * We only get here if we didn't actually read anything. 35462306a36Sopenharmony_ci * 35562306a36Sopenharmony_ci * However, we could have seen (and removed) a zero-sized 35662306a36Sopenharmony_ci * pipe buffer, and might have made space in the buffers 35762306a36Sopenharmony_ci * that way. 35862306a36Sopenharmony_ci * 35962306a36Sopenharmony_ci * You can't make zero-sized pipe buffers by doing an empty 36062306a36Sopenharmony_ci * write (not even in packet mode), but they can happen if 36162306a36Sopenharmony_ci * the writer gets an EFAULT when trying to fill a buffer 36262306a36Sopenharmony_ci * that already got allocated and inserted in the buffer 36362306a36Sopenharmony_ci * array. 36462306a36Sopenharmony_ci * 36562306a36Sopenharmony_ci * So we still need to wake up any pending writers in the 36662306a36Sopenharmony_ci * _very_ unlikely case that the pipe was full, but we got 36762306a36Sopenharmony_ci * no data. 36862306a36Sopenharmony_ci */ 36962306a36Sopenharmony_ci if (unlikely(was_full)) 37062306a36Sopenharmony_ci wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 37162306a36Sopenharmony_ci kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci /* 37462306a36Sopenharmony_ci * But because we didn't read anything, at this point we can 37562306a36Sopenharmony_ci * just return directly with -ERESTARTSYS if we're interrupted, 37662306a36Sopenharmony_ci * since we've done any required wakeups and there's no need 37762306a36Sopenharmony_ci * to mark anything accessed. And we've dropped the lock. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) 38062306a36Sopenharmony_ci return -ERESTARTSYS; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci __pipe_lock(pipe); 38362306a36Sopenharmony_ci was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); 38462306a36Sopenharmony_ci wake_next_reader = true; 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci if (pipe_empty(pipe->head, pipe->tail)) 38762306a36Sopenharmony_ci wake_next_reader = false; 38862306a36Sopenharmony_ci __pipe_unlock(pipe); 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci if (was_full) 39162306a36Sopenharmony_ci wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 39262306a36Sopenharmony_ci if (wake_next_reader) 39362306a36Sopenharmony_ci wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 39462306a36Sopenharmony_ci kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 39562306a36Sopenharmony_ci if (ret > 0) 39662306a36Sopenharmony_ci file_accessed(filp); 39762306a36Sopenharmony_ci return ret; 39862306a36Sopenharmony_ci} 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_cistatic inline int is_packetized(struct file *file) 40162306a36Sopenharmony_ci{ 40262306a36Sopenharmony_ci return (file->f_flags & O_DIRECT) != 0; 40362306a36Sopenharmony_ci} 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ 40662306a36Sopenharmony_cistatic inline bool pipe_writable(const struct pipe_inode_info *pipe) 40762306a36Sopenharmony_ci{ 40862306a36Sopenharmony_ci unsigned int head = READ_ONCE(pipe->head); 40962306a36Sopenharmony_ci unsigned int tail = READ_ONCE(pipe->tail); 41062306a36Sopenharmony_ci unsigned int max_usage = READ_ONCE(pipe->max_usage); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci return !pipe_full(head, tail, max_usage) || 41362306a36Sopenharmony_ci !READ_ONCE(pipe->readers); 41462306a36Sopenharmony_ci} 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_cistatic ssize_t 41762306a36Sopenharmony_cipipe_write(struct kiocb *iocb, struct iov_iter *from) 41862306a36Sopenharmony_ci{ 41962306a36Sopenharmony_ci struct file *filp = iocb->ki_filp; 42062306a36Sopenharmony_ci struct pipe_inode_info *pipe = filp->private_data; 42162306a36Sopenharmony_ci unsigned int head; 42262306a36Sopenharmony_ci ssize_t ret = 0; 42362306a36Sopenharmony_ci size_t total_len = iov_iter_count(from); 42462306a36Sopenharmony_ci ssize_t chars; 42562306a36Sopenharmony_ci bool was_empty = false; 42662306a36Sopenharmony_ci bool wake_next_writer = false; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci /* Null write succeeds. */ 42962306a36Sopenharmony_ci if (unlikely(total_len == 0)) 43062306a36Sopenharmony_ci return 0; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci __pipe_lock(pipe); 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci if (!pipe->readers) { 43562306a36Sopenharmony_ci send_sig(SIGPIPE, current, 0); 43662306a36Sopenharmony_ci ret = -EPIPE; 43762306a36Sopenharmony_ci goto out; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci if (pipe_has_watch_queue(pipe)) { 44162306a36Sopenharmony_ci ret = -EXDEV; 44262306a36Sopenharmony_ci goto out; 44362306a36Sopenharmony_ci } 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci /* 44662306a36Sopenharmony_ci * If it wasn't empty we try to merge new data into 44762306a36Sopenharmony_ci * the last buffer. 44862306a36Sopenharmony_ci * 44962306a36Sopenharmony_ci * That naturally merges small writes, but it also 45062306a36Sopenharmony_ci * page-aligns the rest of the writes for large writes 45162306a36Sopenharmony_ci * spanning multiple pages. 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_ci head = pipe->head; 45462306a36Sopenharmony_ci was_empty = pipe_empty(head, pipe->tail); 45562306a36Sopenharmony_ci chars = total_len & (PAGE_SIZE-1); 45662306a36Sopenharmony_ci if (chars && !was_empty) { 45762306a36Sopenharmony_ci unsigned int mask = pipe->ring_size - 1; 45862306a36Sopenharmony_ci struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; 45962306a36Sopenharmony_ci int offset = buf->offset + buf->len; 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) && 46262306a36Sopenharmony_ci offset + chars <= PAGE_SIZE) { 46362306a36Sopenharmony_ci ret = pipe_buf_confirm(pipe, buf); 46462306a36Sopenharmony_ci if (ret) 46562306a36Sopenharmony_ci goto out; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci ret = copy_page_from_iter(buf->page, offset, chars, from); 46862306a36Sopenharmony_ci if (unlikely(ret < chars)) { 46962306a36Sopenharmony_ci ret = -EFAULT; 47062306a36Sopenharmony_ci goto out; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci buf->len += ret; 47462306a36Sopenharmony_ci if (!iov_iter_count(from)) 47562306a36Sopenharmony_ci goto out; 47662306a36Sopenharmony_ci } 47762306a36Sopenharmony_ci } 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci for (;;) { 48062306a36Sopenharmony_ci if (!pipe->readers) { 48162306a36Sopenharmony_ci send_sig(SIGPIPE, current, 0); 48262306a36Sopenharmony_ci if (!ret) 48362306a36Sopenharmony_ci ret = -EPIPE; 48462306a36Sopenharmony_ci break; 48562306a36Sopenharmony_ci } 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci head = pipe->head; 48862306a36Sopenharmony_ci if (!pipe_full(head, pipe->tail, pipe->max_usage)) { 48962306a36Sopenharmony_ci unsigned int mask = pipe->ring_size - 1; 49062306a36Sopenharmony_ci struct pipe_buffer *buf; 49162306a36Sopenharmony_ci struct page *page = pipe->tmp_page; 49262306a36Sopenharmony_ci int copied; 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci if (!page) { 49562306a36Sopenharmony_ci page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); 49662306a36Sopenharmony_ci if (unlikely(!page)) { 49762306a36Sopenharmony_ci ret = ret ? : -ENOMEM; 49862306a36Sopenharmony_ci break; 49962306a36Sopenharmony_ci } 50062306a36Sopenharmony_ci pipe->tmp_page = page; 50162306a36Sopenharmony_ci } 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* Allocate a slot in the ring in advance and attach an 50462306a36Sopenharmony_ci * empty buffer. If we fault or otherwise fail to use 50562306a36Sopenharmony_ci * it, either the reader will consume it or it'll still 50662306a36Sopenharmony_ci * be there for the next write. 50762306a36Sopenharmony_ci */ 50862306a36Sopenharmony_ci spin_lock_irq(&pipe->rd_wait.lock); 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci head = pipe->head; 51162306a36Sopenharmony_ci if (pipe_full(head, pipe->tail, pipe->max_usage)) { 51262306a36Sopenharmony_ci spin_unlock_irq(&pipe->rd_wait.lock); 51362306a36Sopenharmony_ci continue; 51462306a36Sopenharmony_ci } 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci pipe->head = head + 1; 51762306a36Sopenharmony_ci spin_unlock_irq(&pipe->rd_wait.lock); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci /* Insert it into the buffer array */ 52062306a36Sopenharmony_ci buf = &pipe->bufs[head & mask]; 52162306a36Sopenharmony_ci buf->page = page; 52262306a36Sopenharmony_ci buf->ops = &anon_pipe_buf_ops; 52362306a36Sopenharmony_ci buf->offset = 0; 52462306a36Sopenharmony_ci buf->len = 0; 52562306a36Sopenharmony_ci if (is_packetized(filp)) 52662306a36Sopenharmony_ci buf->flags = PIPE_BUF_FLAG_PACKET; 52762306a36Sopenharmony_ci else 52862306a36Sopenharmony_ci buf->flags = PIPE_BUF_FLAG_CAN_MERGE; 52962306a36Sopenharmony_ci pipe->tmp_page = NULL; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); 53262306a36Sopenharmony_ci if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { 53362306a36Sopenharmony_ci if (!ret) 53462306a36Sopenharmony_ci ret = -EFAULT; 53562306a36Sopenharmony_ci break; 53662306a36Sopenharmony_ci } 53762306a36Sopenharmony_ci ret += copied; 53862306a36Sopenharmony_ci buf->len = copied; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci if (!iov_iter_count(from)) 54162306a36Sopenharmony_ci break; 54262306a36Sopenharmony_ci } 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci if (!pipe_full(head, pipe->tail, pipe->max_usage)) 54562306a36Sopenharmony_ci continue; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci /* Wait for buffer space to become available. */ 54862306a36Sopenharmony_ci if ((filp->f_flags & O_NONBLOCK) || 54962306a36Sopenharmony_ci (iocb->ki_flags & IOCB_NOWAIT)) { 55062306a36Sopenharmony_ci if (!ret) 55162306a36Sopenharmony_ci ret = -EAGAIN; 55262306a36Sopenharmony_ci break; 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci if (signal_pending(current)) { 55562306a36Sopenharmony_ci if (!ret) 55662306a36Sopenharmony_ci ret = -ERESTARTSYS; 55762306a36Sopenharmony_ci break; 55862306a36Sopenharmony_ci } 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci /* 56162306a36Sopenharmony_ci * We're going to release the pipe lock and wait for more 56262306a36Sopenharmony_ci * space. We wake up any readers if necessary, and then 56362306a36Sopenharmony_ci * after waiting we need to re-check whether the pipe 56462306a36Sopenharmony_ci * become empty while we dropped the lock. 56562306a36Sopenharmony_ci */ 56662306a36Sopenharmony_ci __pipe_unlock(pipe); 56762306a36Sopenharmony_ci if (was_empty) 56862306a36Sopenharmony_ci wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 56962306a36Sopenharmony_ci kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 57062306a36Sopenharmony_ci wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); 57162306a36Sopenharmony_ci __pipe_lock(pipe); 57262306a36Sopenharmony_ci was_empty = pipe_empty(pipe->head, pipe->tail); 57362306a36Sopenharmony_ci wake_next_writer = true; 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ciout: 57662306a36Sopenharmony_ci if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 57762306a36Sopenharmony_ci wake_next_writer = false; 57862306a36Sopenharmony_ci __pipe_unlock(pipe); 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci /* 58162306a36Sopenharmony_ci * If we do do a wakeup event, we do a 'sync' wakeup, because we 58262306a36Sopenharmony_ci * want the reader to start processing things asap, rather than 58362306a36Sopenharmony_ci * leave the data pending. 58462306a36Sopenharmony_ci * 58562306a36Sopenharmony_ci * This is particularly important for small writes, because of 58662306a36Sopenharmony_ci * how (for example) the GNU make jobserver uses small writes to 58762306a36Sopenharmony_ci * wake up pending jobs 58862306a36Sopenharmony_ci * 58962306a36Sopenharmony_ci * Epoll nonsensically wants a wakeup whether the pipe 59062306a36Sopenharmony_ci * was already empty or not. 59162306a36Sopenharmony_ci */ 59262306a36Sopenharmony_ci if (was_empty || pipe->poll_usage) 59362306a36Sopenharmony_ci wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 59462306a36Sopenharmony_ci kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 59562306a36Sopenharmony_ci if (wake_next_writer) 59662306a36Sopenharmony_ci wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 59762306a36Sopenharmony_ci if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { 59862306a36Sopenharmony_ci int err = file_update_time(filp); 59962306a36Sopenharmony_ci if (err) 60062306a36Sopenharmony_ci ret = err; 60162306a36Sopenharmony_ci sb_end_write(file_inode(filp)->i_sb); 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci return ret; 60462306a36Sopenharmony_ci} 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_cistatic long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 60762306a36Sopenharmony_ci{ 60862306a36Sopenharmony_ci struct pipe_inode_info *pipe = filp->private_data; 60962306a36Sopenharmony_ci unsigned int count, head, tail, mask; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci switch (cmd) { 61262306a36Sopenharmony_ci case FIONREAD: 61362306a36Sopenharmony_ci __pipe_lock(pipe); 61462306a36Sopenharmony_ci count = 0; 61562306a36Sopenharmony_ci head = pipe->head; 61662306a36Sopenharmony_ci tail = pipe->tail; 61762306a36Sopenharmony_ci mask = pipe->ring_size - 1; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci while (tail != head) { 62062306a36Sopenharmony_ci count += pipe->bufs[tail & mask].len; 62162306a36Sopenharmony_ci tail++; 62262306a36Sopenharmony_ci } 62362306a36Sopenharmony_ci __pipe_unlock(pipe); 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci return put_user(count, (int __user *)arg); 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE 62862306a36Sopenharmony_ci case IOC_WATCH_QUEUE_SET_SIZE: { 62962306a36Sopenharmony_ci int ret; 63062306a36Sopenharmony_ci __pipe_lock(pipe); 63162306a36Sopenharmony_ci ret = watch_queue_set_size(pipe, arg); 63262306a36Sopenharmony_ci __pipe_unlock(pipe); 63362306a36Sopenharmony_ci return ret; 63462306a36Sopenharmony_ci } 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci case IOC_WATCH_QUEUE_SET_FILTER: 63762306a36Sopenharmony_ci return watch_queue_set_filter( 63862306a36Sopenharmony_ci pipe, (struct watch_notification_filter __user *)arg); 63962306a36Sopenharmony_ci#endif 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci default: 64262306a36Sopenharmony_ci return -ENOIOCTLCMD; 64362306a36Sopenharmony_ci } 64462306a36Sopenharmony_ci} 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci/* No kernel lock held - fine */ 64762306a36Sopenharmony_cistatic __poll_t 64862306a36Sopenharmony_cipipe_poll(struct file *filp, poll_table *wait) 64962306a36Sopenharmony_ci{ 65062306a36Sopenharmony_ci __poll_t mask; 65162306a36Sopenharmony_ci struct pipe_inode_info *pipe = filp->private_data; 65262306a36Sopenharmony_ci unsigned int head, tail; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci /* Epoll has some historical nasty semantics, this enables them */ 65562306a36Sopenharmony_ci WRITE_ONCE(pipe->poll_usage, true); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci /* 65862306a36Sopenharmony_ci * Reading pipe state only -- no need for acquiring the semaphore. 65962306a36Sopenharmony_ci * 66062306a36Sopenharmony_ci * But because this is racy, the code has to add the 66162306a36Sopenharmony_ci * entry to the poll table _first_ .. 66262306a36Sopenharmony_ci */ 66362306a36Sopenharmony_ci if (filp->f_mode & FMODE_READ) 66462306a36Sopenharmony_ci poll_wait(filp, &pipe->rd_wait, wait); 66562306a36Sopenharmony_ci if (filp->f_mode & FMODE_WRITE) 66662306a36Sopenharmony_ci poll_wait(filp, &pipe->wr_wait, wait); 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci /* 66962306a36Sopenharmony_ci * .. and only then can you do the racy tests. That way, 67062306a36Sopenharmony_ci * if something changes and you got it wrong, the poll 67162306a36Sopenharmony_ci * table entry will wake you up and fix it. 67262306a36Sopenharmony_ci */ 67362306a36Sopenharmony_ci head = READ_ONCE(pipe->head); 67462306a36Sopenharmony_ci tail = READ_ONCE(pipe->tail); 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci mask = 0; 67762306a36Sopenharmony_ci if (filp->f_mode & FMODE_READ) { 67862306a36Sopenharmony_ci if (!pipe_empty(head, tail)) 67962306a36Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 68062306a36Sopenharmony_ci if (!pipe->writers && filp->f_version != pipe->w_counter) 68162306a36Sopenharmony_ci mask |= EPOLLHUP; 68262306a36Sopenharmony_ci } 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci if (filp->f_mode & FMODE_WRITE) { 68562306a36Sopenharmony_ci if (!pipe_full(head, tail, pipe->max_usage)) 68662306a36Sopenharmony_ci mask |= EPOLLOUT | EPOLLWRNORM; 68762306a36Sopenharmony_ci /* 68862306a36Sopenharmony_ci * Most Unices do not set EPOLLERR for FIFOs but on Linux they 68962306a36Sopenharmony_ci * behave exactly like pipes for poll(). 69062306a36Sopenharmony_ci */ 69162306a36Sopenharmony_ci if (!pipe->readers) 69262306a36Sopenharmony_ci mask |= EPOLLERR; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci return mask; 69662306a36Sopenharmony_ci} 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_cistatic void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) 69962306a36Sopenharmony_ci{ 70062306a36Sopenharmony_ci int kill = 0; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci spin_lock(&inode->i_lock); 70362306a36Sopenharmony_ci if (!--pipe->files) { 70462306a36Sopenharmony_ci inode->i_pipe = NULL; 70562306a36Sopenharmony_ci kill = 1; 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci if (kill) 71062306a36Sopenharmony_ci free_pipe_info(pipe); 71162306a36Sopenharmony_ci} 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_cistatic int 71462306a36Sopenharmony_cipipe_release(struct inode *inode, struct file *file) 71562306a36Sopenharmony_ci{ 71662306a36Sopenharmony_ci struct pipe_inode_info *pipe = file->private_data; 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci __pipe_lock(pipe); 71962306a36Sopenharmony_ci if (file->f_mode & FMODE_READ) 72062306a36Sopenharmony_ci pipe->readers--; 72162306a36Sopenharmony_ci if (file->f_mode & FMODE_WRITE) 72262306a36Sopenharmony_ci pipe->writers--; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci /* Was that the last reader or writer, but not the other side? */ 72562306a36Sopenharmony_ci if (!pipe->readers != !pipe->writers) { 72662306a36Sopenharmony_ci wake_up_interruptible_all(&pipe->rd_wait); 72762306a36Sopenharmony_ci wake_up_interruptible_all(&pipe->wr_wait); 72862306a36Sopenharmony_ci kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 72962306a36Sopenharmony_ci kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci __pipe_unlock(pipe); 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci put_pipe_info(inode, pipe); 73462306a36Sopenharmony_ci return 0; 73562306a36Sopenharmony_ci} 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_cistatic int 73862306a36Sopenharmony_cipipe_fasync(int fd, struct file *filp, int on) 73962306a36Sopenharmony_ci{ 74062306a36Sopenharmony_ci struct pipe_inode_info *pipe = filp->private_data; 74162306a36Sopenharmony_ci int retval = 0; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci __pipe_lock(pipe); 74462306a36Sopenharmony_ci if (filp->f_mode & FMODE_READ) 74562306a36Sopenharmony_ci retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 74662306a36Sopenharmony_ci if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { 74762306a36Sopenharmony_ci retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 74862306a36Sopenharmony_ci if (retval < 0 && (filp->f_mode & FMODE_READ)) 74962306a36Sopenharmony_ci /* this can happen only if on == T */ 75062306a36Sopenharmony_ci fasync_helper(-1, filp, 0, &pipe->fasync_readers); 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci __pipe_unlock(pipe); 75362306a36Sopenharmony_ci return retval; 75462306a36Sopenharmony_ci} 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ciunsigned long account_pipe_buffers(struct user_struct *user, 75762306a36Sopenharmony_ci unsigned long old, unsigned long new) 75862306a36Sopenharmony_ci{ 75962306a36Sopenharmony_ci return atomic_long_add_return(new - old, &user->pipe_bufs); 76062306a36Sopenharmony_ci} 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_cibool too_many_pipe_buffers_soft(unsigned long user_bufs) 76362306a36Sopenharmony_ci{ 76462306a36Sopenharmony_ci unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci return soft_limit && user_bufs > soft_limit; 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_cibool too_many_pipe_buffers_hard(unsigned long user_bufs) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci return hard_limit && user_bufs > hard_limit; 77462306a36Sopenharmony_ci} 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_cibool pipe_is_unprivileged_user(void) 77762306a36Sopenharmony_ci{ 77862306a36Sopenharmony_ci return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_cistruct pipe_inode_info *alloc_pipe_info(void) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci struct pipe_inode_info *pipe; 78462306a36Sopenharmony_ci unsigned long pipe_bufs = PIPE_DEF_BUFFERS; 78562306a36Sopenharmony_ci struct user_struct *user = get_current_user(); 78662306a36Sopenharmony_ci unsigned long user_bufs; 78762306a36Sopenharmony_ci unsigned int max_size = READ_ONCE(pipe_max_size); 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); 79062306a36Sopenharmony_ci if (pipe == NULL) 79162306a36Sopenharmony_ci goto out_free_uid; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) 79462306a36Sopenharmony_ci pipe_bufs = max_size >> PAGE_SHIFT; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci user_bufs = account_pipe_buffers(user, 0, pipe_bufs); 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { 79962306a36Sopenharmony_ci user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); 80062306a36Sopenharmony_ci pipe_bufs = PIPE_MIN_DEF_BUFFERS; 80162306a36Sopenharmony_ci } 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) 80462306a36Sopenharmony_ci goto out_revert_acct; 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), 80762306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci if (pipe->bufs) { 81062306a36Sopenharmony_ci init_waitqueue_head(&pipe->rd_wait); 81162306a36Sopenharmony_ci init_waitqueue_head(&pipe->wr_wait); 81262306a36Sopenharmony_ci pipe->r_counter = pipe->w_counter = 1; 81362306a36Sopenharmony_ci pipe->max_usage = pipe_bufs; 81462306a36Sopenharmony_ci pipe->ring_size = pipe_bufs; 81562306a36Sopenharmony_ci pipe->nr_accounted = pipe_bufs; 81662306a36Sopenharmony_ci pipe->user = user; 81762306a36Sopenharmony_ci mutex_init(&pipe->mutex); 81862306a36Sopenharmony_ci return pipe; 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ciout_revert_acct: 82262306a36Sopenharmony_ci (void) account_pipe_buffers(user, pipe_bufs, 0); 82362306a36Sopenharmony_ci kfree(pipe); 82462306a36Sopenharmony_ciout_free_uid: 82562306a36Sopenharmony_ci free_uid(user); 82662306a36Sopenharmony_ci return NULL; 82762306a36Sopenharmony_ci} 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_civoid free_pipe_info(struct pipe_inode_info *pipe) 83062306a36Sopenharmony_ci{ 83162306a36Sopenharmony_ci unsigned int i; 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE 83462306a36Sopenharmony_ci if (pipe->watch_queue) 83562306a36Sopenharmony_ci watch_queue_clear(pipe->watch_queue); 83662306a36Sopenharmony_ci#endif 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); 83962306a36Sopenharmony_ci free_uid(pipe->user); 84062306a36Sopenharmony_ci for (i = 0; i < pipe->ring_size; i++) { 84162306a36Sopenharmony_ci struct pipe_buffer *buf = pipe->bufs + i; 84262306a36Sopenharmony_ci if (buf->ops) 84362306a36Sopenharmony_ci pipe_buf_release(pipe, buf); 84462306a36Sopenharmony_ci } 84562306a36Sopenharmony_ci#ifdef CONFIG_WATCH_QUEUE 84662306a36Sopenharmony_ci if (pipe->watch_queue) 84762306a36Sopenharmony_ci put_watch_queue(pipe->watch_queue); 84862306a36Sopenharmony_ci#endif 84962306a36Sopenharmony_ci if (pipe->tmp_page) 85062306a36Sopenharmony_ci __free_page(pipe->tmp_page); 85162306a36Sopenharmony_ci kfree(pipe->bufs); 85262306a36Sopenharmony_ci kfree(pipe); 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_cistatic struct vfsmount *pipe_mnt __read_mostly; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci/* 85862306a36Sopenharmony_ci * pipefs_dname() is called from d_path(). 85962306a36Sopenharmony_ci */ 86062306a36Sopenharmony_cistatic char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 86162306a36Sopenharmony_ci{ 86262306a36Sopenharmony_ci return dynamic_dname(buffer, buflen, "pipe:[%lu]", 86362306a36Sopenharmony_ci d_inode(dentry)->i_ino); 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_cistatic const struct dentry_operations pipefs_dentry_operations = { 86762306a36Sopenharmony_ci .d_dname = pipefs_dname, 86862306a36Sopenharmony_ci}; 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_cistatic struct inode * get_pipe_inode(void) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); 87362306a36Sopenharmony_ci struct pipe_inode_info *pipe; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci if (!inode) 87662306a36Sopenharmony_ci goto fail_inode; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci inode->i_ino = get_next_ino(); 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci pipe = alloc_pipe_info(); 88162306a36Sopenharmony_ci if (!pipe) 88262306a36Sopenharmony_ci goto fail_iput; 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci inode->i_pipe = pipe; 88562306a36Sopenharmony_ci pipe->files = 2; 88662306a36Sopenharmony_ci pipe->readers = pipe->writers = 1; 88762306a36Sopenharmony_ci inode->i_fop = &pipefifo_fops; 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci /* 89062306a36Sopenharmony_ci * Mark the inode dirty from the very beginning, 89162306a36Sopenharmony_ci * that way it will never be moved to the dirty 89262306a36Sopenharmony_ci * list because "mark_inode_dirty()" will think 89362306a36Sopenharmony_ci * that it already _is_ on the dirty list. 89462306a36Sopenharmony_ci */ 89562306a36Sopenharmony_ci inode->i_state = I_DIRTY; 89662306a36Sopenharmony_ci inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 89762306a36Sopenharmony_ci inode->i_uid = current_fsuid(); 89862306a36Sopenharmony_ci inode->i_gid = current_fsgid(); 89962306a36Sopenharmony_ci inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci return inode; 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_cifail_iput: 90462306a36Sopenharmony_ci iput(inode); 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_cifail_inode: 90762306a36Sopenharmony_ci return NULL; 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ciint create_pipe_files(struct file **res, int flags) 91162306a36Sopenharmony_ci{ 91262306a36Sopenharmony_ci struct inode *inode = get_pipe_inode(); 91362306a36Sopenharmony_ci struct file *f; 91462306a36Sopenharmony_ci int error; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci if (!inode) 91762306a36Sopenharmony_ci return -ENFILE; 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci if (flags & O_NOTIFICATION_PIPE) { 92062306a36Sopenharmony_ci error = watch_queue_init(inode->i_pipe); 92162306a36Sopenharmony_ci if (error) { 92262306a36Sopenharmony_ci free_pipe_info(inode->i_pipe); 92362306a36Sopenharmony_ci iput(inode); 92462306a36Sopenharmony_ci return error; 92562306a36Sopenharmony_ci } 92662306a36Sopenharmony_ci } 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci f = alloc_file_pseudo(inode, pipe_mnt, "", 92962306a36Sopenharmony_ci O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), 93062306a36Sopenharmony_ci &pipefifo_fops); 93162306a36Sopenharmony_ci if (IS_ERR(f)) { 93262306a36Sopenharmony_ci free_pipe_info(inode->i_pipe); 93362306a36Sopenharmony_ci iput(inode); 93462306a36Sopenharmony_ci return PTR_ERR(f); 93562306a36Sopenharmony_ci } 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci f->private_data = inode->i_pipe; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), 94062306a36Sopenharmony_ci &pipefifo_fops); 94162306a36Sopenharmony_ci if (IS_ERR(res[0])) { 94262306a36Sopenharmony_ci put_pipe_info(inode, inode->i_pipe); 94362306a36Sopenharmony_ci fput(f); 94462306a36Sopenharmony_ci return PTR_ERR(res[0]); 94562306a36Sopenharmony_ci } 94662306a36Sopenharmony_ci res[0]->private_data = inode->i_pipe; 94762306a36Sopenharmony_ci res[1] = f; 94862306a36Sopenharmony_ci stream_open(inode, res[0]); 94962306a36Sopenharmony_ci stream_open(inode, res[1]); 95062306a36Sopenharmony_ci return 0; 95162306a36Sopenharmony_ci} 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_cistatic int __do_pipe_flags(int *fd, struct file **files, int flags) 95462306a36Sopenharmony_ci{ 95562306a36Sopenharmony_ci int error; 95662306a36Sopenharmony_ci int fdw, fdr; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE)) 95962306a36Sopenharmony_ci return -EINVAL; 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci error = create_pipe_files(files, flags); 96262306a36Sopenharmony_ci if (error) 96362306a36Sopenharmony_ci return error; 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci error = get_unused_fd_flags(flags); 96662306a36Sopenharmony_ci if (error < 0) 96762306a36Sopenharmony_ci goto err_read_pipe; 96862306a36Sopenharmony_ci fdr = error; 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci error = get_unused_fd_flags(flags); 97162306a36Sopenharmony_ci if (error < 0) 97262306a36Sopenharmony_ci goto err_fdr; 97362306a36Sopenharmony_ci fdw = error; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci audit_fd_pair(fdr, fdw); 97662306a36Sopenharmony_ci fd[0] = fdr; 97762306a36Sopenharmony_ci fd[1] = fdw; 97862306a36Sopenharmony_ci /* pipe groks IOCB_NOWAIT */ 97962306a36Sopenharmony_ci files[0]->f_mode |= FMODE_NOWAIT; 98062306a36Sopenharmony_ci files[1]->f_mode |= FMODE_NOWAIT; 98162306a36Sopenharmony_ci return 0; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci err_fdr: 98462306a36Sopenharmony_ci put_unused_fd(fdr); 98562306a36Sopenharmony_ci err_read_pipe: 98662306a36Sopenharmony_ci fput(files[0]); 98762306a36Sopenharmony_ci fput(files[1]); 98862306a36Sopenharmony_ci return error; 98962306a36Sopenharmony_ci} 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ciint do_pipe_flags(int *fd, int flags) 99262306a36Sopenharmony_ci{ 99362306a36Sopenharmony_ci struct file *files[2]; 99462306a36Sopenharmony_ci int error = __do_pipe_flags(fd, files, flags); 99562306a36Sopenharmony_ci if (!error) { 99662306a36Sopenharmony_ci fd_install(fd[0], files[0]); 99762306a36Sopenharmony_ci fd_install(fd[1], files[1]); 99862306a36Sopenharmony_ci } 99962306a36Sopenharmony_ci return error; 100062306a36Sopenharmony_ci} 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci/* 100362306a36Sopenharmony_ci * sys_pipe() is the normal C calling standard for creating 100462306a36Sopenharmony_ci * a pipe. It's not the way Unix traditionally does this, though. 100562306a36Sopenharmony_ci */ 100662306a36Sopenharmony_cistatic int do_pipe2(int __user *fildes, int flags) 100762306a36Sopenharmony_ci{ 100862306a36Sopenharmony_ci struct file *files[2]; 100962306a36Sopenharmony_ci int fd[2]; 101062306a36Sopenharmony_ci int error; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci error = __do_pipe_flags(fd, files, flags); 101362306a36Sopenharmony_ci if (!error) { 101462306a36Sopenharmony_ci if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { 101562306a36Sopenharmony_ci fput(files[0]); 101662306a36Sopenharmony_ci fput(files[1]); 101762306a36Sopenharmony_ci put_unused_fd(fd[0]); 101862306a36Sopenharmony_ci put_unused_fd(fd[1]); 101962306a36Sopenharmony_ci error = -EFAULT; 102062306a36Sopenharmony_ci } else { 102162306a36Sopenharmony_ci fd_install(fd[0], files[0]); 102262306a36Sopenharmony_ci fd_install(fd[1], files[1]); 102362306a36Sopenharmony_ci } 102462306a36Sopenharmony_ci } 102562306a36Sopenharmony_ci return error; 102662306a36Sopenharmony_ci} 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ciSYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 102962306a36Sopenharmony_ci{ 103062306a36Sopenharmony_ci return do_pipe2(fildes, flags); 103162306a36Sopenharmony_ci} 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ciSYSCALL_DEFINE1(pipe, int __user *, fildes) 103462306a36Sopenharmony_ci{ 103562306a36Sopenharmony_ci return do_pipe2(fildes, 0); 103662306a36Sopenharmony_ci} 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci/* 103962306a36Sopenharmony_ci * This is the stupid "wait for pipe to be readable or writable" 104062306a36Sopenharmony_ci * model. 104162306a36Sopenharmony_ci * 104262306a36Sopenharmony_ci * See pipe_read/write() for the proper kind of exclusive wait, 104362306a36Sopenharmony_ci * but that requires that we wake up any other readers/writers 104462306a36Sopenharmony_ci * if we then do not end up reading everything (ie the whole 104562306a36Sopenharmony_ci * "wake_next_reader/writer" logic in pipe_read/write()). 104662306a36Sopenharmony_ci */ 104762306a36Sopenharmony_civoid pipe_wait_readable(struct pipe_inode_info *pipe) 104862306a36Sopenharmony_ci{ 104962306a36Sopenharmony_ci pipe_unlock(pipe); 105062306a36Sopenharmony_ci wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe)); 105162306a36Sopenharmony_ci pipe_lock(pipe); 105262306a36Sopenharmony_ci} 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_civoid pipe_wait_writable(struct pipe_inode_info *pipe) 105562306a36Sopenharmony_ci{ 105662306a36Sopenharmony_ci pipe_unlock(pipe); 105762306a36Sopenharmony_ci wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe)); 105862306a36Sopenharmony_ci pipe_lock(pipe); 105962306a36Sopenharmony_ci} 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci/* 106262306a36Sopenharmony_ci * This depends on both the wait (here) and the wakeup (wake_up_partner) 106362306a36Sopenharmony_ci * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot 106462306a36Sopenharmony_ci * race with the count check and waitqueue prep. 106562306a36Sopenharmony_ci * 106662306a36Sopenharmony_ci * Normally in order to avoid races, you'd do the prepare_to_wait() first, 106762306a36Sopenharmony_ci * then check the condition you're waiting for, and only then sleep. But 106862306a36Sopenharmony_ci * because of the pipe lock, we can check the condition before being on 106962306a36Sopenharmony_ci * the wait queue. 107062306a36Sopenharmony_ci * 107162306a36Sopenharmony_ci * We use the 'rd_wait' waitqueue for pipe partner waiting. 107262306a36Sopenharmony_ci */ 107362306a36Sopenharmony_cistatic int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) 107462306a36Sopenharmony_ci{ 107562306a36Sopenharmony_ci DEFINE_WAIT(rdwait); 107662306a36Sopenharmony_ci int cur = *cnt; 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci while (cur == *cnt) { 107962306a36Sopenharmony_ci prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); 108062306a36Sopenharmony_ci pipe_unlock(pipe); 108162306a36Sopenharmony_ci schedule(); 108262306a36Sopenharmony_ci finish_wait(&pipe->rd_wait, &rdwait); 108362306a36Sopenharmony_ci pipe_lock(pipe); 108462306a36Sopenharmony_ci if (signal_pending(current)) 108562306a36Sopenharmony_ci break; 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci return cur == *cnt ? -ERESTARTSYS : 0; 108862306a36Sopenharmony_ci} 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_cistatic void wake_up_partner(struct pipe_inode_info *pipe) 109162306a36Sopenharmony_ci{ 109262306a36Sopenharmony_ci wake_up_interruptible_all(&pipe->rd_wait); 109362306a36Sopenharmony_ci} 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_cistatic int fifo_open(struct inode *inode, struct file *filp) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci struct pipe_inode_info *pipe; 109862306a36Sopenharmony_ci bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; 109962306a36Sopenharmony_ci int ret; 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci filp->f_version = 0; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 110462306a36Sopenharmony_ci if (inode->i_pipe) { 110562306a36Sopenharmony_ci pipe = inode->i_pipe; 110662306a36Sopenharmony_ci pipe->files++; 110762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 110862306a36Sopenharmony_ci } else { 110962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 111062306a36Sopenharmony_ci pipe = alloc_pipe_info(); 111162306a36Sopenharmony_ci if (!pipe) 111262306a36Sopenharmony_ci return -ENOMEM; 111362306a36Sopenharmony_ci pipe->files = 1; 111462306a36Sopenharmony_ci spin_lock(&inode->i_lock); 111562306a36Sopenharmony_ci if (unlikely(inode->i_pipe)) { 111662306a36Sopenharmony_ci inode->i_pipe->files++; 111762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 111862306a36Sopenharmony_ci free_pipe_info(pipe); 111962306a36Sopenharmony_ci pipe = inode->i_pipe; 112062306a36Sopenharmony_ci } else { 112162306a36Sopenharmony_ci inode->i_pipe = pipe; 112262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 112362306a36Sopenharmony_ci } 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci filp->private_data = pipe; 112662306a36Sopenharmony_ci /* OK, we have a pipe and it's pinned down */ 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci __pipe_lock(pipe); 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci /* We can only do regular read/write on fifos */ 113162306a36Sopenharmony_ci stream_open(inode, filp); 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) { 113462306a36Sopenharmony_ci case FMODE_READ: 113562306a36Sopenharmony_ci /* 113662306a36Sopenharmony_ci * O_RDONLY 113762306a36Sopenharmony_ci * POSIX.1 says that O_NONBLOCK means return with the FIFO 113862306a36Sopenharmony_ci * opened, even when there is no process writing the FIFO. 113962306a36Sopenharmony_ci */ 114062306a36Sopenharmony_ci pipe->r_counter++; 114162306a36Sopenharmony_ci if (pipe->readers++ == 0) 114262306a36Sopenharmony_ci wake_up_partner(pipe); 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci if (!is_pipe && !pipe->writers) { 114562306a36Sopenharmony_ci if ((filp->f_flags & O_NONBLOCK)) { 114662306a36Sopenharmony_ci /* suppress EPOLLHUP until we have 114762306a36Sopenharmony_ci * seen a writer */ 114862306a36Sopenharmony_ci filp->f_version = pipe->w_counter; 114962306a36Sopenharmony_ci } else { 115062306a36Sopenharmony_ci if (wait_for_partner(pipe, &pipe->w_counter)) 115162306a36Sopenharmony_ci goto err_rd; 115262306a36Sopenharmony_ci } 115362306a36Sopenharmony_ci } 115462306a36Sopenharmony_ci break; 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci case FMODE_WRITE: 115762306a36Sopenharmony_ci /* 115862306a36Sopenharmony_ci * O_WRONLY 115962306a36Sopenharmony_ci * POSIX.1 says that O_NONBLOCK means return -1 with 116062306a36Sopenharmony_ci * errno=ENXIO when there is no process reading the FIFO. 116162306a36Sopenharmony_ci */ 116262306a36Sopenharmony_ci ret = -ENXIO; 116362306a36Sopenharmony_ci if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers) 116462306a36Sopenharmony_ci goto err; 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci pipe->w_counter++; 116762306a36Sopenharmony_ci if (!pipe->writers++) 116862306a36Sopenharmony_ci wake_up_partner(pipe); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci if (!is_pipe && !pipe->readers) { 117162306a36Sopenharmony_ci if (wait_for_partner(pipe, &pipe->r_counter)) 117262306a36Sopenharmony_ci goto err_wr; 117362306a36Sopenharmony_ci } 117462306a36Sopenharmony_ci break; 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci case FMODE_READ | FMODE_WRITE: 117762306a36Sopenharmony_ci /* 117862306a36Sopenharmony_ci * O_RDWR 117962306a36Sopenharmony_ci * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. 118062306a36Sopenharmony_ci * This implementation will NEVER block on a O_RDWR open, since 118162306a36Sopenharmony_ci * the process can at least talk to itself. 118262306a36Sopenharmony_ci */ 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci pipe->readers++; 118562306a36Sopenharmony_ci pipe->writers++; 118662306a36Sopenharmony_ci pipe->r_counter++; 118762306a36Sopenharmony_ci pipe->w_counter++; 118862306a36Sopenharmony_ci if (pipe->readers == 1 || pipe->writers == 1) 118962306a36Sopenharmony_ci wake_up_partner(pipe); 119062306a36Sopenharmony_ci break; 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci default: 119362306a36Sopenharmony_ci ret = -EINVAL; 119462306a36Sopenharmony_ci goto err; 119562306a36Sopenharmony_ci } 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci /* Ok! */ 119862306a36Sopenharmony_ci __pipe_unlock(pipe); 119962306a36Sopenharmony_ci return 0; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_cierr_rd: 120262306a36Sopenharmony_ci if (!--pipe->readers) 120362306a36Sopenharmony_ci wake_up_interruptible(&pipe->wr_wait); 120462306a36Sopenharmony_ci ret = -ERESTARTSYS; 120562306a36Sopenharmony_ci goto err; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_cierr_wr: 120862306a36Sopenharmony_ci if (!--pipe->writers) 120962306a36Sopenharmony_ci wake_up_interruptible_all(&pipe->rd_wait); 121062306a36Sopenharmony_ci ret = -ERESTARTSYS; 121162306a36Sopenharmony_ci goto err; 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_cierr: 121462306a36Sopenharmony_ci __pipe_unlock(pipe); 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci put_pipe_info(inode, pipe); 121762306a36Sopenharmony_ci return ret; 121862306a36Sopenharmony_ci} 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ciconst struct file_operations pipefifo_fops = { 122162306a36Sopenharmony_ci .open = fifo_open, 122262306a36Sopenharmony_ci .llseek = no_llseek, 122362306a36Sopenharmony_ci .read_iter = pipe_read, 122462306a36Sopenharmony_ci .write_iter = pipe_write, 122562306a36Sopenharmony_ci .poll = pipe_poll, 122662306a36Sopenharmony_ci .unlocked_ioctl = pipe_ioctl, 122762306a36Sopenharmony_ci .release = pipe_release, 122862306a36Sopenharmony_ci .fasync = pipe_fasync, 122962306a36Sopenharmony_ci .splice_write = iter_file_splice_write, 123062306a36Sopenharmony_ci}; 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci/* 123362306a36Sopenharmony_ci * Currently we rely on the pipe array holding a power-of-2 number 123462306a36Sopenharmony_ci * of pages. Returns 0 on error. 123562306a36Sopenharmony_ci */ 123662306a36Sopenharmony_ciunsigned int round_pipe_size(unsigned int size) 123762306a36Sopenharmony_ci{ 123862306a36Sopenharmony_ci if (size > (1U << 31)) 123962306a36Sopenharmony_ci return 0; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci /* Minimum pipe size, as required by POSIX */ 124262306a36Sopenharmony_ci if (size < PAGE_SIZE) 124362306a36Sopenharmony_ci return PAGE_SIZE; 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_ci return roundup_pow_of_two(size); 124662306a36Sopenharmony_ci} 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_ci/* 124962306a36Sopenharmony_ci * Resize the pipe ring to a number of slots. 125062306a36Sopenharmony_ci * 125162306a36Sopenharmony_ci * Note the pipe can be reduced in capacity, but only if the current 125262306a36Sopenharmony_ci * occupancy doesn't exceed nr_slots; if it does, EBUSY will be 125362306a36Sopenharmony_ci * returned instead. 125462306a36Sopenharmony_ci */ 125562306a36Sopenharmony_ciint pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) 125662306a36Sopenharmony_ci{ 125762306a36Sopenharmony_ci struct pipe_buffer *bufs; 125862306a36Sopenharmony_ci unsigned int head, tail, mask, n; 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci bufs = kcalloc(nr_slots, sizeof(*bufs), 126162306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 126262306a36Sopenharmony_ci if (unlikely(!bufs)) 126362306a36Sopenharmony_ci return -ENOMEM; 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci spin_lock_irq(&pipe->rd_wait.lock); 126662306a36Sopenharmony_ci mask = pipe->ring_size - 1; 126762306a36Sopenharmony_ci head = pipe->head; 126862306a36Sopenharmony_ci tail = pipe->tail; 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci n = pipe_occupancy(head, tail); 127162306a36Sopenharmony_ci if (nr_slots < n) { 127262306a36Sopenharmony_ci spin_unlock_irq(&pipe->rd_wait.lock); 127362306a36Sopenharmony_ci kfree(bufs); 127462306a36Sopenharmony_ci return -EBUSY; 127562306a36Sopenharmony_ci } 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci /* 127862306a36Sopenharmony_ci * The pipe array wraps around, so just start the new one at zero 127962306a36Sopenharmony_ci * and adjust the indices. 128062306a36Sopenharmony_ci */ 128162306a36Sopenharmony_ci if (n > 0) { 128262306a36Sopenharmony_ci unsigned int h = head & mask; 128362306a36Sopenharmony_ci unsigned int t = tail & mask; 128462306a36Sopenharmony_ci if (h > t) { 128562306a36Sopenharmony_ci memcpy(bufs, pipe->bufs + t, 128662306a36Sopenharmony_ci n * sizeof(struct pipe_buffer)); 128762306a36Sopenharmony_ci } else { 128862306a36Sopenharmony_ci unsigned int tsize = pipe->ring_size - t; 128962306a36Sopenharmony_ci if (h > 0) 129062306a36Sopenharmony_ci memcpy(bufs + tsize, pipe->bufs, 129162306a36Sopenharmony_ci h * sizeof(struct pipe_buffer)); 129262306a36Sopenharmony_ci memcpy(bufs, pipe->bufs + t, 129362306a36Sopenharmony_ci tsize * sizeof(struct pipe_buffer)); 129462306a36Sopenharmony_ci } 129562306a36Sopenharmony_ci } 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci head = n; 129862306a36Sopenharmony_ci tail = 0; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci kfree(pipe->bufs); 130162306a36Sopenharmony_ci pipe->bufs = bufs; 130262306a36Sopenharmony_ci pipe->ring_size = nr_slots; 130362306a36Sopenharmony_ci if (pipe->max_usage > nr_slots) 130462306a36Sopenharmony_ci pipe->max_usage = nr_slots; 130562306a36Sopenharmony_ci pipe->tail = tail; 130662306a36Sopenharmony_ci pipe->head = head; 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci if (!pipe_has_watch_queue(pipe)) { 130962306a36Sopenharmony_ci pipe->max_usage = nr_slots; 131062306a36Sopenharmony_ci pipe->nr_accounted = nr_slots; 131162306a36Sopenharmony_ci } 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci spin_unlock_irq(&pipe->rd_wait.lock); 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci /* This might have made more room for writers */ 131662306a36Sopenharmony_ci wake_up_interruptible(&pipe->wr_wait); 131762306a36Sopenharmony_ci return 0; 131862306a36Sopenharmony_ci} 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci/* 132162306a36Sopenharmony_ci * Allocate a new array of pipe buffers and copy the info over. Returns the 132262306a36Sopenharmony_ci * pipe size if successful, or return -ERROR on error. 132362306a36Sopenharmony_ci */ 132462306a36Sopenharmony_cistatic long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg) 132562306a36Sopenharmony_ci{ 132662306a36Sopenharmony_ci unsigned long user_bufs; 132762306a36Sopenharmony_ci unsigned int nr_slots, size; 132862306a36Sopenharmony_ci long ret = 0; 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci if (pipe_has_watch_queue(pipe)) 133162306a36Sopenharmony_ci return -EBUSY; 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci size = round_pipe_size(arg); 133462306a36Sopenharmony_ci nr_slots = size >> PAGE_SHIFT; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci if (!nr_slots) 133762306a36Sopenharmony_ci return -EINVAL; 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci /* 134062306a36Sopenharmony_ci * If trying to increase the pipe capacity, check that an 134162306a36Sopenharmony_ci * unprivileged user is not trying to exceed various limits 134262306a36Sopenharmony_ci * (soft limit check here, hard limit check just below). 134362306a36Sopenharmony_ci * Decreasing the pipe capacity is always permitted, even 134462306a36Sopenharmony_ci * if the user is currently over a limit. 134562306a36Sopenharmony_ci */ 134662306a36Sopenharmony_ci if (nr_slots > pipe->max_usage && 134762306a36Sopenharmony_ci size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) 134862306a36Sopenharmony_ci return -EPERM; 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots); 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci if (nr_slots > pipe->max_usage && 135362306a36Sopenharmony_ci (too_many_pipe_buffers_hard(user_bufs) || 135462306a36Sopenharmony_ci too_many_pipe_buffers_soft(user_bufs)) && 135562306a36Sopenharmony_ci pipe_is_unprivileged_user()) { 135662306a36Sopenharmony_ci ret = -EPERM; 135762306a36Sopenharmony_ci goto out_revert_acct; 135862306a36Sopenharmony_ci } 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci ret = pipe_resize_ring(pipe, nr_slots); 136162306a36Sopenharmony_ci if (ret < 0) 136262306a36Sopenharmony_ci goto out_revert_acct; 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci return pipe->max_usage * PAGE_SIZE; 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_ciout_revert_acct: 136762306a36Sopenharmony_ci (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted); 136862306a36Sopenharmony_ci return ret; 136962306a36Sopenharmony_ci} 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci/* 137262306a36Sopenharmony_ci * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is 137362306a36Sopenharmony_ci * not enough to verify that this is a pipe. 137462306a36Sopenharmony_ci */ 137562306a36Sopenharmony_cistruct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) 137662306a36Sopenharmony_ci{ 137762306a36Sopenharmony_ci struct pipe_inode_info *pipe = file->private_data; 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci if (file->f_op != &pipefifo_fops || !pipe) 138062306a36Sopenharmony_ci return NULL; 138162306a36Sopenharmony_ci if (for_splice && pipe_has_watch_queue(pipe)) 138262306a36Sopenharmony_ci return NULL; 138362306a36Sopenharmony_ci return pipe; 138462306a36Sopenharmony_ci} 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_cilong pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) 138762306a36Sopenharmony_ci{ 138862306a36Sopenharmony_ci struct pipe_inode_info *pipe; 138962306a36Sopenharmony_ci long ret; 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci pipe = get_pipe_info(file, false); 139262306a36Sopenharmony_ci if (!pipe) 139362306a36Sopenharmony_ci return -EBADF; 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci __pipe_lock(pipe); 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci switch (cmd) { 139862306a36Sopenharmony_ci case F_SETPIPE_SZ: 139962306a36Sopenharmony_ci ret = pipe_set_size(pipe, arg); 140062306a36Sopenharmony_ci break; 140162306a36Sopenharmony_ci case F_GETPIPE_SZ: 140262306a36Sopenharmony_ci ret = pipe->max_usage * PAGE_SIZE; 140362306a36Sopenharmony_ci break; 140462306a36Sopenharmony_ci default: 140562306a36Sopenharmony_ci ret = -EINVAL; 140662306a36Sopenharmony_ci break; 140762306a36Sopenharmony_ci } 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci __pipe_unlock(pipe); 141062306a36Sopenharmony_ci return ret; 141162306a36Sopenharmony_ci} 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_cistatic const struct super_operations pipefs_ops = { 141462306a36Sopenharmony_ci .destroy_inode = free_inode_nonrcu, 141562306a36Sopenharmony_ci .statfs = simple_statfs, 141662306a36Sopenharmony_ci}; 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci/* 141962306a36Sopenharmony_ci * pipefs should _never_ be mounted by userland - too much of security hassle, 142062306a36Sopenharmony_ci * no real gain from having the whole whorehouse mounted. So we don't need 142162306a36Sopenharmony_ci * any operations on the root directory. However, we need a non-trivial 142262306a36Sopenharmony_ci * d_name - pipe: will go nicely and kill the special-casing in procfs. 142362306a36Sopenharmony_ci */ 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_cistatic int pipefs_init_fs_context(struct fs_context *fc) 142662306a36Sopenharmony_ci{ 142762306a36Sopenharmony_ci struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC); 142862306a36Sopenharmony_ci if (!ctx) 142962306a36Sopenharmony_ci return -ENOMEM; 143062306a36Sopenharmony_ci ctx->ops = &pipefs_ops; 143162306a36Sopenharmony_ci ctx->dops = &pipefs_dentry_operations; 143262306a36Sopenharmony_ci return 0; 143362306a36Sopenharmony_ci} 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_cistatic struct file_system_type pipe_fs_type = { 143662306a36Sopenharmony_ci .name = "pipefs", 143762306a36Sopenharmony_ci .init_fs_context = pipefs_init_fs_context, 143862306a36Sopenharmony_ci .kill_sb = kill_anon_super, 143962306a36Sopenharmony_ci}; 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 144262306a36Sopenharmony_cistatic int do_proc_dopipe_max_size_conv(unsigned long *lvalp, 144362306a36Sopenharmony_ci unsigned int *valp, 144462306a36Sopenharmony_ci int write, void *data) 144562306a36Sopenharmony_ci{ 144662306a36Sopenharmony_ci if (write) { 144762306a36Sopenharmony_ci unsigned int val; 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci val = round_pipe_size(*lvalp); 145062306a36Sopenharmony_ci if (val == 0) 145162306a36Sopenharmony_ci return -EINVAL; 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci *valp = val; 145462306a36Sopenharmony_ci } else { 145562306a36Sopenharmony_ci unsigned int val = *valp; 145662306a36Sopenharmony_ci *lvalp = (unsigned long) val; 145762306a36Sopenharmony_ci } 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci return 0; 146062306a36Sopenharmony_ci} 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_cistatic int proc_dopipe_max_size(struct ctl_table *table, int write, 146362306a36Sopenharmony_ci void *buffer, size_t *lenp, loff_t *ppos) 146462306a36Sopenharmony_ci{ 146562306a36Sopenharmony_ci return do_proc_douintvec(table, write, buffer, lenp, ppos, 146662306a36Sopenharmony_ci do_proc_dopipe_max_size_conv, NULL); 146762306a36Sopenharmony_ci} 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_cistatic struct ctl_table fs_pipe_sysctls[] = { 147062306a36Sopenharmony_ci { 147162306a36Sopenharmony_ci .procname = "pipe-max-size", 147262306a36Sopenharmony_ci .data = &pipe_max_size, 147362306a36Sopenharmony_ci .maxlen = sizeof(pipe_max_size), 147462306a36Sopenharmony_ci .mode = 0644, 147562306a36Sopenharmony_ci .proc_handler = proc_dopipe_max_size, 147662306a36Sopenharmony_ci }, 147762306a36Sopenharmony_ci { 147862306a36Sopenharmony_ci .procname = "pipe-user-pages-hard", 147962306a36Sopenharmony_ci .data = &pipe_user_pages_hard, 148062306a36Sopenharmony_ci .maxlen = sizeof(pipe_user_pages_hard), 148162306a36Sopenharmony_ci .mode = 0644, 148262306a36Sopenharmony_ci .proc_handler = proc_doulongvec_minmax, 148362306a36Sopenharmony_ci }, 148462306a36Sopenharmony_ci { 148562306a36Sopenharmony_ci .procname = "pipe-user-pages-soft", 148662306a36Sopenharmony_ci .data = &pipe_user_pages_soft, 148762306a36Sopenharmony_ci .maxlen = sizeof(pipe_user_pages_soft), 148862306a36Sopenharmony_ci .mode = 0644, 148962306a36Sopenharmony_ci .proc_handler = proc_doulongvec_minmax, 149062306a36Sopenharmony_ci }, 149162306a36Sopenharmony_ci { } 149262306a36Sopenharmony_ci}; 149362306a36Sopenharmony_ci#endif 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_cistatic int __init init_pipe_fs(void) 149662306a36Sopenharmony_ci{ 149762306a36Sopenharmony_ci int err = register_filesystem(&pipe_fs_type); 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci if (!err) { 150062306a36Sopenharmony_ci pipe_mnt = kern_mount(&pipe_fs_type); 150162306a36Sopenharmony_ci if (IS_ERR(pipe_mnt)) { 150262306a36Sopenharmony_ci err = PTR_ERR(pipe_mnt); 150362306a36Sopenharmony_ci unregister_filesystem(&pipe_fs_type); 150462306a36Sopenharmony_ci } 150562306a36Sopenharmony_ci } 150662306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 150762306a36Sopenharmony_ci register_sysctl_init("fs", fs_pipe_sysctls); 150862306a36Sopenharmony_ci#endif 150962306a36Sopenharmony_ci return err; 151062306a36Sopenharmony_ci} 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_cifs_initcall(init_pipe_fs); 1513