162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/file.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Manage the dynamic fd arrays in the process files_struct. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/syscalls.h> 1162306a36Sopenharmony_ci#include <linux/export.h> 1262306a36Sopenharmony_ci#include <linux/fs.h> 1362306a36Sopenharmony_ci#include <linux/kernel.h> 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/sched/signal.h> 1662306a36Sopenharmony_ci#include <linux/slab.h> 1762306a36Sopenharmony_ci#include <linux/file.h> 1862306a36Sopenharmony_ci#include <linux/fdtable.h> 1962306a36Sopenharmony_ci#include <linux/bitops.h> 2062306a36Sopenharmony_ci#include <linux/spinlock.h> 2162306a36Sopenharmony_ci#include <linux/rcupdate.h> 2262306a36Sopenharmony_ci#include <linux/close_range.h> 2362306a36Sopenharmony_ci#include <net/sock.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include "internal.h" 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ciunsigned int sysctl_nr_open __read_mostly = 1024*1024; 2862306a36Sopenharmony_ciunsigned int sysctl_nr_open_min = BITS_PER_LONG; 2962306a36Sopenharmony_ci/* our min() is unusable in constant expressions ;-/ */ 3062306a36Sopenharmony_ci#define __const_min(x, y) ((x) < (y) ? (x) : (y)) 3162306a36Sopenharmony_ciunsigned int sysctl_nr_open_max = 3262306a36Sopenharmony_ci __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_cistatic void __free_fdtable(struct fdtable *fdt) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci kvfree(fdt->fd); 3762306a36Sopenharmony_ci kvfree(fdt->open_fds); 3862306a36Sopenharmony_ci kfree(fdt); 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic void free_fdtable_rcu(struct rcu_head *rcu) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci __free_fdtable(container_of(rcu, struct fdtable, rcu)); 4462306a36Sopenharmony_ci} 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) 4762306a36Sopenharmony_ci#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci/* 5062306a36Sopenharmony_ci * Copy 'count' fd bits from the old table to the new table and clear the extra 5162306a36Sopenharmony_ci * space if any. This does not copy the file pointers. Called with the files 5262306a36Sopenharmony_ci * spinlock held for write. 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_cistatic void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, 5562306a36Sopenharmony_ci unsigned int count) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci unsigned int cpy, set; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci cpy = count / BITS_PER_BYTE; 6062306a36Sopenharmony_ci set = (nfdt->max_fds - count) / BITS_PER_BYTE; 6162306a36Sopenharmony_ci memcpy(nfdt->open_fds, ofdt->open_fds, cpy); 6262306a36Sopenharmony_ci memset((char *)nfdt->open_fds + cpy, 0, set); 6362306a36Sopenharmony_ci memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); 6462306a36Sopenharmony_ci memset((char *)nfdt->close_on_exec + cpy, 0, set); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci cpy = BITBIT_SIZE(count); 6762306a36Sopenharmony_ci set = BITBIT_SIZE(nfdt->max_fds) - cpy; 6862306a36Sopenharmony_ci memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); 6962306a36Sopenharmony_ci memset((char *)nfdt->full_fds_bits + cpy, 0, set); 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci/* 7362306a36Sopenharmony_ci * Copy all file descriptors from the old table to the new, expanded table and 7462306a36Sopenharmony_ci * clear the extra space. Called with the files spinlock held for write. 7562306a36Sopenharmony_ci */ 7662306a36Sopenharmony_cistatic void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci size_t cpy, set; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci BUG_ON(nfdt->max_fds < ofdt->max_fds); 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci cpy = ofdt->max_fds * sizeof(struct file *); 8362306a36Sopenharmony_ci set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); 8462306a36Sopenharmony_ci memcpy(nfdt->fd, ofdt->fd, cpy); 8562306a36Sopenharmony_ci memset((char *)nfdt->fd + cpy, 0, set); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/* 9162306a36Sopenharmony_ci * Note how the fdtable bitmap allocations very much have to be a multiple of 9262306a36Sopenharmony_ci * BITS_PER_LONG. This is not only because we walk those things in chunks of 9362306a36Sopenharmony_ci * 'unsigned long' in some places, but simply because that is how the Linux 9462306a36Sopenharmony_ci * kernel bitmaps are defined to work: they are not "bits in an array of bytes", 9562306a36Sopenharmony_ci * they are very much "bits in an array of unsigned long". 9662306a36Sopenharmony_ci * 9762306a36Sopenharmony_ci * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied 9862306a36Sopenharmony_ci * by that "1024/sizeof(ptr)" before, we already know there are sufficient 9962306a36Sopenharmony_ci * clear low bits. Clang seems to realize that, gcc ends up being confused. 10062306a36Sopenharmony_ci * 10162306a36Sopenharmony_ci * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, 10262306a36Sopenharmony_ci * let's consider it documentation (and maybe a test-case for gcc to improve 10362306a36Sopenharmony_ci * its code generation ;) 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_cistatic struct fdtable * alloc_fdtable(unsigned int nr) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci struct fdtable *fdt; 10862306a36Sopenharmony_ci void *data; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci /* 11162306a36Sopenharmony_ci * Figure out how many fds we actually want to support in this fdtable. 11262306a36Sopenharmony_ci * Allocation steps are keyed to the size of the fdarray, since it 11362306a36Sopenharmony_ci * grows far faster than any of the other dynamic data. We try to fit 11462306a36Sopenharmony_ci * the fdarray into comfortable page-tuned chunks: starting at 1024B 11562306a36Sopenharmony_ci * and growing in powers of two from there on. 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_ci nr /= (1024 / sizeof(struct file *)); 11862306a36Sopenharmony_ci nr = roundup_pow_of_two(nr + 1); 11962306a36Sopenharmony_ci nr *= (1024 / sizeof(struct file *)); 12062306a36Sopenharmony_ci nr = ALIGN(nr, BITS_PER_LONG); 12162306a36Sopenharmony_ci /* 12262306a36Sopenharmony_ci * Note that this can drive nr *below* what we had passed if sysctl_nr_open 12362306a36Sopenharmony_ci * had been set lower between the check in expand_files() and here. Deal 12462306a36Sopenharmony_ci * with that in caller, it's cheaper that way. 12562306a36Sopenharmony_ci * 12662306a36Sopenharmony_ci * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise 12762306a36Sopenharmony_ci * bitmaps handling below becomes unpleasant, to put it mildly... 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci if (unlikely(nr > sysctl_nr_open)) 13062306a36Sopenharmony_ci nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT); 13362306a36Sopenharmony_ci if (!fdt) 13462306a36Sopenharmony_ci goto out; 13562306a36Sopenharmony_ci fdt->max_fds = nr; 13662306a36Sopenharmony_ci data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT); 13762306a36Sopenharmony_ci if (!data) 13862306a36Sopenharmony_ci goto out_fdt; 13962306a36Sopenharmony_ci fdt->fd = data; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci data = kvmalloc(max_t(size_t, 14262306a36Sopenharmony_ci 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES), 14362306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 14462306a36Sopenharmony_ci if (!data) 14562306a36Sopenharmony_ci goto out_arr; 14662306a36Sopenharmony_ci fdt->open_fds = data; 14762306a36Sopenharmony_ci data += nr / BITS_PER_BYTE; 14862306a36Sopenharmony_ci fdt->close_on_exec = data; 14962306a36Sopenharmony_ci data += nr / BITS_PER_BYTE; 15062306a36Sopenharmony_ci fdt->full_fds_bits = data; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci return fdt; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ciout_arr: 15562306a36Sopenharmony_ci kvfree(fdt->fd); 15662306a36Sopenharmony_ciout_fdt: 15762306a36Sopenharmony_ci kfree(fdt); 15862306a36Sopenharmony_ciout: 15962306a36Sopenharmony_ci return NULL; 16062306a36Sopenharmony_ci} 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci/* 16362306a36Sopenharmony_ci * Expand the file descriptor table. 16462306a36Sopenharmony_ci * This function will allocate a new fdtable and both fd array and fdset, of 16562306a36Sopenharmony_ci * the given size. 16662306a36Sopenharmony_ci * Return <0 error code on error; 1 on successful completion. 16762306a36Sopenharmony_ci * The files->file_lock should be held on entry, and will be held on exit. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_cistatic int expand_fdtable(struct files_struct *files, unsigned int nr) 17062306a36Sopenharmony_ci __releases(files->file_lock) 17162306a36Sopenharmony_ci __acquires(files->file_lock) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci struct fdtable *new_fdt, *cur_fdt; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci spin_unlock(&files->file_lock); 17662306a36Sopenharmony_ci new_fdt = alloc_fdtable(nr); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci /* make sure all fd_install() have seen resize_in_progress 17962306a36Sopenharmony_ci * or have finished their rcu_read_lock_sched() section. 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_ci if (atomic_read(&files->count) > 1) 18262306a36Sopenharmony_ci synchronize_rcu(); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci spin_lock(&files->file_lock); 18562306a36Sopenharmony_ci if (!new_fdt) 18662306a36Sopenharmony_ci return -ENOMEM; 18762306a36Sopenharmony_ci /* 18862306a36Sopenharmony_ci * extremely unlikely race - sysctl_nr_open decreased between the check in 18962306a36Sopenharmony_ci * caller and alloc_fdtable(). Cheaper to catch it here... 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci if (unlikely(new_fdt->max_fds <= nr)) { 19262306a36Sopenharmony_ci __free_fdtable(new_fdt); 19362306a36Sopenharmony_ci return -EMFILE; 19462306a36Sopenharmony_ci } 19562306a36Sopenharmony_ci cur_fdt = files_fdtable(files); 19662306a36Sopenharmony_ci BUG_ON(nr < cur_fdt->max_fds); 19762306a36Sopenharmony_ci copy_fdtable(new_fdt, cur_fdt); 19862306a36Sopenharmony_ci rcu_assign_pointer(files->fdt, new_fdt); 19962306a36Sopenharmony_ci if (cur_fdt != &files->fdtab) 20062306a36Sopenharmony_ci call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 20162306a36Sopenharmony_ci /* coupled with smp_rmb() in fd_install() */ 20262306a36Sopenharmony_ci smp_wmb(); 20362306a36Sopenharmony_ci return 1; 20462306a36Sopenharmony_ci} 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci/* 20762306a36Sopenharmony_ci * Expand files. 20862306a36Sopenharmony_ci * This function will expand the file structures, if the requested size exceeds 20962306a36Sopenharmony_ci * the current capacity and there is room for expansion. 21062306a36Sopenharmony_ci * Return <0 error code on error; 0 when nothing done; 1 when files were 21162306a36Sopenharmony_ci * expanded and execution may have blocked. 21262306a36Sopenharmony_ci * The files->file_lock should be held on entry, and will be held on exit. 21362306a36Sopenharmony_ci */ 21462306a36Sopenharmony_cistatic int expand_files(struct files_struct *files, unsigned int nr) 21562306a36Sopenharmony_ci __releases(files->file_lock) 21662306a36Sopenharmony_ci __acquires(files->file_lock) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci struct fdtable *fdt; 21962306a36Sopenharmony_ci int expanded = 0; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_cirepeat: 22262306a36Sopenharmony_ci fdt = files_fdtable(files); 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* Do we need to expand? */ 22562306a36Sopenharmony_ci if (nr < fdt->max_fds) 22662306a36Sopenharmony_ci return expanded; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci /* Can we expand? */ 22962306a36Sopenharmony_ci if (nr >= sysctl_nr_open) 23062306a36Sopenharmony_ci return -EMFILE; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci if (unlikely(files->resize_in_progress)) { 23362306a36Sopenharmony_ci spin_unlock(&files->file_lock); 23462306a36Sopenharmony_ci expanded = 1; 23562306a36Sopenharmony_ci wait_event(files->resize_wait, !files->resize_in_progress); 23662306a36Sopenharmony_ci spin_lock(&files->file_lock); 23762306a36Sopenharmony_ci goto repeat; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci /* All good, so we try */ 24162306a36Sopenharmony_ci files->resize_in_progress = true; 24262306a36Sopenharmony_ci expanded = expand_fdtable(files, nr); 24362306a36Sopenharmony_ci files->resize_in_progress = false; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci wake_up_all(&files->resize_wait); 24662306a36Sopenharmony_ci return expanded; 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_cistatic inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci __set_bit(fd, fdt->close_on_exec); 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_cistatic inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci if (test_bit(fd, fdt->close_on_exec)) 25762306a36Sopenharmony_ci __clear_bit(fd, fdt->close_on_exec); 25862306a36Sopenharmony_ci} 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_cistatic inline void __set_open_fd(unsigned int fd, struct fdtable *fdt) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci __set_bit(fd, fdt->open_fds); 26362306a36Sopenharmony_ci fd /= BITS_PER_LONG; 26462306a36Sopenharmony_ci if (!~fdt->open_fds[fd]) 26562306a36Sopenharmony_ci __set_bit(fd, fdt->full_fds_bits); 26662306a36Sopenharmony_ci} 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_cistatic inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci __clear_bit(fd, fdt->open_fds); 27162306a36Sopenharmony_ci __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits); 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_cistatic unsigned int count_open_files(struct fdtable *fdt) 27562306a36Sopenharmony_ci{ 27662306a36Sopenharmony_ci unsigned int size = fdt->max_fds; 27762306a36Sopenharmony_ci unsigned int i; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci /* Find the last open fd */ 28062306a36Sopenharmony_ci for (i = size / BITS_PER_LONG; i > 0; ) { 28162306a36Sopenharmony_ci if (fdt->open_fds[--i]) 28262306a36Sopenharmony_ci break; 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci i = (i + 1) * BITS_PER_LONG; 28562306a36Sopenharmony_ci return i; 28662306a36Sopenharmony_ci} 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci/* 28962306a36Sopenharmony_ci * Note that a sane fdtable size always has to be a multiple of 29062306a36Sopenharmony_ci * BITS_PER_LONG, since we have bitmaps that are sized by this. 29162306a36Sopenharmony_ci * 29262306a36Sopenharmony_ci * 'max_fds' will normally already be properly aligned, but it 29362306a36Sopenharmony_ci * turns out that in the close_range() -> __close_range() -> 29462306a36Sopenharmony_ci * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end 29562306a36Sopenharmony_ci * up having a 'max_fds' value that isn't already aligned. 29662306a36Sopenharmony_ci * 29762306a36Sopenharmony_ci * Rather than make close_range() have to worry about this, 29862306a36Sopenharmony_ci * just make that BITS_PER_LONG alignment be part of a sane 29962306a36Sopenharmony_ci * fdtable size. Becuase that's really what it is. 30062306a36Sopenharmony_ci */ 30162306a36Sopenharmony_cistatic unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci unsigned int count; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci count = count_open_files(fdt); 30662306a36Sopenharmony_ci if (max_fds < NR_OPEN_DEFAULT) 30762306a36Sopenharmony_ci max_fds = NR_OPEN_DEFAULT; 30862306a36Sopenharmony_ci return ALIGN(min(count, max_fds), BITS_PER_LONG); 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci/* 31262306a36Sopenharmony_ci * Allocate a new files structure and copy contents from the 31362306a36Sopenharmony_ci * passed in files structure. 31462306a36Sopenharmony_ci * errorp will be valid only when the returned files_struct is NULL. 31562306a36Sopenharmony_ci */ 31662306a36Sopenharmony_cistruct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) 31762306a36Sopenharmony_ci{ 31862306a36Sopenharmony_ci struct files_struct *newf; 31962306a36Sopenharmony_ci struct file **old_fds, **new_fds; 32062306a36Sopenharmony_ci unsigned int open_files, i; 32162306a36Sopenharmony_ci struct fdtable *old_fdt, *new_fdt; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci *errorp = -ENOMEM; 32462306a36Sopenharmony_ci newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); 32562306a36Sopenharmony_ci if (!newf) 32662306a36Sopenharmony_ci goto out; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci atomic_set(&newf->count, 1); 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci spin_lock_init(&newf->file_lock); 33162306a36Sopenharmony_ci newf->resize_in_progress = false; 33262306a36Sopenharmony_ci init_waitqueue_head(&newf->resize_wait); 33362306a36Sopenharmony_ci newf->next_fd = 0; 33462306a36Sopenharmony_ci new_fdt = &newf->fdtab; 33562306a36Sopenharmony_ci new_fdt->max_fds = NR_OPEN_DEFAULT; 33662306a36Sopenharmony_ci new_fdt->close_on_exec = newf->close_on_exec_init; 33762306a36Sopenharmony_ci new_fdt->open_fds = newf->open_fds_init; 33862306a36Sopenharmony_ci new_fdt->full_fds_bits = newf->full_fds_bits_init; 33962306a36Sopenharmony_ci new_fdt->fd = &newf->fd_array[0]; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci spin_lock(&oldf->file_lock); 34262306a36Sopenharmony_ci old_fdt = files_fdtable(oldf); 34362306a36Sopenharmony_ci open_files = sane_fdtable_size(old_fdt, max_fds); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci /* 34662306a36Sopenharmony_ci * Check whether we need to allocate a larger fd array and fd set. 34762306a36Sopenharmony_ci */ 34862306a36Sopenharmony_ci while (unlikely(open_files > new_fdt->max_fds)) { 34962306a36Sopenharmony_ci spin_unlock(&oldf->file_lock); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci if (new_fdt != &newf->fdtab) 35262306a36Sopenharmony_ci __free_fdtable(new_fdt); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci new_fdt = alloc_fdtable(open_files - 1); 35562306a36Sopenharmony_ci if (!new_fdt) { 35662306a36Sopenharmony_ci *errorp = -ENOMEM; 35762306a36Sopenharmony_ci goto out_release; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci /* beyond sysctl_nr_open; nothing to do */ 36162306a36Sopenharmony_ci if (unlikely(new_fdt->max_fds < open_files)) { 36262306a36Sopenharmony_ci __free_fdtable(new_fdt); 36362306a36Sopenharmony_ci *errorp = -EMFILE; 36462306a36Sopenharmony_ci goto out_release; 36562306a36Sopenharmony_ci } 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* 36862306a36Sopenharmony_ci * Reacquire the oldf lock and a pointer to its fd table 36962306a36Sopenharmony_ci * who knows it may have a new bigger fd table. We need 37062306a36Sopenharmony_ci * the latest pointer. 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_ci spin_lock(&oldf->file_lock); 37362306a36Sopenharmony_ci old_fdt = files_fdtable(oldf); 37462306a36Sopenharmony_ci open_files = sane_fdtable_size(old_fdt, max_fds); 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci copy_fd_bitmaps(new_fdt, old_fdt, open_files); 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci old_fds = old_fdt->fd; 38062306a36Sopenharmony_ci new_fds = new_fdt->fd; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci for (i = open_files; i != 0; i--) { 38362306a36Sopenharmony_ci struct file *f = *old_fds++; 38462306a36Sopenharmony_ci if (f) { 38562306a36Sopenharmony_ci get_file(f); 38662306a36Sopenharmony_ci } else { 38762306a36Sopenharmony_ci /* 38862306a36Sopenharmony_ci * The fd may be claimed in the fd bitmap but not yet 38962306a36Sopenharmony_ci * instantiated in the files array if a sibling thread 39062306a36Sopenharmony_ci * is partway through open(). So make sure that this 39162306a36Sopenharmony_ci * fd is available to the new process. 39262306a36Sopenharmony_ci */ 39362306a36Sopenharmony_ci __clear_open_fd(open_files - i, new_fdt); 39462306a36Sopenharmony_ci } 39562306a36Sopenharmony_ci rcu_assign_pointer(*new_fds++, f); 39662306a36Sopenharmony_ci } 39762306a36Sopenharmony_ci spin_unlock(&oldf->file_lock); 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci /* clear the remainder */ 40062306a36Sopenharmony_ci memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *)); 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci rcu_assign_pointer(newf->fdt, new_fdt); 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci return newf; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ciout_release: 40762306a36Sopenharmony_ci kmem_cache_free(files_cachep, newf); 40862306a36Sopenharmony_ciout: 40962306a36Sopenharmony_ci return NULL; 41062306a36Sopenharmony_ci} 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_cistatic struct fdtable *close_files(struct files_struct * files) 41362306a36Sopenharmony_ci{ 41462306a36Sopenharmony_ci /* 41562306a36Sopenharmony_ci * It is safe to dereference the fd table without RCU or 41662306a36Sopenharmony_ci * ->file_lock because this is the last reference to the 41762306a36Sopenharmony_ci * files structure. 41862306a36Sopenharmony_ci */ 41962306a36Sopenharmony_ci struct fdtable *fdt = rcu_dereference_raw(files->fdt); 42062306a36Sopenharmony_ci unsigned int i, j = 0; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci for (;;) { 42362306a36Sopenharmony_ci unsigned long set; 42462306a36Sopenharmony_ci i = j * BITS_PER_LONG; 42562306a36Sopenharmony_ci if (i >= fdt->max_fds) 42662306a36Sopenharmony_ci break; 42762306a36Sopenharmony_ci set = fdt->open_fds[j++]; 42862306a36Sopenharmony_ci while (set) { 42962306a36Sopenharmony_ci if (set & 1) { 43062306a36Sopenharmony_ci struct file * file = xchg(&fdt->fd[i], NULL); 43162306a36Sopenharmony_ci if (file) { 43262306a36Sopenharmony_ci filp_close(file, files); 43362306a36Sopenharmony_ci cond_resched(); 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci } 43662306a36Sopenharmony_ci i++; 43762306a36Sopenharmony_ci set >>= 1; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci return fdt; 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_civoid put_files_struct(struct files_struct *files) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci if (atomic_dec_and_test(&files->count)) { 44762306a36Sopenharmony_ci struct fdtable *fdt = close_files(files); 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci /* free the arrays if they are not embedded */ 45062306a36Sopenharmony_ci if (fdt != &files->fdtab) 45162306a36Sopenharmony_ci __free_fdtable(fdt); 45262306a36Sopenharmony_ci kmem_cache_free(files_cachep, files); 45362306a36Sopenharmony_ci } 45462306a36Sopenharmony_ci} 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_civoid exit_files(struct task_struct *tsk) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci struct files_struct * files = tsk->files; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci if (files) { 46162306a36Sopenharmony_ci task_lock(tsk); 46262306a36Sopenharmony_ci tsk->files = NULL; 46362306a36Sopenharmony_ci task_unlock(tsk); 46462306a36Sopenharmony_ci put_files_struct(files); 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci} 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_cistruct files_struct init_files = { 46962306a36Sopenharmony_ci .count = ATOMIC_INIT(1), 47062306a36Sopenharmony_ci .fdt = &init_files.fdtab, 47162306a36Sopenharmony_ci .fdtab = { 47262306a36Sopenharmony_ci .max_fds = NR_OPEN_DEFAULT, 47362306a36Sopenharmony_ci .fd = &init_files.fd_array[0], 47462306a36Sopenharmony_ci .close_on_exec = init_files.close_on_exec_init, 47562306a36Sopenharmony_ci .open_fds = init_files.open_fds_init, 47662306a36Sopenharmony_ci .full_fds_bits = init_files.full_fds_bits_init, 47762306a36Sopenharmony_ci }, 47862306a36Sopenharmony_ci .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), 47962306a36Sopenharmony_ci .resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait), 48062306a36Sopenharmony_ci}; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_cistatic unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) 48362306a36Sopenharmony_ci{ 48462306a36Sopenharmony_ci unsigned int maxfd = fdt->max_fds; 48562306a36Sopenharmony_ci unsigned int maxbit = maxfd / BITS_PER_LONG; 48662306a36Sopenharmony_ci unsigned int bitbit = start / BITS_PER_LONG; 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; 48962306a36Sopenharmony_ci if (bitbit > maxfd) 49062306a36Sopenharmony_ci return maxfd; 49162306a36Sopenharmony_ci if (bitbit > start) 49262306a36Sopenharmony_ci start = bitbit; 49362306a36Sopenharmony_ci return find_next_zero_bit(fdt->open_fds, maxfd, start); 49462306a36Sopenharmony_ci} 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci/* 49762306a36Sopenharmony_ci * allocate a file descriptor, mark it busy. 49862306a36Sopenharmony_ci */ 49962306a36Sopenharmony_cistatic int alloc_fd(unsigned start, unsigned end, unsigned flags) 50062306a36Sopenharmony_ci{ 50162306a36Sopenharmony_ci struct files_struct *files = current->files; 50262306a36Sopenharmony_ci unsigned int fd; 50362306a36Sopenharmony_ci int error; 50462306a36Sopenharmony_ci struct fdtable *fdt; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci spin_lock(&files->file_lock); 50762306a36Sopenharmony_cirepeat: 50862306a36Sopenharmony_ci fdt = files_fdtable(files); 50962306a36Sopenharmony_ci fd = start; 51062306a36Sopenharmony_ci if (fd < files->next_fd) 51162306a36Sopenharmony_ci fd = files->next_fd; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci if (fd < fdt->max_fds) 51462306a36Sopenharmony_ci fd = find_next_fd(fdt, fd); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci /* 51762306a36Sopenharmony_ci * N.B. For clone tasks sharing a files structure, this test 51862306a36Sopenharmony_ci * will limit the total number of files that can be opened. 51962306a36Sopenharmony_ci */ 52062306a36Sopenharmony_ci error = -EMFILE; 52162306a36Sopenharmony_ci if (fd >= end) 52262306a36Sopenharmony_ci goto out; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci error = expand_files(files, fd); 52562306a36Sopenharmony_ci if (error < 0) 52662306a36Sopenharmony_ci goto out; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci /* 52962306a36Sopenharmony_ci * If we needed to expand the fs array we 53062306a36Sopenharmony_ci * might have blocked - try again. 53162306a36Sopenharmony_ci */ 53262306a36Sopenharmony_ci if (error) 53362306a36Sopenharmony_ci goto repeat; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci if (start <= files->next_fd) 53662306a36Sopenharmony_ci files->next_fd = fd + 1; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci __set_open_fd(fd, fdt); 53962306a36Sopenharmony_ci if (flags & O_CLOEXEC) 54062306a36Sopenharmony_ci __set_close_on_exec(fd, fdt); 54162306a36Sopenharmony_ci else 54262306a36Sopenharmony_ci __clear_close_on_exec(fd, fdt); 54362306a36Sopenharmony_ci error = fd; 54462306a36Sopenharmony_ci#if 1 54562306a36Sopenharmony_ci /* Sanity check */ 54662306a36Sopenharmony_ci if (rcu_access_pointer(fdt->fd[fd]) != NULL) { 54762306a36Sopenharmony_ci printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); 54862306a36Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 54962306a36Sopenharmony_ci } 55062306a36Sopenharmony_ci#endif 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ciout: 55362306a36Sopenharmony_ci spin_unlock(&files->file_lock); 55462306a36Sopenharmony_ci return error; 55562306a36Sopenharmony_ci} 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ciint __get_unused_fd_flags(unsigned flags, unsigned long nofile) 55862306a36Sopenharmony_ci{ 55962306a36Sopenharmony_ci return alloc_fd(0, nofile, flags); 56062306a36Sopenharmony_ci} 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ciint get_unused_fd_flags(unsigned flags) 56362306a36Sopenharmony_ci{ 56462306a36Sopenharmony_ci return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE)); 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ciEXPORT_SYMBOL(get_unused_fd_flags); 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cistatic void __put_unused_fd(struct files_struct *files, unsigned int fd) 56962306a36Sopenharmony_ci{ 57062306a36Sopenharmony_ci struct fdtable *fdt = files_fdtable(files); 57162306a36Sopenharmony_ci __clear_open_fd(fd, fdt); 57262306a36Sopenharmony_ci if (fd < files->next_fd) 57362306a36Sopenharmony_ci files->next_fd = fd; 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_civoid put_unused_fd(unsigned int fd) 57762306a36Sopenharmony_ci{ 57862306a36Sopenharmony_ci struct files_struct *files = current->files; 57962306a36Sopenharmony_ci spin_lock(&files->file_lock); 58062306a36Sopenharmony_ci __put_unused_fd(files, fd); 58162306a36Sopenharmony_ci spin_unlock(&files->file_lock); 58262306a36Sopenharmony_ci} 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ciEXPORT_SYMBOL(put_unused_fd); 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci/* 58762306a36Sopenharmony_ci * Install a file pointer in the fd array. 58862306a36Sopenharmony_ci * 58962306a36Sopenharmony_ci * The VFS is full of places where we drop the files lock between 59062306a36Sopenharmony_ci * setting the open_fds bitmap and installing the file in the file 59162306a36Sopenharmony_ci * array. At any such point, we are vulnerable to a dup2() race 59262306a36Sopenharmony_ci * installing a file in the array before us. We need to detect this and 59362306a36Sopenharmony_ci * fput() the struct file we are about to overwrite in this case. 59462306a36Sopenharmony_ci * 59562306a36Sopenharmony_ci * It should never happen - if we allow dup2() do it, _really_ bad things 59662306a36Sopenharmony_ci * will follow. 59762306a36Sopenharmony_ci * 59862306a36Sopenharmony_ci * This consumes the "file" refcount, so callers should treat it 59962306a36Sopenharmony_ci * as if they had called fput(file). 60062306a36Sopenharmony_ci */ 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_civoid fd_install(unsigned int fd, struct file *file) 60362306a36Sopenharmony_ci{ 60462306a36Sopenharmony_ci struct files_struct *files = current->files; 60562306a36Sopenharmony_ci struct fdtable *fdt; 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci rcu_read_lock_sched(); 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci if (unlikely(files->resize_in_progress)) { 61062306a36Sopenharmony_ci rcu_read_unlock_sched(); 61162306a36Sopenharmony_ci spin_lock(&files->file_lock); 61262306a36Sopenharmony_ci fdt = files_fdtable(files); 61362306a36Sopenharmony_ci BUG_ON(fdt->fd[fd] != NULL); 61462306a36Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], file); 61562306a36Sopenharmony_ci spin_unlock(&files->file_lock); 61662306a36Sopenharmony_ci return; 61762306a36Sopenharmony_ci } 61862306a36Sopenharmony_ci /* coupled with smp_wmb() in expand_fdtable() */ 61962306a36Sopenharmony_ci smp_rmb(); 62062306a36Sopenharmony_ci fdt = rcu_dereference_sched(files->fdt); 62162306a36Sopenharmony_ci BUG_ON(fdt->fd[fd] != NULL); 62262306a36Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], file); 62362306a36Sopenharmony_ci rcu_read_unlock_sched(); 62462306a36Sopenharmony_ci} 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ciEXPORT_SYMBOL(fd_install); 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci/** 62962306a36Sopenharmony_ci * pick_file - return file associatd with fd 63062306a36Sopenharmony_ci * @files: file struct to retrieve file from 63162306a36Sopenharmony_ci * @fd: file descriptor to retrieve file for 63262306a36Sopenharmony_ci * 63362306a36Sopenharmony_ci * Context: files_lock must be held. 63462306a36Sopenharmony_ci * 63562306a36Sopenharmony_ci * Returns: The file associated with @fd (NULL if @fd is not open) 63662306a36Sopenharmony_ci */ 63762306a36Sopenharmony_cistatic struct file *pick_file(struct files_struct *files, unsigned fd) 63862306a36Sopenharmony_ci{ 63962306a36Sopenharmony_ci struct fdtable *fdt = files_fdtable(files); 64062306a36Sopenharmony_ci struct file *file; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci if (fd >= fdt->max_fds) 64362306a36Sopenharmony_ci return NULL; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci fd = array_index_nospec(fd, fdt->max_fds); 64662306a36Sopenharmony_ci file = fdt->fd[fd]; 64762306a36Sopenharmony_ci if (file) { 64862306a36Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 64962306a36Sopenharmony_ci __put_unused_fd(files, fd); 65062306a36Sopenharmony_ci } 65162306a36Sopenharmony_ci return file; 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ciint close_fd(unsigned fd) 65562306a36Sopenharmony_ci{ 65662306a36Sopenharmony_ci struct files_struct *files = current->files; 65762306a36Sopenharmony_ci struct file *file; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci spin_lock(&files->file_lock); 66062306a36Sopenharmony_ci file = pick_file(files, fd); 66162306a36Sopenharmony_ci spin_unlock(&files->file_lock); 66262306a36Sopenharmony_ci if (!file) 66362306a36Sopenharmony_ci return -EBADF; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci return filp_close(file, files); 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ciEXPORT_SYMBOL(close_fd); /* for ksys_close() */ 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci/** 67062306a36Sopenharmony_ci * last_fd - return last valid index into fd table 67162306a36Sopenharmony_ci * @fdt: File descriptor table. 67262306a36Sopenharmony_ci * 67362306a36Sopenharmony_ci * Context: Either rcu read lock or files_lock must be held. 67462306a36Sopenharmony_ci * 67562306a36Sopenharmony_ci * Returns: Last valid index into fdtable. 67662306a36Sopenharmony_ci */ 67762306a36Sopenharmony_cistatic inline unsigned last_fd(struct fdtable *fdt) 67862306a36Sopenharmony_ci{ 67962306a36Sopenharmony_ci return fdt->max_fds - 1; 68062306a36Sopenharmony_ci} 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_cistatic inline void __range_cloexec(struct files_struct *cur_fds, 68362306a36Sopenharmony_ci unsigned int fd, unsigned int max_fd) 68462306a36Sopenharmony_ci{ 68562306a36Sopenharmony_ci struct fdtable *fdt; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci /* make sure we're using the correct maximum value */ 68862306a36Sopenharmony_ci spin_lock(&cur_fds->file_lock); 68962306a36Sopenharmony_ci fdt = files_fdtable(cur_fds); 69062306a36Sopenharmony_ci max_fd = min(last_fd(fdt), max_fd); 69162306a36Sopenharmony_ci if (fd <= max_fd) 69262306a36Sopenharmony_ci bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1); 69362306a36Sopenharmony_ci spin_unlock(&cur_fds->file_lock); 69462306a36Sopenharmony_ci} 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_cistatic inline void __range_close(struct files_struct *files, unsigned int fd, 69762306a36Sopenharmony_ci unsigned int max_fd) 69862306a36Sopenharmony_ci{ 69962306a36Sopenharmony_ci struct file *file; 70062306a36Sopenharmony_ci unsigned n; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci spin_lock(&files->file_lock); 70362306a36Sopenharmony_ci n = last_fd(files_fdtable(files)); 70462306a36Sopenharmony_ci max_fd = min(max_fd, n); 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci for (; fd <= max_fd; fd++) { 70762306a36Sopenharmony_ci file = pick_file(files, fd); 70862306a36Sopenharmony_ci if (file) { 70962306a36Sopenharmony_ci spin_unlock(&files->file_lock); 71062306a36Sopenharmony_ci filp_close(file, files); 71162306a36Sopenharmony_ci cond_resched(); 71262306a36Sopenharmony_ci spin_lock(&files->file_lock); 71362306a36Sopenharmony_ci } else if (need_resched()) { 71462306a36Sopenharmony_ci spin_unlock(&files->file_lock); 71562306a36Sopenharmony_ci cond_resched(); 71662306a36Sopenharmony_ci spin_lock(&files->file_lock); 71762306a36Sopenharmony_ci } 71862306a36Sopenharmony_ci } 71962306a36Sopenharmony_ci spin_unlock(&files->file_lock); 72062306a36Sopenharmony_ci} 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci/** 72362306a36Sopenharmony_ci * __close_range() - Close all file descriptors in a given range. 72462306a36Sopenharmony_ci * 72562306a36Sopenharmony_ci * @fd: starting file descriptor to close 72662306a36Sopenharmony_ci * @max_fd: last file descriptor to close 72762306a36Sopenharmony_ci * @flags: CLOSE_RANGE flags. 72862306a36Sopenharmony_ci * 72962306a36Sopenharmony_ci * This closes a range of file descriptors. All file descriptors 73062306a36Sopenharmony_ci * from @fd up to and including @max_fd are closed. 73162306a36Sopenharmony_ci */ 73262306a36Sopenharmony_ciint __close_range(unsigned fd, unsigned max_fd, unsigned int flags) 73362306a36Sopenharmony_ci{ 73462306a36Sopenharmony_ci struct task_struct *me = current; 73562306a36Sopenharmony_ci struct files_struct *cur_fds = me->files, *fds = NULL; 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC)) 73862306a36Sopenharmony_ci return -EINVAL; 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci if (fd > max_fd) 74162306a36Sopenharmony_ci return -EINVAL; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci if (flags & CLOSE_RANGE_UNSHARE) { 74462306a36Sopenharmony_ci int ret; 74562306a36Sopenharmony_ci unsigned int max_unshare_fds = NR_OPEN_MAX; 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci /* 74862306a36Sopenharmony_ci * If the caller requested all fds to be made cloexec we always 74962306a36Sopenharmony_ci * copy all of the file descriptors since they still want to 75062306a36Sopenharmony_ci * use them. 75162306a36Sopenharmony_ci */ 75262306a36Sopenharmony_ci if (!(flags & CLOSE_RANGE_CLOEXEC)) { 75362306a36Sopenharmony_ci /* 75462306a36Sopenharmony_ci * If the requested range is greater than the current 75562306a36Sopenharmony_ci * maximum, we're closing everything so only copy all 75662306a36Sopenharmony_ci * file descriptors beneath the lowest file descriptor. 75762306a36Sopenharmony_ci */ 75862306a36Sopenharmony_ci rcu_read_lock(); 75962306a36Sopenharmony_ci if (max_fd >= last_fd(files_fdtable(cur_fds))) 76062306a36Sopenharmony_ci max_unshare_fds = fd; 76162306a36Sopenharmony_ci rcu_read_unlock(); 76262306a36Sopenharmony_ci } 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); 76562306a36Sopenharmony_ci if (ret) 76662306a36Sopenharmony_ci return ret; 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_ci /* 76962306a36Sopenharmony_ci * We used to share our file descriptor table, and have now 77062306a36Sopenharmony_ci * created a private one, make sure we're using it below. 77162306a36Sopenharmony_ci */ 77262306a36Sopenharmony_ci if (fds) 77362306a36Sopenharmony_ci swap(cur_fds, fds); 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci if (flags & CLOSE_RANGE_CLOEXEC) 77762306a36Sopenharmony_ci __range_cloexec(cur_fds, fd, max_fd); 77862306a36Sopenharmony_ci else 77962306a36Sopenharmony_ci __range_close(cur_fds, fd, max_fd); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci if (fds) { 78262306a36Sopenharmony_ci /* 78362306a36Sopenharmony_ci * We're done closing the files we were supposed to. Time to install 78462306a36Sopenharmony_ci * the new file descriptor table and drop the old one. 78562306a36Sopenharmony_ci */ 78662306a36Sopenharmony_ci task_lock(me); 78762306a36Sopenharmony_ci me->files = cur_fds; 78862306a36Sopenharmony_ci task_unlock(me); 78962306a36Sopenharmony_ci put_files_struct(fds); 79062306a36Sopenharmony_ci } 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci return 0; 79362306a36Sopenharmony_ci} 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci/* 79662306a36Sopenharmony_ci * See close_fd_get_file() below, this variant assumes current->files->file_lock 79762306a36Sopenharmony_ci * is held. 79862306a36Sopenharmony_ci */ 79962306a36Sopenharmony_cistruct file *__close_fd_get_file(unsigned int fd) 80062306a36Sopenharmony_ci{ 80162306a36Sopenharmony_ci return pick_file(current->files, fd); 80262306a36Sopenharmony_ci} 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci/* 80562306a36Sopenharmony_ci * variant of close_fd that gets a ref on the file for later fput. 80662306a36Sopenharmony_ci * The caller must ensure that filp_close() called on the file. 80762306a36Sopenharmony_ci */ 80862306a36Sopenharmony_cistruct file *close_fd_get_file(unsigned int fd) 80962306a36Sopenharmony_ci{ 81062306a36Sopenharmony_ci struct files_struct *files = current->files; 81162306a36Sopenharmony_ci struct file *file; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci spin_lock(&files->file_lock); 81462306a36Sopenharmony_ci file = pick_file(files, fd); 81562306a36Sopenharmony_ci spin_unlock(&files->file_lock); 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci return file; 81862306a36Sopenharmony_ci} 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_civoid do_close_on_exec(struct files_struct *files) 82162306a36Sopenharmony_ci{ 82262306a36Sopenharmony_ci unsigned i; 82362306a36Sopenharmony_ci struct fdtable *fdt; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci /* exec unshares first */ 82662306a36Sopenharmony_ci spin_lock(&files->file_lock); 82762306a36Sopenharmony_ci for (i = 0; ; i++) { 82862306a36Sopenharmony_ci unsigned long set; 82962306a36Sopenharmony_ci unsigned fd = i * BITS_PER_LONG; 83062306a36Sopenharmony_ci fdt = files_fdtable(files); 83162306a36Sopenharmony_ci if (fd >= fdt->max_fds) 83262306a36Sopenharmony_ci break; 83362306a36Sopenharmony_ci set = fdt->close_on_exec[i]; 83462306a36Sopenharmony_ci if (!set) 83562306a36Sopenharmony_ci continue; 83662306a36Sopenharmony_ci fdt->close_on_exec[i] = 0; 83762306a36Sopenharmony_ci for ( ; set ; fd++, set >>= 1) { 83862306a36Sopenharmony_ci struct file *file; 83962306a36Sopenharmony_ci if (!(set & 1)) 84062306a36Sopenharmony_ci continue; 84162306a36Sopenharmony_ci file = fdt->fd[fd]; 84262306a36Sopenharmony_ci if (!file) 84362306a36Sopenharmony_ci continue; 84462306a36Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 84562306a36Sopenharmony_ci __put_unused_fd(files, fd); 84662306a36Sopenharmony_ci spin_unlock(&files->file_lock); 84762306a36Sopenharmony_ci filp_close(file, files); 84862306a36Sopenharmony_ci cond_resched(); 84962306a36Sopenharmony_ci spin_lock(&files->file_lock); 85062306a36Sopenharmony_ci } 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci spin_unlock(&files->file_lock); 85462306a36Sopenharmony_ci} 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_cistatic inline struct file *__fget_files_rcu(struct files_struct *files, 85762306a36Sopenharmony_ci unsigned int fd, fmode_t mask) 85862306a36Sopenharmony_ci{ 85962306a36Sopenharmony_ci for (;;) { 86062306a36Sopenharmony_ci struct file *file; 86162306a36Sopenharmony_ci struct fdtable *fdt = rcu_dereference_raw(files->fdt); 86262306a36Sopenharmony_ci struct file __rcu **fdentry; 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci if (unlikely(fd >= fdt->max_fds)) 86562306a36Sopenharmony_ci return NULL; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); 86862306a36Sopenharmony_ci file = rcu_dereference_raw(*fdentry); 86962306a36Sopenharmony_ci if (unlikely(!file)) 87062306a36Sopenharmony_ci return NULL; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci if (unlikely(file->f_mode & mask)) 87362306a36Sopenharmony_ci return NULL; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci /* 87662306a36Sopenharmony_ci * Ok, we have a file pointer. However, because we do 87762306a36Sopenharmony_ci * this all locklessly under RCU, we may be racing with 87862306a36Sopenharmony_ci * that file being closed. 87962306a36Sopenharmony_ci * 88062306a36Sopenharmony_ci * Such a race can take two forms: 88162306a36Sopenharmony_ci * 88262306a36Sopenharmony_ci * (a) the file ref already went down to zero, 88362306a36Sopenharmony_ci * and get_file_rcu() fails. Just try again: 88462306a36Sopenharmony_ci */ 88562306a36Sopenharmony_ci if (unlikely(!get_file_rcu(file))) 88662306a36Sopenharmony_ci continue; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci /* 88962306a36Sopenharmony_ci * (b) the file table entry has changed under us. 89062306a36Sopenharmony_ci * Note that we don't need to re-check the 'fdt->fd' 89162306a36Sopenharmony_ci * pointer having changed, because it always goes 89262306a36Sopenharmony_ci * hand-in-hand with 'fdt'. 89362306a36Sopenharmony_ci * 89462306a36Sopenharmony_ci * If so, we need to put our ref and try again. 89562306a36Sopenharmony_ci */ 89662306a36Sopenharmony_ci if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || 89762306a36Sopenharmony_ci unlikely(rcu_dereference_raw(*fdentry) != file)) { 89862306a36Sopenharmony_ci fput(file); 89962306a36Sopenharmony_ci continue; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* 90362306a36Sopenharmony_ci * Ok, we have a ref to the file, and checked that it 90462306a36Sopenharmony_ci * still exists. 90562306a36Sopenharmony_ci */ 90662306a36Sopenharmony_ci return file; 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_cistatic struct file *__fget_files(struct files_struct *files, unsigned int fd, 91162306a36Sopenharmony_ci fmode_t mask) 91262306a36Sopenharmony_ci{ 91362306a36Sopenharmony_ci struct file *file; 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci rcu_read_lock(); 91662306a36Sopenharmony_ci file = __fget_files_rcu(files, fd, mask); 91762306a36Sopenharmony_ci rcu_read_unlock(); 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci return file; 92062306a36Sopenharmony_ci} 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_cistatic inline struct file *__fget(unsigned int fd, fmode_t mask) 92362306a36Sopenharmony_ci{ 92462306a36Sopenharmony_ci return __fget_files(current->files, fd, mask); 92562306a36Sopenharmony_ci} 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_cistruct file *fget(unsigned int fd) 92862306a36Sopenharmony_ci{ 92962306a36Sopenharmony_ci return __fget(fd, FMODE_PATH); 93062306a36Sopenharmony_ci} 93162306a36Sopenharmony_ciEXPORT_SYMBOL(fget); 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_cistruct file *fget_raw(unsigned int fd) 93462306a36Sopenharmony_ci{ 93562306a36Sopenharmony_ci return __fget(fd, 0); 93662306a36Sopenharmony_ci} 93762306a36Sopenharmony_ciEXPORT_SYMBOL(fget_raw); 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_cistruct file *fget_task(struct task_struct *task, unsigned int fd) 94062306a36Sopenharmony_ci{ 94162306a36Sopenharmony_ci struct file *file = NULL; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci task_lock(task); 94462306a36Sopenharmony_ci if (task->files) 94562306a36Sopenharmony_ci file = __fget_files(task->files, fd, 0); 94662306a36Sopenharmony_ci task_unlock(task); 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci return file; 94962306a36Sopenharmony_ci} 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_cistruct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci /* Must be called with rcu_read_lock held */ 95462306a36Sopenharmony_ci struct files_struct *files; 95562306a36Sopenharmony_ci struct file *file = NULL; 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci task_lock(task); 95862306a36Sopenharmony_ci files = task->files; 95962306a36Sopenharmony_ci if (files) 96062306a36Sopenharmony_ci file = files_lookup_fd_rcu(files, fd); 96162306a36Sopenharmony_ci task_unlock(task); 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci return file; 96462306a36Sopenharmony_ci} 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_cistruct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd) 96762306a36Sopenharmony_ci{ 96862306a36Sopenharmony_ci /* Must be called with rcu_read_lock held */ 96962306a36Sopenharmony_ci struct files_struct *files; 97062306a36Sopenharmony_ci unsigned int fd = *ret_fd; 97162306a36Sopenharmony_ci struct file *file = NULL; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci task_lock(task); 97462306a36Sopenharmony_ci files = task->files; 97562306a36Sopenharmony_ci if (files) { 97662306a36Sopenharmony_ci for (; fd < files_fdtable(files)->max_fds; fd++) { 97762306a36Sopenharmony_ci file = files_lookup_fd_rcu(files, fd); 97862306a36Sopenharmony_ci if (file) 97962306a36Sopenharmony_ci break; 98062306a36Sopenharmony_ci } 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci task_unlock(task); 98362306a36Sopenharmony_ci *ret_fd = fd; 98462306a36Sopenharmony_ci return file; 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ciEXPORT_SYMBOL(task_lookup_next_fd_rcu); 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_ci/* 98962306a36Sopenharmony_ci * Lightweight file lookup - no refcnt increment if fd table isn't shared. 99062306a36Sopenharmony_ci * 99162306a36Sopenharmony_ci * You can use this instead of fget if you satisfy all of the following 99262306a36Sopenharmony_ci * conditions: 99362306a36Sopenharmony_ci * 1) You must call fput_light before exiting the syscall and returning control 99462306a36Sopenharmony_ci * to userspace (i.e. you cannot remember the returned struct file * after 99562306a36Sopenharmony_ci * returning to userspace). 99662306a36Sopenharmony_ci * 2) You must not call filp_close on the returned struct file * in between 99762306a36Sopenharmony_ci * calls to fget_light and fput_light. 99862306a36Sopenharmony_ci * 3) You must not clone the current task in between the calls to fget_light 99962306a36Sopenharmony_ci * and fput_light. 100062306a36Sopenharmony_ci * 100162306a36Sopenharmony_ci * The fput_needed flag returned by fget_light should be passed to the 100262306a36Sopenharmony_ci * corresponding fput_light. 100362306a36Sopenharmony_ci */ 100462306a36Sopenharmony_cistatic unsigned long __fget_light(unsigned int fd, fmode_t mask) 100562306a36Sopenharmony_ci{ 100662306a36Sopenharmony_ci struct files_struct *files = current->files; 100762306a36Sopenharmony_ci struct file *file; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci /* 101062306a36Sopenharmony_ci * If another thread is concurrently calling close_fd() followed 101162306a36Sopenharmony_ci * by put_files_struct(), we must not observe the old table 101262306a36Sopenharmony_ci * entry combined with the new refcount - otherwise we could 101362306a36Sopenharmony_ci * return a file that is concurrently being freed. 101462306a36Sopenharmony_ci * 101562306a36Sopenharmony_ci * atomic_read_acquire() pairs with atomic_dec_and_test() in 101662306a36Sopenharmony_ci * put_files_struct(). 101762306a36Sopenharmony_ci */ 101862306a36Sopenharmony_ci if (atomic_read_acquire(&files->count) == 1) { 101962306a36Sopenharmony_ci file = files_lookup_fd_raw(files, fd); 102062306a36Sopenharmony_ci if (!file || unlikely(file->f_mode & mask)) 102162306a36Sopenharmony_ci return 0; 102262306a36Sopenharmony_ci return (unsigned long)file; 102362306a36Sopenharmony_ci } else { 102462306a36Sopenharmony_ci file = __fget(fd, mask); 102562306a36Sopenharmony_ci if (!file) 102662306a36Sopenharmony_ci return 0; 102762306a36Sopenharmony_ci return FDPUT_FPUT | (unsigned long)file; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci} 103062306a36Sopenharmony_ciunsigned long __fdget(unsigned int fd) 103162306a36Sopenharmony_ci{ 103262306a36Sopenharmony_ci return __fget_light(fd, FMODE_PATH); 103362306a36Sopenharmony_ci} 103462306a36Sopenharmony_ciEXPORT_SYMBOL(__fdget); 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ciunsigned long __fdget_raw(unsigned int fd) 103762306a36Sopenharmony_ci{ 103862306a36Sopenharmony_ci return __fget_light(fd, 0); 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci/* 104262306a36Sopenharmony_ci * Try to avoid f_pos locking. We only need it if the 104362306a36Sopenharmony_ci * file is marked for FMODE_ATOMIC_POS, and it can be 104462306a36Sopenharmony_ci * accessed multiple ways. 104562306a36Sopenharmony_ci * 104662306a36Sopenharmony_ci * Always do it for directories, because pidfd_getfd() 104762306a36Sopenharmony_ci * can make a file accessible even if it otherwise would 104862306a36Sopenharmony_ci * not be, and for directories this is a correctness 104962306a36Sopenharmony_ci * issue, not a "POSIX requirement". 105062306a36Sopenharmony_ci */ 105162306a36Sopenharmony_cistatic inline bool file_needs_f_pos_lock(struct file *file) 105262306a36Sopenharmony_ci{ 105362306a36Sopenharmony_ci return (file->f_mode & FMODE_ATOMIC_POS) && 105462306a36Sopenharmony_ci (file_count(file) > 1 || file->f_op->iterate_shared); 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ciunsigned long __fdget_pos(unsigned int fd) 105862306a36Sopenharmony_ci{ 105962306a36Sopenharmony_ci unsigned long v = __fdget(fd); 106062306a36Sopenharmony_ci struct file *file = (struct file *)(v & ~3); 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci if (file && file_needs_f_pos_lock(file)) { 106362306a36Sopenharmony_ci v |= FDPUT_POS_UNLOCK; 106462306a36Sopenharmony_ci mutex_lock(&file->f_pos_lock); 106562306a36Sopenharmony_ci } 106662306a36Sopenharmony_ci return v; 106762306a36Sopenharmony_ci} 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_civoid __f_unlock_pos(struct file *f) 107062306a36Sopenharmony_ci{ 107162306a36Sopenharmony_ci mutex_unlock(&f->f_pos_lock); 107262306a36Sopenharmony_ci} 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci/* 107562306a36Sopenharmony_ci * We only lock f_pos if we have threads or if the file might be 107662306a36Sopenharmony_ci * shared with another process. In both cases we'll have an elevated 107762306a36Sopenharmony_ci * file count (done either by fdget() or by fork()). 107862306a36Sopenharmony_ci */ 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_civoid set_close_on_exec(unsigned int fd, int flag) 108162306a36Sopenharmony_ci{ 108262306a36Sopenharmony_ci struct files_struct *files = current->files; 108362306a36Sopenharmony_ci struct fdtable *fdt; 108462306a36Sopenharmony_ci spin_lock(&files->file_lock); 108562306a36Sopenharmony_ci fdt = files_fdtable(files); 108662306a36Sopenharmony_ci if (flag) 108762306a36Sopenharmony_ci __set_close_on_exec(fd, fdt); 108862306a36Sopenharmony_ci else 108962306a36Sopenharmony_ci __clear_close_on_exec(fd, fdt); 109062306a36Sopenharmony_ci spin_unlock(&files->file_lock); 109162306a36Sopenharmony_ci} 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_cibool get_close_on_exec(unsigned int fd) 109462306a36Sopenharmony_ci{ 109562306a36Sopenharmony_ci struct files_struct *files = current->files; 109662306a36Sopenharmony_ci struct fdtable *fdt; 109762306a36Sopenharmony_ci bool res; 109862306a36Sopenharmony_ci rcu_read_lock(); 109962306a36Sopenharmony_ci fdt = files_fdtable(files); 110062306a36Sopenharmony_ci res = close_on_exec(fd, fdt); 110162306a36Sopenharmony_ci rcu_read_unlock(); 110262306a36Sopenharmony_ci return res; 110362306a36Sopenharmony_ci} 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_cistatic int do_dup2(struct files_struct *files, 110662306a36Sopenharmony_ci struct file *file, unsigned fd, unsigned flags) 110762306a36Sopenharmony_ci__releases(&files->file_lock) 110862306a36Sopenharmony_ci{ 110962306a36Sopenharmony_ci struct file *tofree; 111062306a36Sopenharmony_ci struct fdtable *fdt; 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci /* 111362306a36Sopenharmony_ci * We need to detect attempts to do dup2() over allocated but still 111462306a36Sopenharmony_ci * not finished descriptor. NB: OpenBSD avoids that at the price of 111562306a36Sopenharmony_ci * extra work in their equivalent of fget() - they insert struct 111662306a36Sopenharmony_ci * file immediately after grabbing descriptor, mark it larval if 111762306a36Sopenharmony_ci * more work (e.g. actual opening) is needed and make sure that 111862306a36Sopenharmony_ci * fget() treats larval files as absent. Potentially interesting, 111962306a36Sopenharmony_ci * but while extra work in fget() is trivial, locking implications 112062306a36Sopenharmony_ci * and amount of surgery on open()-related paths in VFS are not. 112162306a36Sopenharmony_ci * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" 112262306a36Sopenharmony_ci * deadlocks in rather amusing ways, AFAICS. All of that is out of 112362306a36Sopenharmony_ci * scope of POSIX or SUS, since neither considers shared descriptor 112462306a36Sopenharmony_ci * tables and this condition does not arise without those. 112562306a36Sopenharmony_ci */ 112662306a36Sopenharmony_ci fdt = files_fdtable(files); 112762306a36Sopenharmony_ci tofree = fdt->fd[fd]; 112862306a36Sopenharmony_ci if (!tofree && fd_is_open(fd, fdt)) 112962306a36Sopenharmony_ci goto Ebusy; 113062306a36Sopenharmony_ci get_file(file); 113162306a36Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], file); 113262306a36Sopenharmony_ci __set_open_fd(fd, fdt); 113362306a36Sopenharmony_ci if (flags & O_CLOEXEC) 113462306a36Sopenharmony_ci __set_close_on_exec(fd, fdt); 113562306a36Sopenharmony_ci else 113662306a36Sopenharmony_ci __clear_close_on_exec(fd, fdt); 113762306a36Sopenharmony_ci spin_unlock(&files->file_lock); 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci if (tofree) 114062306a36Sopenharmony_ci filp_close(tofree, files); 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci return fd; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ciEbusy: 114562306a36Sopenharmony_ci spin_unlock(&files->file_lock); 114662306a36Sopenharmony_ci return -EBUSY; 114762306a36Sopenharmony_ci} 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ciint replace_fd(unsigned fd, struct file *file, unsigned flags) 115062306a36Sopenharmony_ci{ 115162306a36Sopenharmony_ci int err; 115262306a36Sopenharmony_ci struct files_struct *files = current->files; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci if (!file) 115562306a36Sopenharmony_ci return close_fd(fd); 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci if (fd >= rlimit(RLIMIT_NOFILE)) 115862306a36Sopenharmony_ci return -EBADF; 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci spin_lock(&files->file_lock); 116162306a36Sopenharmony_ci err = expand_files(files, fd); 116262306a36Sopenharmony_ci if (unlikely(err < 0)) 116362306a36Sopenharmony_ci goto out_unlock; 116462306a36Sopenharmony_ci return do_dup2(files, file, fd, flags); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ciout_unlock: 116762306a36Sopenharmony_ci spin_unlock(&files->file_lock); 116862306a36Sopenharmony_ci return err; 116962306a36Sopenharmony_ci} 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci/** 117262306a36Sopenharmony_ci * __receive_fd() - Install received file into file descriptor table 117362306a36Sopenharmony_ci * @file: struct file that was received from another process 117462306a36Sopenharmony_ci * @ufd: __user pointer to write new fd number to 117562306a36Sopenharmony_ci * @o_flags: the O_* flags to apply to the new fd entry 117662306a36Sopenharmony_ci * 117762306a36Sopenharmony_ci * Installs a received file into the file descriptor table, with appropriate 117862306a36Sopenharmony_ci * checks and count updates. Optionally writes the fd number to userspace, if 117962306a36Sopenharmony_ci * @ufd is non-NULL. 118062306a36Sopenharmony_ci * 118162306a36Sopenharmony_ci * This helper handles its own reference counting of the incoming 118262306a36Sopenharmony_ci * struct file. 118362306a36Sopenharmony_ci * 118462306a36Sopenharmony_ci * Returns newly install fd or -ve on error. 118562306a36Sopenharmony_ci */ 118662306a36Sopenharmony_ciint __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci int new_fd; 118962306a36Sopenharmony_ci int error; 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci error = security_file_receive(file); 119262306a36Sopenharmony_ci if (error) 119362306a36Sopenharmony_ci return error; 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ci new_fd = get_unused_fd_flags(o_flags); 119662306a36Sopenharmony_ci if (new_fd < 0) 119762306a36Sopenharmony_ci return new_fd; 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci if (ufd) { 120062306a36Sopenharmony_ci error = put_user(new_fd, ufd); 120162306a36Sopenharmony_ci if (error) { 120262306a36Sopenharmony_ci put_unused_fd(new_fd); 120362306a36Sopenharmony_ci return error; 120462306a36Sopenharmony_ci } 120562306a36Sopenharmony_ci } 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci fd_install(new_fd, get_file(file)); 120862306a36Sopenharmony_ci __receive_sock(file); 120962306a36Sopenharmony_ci return new_fd; 121062306a36Sopenharmony_ci} 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ciint receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags) 121362306a36Sopenharmony_ci{ 121462306a36Sopenharmony_ci int error; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci error = security_file_receive(file); 121762306a36Sopenharmony_ci if (error) 121862306a36Sopenharmony_ci return error; 121962306a36Sopenharmony_ci error = replace_fd(new_fd, file, o_flags); 122062306a36Sopenharmony_ci if (error) 122162306a36Sopenharmony_ci return error; 122262306a36Sopenharmony_ci __receive_sock(file); 122362306a36Sopenharmony_ci return new_fd; 122462306a36Sopenharmony_ci} 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ciint receive_fd(struct file *file, unsigned int o_flags) 122762306a36Sopenharmony_ci{ 122862306a36Sopenharmony_ci return __receive_fd(file, NULL, o_flags); 122962306a36Sopenharmony_ci} 123062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(receive_fd); 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_cistatic int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) 123362306a36Sopenharmony_ci{ 123462306a36Sopenharmony_ci int err = -EBADF; 123562306a36Sopenharmony_ci struct file *file; 123662306a36Sopenharmony_ci struct files_struct *files = current->files; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci if ((flags & ~O_CLOEXEC) != 0) 123962306a36Sopenharmony_ci return -EINVAL; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci if (unlikely(oldfd == newfd)) 124262306a36Sopenharmony_ci return -EINVAL; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci if (newfd >= rlimit(RLIMIT_NOFILE)) 124562306a36Sopenharmony_ci return -EBADF; 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci spin_lock(&files->file_lock); 124862306a36Sopenharmony_ci err = expand_files(files, newfd); 124962306a36Sopenharmony_ci file = files_lookup_fd_locked(files, oldfd); 125062306a36Sopenharmony_ci if (unlikely(!file)) 125162306a36Sopenharmony_ci goto Ebadf; 125262306a36Sopenharmony_ci if (unlikely(err < 0)) { 125362306a36Sopenharmony_ci if (err == -EMFILE) 125462306a36Sopenharmony_ci goto Ebadf; 125562306a36Sopenharmony_ci goto out_unlock; 125662306a36Sopenharmony_ci } 125762306a36Sopenharmony_ci return do_dup2(files, file, newfd, flags); 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ciEbadf: 126062306a36Sopenharmony_ci err = -EBADF; 126162306a36Sopenharmony_ciout_unlock: 126262306a36Sopenharmony_ci spin_unlock(&files->file_lock); 126362306a36Sopenharmony_ci return err; 126462306a36Sopenharmony_ci} 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ciSYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) 126762306a36Sopenharmony_ci{ 126862306a36Sopenharmony_ci return ksys_dup3(oldfd, newfd, flags); 126962306a36Sopenharmony_ci} 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ciSYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) 127262306a36Sopenharmony_ci{ 127362306a36Sopenharmony_ci if (unlikely(newfd == oldfd)) { /* corner case */ 127462306a36Sopenharmony_ci struct files_struct *files = current->files; 127562306a36Sopenharmony_ci int retval = oldfd; 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci rcu_read_lock(); 127862306a36Sopenharmony_ci if (!files_lookup_fd_rcu(files, oldfd)) 127962306a36Sopenharmony_ci retval = -EBADF; 128062306a36Sopenharmony_ci rcu_read_unlock(); 128162306a36Sopenharmony_ci return retval; 128262306a36Sopenharmony_ci } 128362306a36Sopenharmony_ci return ksys_dup3(oldfd, newfd, 0); 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ciSYSCALL_DEFINE1(dup, unsigned int, fildes) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci int ret = -EBADF; 128962306a36Sopenharmony_ci struct file *file = fget_raw(fildes); 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci if (file) { 129262306a36Sopenharmony_ci ret = get_unused_fd_flags(0); 129362306a36Sopenharmony_ci if (ret >= 0) 129462306a36Sopenharmony_ci fd_install(ret, file); 129562306a36Sopenharmony_ci else 129662306a36Sopenharmony_ci fput(file); 129762306a36Sopenharmony_ci } 129862306a36Sopenharmony_ci return ret; 129962306a36Sopenharmony_ci} 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ciint f_dupfd(unsigned int from, struct file *file, unsigned flags) 130262306a36Sopenharmony_ci{ 130362306a36Sopenharmony_ci unsigned long nofile = rlimit(RLIMIT_NOFILE); 130462306a36Sopenharmony_ci int err; 130562306a36Sopenharmony_ci if (from >= nofile) 130662306a36Sopenharmony_ci return -EINVAL; 130762306a36Sopenharmony_ci err = alloc_fd(from, nofile, flags); 130862306a36Sopenharmony_ci if (err >= 0) { 130962306a36Sopenharmony_ci get_file(file); 131062306a36Sopenharmony_ci fd_install(err, file); 131162306a36Sopenharmony_ci } 131262306a36Sopenharmony_ci return err; 131362306a36Sopenharmony_ci} 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ciint iterate_fd(struct files_struct *files, unsigned n, 131662306a36Sopenharmony_ci int (*f)(const void *, struct file *, unsigned), 131762306a36Sopenharmony_ci const void *p) 131862306a36Sopenharmony_ci{ 131962306a36Sopenharmony_ci struct fdtable *fdt; 132062306a36Sopenharmony_ci int res = 0; 132162306a36Sopenharmony_ci if (!files) 132262306a36Sopenharmony_ci return 0; 132362306a36Sopenharmony_ci spin_lock(&files->file_lock); 132462306a36Sopenharmony_ci for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { 132562306a36Sopenharmony_ci struct file *file; 132662306a36Sopenharmony_ci file = rcu_dereference_check_fdtable(files, fdt->fd[n]); 132762306a36Sopenharmony_ci if (!file) 132862306a36Sopenharmony_ci continue; 132962306a36Sopenharmony_ci res = f(p, file, n); 133062306a36Sopenharmony_ci if (res) 133162306a36Sopenharmony_ci break; 133262306a36Sopenharmony_ci } 133362306a36Sopenharmony_ci spin_unlock(&files->file_lock); 133462306a36Sopenharmony_ci return res; 133562306a36Sopenharmony_ci} 133662306a36Sopenharmony_ciEXPORT_SYMBOL(iterate_fd); 1337