18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/fs/file.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Manage the dynamic fd arrays in the process files_struct. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/syscalls.h> 118c2ecf20Sopenharmony_ci#include <linux/export.h> 128c2ecf20Sopenharmony_ci#include <linux/fs.h> 138c2ecf20Sopenharmony_ci#include <linux/kernel.h> 148c2ecf20Sopenharmony_ci#include <linux/mm.h> 158c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 168c2ecf20Sopenharmony_ci#include <linux/slab.h> 178c2ecf20Sopenharmony_ci#include <linux/file.h> 188c2ecf20Sopenharmony_ci#include <linux/fdtable.h> 198c2ecf20Sopenharmony_ci#include <linux/bitops.h> 208c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 218c2ecf20Sopenharmony_ci#include <linux/rcupdate.h> 228c2ecf20Sopenharmony_ci#include <linux/close_range.h> 238c2ecf20Sopenharmony_ci#include <net/sock.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include "internal.h" 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ciunsigned int sysctl_nr_open __read_mostly = 1024*1024; 288c2ecf20Sopenharmony_ciunsigned int sysctl_nr_open_min = BITS_PER_LONG; 298c2ecf20Sopenharmony_ci/* our min() is unusable in constant expressions ;-/ */ 308c2ecf20Sopenharmony_ci#define __const_min(x, y) ((x) < (y) ? (x) : (y)) 318c2ecf20Sopenharmony_ciunsigned int sysctl_nr_open_max = 328c2ecf20Sopenharmony_ci __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG; 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_cistatic void __free_fdtable(struct fdtable *fdt) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci kvfree(fdt->fd); 378c2ecf20Sopenharmony_ci kvfree(fdt->open_fds); 388c2ecf20Sopenharmony_ci kfree(fdt); 398c2ecf20Sopenharmony_ci} 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistatic void free_fdtable_rcu(struct rcu_head *rcu) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci __free_fdtable(container_of(rcu, struct fdtable, rcu)); 448c2ecf20Sopenharmony_ci} 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) 478c2ecf20Sopenharmony_ci#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci/* 508c2ecf20Sopenharmony_ci * Copy 'count' fd bits from the old table to the new table and clear the extra 518c2ecf20Sopenharmony_ci * space if any. This does not copy the file pointers. Called with the files 528c2ecf20Sopenharmony_ci * spinlock held for write. 538c2ecf20Sopenharmony_ci */ 548c2ecf20Sopenharmony_cistatic void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, 558c2ecf20Sopenharmony_ci unsigned int count) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci unsigned int cpy, set; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci cpy = count / BITS_PER_BYTE; 608c2ecf20Sopenharmony_ci set = (nfdt->max_fds - count) / BITS_PER_BYTE; 618c2ecf20Sopenharmony_ci memcpy(nfdt->open_fds, ofdt->open_fds, cpy); 628c2ecf20Sopenharmony_ci memset((char *)nfdt->open_fds + cpy, 0, set); 638c2ecf20Sopenharmony_ci memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); 648c2ecf20Sopenharmony_ci memset((char *)nfdt->close_on_exec + cpy, 0, set); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci cpy = BITBIT_SIZE(count); 678c2ecf20Sopenharmony_ci set = BITBIT_SIZE(nfdt->max_fds) - cpy; 688c2ecf20Sopenharmony_ci memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); 698c2ecf20Sopenharmony_ci memset((char *)nfdt->full_fds_bits + cpy, 0, set); 708c2ecf20Sopenharmony_ci} 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci/* 738c2ecf20Sopenharmony_ci * Copy all file descriptors from the old table to the new, expanded table and 748c2ecf20Sopenharmony_ci * clear the extra space. Called with the files spinlock held for write. 758c2ecf20Sopenharmony_ci */ 768c2ecf20Sopenharmony_cistatic void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci size_t cpy, set; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci BUG_ON(nfdt->max_fds < ofdt->max_fds); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci cpy = ofdt->max_fds * sizeof(struct file *); 838c2ecf20Sopenharmony_ci set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); 848c2ecf20Sopenharmony_ci memcpy(nfdt->fd, ofdt->fd, cpy); 858c2ecf20Sopenharmony_ci memset((char *)nfdt->fd + cpy, 0, set); 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); 888c2ecf20Sopenharmony_ci} 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci/* 918c2ecf20Sopenharmony_ci * Note how the fdtable bitmap allocations very much have to be a multiple of 928c2ecf20Sopenharmony_ci * BITS_PER_LONG. This is not only because we walk those things in chunks of 938c2ecf20Sopenharmony_ci * 'unsigned long' in some places, but simply because that is how the Linux 948c2ecf20Sopenharmony_ci * kernel bitmaps are defined to work: they are not "bits in an array of bytes", 958c2ecf20Sopenharmony_ci * they are very much "bits in an array of unsigned long". 968c2ecf20Sopenharmony_ci * 978c2ecf20Sopenharmony_ci * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied 988c2ecf20Sopenharmony_ci * by that "1024/sizeof(ptr)" before, we already know there are sufficient 998c2ecf20Sopenharmony_ci * clear low bits. Clang seems to realize that, gcc ends up being confused. 1008c2ecf20Sopenharmony_ci * 1018c2ecf20Sopenharmony_ci * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, 1028c2ecf20Sopenharmony_ci * let's consider it documentation (and maybe a test-case for gcc to improve 1038c2ecf20Sopenharmony_ci * its code generation ;) 1048c2ecf20Sopenharmony_ci */ 1058c2ecf20Sopenharmony_cistatic struct fdtable * alloc_fdtable(unsigned int nr) 1068c2ecf20Sopenharmony_ci{ 1078c2ecf20Sopenharmony_ci struct fdtable *fdt; 1088c2ecf20Sopenharmony_ci void *data; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci /* 1118c2ecf20Sopenharmony_ci * Figure out how many fds we actually want to support in this fdtable. 1128c2ecf20Sopenharmony_ci * Allocation steps are keyed to the size of the fdarray, since it 1138c2ecf20Sopenharmony_ci * grows far faster than any of the other dynamic data. We try to fit 1148c2ecf20Sopenharmony_ci * the fdarray into comfortable page-tuned chunks: starting at 1024B 1158c2ecf20Sopenharmony_ci * and growing in powers of two from there on. 1168c2ecf20Sopenharmony_ci */ 1178c2ecf20Sopenharmony_ci nr /= (1024 / sizeof(struct file *)); 1188c2ecf20Sopenharmony_ci nr = roundup_pow_of_two(nr + 1); 1198c2ecf20Sopenharmony_ci nr *= (1024 / sizeof(struct file *)); 1208c2ecf20Sopenharmony_ci nr = ALIGN(nr, BITS_PER_LONG); 1218c2ecf20Sopenharmony_ci /* 1228c2ecf20Sopenharmony_ci * Note that this can drive nr *below* what we had passed if sysctl_nr_open 1238c2ecf20Sopenharmony_ci * had been set lower between the check in expand_files() and here. Deal 1248c2ecf20Sopenharmony_ci * with that in caller, it's cheaper that way. 1258c2ecf20Sopenharmony_ci * 1268c2ecf20Sopenharmony_ci * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise 1278c2ecf20Sopenharmony_ci * bitmaps handling below becomes unpleasant, to put it mildly... 1288c2ecf20Sopenharmony_ci */ 1298c2ecf20Sopenharmony_ci if (unlikely(nr > sysctl_nr_open)) 1308c2ecf20Sopenharmony_ci nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT); 1338c2ecf20Sopenharmony_ci if (!fdt) 1348c2ecf20Sopenharmony_ci goto out; 1358c2ecf20Sopenharmony_ci fdt->max_fds = nr; 1368c2ecf20Sopenharmony_ci data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT); 1378c2ecf20Sopenharmony_ci if (!data) 1388c2ecf20Sopenharmony_ci goto out_fdt; 1398c2ecf20Sopenharmony_ci fdt->fd = data; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci data = kvmalloc(max_t(size_t, 1428c2ecf20Sopenharmony_ci 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES), 1438c2ecf20Sopenharmony_ci GFP_KERNEL_ACCOUNT); 1448c2ecf20Sopenharmony_ci if (!data) 1458c2ecf20Sopenharmony_ci goto out_arr; 1468c2ecf20Sopenharmony_ci fdt->open_fds = data; 1478c2ecf20Sopenharmony_ci data += nr / BITS_PER_BYTE; 1488c2ecf20Sopenharmony_ci fdt->close_on_exec = data; 1498c2ecf20Sopenharmony_ci data += nr / BITS_PER_BYTE; 1508c2ecf20Sopenharmony_ci fdt->full_fds_bits = data; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci return fdt; 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ciout_arr: 1558c2ecf20Sopenharmony_ci kvfree(fdt->fd); 1568c2ecf20Sopenharmony_ciout_fdt: 1578c2ecf20Sopenharmony_ci kfree(fdt); 1588c2ecf20Sopenharmony_ciout: 1598c2ecf20Sopenharmony_ci return NULL; 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci/* 1638c2ecf20Sopenharmony_ci * Expand the file descriptor table. 1648c2ecf20Sopenharmony_ci * This function will allocate a new fdtable and both fd array and fdset, of 1658c2ecf20Sopenharmony_ci * the given size. 1668c2ecf20Sopenharmony_ci * Return <0 error code on error; 1 on successful completion. 1678c2ecf20Sopenharmony_ci * The files->file_lock should be held on entry, and will be held on exit. 1688c2ecf20Sopenharmony_ci */ 1698c2ecf20Sopenharmony_cistatic int expand_fdtable(struct files_struct *files, unsigned int nr) 1708c2ecf20Sopenharmony_ci __releases(files->file_lock) 1718c2ecf20Sopenharmony_ci __acquires(files->file_lock) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci struct fdtable *new_fdt, *cur_fdt; 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 1768c2ecf20Sopenharmony_ci new_fdt = alloc_fdtable(nr); 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci /* make sure all __fd_install() have seen resize_in_progress 1798c2ecf20Sopenharmony_ci * or have finished their rcu_read_lock_sched() section. 1808c2ecf20Sopenharmony_ci */ 1818c2ecf20Sopenharmony_ci if (atomic_read(&files->count) > 1) 1828c2ecf20Sopenharmony_ci synchronize_rcu(); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 1858c2ecf20Sopenharmony_ci if (!new_fdt) 1868c2ecf20Sopenharmony_ci return -ENOMEM; 1878c2ecf20Sopenharmony_ci /* 1888c2ecf20Sopenharmony_ci * extremely unlikely race - sysctl_nr_open decreased between the check in 1898c2ecf20Sopenharmony_ci * caller and alloc_fdtable(). Cheaper to catch it here... 1908c2ecf20Sopenharmony_ci */ 1918c2ecf20Sopenharmony_ci if (unlikely(new_fdt->max_fds <= nr)) { 1928c2ecf20Sopenharmony_ci __free_fdtable(new_fdt); 1938c2ecf20Sopenharmony_ci return -EMFILE; 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci cur_fdt = files_fdtable(files); 1968c2ecf20Sopenharmony_ci BUG_ON(nr < cur_fdt->max_fds); 1978c2ecf20Sopenharmony_ci copy_fdtable(new_fdt, cur_fdt); 1988c2ecf20Sopenharmony_ci rcu_assign_pointer(files->fdt, new_fdt); 1998c2ecf20Sopenharmony_ci if (cur_fdt != &files->fdtab) 2008c2ecf20Sopenharmony_ci call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 2018c2ecf20Sopenharmony_ci /* coupled with smp_rmb() in __fd_install() */ 2028c2ecf20Sopenharmony_ci smp_wmb(); 2038c2ecf20Sopenharmony_ci return 1; 2048c2ecf20Sopenharmony_ci} 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci/* 2078c2ecf20Sopenharmony_ci * Expand files. 2088c2ecf20Sopenharmony_ci * This function will expand the file structures, if the requested size exceeds 2098c2ecf20Sopenharmony_ci * the current capacity and there is room for expansion. 2108c2ecf20Sopenharmony_ci * Return <0 error code on error; 0 when nothing done; 1 when files were 2118c2ecf20Sopenharmony_ci * expanded and execution may have blocked. 2128c2ecf20Sopenharmony_ci * The files->file_lock should be held on entry, and will be held on exit. 2138c2ecf20Sopenharmony_ci */ 2148c2ecf20Sopenharmony_cistatic int expand_files(struct files_struct *files, unsigned int nr) 2158c2ecf20Sopenharmony_ci __releases(files->file_lock) 2168c2ecf20Sopenharmony_ci __acquires(files->file_lock) 2178c2ecf20Sopenharmony_ci{ 2188c2ecf20Sopenharmony_ci struct fdtable *fdt; 2198c2ecf20Sopenharmony_ci int expanded = 0; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_cirepeat: 2228c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci /* Do we need to expand? */ 2258c2ecf20Sopenharmony_ci if (nr < fdt->max_fds) 2268c2ecf20Sopenharmony_ci return expanded; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci /* Can we expand? */ 2298c2ecf20Sopenharmony_ci if (nr >= sysctl_nr_open) 2308c2ecf20Sopenharmony_ci return -EMFILE; 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci if (unlikely(files->resize_in_progress)) { 2338c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 2348c2ecf20Sopenharmony_ci expanded = 1; 2358c2ecf20Sopenharmony_ci wait_event(files->resize_wait, !files->resize_in_progress); 2368c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 2378c2ecf20Sopenharmony_ci goto repeat; 2388c2ecf20Sopenharmony_ci } 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci /* All good, so we try */ 2418c2ecf20Sopenharmony_ci files->resize_in_progress = true; 2428c2ecf20Sopenharmony_ci expanded = expand_fdtable(files, nr); 2438c2ecf20Sopenharmony_ci files->resize_in_progress = false; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci wake_up_all(&files->resize_wait); 2468c2ecf20Sopenharmony_ci return expanded; 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt) 2508c2ecf20Sopenharmony_ci{ 2518c2ecf20Sopenharmony_ci __set_bit(fd, fdt->close_on_exec); 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_cistatic inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt) 2558c2ecf20Sopenharmony_ci{ 2568c2ecf20Sopenharmony_ci if (test_bit(fd, fdt->close_on_exec)) 2578c2ecf20Sopenharmony_ci __clear_bit(fd, fdt->close_on_exec); 2588c2ecf20Sopenharmony_ci} 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_cistatic inline void __set_open_fd(unsigned int fd, struct fdtable *fdt) 2618c2ecf20Sopenharmony_ci{ 2628c2ecf20Sopenharmony_ci __set_bit(fd, fdt->open_fds); 2638c2ecf20Sopenharmony_ci fd /= BITS_PER_LONG; 2648c2ecf20Sopenharmony_ci if (!~fdt->open_fds[fd]) 2658c2ecf20Sopenharmony_ci __set_bit(fd, fdt->full_fds_bits); 2668c2ecf20Sopenharmony_ci} 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_cistatic inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci __clear_bit(fd, fdt->open_fds); 2718c2ecf20Sopenharmony_ci __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits); 2728c2ecf20Sopenharmony_ci} 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_cistatic unsigned int count_open_files(struct fdtable *fdt) 2758c2ecf20Sopenharmony_ci{ 2768c2ecf20Sopenharmony_ci unsigned int size = fdt->max_fds; 2778c2ecf20Sopenharmony_ci unsigned int i; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci /* Find the last open fd */ 2808c2ecf20Sopenharmony_ci for (i = size / BITS_PER_LONG; i > 0; ) { 2818c2ecf20Sopenharmony_ci if (fdt->open_fds[--i]) 2828c2ecf20Sopenharmony_ci break; 2838c2ecf20Sopenharmony_ci } 2848c2ecf20Sopenharmony_ci i = (i + 1) * BITS_PER_LONG; 2858c2ecf20Sopenharmony_ci return i; 2868c2ecf20Sopenharmony_ci} 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci/* 2898c2ecf20Sopenharmony_ci * Note that a sane fdtable size always has to be a multiple of 2908c2ecf20Sopenharmony_ci * BITS_PER_LONG, since we have bitmaps that are sized by this. 2918c2ecf20Sopenharmony_ci * 2928c2ecf20Sopenharmony_ci * 'max_fds' will normally already be properly aligned, but it 2938c2ecf20Sopenharmony_ci * turns out that in the close_range() -> __close_range() -> 2948c2ecf20Sopenharmony_ci * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end 2958c2ecf20Sopenharmony_ci * up having a 'max_fds' value that isn't already aligned. 2968c2ecf20Sopenharmony_ci * 2978c2ecf20Sopenharmony_ci * Rather than make close_range() have to worry about this, 2988c2ecf20Sopenharmony_ci * just make that BITS_PER_LONG alignment be part of a sane 2998c2ecf20Sopenharmony_ci * fdtable size. Becuase that's really what it is. 3008c2ecf20Sopenharmony_ci */ 3018c2ecf20Sopenharmony_cistatic unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) 3028c2ecf20Sopenharmony_ci{ 3038c2ecf20Sopenharmony_ci unsigned int count; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci count = count_open_files(fdt); 3068c2ecf20Sopenharmony_ci if (max_fds < NR_OPEN_DEFAULT) 3078c2ecf20Sopenharmony_ci max_fds = NR_OPEN_DEFAULT; 3088c2ecf20Sopenharmony_ci return ALIGN(min(count, max_fds), BITS_PER_LONG); 3098c2ecf20Sopenharmony_ci} 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci/* 3128c2ecf20Sopenharmony_ci * Allocate a new files structure and copy contents from the 3138c2ecf20Sopenharmony_ci * passed in files structure. 3148c2ecf20Sopenharmony_ci * errorp will be valid only when the returned files_struct is NULL. 3158c2ecf20Sopenharmony_ci */ 3168c2ecf20Sopenharmony_cistruct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci struct files_struct *newf; 3198c2ecf20Sopenharmony_ci struct file **old_fds, **new_fds; 3208c2ecf20Sopenharmony_ci unsigned int open_files, i; 3218c2ecf20Sopenharmony_ci struct fdtable *old_fdt, *new_fdt; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci *errorp = -ENOMEM; 3248c2ecf20Sopenharmony_ci newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); 3258c2ecf20Sopenharmony_ci if (!newf) 3268c2ecf20Sopenharmony_ci goto out; 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci atomic_set(&newf->count, 1); 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci spin_lock_init(&newf->file_lock); 3318c2ecf20Sopenharmony_ci newf->resize_in_progress = false; 3328c2ecf20Sopenharmony_ci init_waitqueue_head(&newf->resize_wait); 3338c2ecf20Sopenharmony_ci newf->next_fd = 0; 3348c2ecf20Sopenharmony_ci new_fdt = &newf->fdtab; 3358c2ecf20Sopenharmony_ci new_fdt->max_fds = NR_OPEN_DEFAULT; 3368c2ecf20Sopenharmony_ci new_fdt->close_on_exec = newf->close_on_exec_init; 3378c2ecf20Sopenharmony_ci new_fdt->open_fds = newf->open_fds_init; 3388c2ecf20Sopenharmony_ci new_fdt->full_fds_bits = newf->full_fds_bits_init; 3398c2ecf20Sopenharmony_ci new_fdt->fd = &newf->fd_array[0]; 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci spin_lock(&oldf->file_lock); 3428c2ecf20Sopenharmony_ci old_fdt = files_fdtable(oldf); 3438c2ecf20Sopenharmony_ci open_files = sane_fdtable_size(old_fdt, max_fds); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci /* 3468c2ecf20Sopenharmony_ci * Check whether we need to allocate a larger fd array and fd set. 3478c2ecf20Sopenharmony_ci */ 3488c2ecf20Sopenharmony_ci while (unlikely(open_files > new_fdt->max_fds)) { 3498c2ecf20Sopenharmony_ci spin_unlock(&oldf->file_lock); 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci if (new_fdt != &newf->fdtab) 3528c2ecf20Sopenharmony_ci __free_fdtable(new_fdt); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci new_fdt = alloc_fdtable(open_files - 1); 3558c2ecf20Sopenharmony_ci if (!new_fdt) { 3568c2ecf20Sopenharmony_ci *errorp = -ENOMEM; 3578c2ecf20Sopenharmony_ci goto out_release; 3588c2ecf20Sopenharmony_ci } 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci /* beyond sysctl_nr_open; nothing to do */ 3618c2ecf20Sopenharmony_ci if (unlikely(new_fdt->max_fds < open_files)) { 3628c2ecf20Sopenharmony_ci __free_fdtable(new_fdt); 3638c2ecf20Sopenharmony_ci *errorp = -EMFILE; 3648c2ecf20Sopenharmony_ci goto out_release; 3658c2ecf20Sopenharmony_ci } 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci /* 3688c2ecf20Sopenharmony_ci * Reacquire the oldf lock and a pointer to its fd table 3698c2ecf20Sopenharmony_ci * who knows it may have a new bigger fd table. We need 3708c2ecf20Sopenharmony_ci * the latest pointer. 3718c2ecf20Sopenharmony_ci */ 3728c2ecf20Sopenharmony_ci spin_lock(&oldf->file_lock); 3738c2ecf20Sopenharmony_ci old_fdt = files_fdtable(oldf); 3748c2ecf20Sopenharmony_ci open_files = sane_fdtable_size(old_fdt, max_fds); 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci copy_fd_bitmaps(new_fdt, old_fdt, open_files); 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci old_fds = old_fdt->fd; 3808c2ecf20Sopenharmony_ci new_fds = new_fdt->fd; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci for (i = open_files; i != 0; i--) { 3838c2ecf20Sopenharmony_ci struct file *f = *old_fds++; 3848c2ecf20Sopenharmony_ci if (f) { 3858c2ecf20Sopenharmony_ci get_file(f); 3868c2ecf20Sopenharmony_ci } else { 3878c2ecf20Sopenharmony_ci /* 3888c2ecf20Sopenharmony_ci * The fd may be claimed in the fd bitmap but not yet 3898c2ecf20Sopenharmony_ci * instantiated in the files array if a sibling thread 3908c2ecf20Sopenharmony_ci * is partway through open(). So make sure that this 3918c2ecf20Sopenharmony_ci * fd is available to the new process. 3928c2ecf20Sopenharmony_ci */ 3938c2ecf20Sopenharmony_ci __clear_open_fd(open_files - i, new_fdt); 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci rcu_assign_pointer(*new_fds++, f); 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci spin_unlock(&oldf->file_lock); 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci /* clear the remainder */ 4008c2ecf20Sopenharmony_ci memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *)); 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci rcu_assign_pointer(newf->fdt, new_fdt); 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci return newf; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ciout_release: 4078c2ecf20Sopenharmony_ci kmem_cache_free(files_cachep, newf); 4088c2ecf20Sopenharmony_ciout: 4098c2ecf20Sopenharmony_ci return NULL; 4108c2ecf20Sopenharmony_ci} 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_cistatic struct fdtable *close_files(struct files_struct * files) 4138c2ecf20Sopenharmony_ci{ 4148c2ecf20Sopenharmony_ci /* 4158c2ecf20Sopenharmony_ci * It is safe to dereference the fd table without RCU or 4168c2ecf20Sopenharmony_ci * ->file_lock because this is the last reference to the 4178c2ecf20Sopenharmony_ci * files structure. 4188c2ecf20Sopenharmony_ci */ 4198c2ecf20Sopenharmony_ci struct fdtable *fdt = rcu_dereference_raw(files->fdt); 4208c2ecf20Sopenharmony_ci unsigned int i, j = 0; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci for (;;) { 4238c2ecf20Sopenharmony_ci unsigned long set; 4248c2ecf20Sopenharmony_ci i = j * BITS_PER_LONG; 4258c2ecf20Sopenharmony_ci if (i >= fdt->max_fds) 4268c2ecf20Sopenharmony_ci break; 4278c2ecf20Sopenharmony_ci set = fdt->open_fds[j++]; 4288c2ecf20Sopenharmony_ci while (set) { 4298c2ecf20Sopenharmony_ci if (set & 1) { 4308c2ecf20Sopenharmony_ci struct file * file = xchg(&fdt->fd[i], NULL); 4318c2ecf20Sopenharmony_ci if (file) { 4328c2ecf20Sopenharmony_ci filp_close(file, files); 4338c2ecf20Sopenharmony_ci cond_resched(); 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci } 4368c2ecf20Sopenharmony_ci i++; 4378c2ecf20Sopenharmony_ci set >>= 1; 4388c2ecf20Sopenharmony_ci } 4398c2ecf20Sopenharmony_ci } 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci return fdt; 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistruct files_struct *get_files_struct(struct task_struct *task) 4458c2ecf20Sopenharmony_ci{ 4468c2ecf20Sopenharmony_ci struct files_struct *files; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci task_lock(task); 4498c2ecf20Sopenharmony_ci files = task->files; 4508c2ecf20Sopenharmony_ci if (files) 4518c2ecf20Sopenharmony_ci atomic_inc(&files->count); 4528c2ecf20Sopenharmony_ci task_unlock(task); 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci return files; 4558c2ecf20Sopenharmony_ci} 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_civoid put_files_struct(struct files_struct *files) 4588c2ecf20Sopenharmony_ci{ 4598c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&files->count)) { 4608c2ecf20Sopenharmony_ci struct fdtable *fdt = close_files(files); 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci /* free the arrays if they are not embedded */ 4638c2ecf20Sopenharmony_ci if (fdt != &files->fdtab) 4648c2ecf20Sopenharmony_ci __free_fdtable(fdt); 4658c2ecf20Sopenharmony_ci kmem_cache_free(files_cachep, files); 4668c2ecf20Sopenharmony_ci } 4678c2ecf20Sopenharmony_ci} 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_civoid reset_files_struct(struct files_struct *files) 4708c2ecf20Sopenharmony_ci{ 4718c2ecf20Sopenharmony_ci struct task_struct *tsk = current; 4728c2ecf20Sopenharmony_ci struct files_struct *old; 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci old = tsk->files; 4758c2ecf20Sopenharmony_ci task_lock(tsk); 4768c2ecf20Sopenharmony_ci tsk->files = files; 4778c2ecf20Sopenharmony_ci task_unlock(tsk); 4788c2ecf20Sopenharmony_ci put_files_struct(old); 4798c2ecf20Sopenharmony_ci} 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_civoid exit_files(struct task_struct *tsk) 4828c2ecf20Sopenharmony_ci{ 4838c2ecf20Sopenharmony_ci struct files_struct * files = tsk->files; 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci if (files) { 4868c2ecf20Sopenharmony_ci task_lock(tsk); 4878c2ecf20Sopenharmony_ci tsk->files = NULL; 4888c2ecf20Sopenharmony_ci task_unlock(tsk); 4898c2ecf20Sopenharmony_ci put_files_struct(files); 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci} 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_cistruct files_struct init_files = { 4948c2ecf20Sopenharmony_ci .count = ATOMIC_INIT(1), 4958c2ecf20Sopenharmony_ci .fdt = &init_files.fdtab, 4968c2ecf20Sopenharmony_ci .fdtab = { 4978c2ecf20Sopenharmony_ci .max_fds = NR_OPEN_DEFAULT, 4988c2ecf20Sopenharmony_ci .fd = &init_files.fd_array[0], 4998c2ecf20Sopenharmony_ci .close_on_exec = init_files.close_on_exec_init, 5008c2ecf20Sopenharmony_ci .open_fds = init_files.open_fds_init, 5018c2ecf20Sopenharmony_ci .full_fds_bits = init_files.full_fds_bits_init, 5028c2ecf20Sopenharmony_ci }, 5038c2ecf20Sopenharmony_ci .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), 5048c2ecf20Sopenharmony_ci .resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait), 5058c2ecf20Sopenharmony_ci}; 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_cistatic unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) 5088c2ecf20Sopenharmony_ci{ 5098c2ecf20Sopenharmony_ci unsigned int maxfd = fdt->max_fds; 5108c2ecf20Sopenharmony_ci unsigned int maxbit = maxfd / BITS_PER_LONG; 5118c2ecf20Sopenharmony_ci unsigned int bitbit = start / BITS_PER_LONG; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; 5148c2ecf20Sopenharmony_ci if (bitbit > maxfd) 5158c2ecf20Sopenharmony_ci return maxfd; 5168c2ecf20Sopenharmony_ci if (bitbit > start) 5178c2ecf20Sopenharmony_ci start = bitbit; 5188c2ecf20Sopenharmony_ci return find_next_zero_bit(fdt->open_fds, maxfd, start); 5198c2ecf20Sopenharmony_ci} 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci/* 5228c2ecf20Sopenharmony_ci * allocate a file descriptor, mark it busy. 5238c2ecf20Sopenharmony_ci */ 5248c2ecf20Sopenharmony_ciint __alloc_fd(struct files_struct *files, 5258c2ecf20Sopenharmony_ci unsigned start, unsigned end, unsigned flags) 5268c2ecf20Sopenharmony_ci{ 5278c2ecf20Sopenharmony_ci unsigned int fd; 5288c2ecf20Sopenharmony_ci int error; 5298c2ecf20Sopenharmony_ci struct fdtable *fdt; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 5328c2ecf20Sopenharmony_cirepeat: 5338c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 5348c2ecf20Sopenharmony_ci fd = start; 5358c2ecf20Sopenharmony_ci if (fd < files->next_fd) 5368c2ecf20Sopenharmony_ci fd = files->next_fd; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci if (fd < fdt->max_fds) 5398c2ecf20Sopenharmony_ci fd = find_next_fd(fdt, fd); 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci /* 5428c2ecf20Sopenharmony_ci * N.B. For clone tasks sharing a files structure, this test 5438c2ecf20Sopenharmony_ci * will limit the total number of files that can be opened. 5448c2ecf20Sopenharmony_ci */ 5458c2ecf20Sopenharmony_ci error = -EMFILE; 5468c2ecf20Sopenharmony_ci if (fd >= end) 5478c2ecf20Sopenharmony_ci goto out; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci error = expand_files(files, fd); 5508c2ecf20Sopenharmony_ci if (error < 0) 5518c2ecf20Sopenharmony_ci goto out; 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci /* 5548c2ecf20Sopenharmony_ci * If we needed to expand the fs array we 5558c2ecf20Sopenharmony_ci * might have blocked - try again. 5568c2ecf20Sopenharmony_ci */ 5578c2ecf20Sopenharmony_ci if (error) 5588c2ecf20Sopenharmony_ci goto repeat; 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci if (start <= files->next_fd) 5618c2ecf20Sopenharmony_ci files->next_fd = fd + 1; 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci __set_open_fd(fd, fdt); 5648c2ecf20Sopenharmony_ci if (flags & O_CLOEXEC) 5658c2ecf20Sopenharmony_ci __set_close_on_exec(fd, fdt); 5668c2ecf20Sopenharmony_ci else 5678c2ecf20Sopenharmony_ci __clear_close_on_exec(fd, fdt); 5688c2ecf20Sopenharmony_ci error = fd; 5698c2ecf20Sopenharmony_ci#if 1 5708c2ecf20Sopenharmony_ci /* Sanity check */ 5718c2ecf20Sopenharmony_ci if (rcu_access_pointer(fdt->fd[fd]) != NULL) { 5728c2ecf20Sopenharmony_ci printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); 5738c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 5748c2ecf20Sopenharmony_ci } 5758c2ecf20Sopenharmony_ci#endif 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ciout: 5788c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 5798c2ecf20Sopenharmony_ci return error; 5808c2ecf20Sopenharmony_ci} 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_cistatic int alloc_fd(unsigned start, unsigned flags) 5838c2ecf20Sopenharmony_ci{ 5848c2ecf20Sopenharmony_ci return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ciint __get_unused_fd_flags(unsigned flags, unsigned long nofile) 5888c2ecf20Sopenharmony_ci{ 5898c2ecf20Sopenharmony_ci return __alloc_fd(current->files, 0, nofile, flags); 5908c2ecf20Sopenharmony_ci} 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ciint get_unused_fd_flags(unsigned flags) 5938c2ecf20Sopenharmony_ci{ 5948c2ecf20Sopenharmony_ci return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE)); 5958c2ecf20Sopenharmony_ci} 5968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_unused_fd_flags); 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_cistatic void __put_unused_fd(struct files_struct *files, unsigned int fd) 5998c2ecf20Sopenharmony_ci{ 6008c2ecf20Sopenharmony_ci struct fdtable *fdt = files_fdtable(files); 6018c2ecf20Sopenharmony_ci __clear_open_fd(fd, fdt); 6028c2ecf20Sopenharmony_ci if (fd < files->next_fd) 6038c2ecf20Sopenharmony_ci files->next_fd = fd; 6048c2ecf20Sopenharmony_ci} 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_civoid put_unused_fd(unsigned int fd) 6078c2ecf20Sopenharmony_ci{ 6088c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 6098c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 6108c2ecf20Sopenharmony_ci __put_unused_fd(files, fd); 6118c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 6128c2ecf20Sopenharmony_ci} 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ciEXPORT_SYMBOL(put_unused_fd); 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci/* 6178c2ecf20Sopenharmony_ci * Install a file pointer in the fd array. 6188c2ecf20Sopenharmony_ci * 6198c2ecf20Sopenharmony_ci * The VFS is full of places where we drop the files lock between 6208c2ecf20Sopenharmony_ci * setting the open_fds bitmap and installing the file in the file 6218c2ecf20Sopenharmony_ci * array. At any such point, we are vulnerable to a dup2() race 6228c2ecf20Sopenharmony_ci * installing a file in the array before us. We need to detect this and 6238c2ecf20Sopenharmony_ci * fput() the struct file we are about to overwrite in this case. 6248c2ecf20Sopenharmony_ci * 6258c2ecf20Sopenharmony_ci * It should never happen - if we allow dup2() do it, _really_ bad things 6268c2ecf20Sopenharmony_ci * will follow. 6278c2ecf20Sopenharmony_ci * 6288c2ecf20Sopenharmony_ci * NOTE: __fd_install() variant is really, really low-level; don't 6298c2ecf20Sopenharmony_ci * use it unless you are forced to by truly lousy API shoved down 6308c2ecf20Sopenharmony_ci * your throat. 'files' *MUST* be either current->files or obtained 6318c2ecf20Sopenharmony_ci * by get_files_struct(current) done by whoever had given it to you, 6328c2ecf20Sopenharmony_ci * or really bad things will happen. Normally you want to use 6338c2ecf20Sopenharmony_ci * fd_install() instead. 6348c2ecf20Sopenharmony_ci */ 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_civoid __fd_install(struct files_struct *files, unsigned int fd, 6378c2ecf20Sopenharmony_ci struct file *file) 6388c2ecf20Sopenharmony_ci{ 6398c2ecf20Sopenharmony_ci struct fdtable *fdt; 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci rcu_read_lock_sched(); 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci if (unlikely(files->resize_in_progress)) { 6448c2ecf20Sopenharmony_ci rcu_read_unlock_sched(); 6458c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 6468c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 6478c2ecf20Sopenharmony_ci BUG_ON(fdt->fd[fd] != NULL); 6488c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], file); 6498c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 6508c2ecf20Sopenharmony_ci return; 6518c2ecf20Sopenharmony_ci } 6528c2ecf20Sopenharmony_ci /* coupled with smp_wmb() in expand_fdtable() */ 6538c2ecf20Sopenharmony_ci smp_rmb(); 6548c2ecf20Sopenharmony_ci fdt = rcu_dereference_sched(files->fdt); 6558c2ecf20Sopenharmony_ci BUG_ON(fdt->fd[fd] != NULL); 6568c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], file); 6578c2ecf20Sopenharmony_ci rcu_read_unlock_sched(); 6588c2ecf20Sopenharmony_ci} 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci/* 6618c2ecf20Sopenharmony_ci * This consumes the "file" refcount, so callers should treat it 6628c2ecf20Sopenharmony_ci * as if they had called fput(file). 6638c2ecf20Sopenharmony_ci */ 6648c2ecf20Sopenharmony_civoid fd_install(unsigned int fd, struct file *file) 6658c2ecf20Sopenharmony_ci{ 6668c2ecf20Sopenharmony_ci __fd_install(current->files, fd, file); 6678c2ecf20Sopenharmony_ci} 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ciEXPORT_SYMBOL(fd_install); 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_cistatic struct file *pick_file(struct files_struct *files, unsigned fd) 6728c2ecf20Sopenharmony_ci{ 6738c2ecf20Sopenharmony_ci struct file *file = NULL; 6748c2ecf20Sopenharmony_ci struct fdtable *fdt; 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 6778c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 6788c2ecf20Sopenharmony_ci if (fd >= fdt->max_fds) 6798c2ecf20Sopenharmony_ci goto out_unlock; 6808c2ecf20Sopenharmony_ci fd = array_index_nospec(fd, fdt->max_fds); 6818c2ecf20Sopenharmony_ci file = fdt->fd[fd]; 6828c2ecf20Sopenharmony_ci if (!file) 6838c2ecf20Sopenharmony_ci goto out_unlock; 6848c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 6858c2ecf20Sopenharmony_ci __put_unused_fd(files, fd); 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ciout_unlock: 6888c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 6898c2ecf20Sopenharmony_ci return file; 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci/* 6938c2ecf20Sopenharmony_ci * The same warnings as for __alloc_fd()/__fd_install() apply here... 6948c2ecf20Sopenharmony_ci */ 6958c2ecf20Sopenharmony_ciint __close_fd(struct files_struct *files, unsigned fd) 6968c2ecf20Sopenharmony_ci{ 6978c2ecf20Sopenharmony_ci struct file *file; 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci file = pick_file(files, fd); 7008c2ecf20Sopenharmony_ci if (!file) 7018c2ecf20Sopenharmony_ci return -EBADF; 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci return filp_close(file, files); 7048c2ecf20Sopenharmony_ci} 7058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__close_fd); /* for ksys_close() */ 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci/** 7088c2ecf20Sopenharmony_ci * __close_range() - Close all file descriptors in a given range. 7098c2ecf20Sopenharmony_ci * 7108c2ecf20Sopenharmony_ci * @fd: starting file descriptor to close 7118c2ecf20Sopenharmony_ci * @max_fd: last file descriptor to close 7128c2ecf20Sopenharmony_ci * 7138c2ecf20Sopenharmony_ci * This closes a range of file descriptors. All file descriptors 7148c2ecf20Sopenharmony_ci * from @fd up to and including @max_fd are closed. 7158c2ecf20Sopenharmony_ci */ 7168c2ecf20Sopenharmony_ciint __close_range(unsigned fd, unsigned max_fd, unsigned int flags) 7178c2ecf20Sopenharmony_ci{ 7188c2ecf20Sopenharmony_ci unsigned int cur_max; 7198c2ecf20Sopenharmony_ci struct task_struct *me = current; 7208c2ecf20Sopenharmony_ci struct files_struct *cur_fds = me->files, *fds = NULL; 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci if (flags & ~CLOSE_RANGE_UNSHARE) 7238c2ecf20Sopenharmony_ci return -EINVAL; 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci if (fd > max_fd) 7268c2ecf20Sopenharmony_ci return -EINVAL; 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci rcu_read_lock(); 7298c2ecf20Sopenharmony_ci cur_max = files_fdtable(cur_fds)->max_fds; 7308c2ecf20Sopenharmony_ci rcu_read_unlock(); 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci /* cap to last valid index into fdtable */ 7338c2ecf20Sopenharmony_ci cur_max--; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci if (flags & CLOSE_RANGE_UNSHARE) { 7368c2ecf20Sopenharmony_ci int ret; 7378c2ecf20Sopenharmony_ci unsigned int max_unshare_fds = NR_OPEN_MAX; 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci /* 7408c2ecf20Sopenharmony_ci * If the requested range is greater than the current maximum, 7418c2ecf20Sopenharmony_ci * we're closing everything so only copy all file descriptors 7428c2ecf20Sopenharmony_ci * beneath the lowest file descriptor. 7438c2ecf20Sopenharmony_ci */ 7448c2ecf20Sopenharmony_ci if (max_fd >= cur_max) 7458c2ecf20Sopenharmony_ci max_unshare_fds = fd; 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); 7488c2ecf20Sopenharmony_ci if (ret) 7498c2ecf20Sopenharmony_ci return ret; 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ci /* 7528c2ecf20Sopenharmony_ci * We used to share our file descriptor table, and have now 7538c2ecf20Sopenharmony_ci * created a private one, make sure we're using it below. 7548c2ecf20Sopenharmony_ci */ 7558c2ecf20Sopenharmony_ci if (fds) 7568c2ecf20Sopenharmony_ci swap(cur_fds, fds); 7578c2ecf20Sopenharmony_ci } 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci max_fd = min(max_fd, cur_max); 7608c2ecf20Sopenharmony_ci while (fd <= max_fd) { 7618c2ecf20Sopenharmony_ci struct file *file; 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci file = pick_file(cur_fds, fd++); 7648c2ecf20Sopenharmony_ci if (!file) 7658c2ecf20Sopenharmony_ci continue; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci filp_close(file, cur_fds); 7688c2ecf20Sopenharmony_ci cond_resched(); 7698c2ecf20Sopenharmony_ci } 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci if (fds) { 7728c2ecf20Sopenharmony_ci /* 7738c2ecf20Sopenharmony_ci * We're done closing the files we were supposed to. Time to install 7748c2ecf20Sopenharmony_ci * the new file descriptor table and drop the old one. 7758c2ecf20Sopenharmony_ci */ 7768c2ecf20Sopenharmony_ci task_lock(me); 7778c2ecf20Sopenharmony_ci me->files = cur_fds; 7788c2ecf20Sopenharmony_ci task_unlock(me); 7798c2ecf20Sopenharmony_ci put_files_struct(fds); 7808c2ecf20Sopenharmony_ci } 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci return 0; 7838c2ecf20Sopenharmony_ci} 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci/* 7868c2ecf20Sopenharmony_ci * See close_fd_get_file() below, this variant assumes current->files->file_lock 7878c2ecf20Sopenharmony_ci * is held. 7888c2ecf20Sopenharmony_ci */ 7898c2ecf20Sopenharmony_ciint __close_fd_get_file(unsigned int fd, struct file **res) 7908c2ecf20Sopenharmony_ci{ 7918c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 7928c2ecf20Sopenharmony_ci struct file *file; 7938c2ecf20Sopenharmony_ci struct fdtable *fdt; 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 7968c2ecf20Sopenharmony_ci if (fd >= fdt->max_fds) 7978c2ecf20Sopenharmony_ci goto out_err; 7988c2ecf20Sopenharmony_ci file = fdt->fd[fd]; 7998c2ecf20Sopenharmony_ci if (!file) 8008c2ecf20Sopenharmony_ci goto out_err; 8018c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 8028c2ecf20Sopenharmony_ci __put_unused_fd(files, fd); 8038c2ecf20Sopenharmony_ci get_file(file); 8048c2ecf20Sopenharmony_ci *res = file; 8058c2ecf20Sopenharmony_ci return 0; 8068c2ecf20Sopenharmony_ciout_err: 8078c2ecf20Sopenharmony_ci *res = NULL; 8088c2ecf20Sopenharmony_ci return -ENOENT; 8098c2ecf20Sopenharmony_ci} 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci/* 8128c2ecf20Sopenharmony_ci * variant of close_fd that gets a ref on the file for later fput. 8138c2ecf20Sopenharmony_ci * The caller must ensure that filp_close() called on the file, and then 8148c2ecf20Sopenharmony_ci * an fput(). 8158c2ecf20Sopenharmony_ci */ 8168c2ecf20Sopenharmony_ciint close_fd_get_file(unsigned int fd, struct file **res) 8178c2ecf20Sopenharmony_ci{ 8188c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 8198c2ecf20Sopenharmony_ci int ret; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 8228c2ecf20Sopenharmony_ci ret = __close_fd_get_file(fd, res); 8238c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci return ret; 8268c2ecf20Sopenharmony_ci} 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_civoid do_close_on_exec(struct files_struct *files) 8298c2ecf20Sopenharmony_ci{ 8308c2ecf20Sopenharmony_ci unsigned i; 8318c2ecf20Sopenharmony_ci struct fdtable *fdt; 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci /* exec unshares first */ 8348c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 8358c2ecf20Sopenharmony_ci for (i = 0; ; i++) { 8368c2ecf20Sopenharmony_ci unsigned long set; 8378c2ecf20Sopenharmony_ci unsigned fd = i * BITS_PER_LONG; 8388c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 8398c2ecf20Sopenharmony_ci if (fd >= fdt->max_fds) 8408c2ecf20Sopenharmony_ci break; 8418c2ecf20Sopenharmony_ci set = fdt->close_on_exec[i]; 8428c2ecf20Sopenharmony_ci if (!set) 8438c2ecf20Sopenharmony_ci continue; 8448c2ecf20Sopenharmony_ci fdt->close_on_exec[i] = 0; 8458c2ecf20Sopenharmony_ci for ( ; set ; fd++, set >>= 1) { 8468c2ecf20Sopenharmony_ci struct file *file; 8478c2ecf20Sopenharmony_ci if (!(set & 1)) 8488c2ecf20Sopenharmony_ci continue; 8498c2ecf20Sopenharmony_ci file = fdt->fd[fd]; 8508c2ecf20Sopenharmony_ci if (!file) 8518c2ecf20Sopenharmony_ci continue; 8528c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], NULL); 8538c2ecf20Sopenharmony_ci __put_unused_fd(files, fd); 8548c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 8558c2ecf20Sopenharmony_ci filp_close(file, files); 8568c2ecf20Sopenharmony_ci cond_resched(); 8578c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 8588c2ecf20Sopenharmony_ci } 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci } 8618c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 8628c2ecf20Sopenharmony_ci} 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_cistatic inline struct file *__fget_files_rcu(struct files_struct *files, 8658c2ecf20Sopenharmony_ci unsigned int fd, fmode_t mask, unsigned int refs) 8668c2ecf20Sopenharmony_ci{ 8678c2ecf20Sopenharmony_ci for (;;) { 8688c2ecf20Sopenharmony_ci struct file *file; 8698c2ecf20Sopenharmony_ci struct fdtable *fdt = rcu_dereference_raw(files->fdt); 8708c2ecf20Sopenharmony_ci struct file __rcu **fdentry; 8718c2ecf20Sopenharmony_ci 8728c2ecf20Sopenharmony_ci if (unlikely(fd >= fdt->max_fds)) 8738c2ecf20Sopenharmony_ci return NULL; 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); 8768c2ecf20Sopenharmony_ci file = rcu_dereference_raw(*fdentry); 8778c2ecf20Sopenharmony_ci if (unlikely(!file)) 8788c2ecf20Sopenharmony_ci return NULL; 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci if (unlikely(file->f_mode & mask)) 8818c2ecf20Sopenharmony_ci return NULL; 8828c2ecf20Sopenharmony_ci 8838c2ecf20Sopenharmony_ci /* 8848c2ecf20Sopenharmony_ci * Ok, we have a file pointer. However, because we do 8858c2ecf20Sopenharmony_ci * this all locklessly under RCU, we may be racing with 8868c2ecf20Sopenharmony_ci * that file being closed. 8878c2ecf20Sopenharmony_ci * 8888c2ecf20Sopenharmony_ci * Such a race can take two forms: 8898c2ecf20Sopenharmony_ci * 8908c2ecf20Sopenharmony_ci * (a) the file ref already went down to zero, 8918c2ecf20Sopenharmony_ci * and get_file_rcu_many() fails. Just try 8928c2ecf20Sopenharmony_ci * again: 8938c2ecf20Sopenharmony_ci */ 8948c2ecf20Sopenharmony_ci if (unlikely(!get_file_rcu_many(file, refs))) 8958c2ecf20Sopenharmony_ci continue; 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci /* 8988c2ecf20Sopenharmony_ci * (b) the file table entry has changed under us. 8998c2ecf20Sopenharmony_ci * Note that we don't need to re-check the 'fdt->fd' 9008c2ecf20Sopenharmony_ci * pointer having changed, because it always goes 9018c2ecf20Sopenharmony_ci * hand-in-hand with 'fdt'. 9028c2ecf20Sopenharmony_ci * 9038c2ecf20Sopenharmony_ci * If so, we need to put our refs and try again. 9048c2ecf20Sopenharmony_ci */ 9058c2ecf20Sopenharmony_ci if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || 9068c2ecf20Sopenharmony_ci unlikely(rcu_dereference_raw(*fdentry) != file)) { 9078c2ecf20Sopenharmony_ci fput_many(file, refs); 9088c2ecf20Sopenharmony_ci continue; 9098c2ecf20Sopenharmony_ci } 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_ci /* 9128c2ecf20Sopenharmony_ci * Ok, we have a ref to the file, and checked that it 9138c2ecf20Sopenharmony_ci * still exists. 9148c2ecf20Sopenharmony_ci */ 9158c2ecf20Sopenharmony_ci return file; 9168c2ecf20Sopenharmony_ci } 9178c2ecf20Sopenharmony_ci} 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_cistatic struct file *__fget_files(struct files_struct *files, unsigned int fd, 9208c2ecf20Sopenharmony_ci fmode_t mask, unsigned int refs) 9218c2ecf20Sopenharmony_ci{ 9228c2ecf20Sopenharmony_ci struct file *file; 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci rcu_read_lock(); 9258c2ecf20Sopenharmony_ci file = __fget_files_rcu(files, fd, mask, refs); 9268c2ecf20Sopenharmony_ci rcu_read_unlock(); 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci return file; 9298c2ecf20Sopenharmony_ci} 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_cistatic inline struct file *__fget(unsigned int fd, fmode_t mask, 9328c2ecf20Sopenharmony_ci unsigned int refs) 9338c2ecf20Sopenharmony_ci{ 9348c2ecf20Sopenharmony_ci return __fget_files(current->files, fd, mask, refs); 9358c2ecf20Sopenharmony_ci} 9368c2ecf20Sopenharmony_ci 9378c2ecf20Sopenharmony_cistruct file *fget_many(unsigned int fd, unsigned int refs) 9388c2ecf20Sopenharmony_ci{ 9398c2ecf20Sopenharmony_ci return __fget(fd, FMODE_PATH, refs); 9408c2ecf20Sopenharmony_ci} 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_cistruct file *fget(unsigned int fd) 9438c2ecf20Sopenharmony_ci{ 9448c2ecf20Sopenharmony_ci return __fget(fd, FMODE_PATH, 1); 9458c2ecf20Sopenharmony_ci} 9468c2ecf20Sopenharmony_ciEXPORT_SYMBOL(fget); 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_cistruct file *fget_raw(unsigned int fd) 9498c2ecf20Sopenharmony_ci{ 9508c2ecf20Sopenharmony_ci return __fget(fd, 0, 1); 9518c2ecf20Sopenharmony_ci} 9528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(fget_raw); 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_cistruct file *fget_task(struct task_struct *task, unsigned int fd) 9558c2ecf20Sopenharmony_ci{ 9568c2ecf20Sopenharmony_ci struct file *file = NULL; 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci task_lock(task); 9598c2ecf20Sopenharmony_ci if (task->files) 9608c2ecf20Sopenharmony_ci file = __fget_files(task->files, fd, 0, 1); 9618c2ecf20Sopenharmony_ci task_unlock(task); 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_ci return file; 9648c2ecf20Sopenharmony_ci} 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci/* 9678c2ecf20Sopenharmony_ci * Lightweight file lookup - no refcnt increment if fd table isn't shared. 9688c2ecf20Sopenharmony_ci * 9698c2ecf20Sopenharmony_ci * You can use this instead of fget if you satisfy all of the following 9708c2ecf20Sopenharmony_ci * conditions: 9718c2ecf20Sopenharmony_ci * 1) You must call fput_light before exiting the syscall and returning control 9728c2ecf20Sopenharmony_ci * to userspace (i.e. you cannot remember the returned struct file * after 9738c2ecf20Sopenharmony_ci * returning to userspace). 9748c2ecf20Sopenharmony_ci * 2) You must not call filp_close on the returned struct file * in between 9758c2ecf20Sopenharmony_ci * calls to fget_light and fput_light. 9768c2ecf20Sopenharmony_ci * 3) You must not clone the current task in between the calls to fget_light 9778c2ecf20Sopenharmony_ci * and fput_light. 9788c2ecf20Sopenharmony_ci * 9798c2ecf20Sopenharmony_ci * The fput_needed flag returned by fget_light should be passed to the 9808c2ecf20Sopenharmony_ci * corresponding fput_light. 9818c2ecf20Sopenharmony_ci */ 9828c2ecf20Sopenharmony_cistatic unsigned long __fget_light(unsigned int fd, fmode_t mask) 9838c2ecf20Sopenharmony_ci{ 9848c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 9858c2ecf20Sopenharmony_ci struct file *file; 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci if (atomic_read(&files->count) == 1) { 9888c2ecf20Sopenharmony_ci file = files_lookup_fd_raw(files, fd); 9898c2ecf20Sopenharmony_ci if (!file || unlikely(file->f_mode & mask)) 9908c2ecf20Sopenharmony_ci return 0; 9918c2ecf20Sopenharmony_ci return (unsigned long)file; 9928c2ecf20Sopenharmony_ci } else { 9938c2ecf20Sopenharmony_ci file = __fget(fd, mask, 1); 9948c2ecf20Sopenharmony_ci if (!file) 9958c2ecf20Sopenharmony_ci return 0; 9968c2ecf20Sopenharmony_ci return FDPUT_FPUT | (unsigned long)file; 9978c2ecf20Sopenharmony_ci } 9988c2ecf20Sopenharmony_ci} 9998c2ecf20Sopenharmony_ciunsigned long __fdget(unsigned int fd) 10008c2ecf20Sopenharmony_ci{ 10018c2ecf20Sopenharmony_ci return __fget_light(fd, FMODE_PATH); 10028c2ecf20Sopenharmony_ci} 10038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__fdget); 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ciunsigned long __fdget_raw(unsigned int fd) 10068c2ecf20Sopenharmony_ci{ 10078c2ecf20Sopenharmony_ci return __fget_light(fd, 0); 10088c2ecf20Sopenharmony_ci} 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci/* 10118c2ecf20Sopenharmony_ci * Try to avoid f_pos locking. We only need it if the 10128c2ecf20Sopenharmony_ci * file is marked for FMODE_ATOMIC_POS, and it can be 10138c2ecf20Sopenharmony_ci * accessed multiple ways. 10148c2ecf20Sopenharmony_ci * 10158c2ecf20Sopenharmony_ci * Always do it for directories, because pidfd_getfd() 10168c2ecf20Sopenharmony_ci * can make a file accessible even if it otherwise would 10178c2ecf20Sopenharmony_ci * not be, and for directories this is a correctness 10188c2ecf20Sopenharmony_ci * issue, not a "POSIX requirement". 10198c2ecf20Sopenharmony_ci */ 10208c2ecf20Sopenharmony_cistatic inline bool file_needs_f_pos_lock(struct file *file) 10218c2ecf20Sopenharmony_ci{ 10228c2ecf20Sopenharmony_ci return (file->f_mode & FMODE_ATOMIC_POS) && 10238c2ecf20Sopenharmony_ci (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode)); 10248c2ecf20Sopenharmony_ci} 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_ciunsigned long __fdget_pos(unsigned int fd) 10278c2ecf20Sopenharmony_ci{ 10288c2ecf20Sopenharmony_ci unsigned long v = __fdget(fd); 10298c2ecf20Sopenharmony_ci struct file *file = (struct file *)(v & ~3); 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci if (file && file_needs_f_pos_lock(file)) { 10328c2ecf20Sopenharmony_ci v |= FDPUT_POS_UNLOCK; 10338c2ecf20Sopenharmony_ci mutex_lock(&file->f_pos_lock); 10348c2ecf20Sopenharmony_ci } 10358c2ecf20Sopenharmony_ci return v; 10368c2ecf20Sopenharmony_ci} 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_civoid __f_unlock_pos(struct file *f) 10398c2ecf20Sopenharmony_ci{ 10408c2ecf20Sopenharmony_ci mutex_unlock(&f->f_pos_lock); 10418c2ecf20Sopenharmony_ci} 10428c2ecf20Sopenharmony_ci 10438c2ecf20Sopenharmony_ci/* 10448c2ecf20Sopenharmony_ci * We only lock f_pos if we have threads or if the file might be 10458c2ecf20Sopenharmony_ci * shared with another process. In both cases we'll have an elevated 10468c2ecf20Sopenharmony_ci * file count (done either by fdget() or by fork()). 10478c2ecf20Sopenharmony_ci */ 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_civoid set_close_on_exec(unsigned int fd, int flag) 10508c2ecf20Sopenharmony_ci{ 10518c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 10528c2ecf20Sopenharmony_ci struct fdtable *fdt; 10538c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 10548c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 10558c2ecf20Sopenharmony_ci if (flag) 10568c2ecf20Sopenharmony_ci __set_close_on_exec(fd, fdt); 10578c2ecf20Sopenharmony_ci else 10588c2ecf20Sopenharmony_ci __clear_close_on_exec(fd, fdt); 10598c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 10608c2ecf20Sopenharmony_ci} 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_cibool get_close_on_exec(unsigned int fd) 10638c2ecf20Sopenharmony_ci{ 10648c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 10658c2ecf20Sopenharmony_ci struct fdtable *fdt; 10668c2ecf20Sopenharmony_ci bool res; 10678c2ecf20Sopenharmony_ci rcu_read_lock(); 10688c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 10698c2ecf20Sopenharmony_ci res = close_on_exec(fd, fdt); 10708c2ecf20Sopenharmony_ci rcu_read_unlock(); 10718c2ecf20Sopenharmony_ci return res; 10728c2ecf20Sopenharmony_ci} 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_cistatic int do_dup2(struct files_struct *files, 10758c2ecf20Sopenharmony_ci struct file *file, unsigned fd, unsigned flags) 10768c2ecf20Sopenharmony_ci__releases(&files->file_lock) 10778c2ecf20Sopenharmony_ci{ 10788c2ecf20Sopenharmony_ci struct file *tofree; 10798c2ecf20Sopenharmony_ci struct fdtable *fdt; 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci /* 10828c2ecf20Sopenharmony_ci * We need to detect attempts to do dup2() over allocated but still 10838c2ecf20Sopenharmony_ci * not finished descriptor. NB: OpenBSD avoids that at the price of 10848c2ecf20Sopenharmony_ci * extra work in their equivalent of fget() - they insert struct 10858c2ecf20Sopenharmony_ci * file immediately after grabbing descriptor, mark it larval if 10868c2ecf20Sopenharmony_ci * more work (e.g. actual opening) is needed and make sure that 10878c2ecf20Sopenharmony_ci * fget() treats larval files as absent. Potentially interesting, 10888c2ecf20Sopenharmony_ci * but while extra work in fget() is trivial, locking implications 10898c2ecf20Sopenharmony_ci * and amount of surgery on open()-related paths in VFS are not. 10908c2ecf20Sopenharmony_ci * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" 10918c2ecf20Sopenharmony_ci * deadlocks in rather amusing ways, AFAICS. All of that is out of 10928c2ecf20Sopenharmony_ci * scope of POSIX or SUS, since neither considers shared descriptor 10938c2ecf20Sopenharmony_ci * tables and this condition does not arise without those. 10948c2ecf20Sopenharmony_ci */ 10958c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 10968c2ecf20Sopenharmony_ci tofree = fdt->fd[fd]; 10978c2ecf20Sopenharmony_ci if (!tofree && fd_is_open(fd, fdt)) 10988c2ecf20Sopenharmony_ci goto Ebusy; 10998c2ecf20Sopenharmony_ci get_file(file); 11008c2ecf20Sopenharmony_ci rcu_assign_pointer(fdt->fd[fd], file); 11018c2ecf20Sopenharmony_ci __set_open_fd(fd, fdt); 11028c2ecf20Sopenharmony_ci if (flags & O_CLOEXEC) 11038c2ecf20Sopenharmony_ci __set_close_on_exec(fd, fdt); 11048c2ecf20Sopenharmony_ci else 11058c2ecf20Sopenharmony_ci __clear_close_on_exec(fd, fdt); 11068c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 11078c2ecf20Sopenharmony_ci 11088c2ecf20Sopenharmony_ci if (tofree) 11098c2ecf20Sopenharmony_ci filp_close(tofree, files); 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci return fd; 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ciEbusy: 11148c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 11158c2ecf20Sopenharmony_ci return -EBUSY; 11168c2ecf20Sopenharmony_ci} 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ciint replace_fd(unsigned fd, struct file *file, unsigned flags) 11198c2ecf20Sopenharmony_ci{ 11208c2ecf20Sopenharmony_ci int err; 11218c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci if (!file) 11248c2ecf20Sopenharmony_ci return __close_fd(files, fd); 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_ci if (fd >= rlimit(RLIMIT_NOFILE)) 11278c2ecf20Sopenharmony_ci return -EBADF; 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 11308c2ecf20Sopenharmony_ci err = expand_files(files, fd); 11318c2ecf20Sopenharmony_ci if (unlikely(err < 0)) 11328c2ecf20Sopenharmony_ci goto out_unlock; 11338c2ecf20Sopenharmony_ci return do_dup2(files, file, fd, flags); 11348c2ecf20Sopenharmony_ci 11358c2ecf20Sopenharmony_ciout_unlock: 11368c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 11378c2ecf20Sopenharmony_ci return err; 11388c2ecf20Sopenharmony_ci} 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_ci/** 11418c2ecf20Sopenharmony_ci * __receive_fd() - Install received file into file descriptor table 11428c2ecf20Sopenharmony_ci * 11438c2ecf20Sopenharmony_ci * @fd: fd to install into (if negative, a new fd will be allocated) 11448c2ecf20Sopenharmony_ci * @file: struct file that was received from another process 11458c2ecf20Sopenharmony_ci * @ufd: __user pointer to write new fd number to 11468c2ecf20Sopenharmony_ci * @o_flags: the O_* flags to apply to the new fd entry 11478c2ecf20Sopenharmony_ci * 11488c2ecf20Sopenharmony_ci * Installs a received file into the file descriptor table, with appropriate 11498c2ecf20Sopenharmony_ci * checks and count updates. Optionally writes the fd number to userspace, if 11508c2ecf20Sopenharmony_ci * @ufd is non-NULL. 11518c2ecf20Sopenharmony_ci * 11528c2ecf20Sopenharmony_ci * This helper handles its own reference counting of the incoming 11538c2ecf20Sopenharmony_ci * struct file. 11548c2ecf20Sopenharmony_ci * 11558c2ecf20Sopenharmony_ci * Returns newly install fd or -ve on error. 11568c2ecf20Sopenharmony_ci */ 11578c2ecf20Sopenharmony_ciint __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags) 11588c2ecf20Sopenharmony_ci{ 11598c2ecf20Sopenharmony_ci int new_fd; 11608c2ecf20Sopenharmony_ci int error; 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci error = security_file_receive(file); 11638c2ecf20Sopenharmony_ci if (error) 11648c2ecf20Sopenharmony_ci return error; 11658c2ecf20Sopenharmony_ci 11668c2ecf20Sopenharmony_ci if (fd < 0) { 11678c2ecf20Sopenharmony_ci new_fd = get_unused_fd_flags(o_flags); 11688c2ecf20Sopenharmony_ci if (new_fd < 0) 11698c2ecf20Sopenharmony_ci return new_fd; 11708c2ecf20Sopenharmony_ci } else { 11718c2ecf20Sopenharmony_ci new_fd = fd; 11728c2ecf20Sopenharmony_ci } 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_ci if (ufd) { 11758c2ecf20Sopenharmony_ci error = put_user(new_fd, ufd); 11768c2ecf20Sopenharmony_ci if (error) { 11778c2ecf20Sopenharmony_ci if (fd < 0) 11788c2ecf20Sopenharmony_ci put_unused_fd(new_fd); 11798c2ecf20Sopenharmony_ci return error; 11808c2ecf20Sopenharmony_ci } 11818c2ecf20Sopenharmony_ci } 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci if (fd < 0) { 11848c2ecf20Sopenharmony_ci fd_install(new_fd, get_file(file)); 11858c2ecf20Sopenharmony_ci } else { 11868c2ecf20Sopenharmony_ci error = replace_fd(new_fd, file, o_flags); 11878c2ecf20Sopenharmony_ci if (error) 11888c2ecf20Sopenharmony_ci return error; 11898c2ecf20Sopenharmony_ci } 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci /* Bump the sock usage counts, if any. */ 11928c2ecf20Sopenharmony_ci __receive_sock(file); 11938c2ecf20Sopenharmony_ci return new_fd; 11948c2ecf20Sopenharmony_ci} 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_cistatic int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) 11978c2ecf20Sopenharmony_ci{ 11988c2ecf20Sopenharmony_ci int err = -EBADF; 11998c2ecf20Sopenharmony_ci struct file *file; 12008c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci if ((flags & ~O_CLOEXEC) != 0) 12038c2ecf20Sopenharmony_ci return -EINVAL; 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci if (unlikely(oldfd == newfd)) 12068c2ecf20Sopenharmony_ci return -EINVAL; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci if (newfd >= rlimit(RLIMIT_NOFILE)) 12098c2ecf20Sopenharmony_ci return -EBADF; 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 12128c2ecf20Sopenharmony_ci err = expand_files(files, newfd); 12138c2ecf20Sopenharmony_ci file = files_lookup_fd_locked(files, oldfd); 12148c2ecf20Sopenharmony_ci if (unlikely(!file)) 12158c2ecf20Sopenharmony_ci goto Ebadf; 12168c2ecf20Sopenharmony_ci if (unlikely(err < 0)) { 12178c2ecf20Sopenharmony_ci if (err == -EMFILE) 12188c2ecf20Sopenharmony_ci goto Ebadf; 12198c2ecf20Sopenharmony_ci goto out_unlock; 12208c2ecf20Sopenharmony_ci } 12218c2ecf20Sopenharmony_ci return do_dup2(files, file, newfd, flags); 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ciEbadf: 12248c2ecf20Sopenharmony_ci err = -EBADF; 12258c2ecf20Sopenharmony_ciout_unlock: 12268c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 12278c2ecf20Sopenharmony_ci return err; 12288c2ecf20Sopenharmony_ci} 12298c2ecf20Sopenharmony_ci 12308c2ecf20Sopenharmony_ciSYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) 12318c2ecf20Sopenharmony_ci{ 12328c2ecf20Sopenharmony_ci return ksys_dup3(oldfd, newfd, flags); 12338c2ecf20Sopenharmony_ci} 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) 12368c2ecf20Sopenharmony_ci{ 12378c2ecf20Sopenharmony_ci if (unlikely(newfd == oldfd)) { /* corner case */ 12388c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 12398c2ecf20Sopenharmony_ci int retval = oldfd; 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_ci rcu_read_lock(); 12428c2ecf20Sopenharmony_ci if (!fcheck_files(files, oldfd)) 12438c2ecf20Sopenharmony_ci retval = -EBADF; 12448c2ecf20Sopenharmony_ci rcu_read_unlock(); 12458c2ecf20Sopenharmony_ci return retval; 12468c2ecf20Sopenharmony_ci } 12478c2ecf20Sopenharmony_ci return ksys_dup3(oldfd, newfd, 0); 12488c2ecf20Sopenharmony_ci} 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(dup, unsigned int, fildes) 12518c2ecf20Sopenharmony_ci{ 12528c2ecf20Sopenharmony_ci int ret = -EBADF; 12538c2ecf20Sopenharmony_ci struct file *file = fget_raw(fildes); 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci if (file) { 12568c2ecf20Sopenharmony_ci ret = get_unused_fd_flags(0); 12578c2ecf20Sopenharmony_ci if (ret >= 0) 12588c2ecf20Sopenharmony_ci fd_install(ret, file); 12598c2ecf20Sopenharmony_ci else 12608c2ecf20Sopenharmony_ci fput(file); 12618c2ecf20Sopenharmony_ci } 12628c2ecf20Sopenharmony_ci return ret; 12638c2ecf20Sopenharmony_ci} 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_ciint f_dupfd(unsigned int from, struct file *file, unsigned flags) 12668c2ecf20Sopenharmony_ci{ 12678c2ecf20Sopenharmony_ci int err; 12688c2ecf20Sopenharmony_ci if (from >= rlimit(RLIMIT_NOFILE)) 12698c2ecf20Sopenharmony_ci return -EINVAL; 12708c2ecf20Sopenharmony_ci err = alloc_fd(from, flags); 12718c2ecf20Sopenharmony_ci if (err >= 0) { 12728c2ecf20Sopenharmony_ci get_file(file); 12738c2ecf20Sopenharmony_ci fd_install(err, file); 12748c2ecf20Sopenharmony_ci } 12758c2ecf20Sopenharmony_ci return err; 12768c2ecf20Sopenharmony_ci} 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_ciint iterate_fd(struct files_struct *files, unsigned n, 12798c2ecf20Sopenharmony_ci int (*f)(const void *, struct file *, unsigned), 12808c2ecf20Sopenharmony_ci const void *p) 12818c2ecf20Sopenharmony_ci{ 12828c2ecf20Sopenharmony_ci struct fdtable *fdt; 12838c2ecf20Sopenharmony_ci int res = 0; 12848c2ecf20Sopenharmony_ci if (!files) 12858c2ecf20Sopenharmony_ci return 0; 12868c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 12878c2ecf20Sopenharmony_ci for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { 12888c2ecf20Sopenharmony_ci struct file *file; 12898c2ecf20Sopenharmony_ci file = rcu_dereference_check_fdtable(files, fdt->fd[n]); 12908c2ecf20Sopenharmony_ci if (!file) 12918c2ecf20Sopenharmony_ci continue; 12928c2ecf20Sopenharmony_ci res = f(p, file, n); 12938c2ecf20Sopenharmony_ci if (res) 12948c2ecf20Sopenharmony_ci break; 12958c2ecf20Sopenharmony_ci } 12968c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 12978c2ecf20Sopenharmony_ci return res; 12988c2ecf20Sopenharmony_ci} 12998c2ecf20Sopenharmony_ciEXPORT_SYMBOL(iterate_fd); 1300