18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/fs/file.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *  Manage the dynamic fd arrays in the process files_struct.
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <linux/syscalls.h>
118c2ecf20Sopenharmony_ci#include <linux/export.h>
128c2ecf20Sopenharmony_ci#include <linux/fs.h>
138c2ecf20Sopenharmony_ci#include <linux/kernel.h>
148c2ecf20Sopenharmony_ci#include <linux/mm.h>
158c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
168c2ecf20Sopenharmony_ci#include <linux/slab.h>
178c2ecf20Sopenharmony_ci#include <linux/file.h>
188c2ecf20Sopenharmony_ci#include <linux/fdtable.h>
198c2ecf20Sopenharmony_ci#include <linux/bitops.h>
208c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
218c2ecf20Sopenharmony_ci#include <linux/rcupdate.h>
228c2ecf20Sopenharmony_ci#include <linux/close_range.h>
238c2ecf20Sopenharmony_ci#include <net/sock.h>
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#include "internal.h"
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ciunsigned int sysctl_nr_open __read_mostly = 1024*1024;
288c2ecf20Sopenharmony_ciunsigned int sysctl_nr_open_min = BITS_PER_LONG;
298c2ecf20Sopenharmony_ci/* our min() is unusable in constant expressions ;-/ */
308c2ecf20Sopenharmony_ci#define __const_min(x, y) ((x) < (y) ? (x) : (y))
318c2ecf20Sopenharmony_ciunsigned int sysctl_nr_open_max =
328c2ecf20Sopenharmony_ci	__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cistatic void __free_fdtable(struct fdtable *fdt)
358c2ecf20Sopenharmony_ci{
368c2ecf20Sopenharmony_ci	kvfree(fdt->fd);
378c2ecf20Sopenharmony_ci	kvfree(fdt->open_fds);
388c2ecf20Sopenharmony_ci	kfree(fdt);
398c2ecf20Sopenharmony_ci}
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic void free_fdtable_rcu(struct rcu_head *rcu)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	__free_fdtable(container_of(rcu, struct fdtable, rcu));
448c2ecf20Sopenharmony_ci}
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci#define BITBIT_NR(nr)	BITS_TO_LONGS(BITS_TO_LONGS(nr))
478c2ecf20Sopenharmony_ci#define BITBIT_SIZE(nr)	(BITBIT_NR(nr) * sizeof(long))
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/*
508c2ecf20Sopenharmony_ci * Copy 'count' fd bits from the old table to the new table and clear the extra
518c2ecf20Sopenharmony_ci * space if any.  This does not copy the file pointers.  Called with the files
528c2ecf20Sopenharmony_ci * spinlock held for write.
538c2ecf20Sopenharmony_ci */
548c2ecf20Sopenharmony_cistatic void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
558c2ecf20Sopenharmony_ci			    unsigned int count)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	unsigned int cpy, set;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	cpy = count / BITS_PER_BYTE;
608c2ecf20Sopenharmony_ci	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
618c2ecf20Sopenharmony_ci	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
628c2ecf20Sopenharmony_ci	memset((char *)nfdt->open_fds + cpy, 0, set);
638c2ecf20Sopenharmony_ci	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
648c2ecf20Sopenharmony_ci	memset((char *)nfdt->close_on_exec + cpy, 0, set);
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	cpy = BITBIT_SIZE(count);
678c2ecf20Sopenharmony_ci	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
688c2ecf20Sopenharmony_ci	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
698c2ecf20Sopenharmony_ci	memset((char *)nfdt->full_fds_bits + cpy, 0, set);
708c2ecf20Sopenharmony_ci}
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci/*
738c2ecf20Sopenharmony_ci * Copy all file descriptors from the old table to the new, expanded table and
748c2ecf20Sopenharmony_ci * clear the extra space.  Called with the files spinlock held for write.
758c2ecf20Sopenharmony_ci */
768c2ecf20Sopenharmony_cistatic void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	size_t cpy, set;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	BUG_ON(nfdt->max_fds < ofdt->max_fds);
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	cpy = ofdt->max_fds * sizeof(struct file *);
838c2ecf20Sopenharmony_ci	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
848c2ecf20Sopenharmony_ci	memcpy(nfdt->fd, ofdt->fd, cpy);
858c2ecf20Sopenharmony_ci	memset((char *)nfdt->fd + cpy, 0, set);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
888c2ecf20Sopenharmony_ci}
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci/*
918c2ecf20Sopenharmony_ci * Note how the fdtable bitmap allocations very much have to be a multiple of
928c2ecf20Sopenharmony_ci * BITS_PER_LONG. This is not only because we walk those things in chunks of
938c2ecf20Sopenharmony_ci * 'unsigned long' in some places, but simply because that is how the Linux
948c2ecf20Sopenharmony_ci * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
958c2ecf20Sopenharmony_ci * they are very much "bits in an array of unsigned long".
968c2ecf20Sopenharmony_ci *
978c2ecf20Sopenharmony_ci * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
988c2ecf20Sopenharmony_ci * by that "1024/sizeof(ptr)" before, we already know there are sufficient
998c2ecf20Sopenharmony_ci * clear low bits. Clang seems to realize that, gcc ends up being confused.
1008c2ecf20Sopenharmony_ci *
1018c2ecf20Sopenharmony_ci * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
1028c2ecf20Sopenharmony_ci * let's consider it documentation (and maybe a test-case for gcc to improve
1038c2ecf20Sopenharmony_ci * its code generation ;)
1048c2ecf20Sopenharmony_ci */
1058c2ecf20Sopenharmony_cistatic struct fdtable * alloc_fdtable(unsigned int nr)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	struct fdtable *fdt;
1088c2ecf20Sopenharmony_ci	void *data;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	/*
1118c2ecf20Sopenharmony_ci	 * Figure out how many fds we actually want to support in this fdtable.
1128c2ecf20Sopenharmony_ci	 * Allocation steps are keyed to the size of the fdarray, since it
1138c2ecf20Sopenharmony_ci	 * grows far faster than any of the other dynamic data. We try to fit
1148c2ecf20Sopenharmony_ci	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
1158c2ecf20Sopenharmony_ci	 * and growing in powers of two from there on.
1168c2ecf20Sopenharmony_ci	 */
1178c2ecf20Sopenharmony_ci	nr /= (1024 / sizeof(struct file *));
1188c2ecf20Sopenharmony_ci	nr = roundup_pow_of_two(nr + 1);
1198c2ecf20Sopenharmony_ci	nr *= (1024 / sizeof(struct file *));
1208c2ecf20Sopenharmony_ci	nr = ALIGN(nr, BITS_PER_LONG);
1218c2ecf20Sopenharmony_ci	/*
1228c2ecf20Sopenharmony_ci	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
1238c2ecf20Sopenharmony_ci	 * had been set lower between the check in expand_files() and here.  Deal
1248c2ecf20Sopenharmony_ci	 * with that in caller, it's cheaper that way.
1258c2ecf20Sopenharmony_ci	 *
1268c2ecf20Sopenharmony_ci	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
1278c2ecf20Sopenharmony_ci	 * bitmaps handling below becomes unpleasant, to put it mildly...
1288c2ecf20Sopenharmony_ci	 */
1298c2ecf20Sopenharmony_ci	if (unlikely(nr > sysctl_nr_open))
1308c2ecf20Sopenharmony_ci		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
1338c2ecf20Sopenharmony_ci	if (!fdt)
1348c2ecf20Sopenharmony_ci		goto out;
1358c2ecf20Sopenharmony_ci	fdt->max_fds = nr;
1368c2ecf20Sopenharmony_ci	data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
1378c2ecf20Sopenharmony_ci	if (!data)
1388c2ecf20Sopenharmony_ci		goto out_fdt;
1398c2ecf20Sopenharmony_ci	fdt->fd = data;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	data = kvmalloc(max_t(size_t,
1428c2ecf20Sopenharmony_ci				 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
1438c2ecf20Sopenharmony_ci				 GFP_KERNEL_ACCOUNT);
1448c2ecf20Sopenharmony_ci	if (!data)
1458c2ecf20Sopenharmony_ci		goto out_arr;
1468c2ecf20Sopenharmony_ci	fdt->open_fds = data;
1478c2ecf20Sopenharmony_ci	data += nr / BITS_PER_BYTE;
1488c2ecf20Sopenharmony_ci	fdt->close_on_exec = data;
1498c2ecf20Sopenharmony_ci	data += nr / BITS_PER_BYTE;
1508c2ecf20Sopenharmony_ci	fdt->full_fds_bits = data;
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	return fdt;
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ciout_arr:
1558c2ecf20Sopenharmony_ci	kvfree(fdt->fd);
1568c2ecf20Sopenharmony_ciout_fdt:
1578c2ecf20Sopenharmony_ci	kfree(fdt);
1588c2ecf20Sopenharmony_ciout:
1598c2ecf20Sopenharmony_ci	return NULL;
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci/*
1638c2ecf20Sopenharmony_ci * Expand the file descriptor table.
1648c2ecf20Sopenharmony_ci * This function will allocate a new fdtable and both fd array and fdset, of
1658c2ecf20Sopenharmony_ci * the given size.
1668c2ecf20Sopenharmony_ci * Return <0 error code on error; 1 on successful completion.
1678c2ecf20Sopenharmony_ci * The files->file_lock should be held on entry, and will be held on exit.
1688c2ecf20Sopenharmony_ci */
1698c2ecf20Sopenharmony_cistatic int expand_fdtable(struct files_struct *files, unsigned int nr)
1708c2ecf20Sopenharmony_ci	__releases(files->file_lock)
1718c2ecf20Sopenharmony_ci	__acquires(files->file_lock)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	struct fdtable *new_fdt, *cur_fdt;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
1768c2ecf20Sopenharmony_ci	new_fdt = alloc_fdtable(nr);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	/* make sure all __fd_install() have seen resize_in_progress
1798c2ecf20Sopenharmony_ci	 * or have finished their rcu_read_lock_sched() section.
1808c2ecf20Sopenharmony_ci	 */
1818c2ecf20Sopenharmony_ci	if (atomic_read(&files->count) > 1)
1828c2ecf20Sopenharmony_ci		synchronize_rcu();
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
1858c2ecf20Sopenharmony_ci	if (!new_fdt)
1868c2ecf20Sopenharmony_ci		return -ENOMEM;
1878c2ecf20Sopenharmony_ci	/*
1888c2ecf20Sopenharmony_ci	 * extremely unlikely race - sysctl_nr_open decreased between the check in
1898c2ecf20Sopenharmony_ci	 * caller and alloc_fdtable().  Cheaper to catch it here...
1908c2ecf20Sopenharmony_ci	 */
1918c2ecf20Sopenharmony_ci	if (unlikely(new_fdt->max_fds <= nr)) {
1928c2ecf20Sopenharmony_ci		__free_fdtable(new_fdt);
1938c2ecf20Sopenharmony_ci		return -EMFILE;
1948c2ecf20Sopenharmony_ci	}
1958c2ecf20Sopenharmony_ci	cur_fdt = files_fdtable(files);
1968c2ecf20Sopenharmony_ci	BUG_ON(nr < cur_fdt->max_fds);
1978c2ecf20Sopenharmony_ci	copy_fdtable(new_fdt, cur_fdt);
1988c2ecf20Sopenharmony_ci	rcu_assign_pointer(files->fdt, new_fdt);
1998c2ecf20Sopenharmony_ci	if (cur_fdt != &files->fdtab)
2008c2ecf20Sopenharmony_ci		call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
2018c2ecf20Sopenharmony_ci	/* coupled with smp_rmb() in __fd_install() */
2028c2ecf20Sopenharmony_ci	smp_wmb();
2038c2ecf20Sopenharmony_ci	return 1;
2048c2ecf20Sopenharmony_ci}
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci/*
2078c2ecf20Sopenharmony_ci * Expand files.
2088c2ecf20Sopenharmony_ci * This function will expand the file structures, if the requested size exceeds
2098c2ecf20Sopenharmony_ci * the current capacity and there is room for expansion.
2108c2ecf20Sopenharmony_ci * Return <0 error code on error; 0 when nothing done; 1 when files were
2118c2ecf20Sopenharmony_ci * expanded and execution may have blocked.
2128c2ecf20Sopenharmony_ci * The files->file_lock should be held on entry, and will be held on exit.
2138c2ecf20Sopenharmony_ci */
2148c2ecf20Sopenharmony_cistatic int expand_files(struct files_struct *files, unsigned int nr)
2158c2ecf20Sopenharmony_ci	__releases(files->file_lock)
2168c2ecf20Sopenharmony_ci	__acquires(files->file_lock)
2178c2ecf20Sopenharmony_ci{
2188c2ecf20Sopenharmony_ci	struct fdtable *fdt;
2198c2ecf20Sopenharmony_ci	int expanded = 0;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_cirepeat:
2228c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	/* Do we need to expand? */
2258c2ecf20Sopenharmony_ci	if (nr < fdt->max_fds)
2268c2ecf20Sopenharmony_ci		return expanded;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	/* Can we expand? */
2298c2ecf20Sopenharmony_ci	if (nr >= sysctl_nr_open)
2308c2ecf20Sopenharmony_ci		return -EMFILE;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	if (unlikely(files->resize_in_progress)) {
2338c2ecf20Sopenharmony_ci		spin_unlock(&files->file_lock);
2348c2ecf20Sopenharmony_ci		expanded = 1;
2358c2ecf20Sopenharmony_ci		wait_event(files->resize_wait, !files->resize_in_progress);
2368c2ecf20Sopenharmony_ci		spin_lock(&files->file_lock);
2378c2ecf20Sopenharmony_ci		goto repeat;
2388c2ecf20Sopenharmony_ci	}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	/* All good, so we try */
2418c2ecf20Sopenharmony_ci	files->resize_in_progress = true;
2428c2ecf20Sopenharmony_ci	expanded = expand_fdtable(files, nr);
2438c2ecf20Sopenharmony_ci	files->resize_in_progress = false;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	wake_up_all(&files->resize_wait);
2468c2ecf20Sopenharmony_ci	return expanded;
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_cistatic inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	__set_bit(fd, fdt->close_on_exec);
2528c2ecf20Sopenharmony_ci}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cistatic inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	if (test_bit(fd, fdt->close_on_exec))
2578c2ecf20Sopenharmony_ci		__clear_bit(fd, fdt->close_on_exec);
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_cistatic inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	__set_bit(fd, fdt->open_fds);
2638c2ecf20Sopenharmony_ci	fd /= BITS_PER_LONG;
2648c2ecf20Sopenharmony_ci	if (!~fdt->open_fds[fd])
2658c2ecf20Sopenharmony_ci		__set_bit(fd, fdt->full_fds_bits);
2668c2ecf20Sopenharmony_ci}
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_cistatic inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
2698c2ecf20Sopenharmony_ci{
2708c2ecf20Sopenharmony_ci	__clear_bit(fd, fdt->open_fds);
2718c2ecf20Sopenharmony_ci	__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
2728c2ecf20Sopenharmony_ci}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_cistatic unsigned int count_open_files(struct fdtable *fdt)
2758c2ecf20Sopenharmony_ci{
2768c2ecf20Sopenharmony_ci	unsigned int size = fdt->max_fds;
2778c2ecf20Sopenharmony_ci	unsigned int i;
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	/* Find the last open fd */
2808c2ecf20Sopenharmony_ci	for (i = size / BITS_PER_LONG; i > 0; ) {
2818c2ecf20Sopenharmony_ci		if (fdt->open_fds[--i])
2828c2ecf20Sopenharmony_ci			break;
2838c2ecf20Sopenharmony_ci	}
2848c2ecf20Sopenharmony_ci	i = (i + 1) * BITS_PER_LONG;
2858c2ecf20Sopenharmony_ci	return i;
2868c2ecf20Sopenharmony_ci}
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci/*
2898c2ecf20Sopenharmony_ci * Note that a sane fdtable size always has to be a multiple of
2908c2ecf20Sopenharmony_ci * BITS_PER_LONG, since we have bitmaps that are sized by this.
2918c2ecf20Sopenharmony_ci *
2928c2ecf20Sopenharmony_ci * 'max_fds' will normally already be properly aligned, but it
2938c2ecf20Sopenharmony_ci * turns out that in the close_range() -> __close_range() ->
2948c2ecf20Sopenharmony_ci * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
2958c2ecf20Sopenharmony_ci * up having a 'max_fds' value that isn't already aligned.
2968c2ecf20Sopenharmony_ci *
2978c2ecf20Sopenharmony_ci * Rather than make close_range() have to worry about this,
2988c2ecf20Sopenharmony_ci * just make that BITS_PER_LONG alignment be part of a sane
2998c2ecf20Sopenharmony_ci * fdtable size. Becuase that's really what it is.
3008c2ecf20Sopenharmony_ci */
3018c2ecf20Sopenharmony_cistatic unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
3028c2ecf20Sopenharmony_ci{
3038c2ecf20Sopenharmony_ci	unsigned int count;
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	count = count_open_files(fdt);
3068c2ecf20Sopenharmony_ci	if (max_fds < NR_OPEN_DEFAULT)
3078c2ecf20Sopenharmony_ci		max_fds = NR_OPEN_DEFAULT;
3088c2ecf20Sopenharmony_ci	return ALIGN(min(count, max_fds), BITS_PER_LONG);
3098c2ecf20Sopenharmony_ci}
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci/*
3128c2ecf20Sopenharmony_ci * Allocate a new files structure and copy contents from the
3138c2ecf20Sopenharmony_ci * passed in files structure.
3148c2ecf20Sopenharmony_ci * errorp will be valid only when the returned files_struct is NULL.
3158c2ecf20Sopenharmony_ci */
3168c2ecf20Sopenharmony_cistruct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
3178c2ecf20Sopenharmony_ci{
3188c2ecf20Sopenharmony_ci	struct files_struct *newf;
3198c2ecf20Sopenharmony_ci	struct file **old_fds, **new_fds;
3208c2ecf20Sopenharmony_ci	unsigned int open_files, i;
3218c2ecf20Sopenharmony_ci	struct fdtable *old_fdt, *new_fdt;
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	*errorp = -ENOMEM;
3248c2ecf20Sopenharmony_ci	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
3258c2ecf20Sopenharmony_ci	if (!newf)
3268c2ecf20Sopenharmony_ci		goto out;
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	atomic_set(&newf->count, 1);
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci	spin_lock_init(&newf->file_lock);
3318c2ecf20Sopenharmony_ci	newf->resize_in_progress = false;
3328c2ecf20Sopenharmony_ci	init_waitqueue_head(&newf->resize_wait);
3338c2ecf20Sopenharmony_ci	newf->next_fd = 0;
3348c2ecf20Sopenharmony_ci	new_fdt = &newf->fdtab;
3358c2ecf20Sopenharmony_ci	new_fdt->max_fds = NR_OPEN_DEFAULT;
3368c2ecf20Sopenharmony_ci	new_fdt->close_on_exec = newf->close_on_exec_init;
3378c2ecf20Sopenharmony_ci	new_fdt->open_fds = newf->open_fds_init;
3388c2ecf20Sopenharmony_ci	new_fdt->full_fds_bits = newf->full_fds_bits_init;
3398c2ecf20Sopenharmony_ci	new_fdt->fd = &newf->fd_array[0];
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	spin_lock(&oldf->file_lock);
3428c2ecf20Sopenharmony_ci	old_fdt = files_fdtable(oldf);
3438c2ecf20Sopenharmony_ci	open_files = sane_fdtable_size(old_fdt, max_fds);
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	/*
3468c2ecf20Sopenharmony_ci	 * Check whether we need to allocate a larger fd array and fd set.
3478c2ecf20Sopenharmony_ci	 */
3488c2ecf20Sopenharmony_ci	while (unlikely(open_files > new_fdt->max_fds)) {
3498c2ecf20Sopenharmony_ci		spin_unlock(&oldf->file_lock);
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci		if (new_fdt != &newf->fdtab)
3528c2ecf20Sopenharmony_ci			__free_fdtable(new_fdt);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci		new_fdt = alloc_fdtable(open_files - 1);
3558c2ecf20Sopenharmony_ci		if (!new_fdt) {
3568c2ecf20Sopenharmony_ci			*errorp = -ENOMEM;
3578c2ecf20Sopenharmony_ci			goto out_release;
3588c2ecf20Sopenharmony_ci		}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci		/* beyond sysctl_nr_open; nothing to do */
3618c2ecf20Sopenharmony_ci		if (unlikely(new_fdt->max_fds < open_files)) {
3628c2ecf20Sopenharmony_ci			__free_fdtable(new_fdt);
3638c2ecf20Sopenharmony_ci			*errorp = -EMFILE;
3648c2ecf20Sopenharmony_ci			goto out_release;
3658c2ecf20Sopenharmony_ci		}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci		/*
3688c2ecf20Sopenharmony_ci		 * Reacquire the oldf lock and a pointer to its fd table
3698c2ecf20Sopenharmony_ci		 * who knows it may have a new bigger fd table. We need
3708c2ecf20Sopenharmony_ci		 * the latest pointer.
3718c2ecf20Sopenharmony_ci		 */
3728c2ecf20Sopenharmony_ci		spin_lock(&oldf->file_lock);
3738c2ecf20Sopenharmony_ci		old_fdt = files_fdtable(oldf);
3748c2ecf20Sopenharmony_ci		open_files = sane_fdtable_size(old_fdt, max_fds);
3758c2ecf20Sopenharmony_ci	}
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	old_fds = old_fdt->fd;
3808c2ecf20Sopenharmony_ci	new_fds = new_fdt->fd;
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	for (i = open_files; i != 0; i--) {
3838c2ecf20Sopenharmony_ci		struct file *f = *old_fds++;
3848c2ecf20Sopenharmony_ci		if (f) {
3858c2ecf20Sopenharmony_ci			get_file(f);
3868c2ecf20Sopenharmony_ci		} else {
3878c2ecf20Sopenharmony_ci			/*
3888c2ecf20Sopenharmony_ci			 * The fd may be claimed in the fd bitmap but not yet
3898c2ecf20Sopenharmony_ci			 * instantiated in the files array if a sibling thread
3908c2ecf20Sopenharmony_ci			 * is partway through open().  So make sure that this
3918c2ecf20Sopenharmony_ci			 * fd is available to the new process.
3928c2ecf20Sopenharmony_ci			 */
3938c2ecf20Sopenharmony_ci			__clear_open_fd(open_files - i, new_fdt);
3948c2ecf20Sopenharmony_ci		}
3958c2ecf20Sopenharmony_ci		rcu_assign_pointer(*new_fds++, f);
3968c2ecf20Sopenharmony_ci	}
3978c2ecf20Sopenharmony_ci	spin_unlock(&oldf->file_lock);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	/* clear the remainder */
4008c2ecf20Sopenharmony_ci	memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	rcu_assign_pointer(newf->fdt, new_fdt);
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	return newf;
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ciout_release:
4078c2ecf20Sopenharmony_ci	kmem_cache_free(files_cachep, newf);
4088c2ecf20Sopenharmony_ciout:
4098c2ecf20Sopenharmony_ci	return NULL;
4108c2ecf20Sopenharmony_ci}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_cistatic struct fdtable *close_files(struct files_struct * files)
4138c2ecf20Sopenharmony_ci{
4148c2ecf20Sopenharmony_ci	/*
4158c2ecf20Sopenharmony_ci	 * It is safe to dereference the fd table without RCU or
4168c2ecf20Sopenharmony_ci	 * ->file_lock because this is the last reference to the
4178c2ecf20Sopenharmony_ci	 * files structure.
4188c2ecf20Sopenharmony_ci	 */
4198c2ecf20Sopenharmony_ci	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
4208c2ecf20Sopenharmony_ci	unsigned int i, j = 0;
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	for (;;) {
4238c2ecf20Sopenharmony_ci		unsigned long set;
4248c2ecf20Sopenharmony_ci		i = j * BITS_PER_LONG;
4258c2ecf20Sopenharmony_ci		if (i >= fdt->max_fds)
4268c2ecf20Sopenharmony_ci			break;
4278c2ecf20Sopenharmony_ci		set = fdt->open_fds[j++];
4288c2ecf20Sopenharmony_ci		while (set) {
4298c2ecf20Sopenharmony_ci			if (set & 1) {
4308c2ecf20Sopenharmony_ci				struct file * file = xchg(&fdt->fd[i], NULL);
4318c2ecf20Sopenharmony_ci				if (file) {
4328c2ecf20Sopenharmony_ci					filp_close(file, files);
4338c2ecf20Sopenharmony_ci					cond_resched();
4348c2ecf20Sopenharmony_ci				}
4358c2ecf20Sopenharmony_ci			}
4368c2ecf20Sopenharmony_ci			i++;
4378c2ecf20Sopenharmony_ci			set >>= 1;
4388c2ecf20Sopenharmony_ci		}
4398c2ecf20Sopenharmony_ci	}
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	return fdt;
4428c2ecf20Sopenharmony_ci}
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_cistruct files_struct *get_files_struct(struct task_struct *task)
4458c2ecf20Sopenharmony_ci{
4468c2ecf20Sopenharmony_ci	struct files_struct *files;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	task_lock(task);
4498c2ecf20Sopenharmony_ci	files = task->files;
4508c2ecf20Sopenharmony_ci	if (files)
4518c2ecf20Sopenharmony_ci		atomic_inc(&files->count);
4528c2ecf20Sopenharmony_ci	task_unlock(task);
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	return files;
4558c2ecf20Sopenharmony_ci}
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_civoid put_files_struct(struct files_struct *files)
4588c2ecf20Sopenharmony_ci{
4598c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&files->count)) {
4608c2ecf20Sopenharmony_ci		struct fdtable *fdt = close_files(files);
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci		/* free the arrays if they are not embedded */
4638c2ecf20Sopenharmony_ci		if (fdt != &files->fdtab)
4648c2ecf20Sopenharmony_ci			__free_fdtable(fdt);
4658c2ecf20Sopenharmony_ci		kmem_cache_free(files_cachep, files);
4668c2ecf20Sopenharmony_ci	}
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_civoid reset_files_struct(struct files_struct *files)
4708c2ecf20Sopenharmony_ci{
4718c2ecf20Sopenharmony_ci	struct task_struct *tsk = current;
4728c2ecf20Sopenharmony_ci	struct files_struct *old;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	old = tsk->files;
4758c2ecf20Sopenharmony_ci	task_lock(tsk);
4768c2ecf20Sopenharmony_ci	tsk->files = files;
4778c2ecf20Sopenharmony_ci	task_unlock(tsk);
4788c2ecf20Sopenharmony_ci	put_files_struct(old);
4798c2ecf20Sopenharmony_ci}
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_civoid exit_files(struct task_struct *tsk)
4828c2ecf20Sopenharmony_ci{
4838c2ecf20Sopenharmony_ci	struct files_struct * files = tsk->files;
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci	if (files) {
4868c2ecf20Sopenharmony_ci		task_lock(tsk);
4878c2ecf20Sopenharmony_ci		tsk->files = NULL;
4888c2ecf20Sopenharmony_ci		task_unlock(tsk);
4898c2ecf20Sopenharmony_ci		put_files_struct(files);
4908c2ecf20Sopenharmony_ci	}
4918c2ecf20Sopenharmony_ci}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_cistruct files_struct init_files = {
4948c2ecf20Sopenharmony_ci	.count		= ATOMIC_INIT(1),
4958c2ecf20Sopenharmony_ci	.fdt		= &init_files.fdtab,
4968c2ecf20Sopenharmony_ci	.fdtab		= {
4978c2ecf20Sopenharmony_ci		.max_fds	= NR_OPEN_DEFAULT,
4988c2ecf20Sopenharmony_ci		.fd		= &init_files.fd_array[0],
4998c2ecf20Sopenharmony_ci		.close_on_exec	= init_files.close_on_exec_init,
5008c2ecf20Sopenharmony_ci		.open_fds	= init_files.open_fds_init,
5018c2ecf20Sopenharmony_ci		.full_fds_bits	= init_files.full_fds_bits_init,
5028c2ecf20Sopenharmony_ci	},
5038c2ecf20Sopenharmony_ci	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
5048c2ecf20Sopenharmony_ci	.resize_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
5058c2ecf20Sopenharmony_ci};
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_cistatic unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
5088c2ecf20Sopenharmony_ci{
5098c2ecf20Sopenharmony_ci	unsigned int maxfd = fdt->max_fds;
5108c2ecf20Sopenharmony_ci	unsigned int maxbit = maxfd / BITS_PER_LONG;
5118c2ecf20Sopenharmony_ci	unsigned int bitbit = start / BITS_PER_LONG;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
5148c2ecf20Sopenharmony_ci	if (bitbit > maxfd)
5158c2ecf20Sopenharmony_ci		return maxfd;
5168c2ecf20Sopenharmony_ci	if (bitbit > start)
5178c2ecf20Sopenharmony_ci		start = bitbit;
5188c2ecf20Sopenharmony_ci	return find_next_zero_bit(fdt->open_fds, maxfd, start);
5198c2ecf20Sopenharmony_ci}
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci/*
5228c2ecf20Sopenharmony_ci * allocate a file descriptor, mark it busy.
5238c2ecf20Sopenharmony_ci */
5248c2ecf20Sopenharmony_ciint __alloc_fd(struct files_struct *files,
5258c2ecf20Sopenharmony_ci	       unsigned start, unsigned end, unsigned flags)
5268c2ecf20Sopenharmony_ci{
5278c2ecf20Sopenharmony_ci	unsigned int fd;
5288c2ecf20Sopenharmony_ci	int error;
5298c2ecf20Sopenharmony_ci	struct fdtable *fdt;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
5328c2ecf20Sopenharmony_cirepeat:
5338c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
5348c2ecf20Sopenharmony_ci	fd = start;
5358c2ecf20Sopenharmony_ci	if (fd < files->next_fd)
5368c2ecf20Sopenharmony_ci		fd = files->next_fd;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	if (fd < fdt->max_fds)
5398c2ecf20Sopenharmony_ci		fd = find_next_fd(fdt, fd);
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	/*
5428c2ecf20Sopenharmony_ci	 * N.B. For clone tasks sharing a files structure, this test
5438c2ecf20Sopenharmony_ci	 * will limit the total number of files that can be opened.
5448c2ecf20Sopenharmony_ci	 */
5458c2ecf20Sopenharmony_ci	error = -EMFILE;
5468c2ecf20Sopenharmony_ci	if (fd >= end)
5478c2ecf20Sopenharmony_ci		goto out;
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci	error = expand_files(files, fd);
5508c2ecf20Sopenharmony_ci	if (error < 0)
5518c2ecf20Sopenharmony_ci		goto out;
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	/*
5548c2ecf20Sopenharmony_ci	 * If we needed to expand the fs array we
5558c2ecf20Sopenharmony_ci	 * might have blocked - try again.
5568c2ecf20Sopenharmony_ci	 */
5578c2ecf20Sopenharmony_ci	if (error)
5588c2ecf20Sopenharmony_ci		goto repeat;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	if (start <= files->next_fd)
5618c2ecf20Sopenharmony_ci		files->next_fd = fd + 1;
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	__set_open_fd(fd, fdt);
5648c2ecf20Sopenharmony_ci	if (flags & O_CLOEXEC)
5658c2ecf20Sopenharmony_ci		__set_close_on_exec(fd, fdt);
5668c2ecf20Sopenharmony_ci	else
5678c2ecf20Sopenharmony_ci		__clear_close_on_exec(fd, fdt);
5688c2ecf20Sopenharmony_ci	error = fd;
5698c2ecf20Sopenharmony_ci#if 1
5708c2ecf20Sopenharmony_ci	/* Sanity check */
5718c2ecf20Sopenharmony_ci	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
5728c2ecf20Sopenharmony_ci		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
5738c2ecf20Sopenharmony_ci		rcu_assign_pointer(fdt->fd[fd], NULL);
5748c2ecf20Sopenharmony_ci	}
5758c2ecf20Sopenharmony_ci#endif
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ciout:
5788c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
5798c2ecf20Sopenharmony_ci	return error;
5808c2ecf20Sopenharmony_ci}
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_cistatic int alloc_fd(unsigned start, unsigned flags)
5838c2ecf20Sopenharmony_ci{
5848c2ecf20Sopenharmony_ci	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
5858c2ecf20Sopenharmony_ci}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ciint __get_unused_fd_flags(unsigned flags, unsigned long nofile)
5888c2ecf20Sopenharmony_ci{
5898c2ecf20Sopenharmony_ci	return __alloc_fd(current->files, 0, nofile, flags);
5908c2ecf20Sopenharmony_ci}
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ciint get_unused_fd_flags(unsigned flags)
5938c2ecf20Sopenharmony_ci{
5948c2ecf20Sopenharmony_ci	return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
5958c2ecf20Sopenharmony_ci}
5968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_unused_fd_flags);
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_cistatic void __put_unused_fd(struct files_struct *files, unsigned int fd)
5998c2ecf20Sopenharmony_ci{
6008c2ecf20Sopenharmony_ci	struct fdtable *fdt = files_fdtable(files);
6018c2ecf20Sopenharmony_ci	__clear_open_fd(fd, fdt);
6028c2ecf20Sopenharmony_ci	if (fd < files->next_fd)
6038c2ecf20Sopenharmony_ci		files->next_fd = fd;
6048c2ecf20Sopenharmony_ci}
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_civoid put_unused_fd(unsigned int fd)
6078c2ecf20Sopenharmony_ci{
6088c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
6098c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
6108c2ecf20Sopenharmony_ci	__put_unused_fd(files, fd);
6118c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
6128c2ecf20Sopenharmony_ci}
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ciEXPORT_SYMBOL(put_unused_fd);
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci/*
6178c2ecf20Sopenharmony_ci * Install a file pointer in the fd array.
6188c2ecf20Sopenharmony_ci *
6198c2ecf20Sopenharmony_ci * The VFS is full of places where we drop the files lock between
6208c2ecf20Sopenharmony_ci * setting the open_fds bitmap and installing the file in the file
6218c2ecf20Sopenharmony_ci * array.  At any such point, we are vulnerable to a dup2() race
6228c2ecf20Sopenharmony_ci * installing a file in the array before us.  We need to detect this and
6238c2ecf20Sopenharmony_ci * fput() the struct file we are about to overwrite in this case.
6248c2ecf20Sopenharmony_ci *
6258c2ecf20Sopenharmony_ci * It should never happen - if we allow dup2() do it, _really_ bad things
6268c2ecf20Sopenharmony_ci * will follow.
6278c2ecf20Sopenharmony_ci *
6288c2ecf20Sopenharmony_ci * NOTE: __fd_install() variant is really, really low-level; don't
6298c2ecf20Sopenharmony_ci * use it unless you are forced to by truly lousy API shoved down
6308c2ecf20Sopenharmony_ci * your throat.  'files' *MUST* be either current->files or obtained
6318c2ecf20Sopenharmony_ci * by get_files_struct(current) done by whoever had given it to you,
6328c2ecf20Sopenharmony_ci * or really bad things will happen.  Normally you want to use
6338c2ecf20Sopenharmony_ci * fd_install() instead.
6348c2ecf20Sopenharmony_ci */
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_civoid __fd_install(struct files_struct *files, unsigned int fd,
6378c2ecf20Sopenharmony_ci		struct file *file)
6388c2ecf20Sopenharmony_ci{
6398c2ecf20Sopenharmony_ci	struct fdtable *fdt;
6408c2ecf20Sopenharmony_ci
6418c2ecf20Sopenharmony_ci	rcu_read_lock_sched();
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_ci	if (unlikely(files->resize_in_progress)) {
6448c2ecf20Sopenharmony_ci		rcu_read_unlock_sched();
6458c2ecf20Sopenharmony_ci		spin_lock(&files->file_lock);
6468c2ecf20Sopenharmony_ci		fdt = files_fdtable(files);
6478c2ecf20Sopenharmony_ci		BUG_ON(fdt->fd[fd] != NULL);
6488c2ecf20Sopenharmony_ci		rcu_assign_pointer(fdt->fd[fd], file);
6498c2ecf20Sopenharmony_ci		spin_unlock(&files->file_lock);
6508c2ecf20Sopenharmony_ci		return;
6518c2ecf20Sopenharmony_ci	}
6528c2ecf20Sopenharmony_ci	/* coupled with smp_wmb() in expand_fdtable() */
6538c2ecf20Sopenharmony_ci	smp_rmb();
6548c2ecf20Sopenharmony_ci	fdt = rcu_dereference_sched(files->fdt);
6558c2ecf20Sopenharmony_ci	BUG_ON(fdt->fd[fd] != NULL);
6568c2ecf20Sopenharmony_ci	rcu_assign_pointer(fdt->fd[fd], file);
6578c2ecf20Sopenharmony_ci	rcu_read_unlock_sched();
6588c2ecf20Sopenharmony_ci}
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_ci/*
6618c2ecf20Sopenharmony_ci * This consumes the "file" refcount, so callers should treat it
6628c2ecf20Sopenharmony_ci * as if they had called fput(file).
6638c2ecf20Sopenharmony_ci */
6648c2ecf20Sopenharmony_civoid fd_install(unsigned int fd, struct file *file)
6658c2ecf20Sopenharmony_ci{
6668c2ecf20Sopenharmony_ci	__fd_install(current->files, fd, file);
6678c2ecf20Sopenharmony_ci}
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ciEXPORT_SYMBOL(fd_install);
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_cistatic struct file *pick_file(struct files_struct *files, unsigned fd)
6728c2ecf20Sopenharmony_ci{
6738c2ecf20Sopenharmony_ci	struct file *file = NULL;
6748c2ecf20Sopenharmony_ci	struct fdtable *fdt;
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
6778c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
6788c2ecf20Sopenharmony_ci	if (fd >= fdt->max_fds)
6798c2ecf20Sopenharmony_ci		goto out_unlock;
6808c2ecf20Sopenharmony_ci	fd = array_index_nospec(fd, fdt->max_fds);
6818c2ecf20Sopenharmony_ci	file = fdt->fd[fd];
6828c2ecf20Sopenharmony_ci	if (!file)
6838c2ecf20Sopenharmony_ci		goto out_unlock;
6848c2ecf20Sopenharmony_ci	rcu_assign_pointer(fdt->fd[fd], NULL);
6858c2ecf20Sopenharmony_ci	__put_unused_fd(files, fd);
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ciout_unlock:
6888c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
6898c2ecf20Sopenharmony_ci	return file;
6908c2ecf20Sopenharmony_ci}
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci/*
6938c2ecf20Sopenharmony_ci * The same warnings as for __alloc_fd()/__fd_install() apply here...
6948c2ecf20Sopenharmony_ci */
6958c2ecf20Sopenharmony_ciint __close_fd(struct files_struct *files, unsigned fd)
6968c2ecf20Sopenharmony_ci{
6978c2ecf20Sopenharmony_ci	struct file *file;
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	file = pick_file(files, fd);
7008c2ecf20Sopenharmony_ci	if (!file)
7018c2ecf20Sopenharmony_ci		return -EBADF;
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	return filp_close(file, files);
7048c2ecf20Sopenharmony_ci}
7058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__close_fd); /* for ksys_close() */
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci/**
7088c2ecf20Sopenharmony_ci * __close_range() - Close all file descriptors in a given range.
7098c2ecf20Sopenharmony_ci *
7108c2ecf20Sopenharmony_ci * @fd:     starting file descriptor to close
7118c2ecf20Sopenharmony_ci * @max_fd: last file descriptor to close
7128c2ecf20Sopenharmony_ci *
7138c2ecf20Sopenharmony_ci * This closes a range of file descriptors. All file descriptors
7148c2ecf20Sopenharmony_ci * from @fd up to and including @max_fd are closed.
7158c2ecf20Sopenharmony_ci */
7168c2ecf20Sopenharmony_ciint __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
7178c2ecf20Sopenharmony_ci{
7188c2ecf20Sopenharmony_ci	unsigned int cur_max;
7198c2ecf20Sopenharmony_ci	struct task_struct *me = current;
7208c2ecf20Sopenharmony_ci	struct files_struct *cur_fds = me->files, *fds = NULL;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	if (flags & ~CLOSE_RANGE_UNSHARE)
7238c2ecf20Sopenharmony_ci		return -EINVAL;
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_ci	if (fd > max_fd)
7268c2ecf20Sopenharmony_ci		return -EINVAL;
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci	rcu_read_lock();
7298c2ecf20Sopenharmony_ci	cur_max = files_fdtable(cur_fds)->max_fds;
7308c2ecf20Sopenharmony_ci	rcu_read_unlock();
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci	/* cap to last valid index into fdtable */
7338c2ecf20Sopenharmony_ci	cur_max--;
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	if (flags & CLOSE_RANGE_UNSHARE) {
7368c2ecf20Sopenharmony_ci		int ret;
7378c2ecf20Sopenharmony_ci		unsigned int max_unshare_fds = NR_OPEN_MAX;
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci		/*
7408c2ecf20Sopenharmony_ci		 * If the requested range is greater than the current maximum,
7418c2ecf20Sopenharmony_ci		 * we're closing everything so only copy all file descriptors
7428c2ecf20Sopenharmony_ci		 * beneath the lowest file descriptor.
7438c2ecf20Sopenharmony_ci		 */
7448c2ecf20Sopenharmony_ci		if (max_fd >= cur_max)
7458c2ecf20Sopenharmony_ci			max_unshare_fds = fd;
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci		ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
7488c2ecf20Sopenharmony_ci		if (ret)
7498c2ecf20Sopenharmony_ci			return ret;
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci		/*
7528c2ecf20Sopenharmony_ci		 * We used to share our file descriptor table, and have now
7538c2ecf20Sopenharmony_ci		 * created a private one, make sure we're using it below.
7548c2ecf20Sopenharmony_ci		 */
7558c2ecf20Sopenharmony_ci		if (fds)
7568c2ecf20Sopenharmony_ci			swap(cur_fds, fds);
7578c2ecf20Sopenharmony_ci	}
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	max_fd = min(max_fd, cur_max);
7608c2ecf20Sopenharmony_ci	while (fd <= max_fd) {
7618c2ecf20Sopenharmony_ci		struct file *file;
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci		file = pick_file(cur_fds, fd++);
7648c2ecf20Sopenharmony_ci		if (!file)
7658c2ecf20Sopenharmony_ci			continue;
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci		filp_close(file, cur_fds);
7688c2ecf20Sopenharmony_ci		cond_resched();
7698c2ecf20Sopenharmony_ci	}
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	if (fds) {
7728c2ecf20Sopenharmony_ci		/*
7738c2ecf20Sopenharmony_ci		 * We're done closing the files we were supposed to. Time to install
7748c2ecf20Sopenharmony_ci		 * the new file descriptor table and drop the old one.
7758c2ecf20Sopenharmony_ci		 */
7768c2ecf20Sopenharmony_ci		task_lock(me);
7778c2ecf20Sopenharmony_ci		me->files = cur_fds;
7788c2ecf20Sopenharmony_ci		task_unlock(me);
7798c2ecf20Sopenharmony_ci		put_files_struct(fds);
7808c2ecf20Sopenharmony_ci	}
7818c2ecf20Sopenharmony_ci
7828c2ecf20Sopenharmony_ci	return 0;
7838c2ecf20Sopenharmony_ci}
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci/*
7868c2ecf20Sopenharmony_ci * See close_fd_get_file() below, this variant assumes current->files->file_lock
7878c2ecf20Sopenharmony_ci * is held.
7888c2ecf20Sopenharmony_ci */
7898c2ecf20Sopenharmony_ciint __close_fd_get_file(unsigned int fd, struct file **res)
7908c2ecf20Sopenharmony_ci{
7918c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
7928c2ecf20Sopenharmony_ci	struct file *file;
7938c2ecf20Sopenharmony_ci	struct fdtable *fdt;
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
7968c2ecf20Sopenharmony_ci	if (fd >= fdt->max_fds)
7978c2ecf20Sopenharmony_ci		goto out_err;
7988c2ecf20Sopenharmony_ci	file = fdt->fd[fd];
7998c2ecf20Sopenharmony_ci	if (!file)
8008c2ecf20Sopenharmony_ci		goto out_err;
8018c2ecf20Sopenharmony_ci	rcu_assign_pointer(fdt->fd[fd], NULL);
8028c2ecf20Sopenharmony_ci	__put_unused_fd(files, fd);
8038c2ecf20Sopenharmony_ci	get_file(file);
8048c2ecf20Sopenharmony_ci	*res = file;
8058c2ecf20Sopenharmony_ci	return 0;
8068c2ecf20Sopenharmony_ciout_err:
8078c2ecf20Sopenharmony_ci	*res = NULL;
8088c2ecf20Sopenharmony_ci	return -ENOENT;
8098c2ecf20Sopenharmony_ci}
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci/*
8128c2ecf20Sopenharmony_ci * variant of close_fd that gets a ref on the file for later fput.
8138c2ecf20Sopenharmony_ci * The caller must ensure that filp_close() called on the file, and then
8148c2ecf20Sopenharmony_ci * an fput().
8158c2ecf20Sopenharmony_ci */
8168c2ecf20Sopenharmony_ciint close_fd_get_file(unsigned int fd, struct file **res)
8178c2ecf20Sopenharmony_ci{
8188c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
8198c2ecf20Sopenharmony_ci	int ret;
8208c2ecf20Sopenharmony_ci
8218c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
8228c2ecf20Sopenharmony_ci	ret = __close_fd_get_file(fd, res);
8238c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
8248c2ecf20Sopenharmony_ci
8258c2ecf20Sopenharmony_ci	return ret;
8268c2ecf20Sopenharmony_ci}
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_civoid do_close_on_exec(struct files_struct *files)
8298c2ecf20Sopenharmony_ci{
8308c2ecf20Sopenharmony_ci	unsigned i;
8318c2ecf20Sopenharmony_ci	struct fdtable *fdt;
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci	/* exec unshares first */
8348c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
8358c2ecf20Sopenharmony_ci	for (i = 0; ; i++) {
8368c2ecf20Sopenharmony_ci		unsigned long set;
8378c2ecf20Sopenharmony_ci		unsigned fd = i * BITS_PER_LONG;
8388c2ecf20Sopenharmony_ci		fdt = files_fdtable(files);
8398c2ecf20Sopenharmony_ci		if (fd >= fdt->max_fds)
8408c2ecf20Sopenharmony_ci			break;
8418c2ecf20Sopenharmony_ci		set = fdt->close_on_exec[i];
8428c2ecf20Sopenharmony_ci		if (!set)
8438c2ecf20Sopenharmony_ci			continue;
8448c2ecf20Sopenharmony_ci		fdt->close_on_exec[i] = 0;
8458c2ecf20Sopenharmony_ci		for ( ; set ; fd++, set >>= 1) {
8468c2ecf20Sopenharmony_ci			struct file *file;
8478c2ecf20Sopenharmony_ci			if (!(set & 1))
8488c2ecf20Sopenharmony_ci				continue;
8498c2ecf20Sopenharmony_ci			file = fdt->fd[fd];
8508c2ecf20Sopenharmony_ci			if (!file)
8518c2ecf20Sopenharmony_ci				continue;
8528c2ecf20Sopenharmony_ci			rcu_assign_pointer(fdt->fd[fd], NULL);
8538c2ecf20Sopenharmony_ci			__put_unused_fd(files, fd);
8548c2ecf20Sopenharmony_ci			spin_unlock(&files->file_lock);
8558c2ecf20Sopenharmony_ci			filp_close(file, files);
8568c2ecf20Sopenharmony_ci			cond_resched();
8578c2ecf20Sopenharmony_ci			spin_lock(&files->file_lock);
8588c2ecf20Sopenharmony_ci		}
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	}
8618c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
8628c2ecf20Sopenharmony_ci}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_cistatic inline struct file *__fget_files_rcu(struct files_struct *files,
8658c2ecf20Sopenharmony_ci	unsigned int fd, fmode_t mask, unsigned int refs)
8668c2ecf20Sopenharmony_ci{
8678c2ecf20Sopenharmony_ci	for (;;) {
8688c2ecf20Sopenharmony_ci		struct file *file;
8698c2ecf20Sopenharmony_ci		struct fdtable *fdt = rcu_dereference_raw(files->fdt);
8708c2ecf20Sopenharmony_ci		struct file __rcu **fdentry;
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ci		if (unlikely(fd >= fdt->max_fds))
8738c2ecf20Sopenharmony_ci			return NULL;
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci		fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
8768c2ecf20Sopenharmony_ci		file = rcu_dereference_raw(*fdentry);
8778c2ecf20Sopenharmony_ci		if (unlikely(!file))
8788c2ecf20Sopenharmony_ci			return NULL;
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_ci		if (unlikely(file->f_mode & mask))
8818c2ecf20Sopenharmony_ci			return NULL;
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci		/*
8848c2ecf20Sopenharmony_ci		 * Ok, we have a file pointer. However, because we do
8858c2ecf20Sopenharmony_ci		 * this all locklessly under RCU, we may be racing with
8868c2ecf20Sopenharmony_ci		 * that file being closed.
8878c2ecf20Sopenharmony_ci		 *
8888c2ecf20Sopenharmony_ci		 * Such a race can take two forms:
8898c2ecf20Sopenharmony_ci		 *
8908c2ecf20Sopenharmony_ci		 *  (a) the file ref already went down to zero,
8918c2ecf20Sopenharmony_ci		 *      and get_file_rcu_many() fails. Just try
8928c2ecf20Sopenharmony_ci		 *      again:
8938c2ecf20Sopenharmony_ci		 */
8948c2ecf20Sopenharmony_ci		if (unlikely(!get_file_rcu_many(file, refs)))
8958c2ecf20Sopenharmony_ci			continue;
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_ci		/*
8988c2ecf20Sopenharmony_ci		 *  (b) the file table entry has changed under us.
8998c2ecf20Sopenharmony_ci		 *       Note that we don't need to re-check the 'fdt->fd'
9008c2ecf20Sopenharmony_ci		 *       pointer having changed, because it always goes
9018c2ecf20Sopenharmony_ci		 *       hand-in-hand with 'fdt'.
9028c2ecf20Sopenharmony_ci		 *
9038c2ecf20Sopenharmony_ci		 * If so, we need to put our refs and try again.
9048c2ecf20Sopenharmony_ci		 */
9058c2ecf20Sopenharmony_ci		if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
9068c2ecf20Sopenharmony_ci		    unlikely(rcu_dereference_raw(*fdentry) != file)) {
9078c2ecf20Sopenharmony_ci			fput_many(file, refs);
9088c2ecf20Sopenharmony_ci			continue;
9098c2ecf20Sopenharmony_ci		}
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci		/*
9128c2ecf20Sopenharmony_ci		 * Ok, we have a ref to the file, and checked that it
9138c2ecf20Sopenharmony_ci		 * still exists.
9148c2ecf20Sopenharmony_ci		 */
9158c2ecf20Sopenharmony_ci		return file;
9168c2ecf20Sopenharmony_ci	}
9178c2ecf20Sopenharmony_ci}
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_cistatic struct file *__fget_files(struct files_struct *files, unsigned int fd,
9208c2ecf20Sopenharmony_ci				 fmode_t mask, unsigned int refs)
9218c2ecf20Sopenharmony_ci{
9228c2ecf20Sopenharmony_ci	struct file *file;
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	rcu_read_lock();
9258c2ecf20Sopenharmony_ci	file = __fget_files_rcu(files, fd, mask, refs);
9268c2ecf20Sopenharmony_ci	rcu_read_unlock();
9278c2ecf20Sopenharmony_ci
9288c2ecf20Sopenharmony_ci	return file;
9298c2ecf20Sopenharmony_ci}
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_cistatic inline struct file *__fget(unsigned int fd, fmode_t mask,
9328c2ecf20Sopenharmony_ci				  unsigned int refs)
9338c2ecf20Sopenharmony_ci{
9348c2ecf20Sopenharmony_ci	return __fget_files(current->files, fd, mask, refs);
9358c2ecf20Sopenharmony_ci}
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_cistruct file *fget_many(unsigned int fd, unsigned int refs)
9388c2ecf20Sopenharmony_ci{
9398c2ecf20Sopenharmony_ci	return __fget(fd, FMODE_PATH, refs);
9408c2ecf20Sopenharmony_ci}
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_cistruct file *fget(unsigned int fd)
9438c2ecf20Sopenharmony_ci{
9448c2ecf20Sopenharmony_ci	return __fget(fd, FMODE_PATH, 1);
9458c2ecf20Sopenharmony_ci}
9468c2ecf20Sopenharmony_ciEXPORT_SYMBOL(fget);
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_cistruct file *fget_raw(unsigned int fd)
9498c2ecf20Sopenharmony_ci{
9508c2ecf20Sopenharmony_ci	return __fget(fd, 0, 1);
9518c2ecf20Sopenharmony_ci}
9528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(fget_raw);
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_cistruct file *fget_task(struct task_struct *task, unsigned int fd)
9558c2ecf20Sopenharmony_ci{
9568c2ecf20Sopenharmony_ci	struct file *file = NULL;
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ci	task_lock(task);
9598c2ecf20Sopenharmony_ci	if (task->files)
9608c2ecf20Sopenharmony_ci		file = __fget_files(task->files, fd, 0, 1);
9618c2ecf20Sopenharmony_ci	task_unlock(task);
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_ci	return file;
9648c2ecf20Sopenharmony_ci}
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_ci/*
9678c2ecf20Sopenharmony_ci * Lightweight file lookup - no refcnt increment if fd table isn't shared.
9688c2ecf20Sopenharmony_ci *
9698c2ecf20Sopenharmony_ci * You can use this instead of fget if you satisfy all of the following
9708c2ecf20Sopenharmony_ci * conditions:
9718c2ecf20Sopenharmony_ci * 1) You must call fput_light before exiting the syscall and returning control
9728c2ecf20Sopenharmony_ci *    to userspace (i.e. you cannot remember the returned struct file * after
9738c2ecf20Sopenharmony_ci *    returning to userspace).
9748c2ecf20Sopenharmony_ci * 2) You must not call filp_close on the returned struct file * in between
9758c2ecf20Sopenharmony_ci *    calls to fget_light and fput_light.
9768c2ecf20Sopenharmony_ci * 3) You must not clone the current task in between the calls to fget_light
9778c2ecf20Sopenharmony_ci *    and fput_light.
9788c2ecf20Sopenharmony_ci *
9798c2ecf20Sopenharmony_ci * The fput_needed flag returned by fget_light should be passed to the
9808c2ecf20Sopenharmony_ci * corresponding fput_light.
9818c2ecf20Sopenharmony_ci */
9828c2ecf20Sopenharmony_cistatic unsigned long __fget_light(unsigned int fd, fmode_t mask)
9838c2ecf20Sopenharmony_ci{
9848c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
9858c2ecf20Sopenharmony_ci	struct file *file;
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	if (atomic_read(&files->count) == 1) {
9888c2ecf20Sopenharmony_ci		file = files_lookup_fd_raw(files, fd);
9898c2ecf20Sopenharmony_ci		if (!file || unlikely(file->f_mode & mask))
9908c2ecf20Sopenharmony_ci			return 0;
9918c2ecf20Sopenharmony_ci		return (unsigned long)file;
9928c2ecf20Sopenharmony_ci	} else {
9938c2ecf20Sopenharmony_ci		file = __fget(fd, mask, 1);
9948c2ecf20Sopenharmony_ci		if (!file)
9958c2ecf20Sopenharmony_ci			return 0;
9968c2ecf20Sopenharmony_ci		return FDPUT_FPUT | (unsigned long)file;
9978c2ecf20Sopenharmony_ci	}
9988c2ecf20Sopenharmony_ci}
9998c2ecf20Sopenharmony_ciunsigned long __fdget(unsigned int fd)
10008c2ecf20Sopenharmony_ci{
10018c2ecf20Sopenharmony_ci	return __fget_light(fd, FMODE_PATH);
10028c2ecf20Sopenharmony_ci}
10038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__fdget);
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ciunsigned long __fdget_raw(unsigned int fd)
10068c2ecf20Sopenharmony_ci{
10078c2ecf20Sopenharmony_ci	return __fget_light(fd, 0);
10088c2ecf20Sopenharmony_ci}
10098c2ecf20Sopenharmony_ci
10108c2ecf20Sopenharmony_ci/*
10118c2ecf20Sopenharmony_ci * Try to avoid f_pos locking. We only need it if the
10128c2ecf20Sopenharmony_ci * file is marked for FMODE_ATOMIC_POS, and it can be
10138c2ecf20Sopenharmony_ci * accessed multiple ways.
10148c2ecf20Sopenharmony_ci *
10158c2ecf20Sopenharmony_ci * Always do it for directories, because pidfd_getfd()
10168c2ecf20Sopenharmony_ci * can make a file accessible even if it otherwise would
10178c2ecf20Sopenharmony_ci * not be, and for directories this is a correctness
10188c2ecf20Sopenharmony_ci * issue, not a "POSIX requirement".
10198c2ecf20Sopenharmony_ci */
10208c2ecf20Sopenharmony_cistatic inline bool file_needs_f_pos_lock(struct file *file)
10218c2ecf20Sopenharmony_ci{
10228c2ecf20Sopenharmony_ci	return (file->f_mode & FMODE_ATOMIC_POS) &&
10238c2ecf20Sopenharmony_ci		(file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode));
10248c2ecf20Sopenharmony_ci}
10258c2ecf20Sopenharmony_ci
10268c2ecf20Sopenharmony_ciunsigned long __fdget_pos(unsigned int fd)
10278c2ecf20Sopenharmony_ci{
10288c2ecf20Sopenharmony_ci	unsigned long v = __fdget(fd);
10298c2ecf20Sopenharmony_ci	struct file *file = (struct file *)(v & ~3);
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci	if (file && file_needs_f_pos_lock(file)) {
10328c2ecf20Sopenharmony_ci		v |= FDPUT_POS_UNLOCK;
10338c2ecf20Sopenharmony_ci		mutex_lock(&file->f_pos_lock);
10348c2ecf20Sopenharmony_ci	}
10358c2ecf20Sopenharmony_ci	return v;
10368c2ecf20Sopenharmony_ci}
10378c2ecf20Sopenharmony_ci
10388c2ecf20Sopenharmony_civoid __f_unlock_pos(struct file *f)
10398c2ecf20Sopenharmony_ci{
10408c2ecf20Sopenharmony_ci	mutex_unlock(&f->f_pos_lock);
10418c2ecf20Sopenharmony_ci}
10428c2ecf20Sopenharmony_ci
10438c2ecf20Sopenharmony_ci/*
10448c2ecf20Sopenharmony_ci * We only lock f_pos if we have threads or if the file might be
10458c2ecf20Sopenharmony_ci * shared with another process. In both cases we'll have an elevated
10468c2ecf20Sopenharmony_ci * file count (done either by fdget() or by fork()).
10478c2ecf20Sopenharmony_ci */
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_civoid set_close_on_exec(unsigned int fd, int flag)
10508c2ecf20Sopenharmony_ci{
10518c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
10528c2ecf20Sopenharmony_ci	struct fdtable *fdt;
10538c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
10548c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
10558c2ecf20Sopenharmony_ci	if (flag)
10568c2ecf20Sopenharmony_ci		__set_close_on_exec(fd, fdt);
10578c2ecf20Sopenharmony_ci	else
10588c2ecf20Sopenharmony_ci		__clear_close_on_exec(fd, fdt);
10598c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
10608c2ecf20Sopenharmony_ci}
10618c2ecf20Sopenharmony_ci
10628c2ecf20Sopenharmony_cibool get_close_on_exec(unsigned int fd)
10638c2ecf20Sopenharmony_ci{
10648c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
10658c2ecf20Sopenharmony_ci	struct fdtable *fdt;
10668c2ecf20Sopenharmony_ci	bool res;
10678c2ecf20Sopenharmony_ci	rcu_read_lock();
10688c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
10698c2ecf20Sopenharmony_ci	res = close_on_exec(fd, fdt);
10708c2ecf20Sopenharmony_ci	rcu_read_unlock();
10718c2ecf20Sopenharmony_ci	return res;
10728c2ecf20Sopenharmony_ci}
10738c2ecf20Sopenharmony_ci
10748c2ecf20Sopenharmony_cistatic int do_dup2(struct files_struct *files,
10758c2ecf20Sopenharmony_ci	struct file *file, unsigned fd, unsigned flags)
10768c2ecf20Sopenharmony_ci__releases(&files->file_lock)
10778c2ecf20Sopenharmony_ci{
10788c2ecf20Sopenharmony_ci	struct file *tofree;
10798c2ecf20Sopenharmony_ci	struct fdtable *fdt;
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci	/*
10828c2ecf20Sopenharmony_ci	 * We need to detect attempts to do dup2() over allocated but still
10838c2ecf20Sopenharmony_ci	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
10848c2ecf20Sopenharmony_ci	 * extra work in their equivalent of fget() - they insert struct
10858c2ecf20Sopenharmony_ci	 * file immediately after grabbing descriptor, mark it larval if
10868c2ecf20Sopenharmony_ci	 * more work (e.g. actual opening) is needed and make sure that
10878c2ecf20Sopenharmony_ci	 * fget() treats larval files as absent.  Potentially interesting,
10888c2ecf20Sopenharmony_ci	 * but while extra work in fget() is trivial, locking implications
10898c2ecf20Sopenharmony_ci	 * and amount of surgery on open()-related paths in VFS are not.
10908c2ecf20Sopenharmony_ci	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
10918c2ecf20Sopenharmony_ci	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
10928c2ecf20Sopenharmony_ci	 * scope of POSIX or SUS, since neither considers shared descriptor
10938c2ecf20Sopenharmony_ci	 * tables and this condition does not arise without those.
10948c2ecf20Sopenharmony_ci	 */
10958c2ecf20Sopenharmony_ci	fdt = files_fdtable(files);
10968c2ecf20Sopenharmony_ci	tofree = fdt->fd[fd];
10978c2ecf20Sopenharmony_ci	if (!tofree && fd_is_open(fd, fdt))
10988c2ecf20Sopenharmony_ci		goto Ebusy;
10998c2ecf20Sopenharmony_ci	get_file(file);
11008c2ecf20Sopenharmony_ci	rcu_assign_pointer(fdt->fd[fd], file);
11018c2ecf20Sopenharmony_ci	__set_open_fd(fd, fdt);
11028c2ecf20Sopenharmony_ci	if (flags & O_CLOEXEC)
11038c2ecf20Sopenharmony_ci		__set_close_on_exec(fd, fdt);
11048c2ecf20Sopenharmony_ci	else
11058c2ecf20Sopenharmony_ci		__clear_close_on_exec(fd, fdt);
11068c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
11078c2ecf20Sopenharmony_ci
11088c2ecf20Sopenharmony_ci	if (tofree)
11098c2ecf20Sopenharmony_ci		filp_close(tofree, files);
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci	return fd;
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_ciEbusy:
11148c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
11158c2ecf20Sopenharmony_ci	return -EBUSY;
11168c2ecf20Sopenharmony_ci}
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ciint replace_fd(unsigned fd, struct file *file, unsigned flags)
11198c2ecf20Sopenharmony_ci{
11208c2ecf20Sopenharmony_ci	int err;
11218c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ci	if (!file)
11248c2ecf20Sopenharmony_ci		return __close_fd(files, fd);
11258c2ecf20Sopenharmony_ci
11268c2ecf20Sopenharmony_ci	if (fd >= rlimit(RLIMIT_NOFILE))
11278c2ecf20Sopenharmony_ci		return -EBADF;
11288c2ecf20Sopenharmony_ci
11298c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
11308c2ecf20Sopenharmony_ci	err = expand_files(files, fd);
11318c2ecf20Sopenharmony_ci	if (unlikely(err < 0))
11328c2ecf20Sopenharmony_ci		goto out_unlock;
11338c2ecf20Sopenharmony_ci	return do_dup2(files, file, fd, flags);
11348c2ecf20Sopenharmony_ci
11358c2ecf20Sopenharmony_ciout_unlock:
11368c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
11378c2ecf20Sopenharmony_ci	return err;
11388c2ecf20Sopenharmony_ci}
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_ci/**
11418c2ecf20Sopenharmony_ci * __receive_fd() - Install received file into file descriptor table
11428c2ecf20Sopenharmony_ci *
11438c2ecf20Sopenharmony_ci * @fd: fd to install into (if negative, a new fd will be allocated)
11448c2ecf20Sopenharmony_ci * @file: struct file that was received from another process
11458c2ecf20Sopenharmony_ci * @ufd: __user pointer to write new fd number to
11468c2ecf20Sopenharmony_ci * @o_flags: the O_* flags to apply to the new fd entry
11478c2ecf20Sopenharmony_ci *
11488c2ecf20Sopenharmony_ci * Installs a received file into the file descriptor table, with appropriate
11498c2ecf20Sopenharmony_ci * checks and count updates. Optionally writes the fd number to userspace, if
11508c2ecf20Sopenharmony_ci * @ufd is non-NULL.
11518c2ecf20Sopenharmony_ci *
11528c2ecf20Sopenharmony_ci * This helper handles its own reference counting of the incoming
11538c2ecf20Sopenharmony_ci * struct file.
11548c2ecf20Sopenharmony_ci *
11558c2ecf20Sopenharmony_ci * Returns newly install fd or -ve on error.
11568c2ecf20Sopenharmony_ci */
11578c2ecf20Sopenharmony_ciint __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags)
11588c2ecf20Sopenharmony_ci{
11598c2ecf20Sopenharmony_ci	int new_fd;
11608c2ecf20Sopenharmony_ci	int error;
11618c2ecf20Sopenharmony_ci
11628c2ecf20Sopenharmony_ci	error = security_file_receive(file);
11638c2ecf20Sopenharmony_ci	if (error)
11648c2ecf20Sopenharmony_ci		return error;
11658c2ecf20Sopenharmony_ci
11668c2ecf20Sopenharmony_ci	if (fd < 0) {
11678c2ecf20Sopenharmony_ci		new_fd = get_unused_fd_flags(o_flags);
11688c2ecf20Sopenharmony_ci		if (new_fd < 0)
11698c2ecf20Sopenharmony_ci			return new_fd;
11708c2ecf20Sopenharmony_ci	} else {
11718c2ecf20Sopenharmony_ci		new_fd = fd;
11728c2ecf20Sopenharmony_ci	}
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci	if (ufd) {
11758c2ecf20Sopenharmony_ci		error = put_user(new_fd, ufd);
11768c2ecf20Sopenharmony_ci		if (error) {
11778c2ecf20Sopenharmony_ci			if (fd < 0)
11788c2ecf20Sopenharmony_ci				put_unused_fd(new_fd);
11798c2ecf20Sopenharmony_ci			return error;
11808c2ecf20Sopenharmony_ci		}
11818c2ecf20Sopenharmony_ci	}
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci	if (fd < 0) {
11848c2ecf20Sopenharmony_ci		fd_install(new_fd, get_file(file));
11858c2ecf20Sopenharmony_ci	} else {
11868c2ecf20Sopenharmony_ci		error = replace_fd(new_fd, file, o_flags);
11878c2ecf20Sopenharmony_ci		if (error)
11888c2ecf20Sopenharmony_ci			return error;
11898c2ecf20Sopenharmony_ci	}
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_ci	/* Bump the sock usage counts, if any. */
11928c2ecf20Sopenharmony_ci	__receive_sock(file);
11938c2ecf20Sopenharmony_ci	return new_fd;
11948c2ecf20Sopenharmony_ci}
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_cistatic int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
11978c2ecf20Sopenharmony_ci{
11988c2ecf20Sopenharmony_ci	int err = -EBADF;
11998c2ecf20Sopenharmony_ci	struct file *file;
12008c2ecf20Sopenharmony_ci	struct files_struct *files = current->files;
12018c2ecf20Sopenharmony_ci
12028c2ecf20Sopenharmony_ci	if ((flags & ~O_CLOEXEC) != 0)
12038c2ecf20Sopenharmony_ci		return -EINVAL;
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci	if (unlikely(oldfd == newfd))
12068c2ecf20Sopenharmony_ci		return -EINVAL;
12078c2ecf20Sopenharmony_ci
12088c2ecf20Sopenharmony_ci	if (newfd >= rlimit(RLIMIT_NOFILE))
12098c2ecf20Sopenharmony_ci		return -EBADF;
12108c2ecf20Sopenharmony_ci
12118c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
12128c2ecf20Sopenharmony_ci	err = expand_files(files, newfd);
12138c2ecf20Sopenharmony_ci	file = files_lookup_fd_locked(files, oldfd);
12148c2ecf20Sopenharmony_ci	if (unlikely(!file))
12158c2ecf20Sopenharmony_ci		goto Ebadf;
12168c2ecf20Sopenharmony_ci	if (unlikely(err < 0)) {
12178c2ecf20Sopenharmony_ci		if (err == -EMFILE)
12188c2ecf20Sopenharmony_ci			goto Ebadf;
12198c2ecf20Sopenharmony_ci		goto out_unlock;
12208c2ecf20Sopenharmony_ci	}
12218c2ecf20Sopenharmony_ci	return do_dup2(files, file, newfd, flags);
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_ciEbadf:
12248c2ecf20Sopenharmony_ci	err = -EBADF;
12258c2ecf20Sopenharmony_ciout_unlock:
12268c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
12278c2ecf20Sopenharmony_ci	return err;
12288c2ecf20Sopenharmony_ci}
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_ciSYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
12318c2ecf20Sopenharmony_ci{
12328c2ecf20Sopenharmony_ci	return ksys_dup3(oldfd, newfd, flags);
12338c2ecf20Sopenharmony_ci}
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
12368c2ecf20Sopenharmony_ci{
12378c2ecf20Sopenharmony_ci	if (unlikely(newfd == oldfd)) { /* corner case */
12388c2ecf20Sopenharmony_ci		struct files_struct *files = current->files;
12398c2ecf20Sopenharmony_ci		int retval = oldfd;
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_ci		rcu_read_lock();
12428c2ecf20Sopenharmony_ci		if (!fcheck_files(files, oldfd))
12438c2ecf20Sopenharmony_ci			retval = -EBADF;
12448c2ecf20Sopenharmony_ci		rcu_read_unlock();
12458c2ecf20Sopenharmony_ci		return retval;
12468c2ecf20Sopenharmony_ci	}
12478c2ecf20Sopenharmony_ci	return ksys_dup3(oldfd, newfd, 0);
12488c2ecf20Sopenharmony_ci}
12498c2ecf20Sopenharmony_ci
12508c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(dup, unsigned int, fildes)
12518c2ecf20Sopenharmony_ci{
12528c2ecf20Sopenharmony_ci	int ret = -EBADF;
12538c2ecf20Sopenharmony_ci	struct file *file = fget_raw(fildes);
12548c2ecf20Sopenharmony_ci
12558c2ecf20Sopenharmony_ci	if (file) {
12568c2ecf20Sopenharmony_ci		ret = get_unused_fd_flags(0);
12578c2ecf20Sopenharmony_ci		if (ret >= 0)
12588c2ecf20Sopenharmony_ci			fd_install(ret, file);
12598c2ecf20Sopenharmony_ci		else
12608c2ecf20Sopenharmony_ci			fput(file);
12618c2ecf20Sopenharmony_ci	}
12628c2ecf20Sopenharmony_ci	return ret;
12638c2ecf20Sopenharmony_ci}
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_ciint f_dupfd(unsigned int from, struct file *file, unsigned flags)
12668c2ecf20Sopenharmony_ci{
12678c2ecf20Sopenharmony_ci	int err;
12688c2ecf20Sopenharmony_ci	if (from >= rlimit(RLIMIT_NOFILE))
12698c2ecf20Sopenharmony_ci		return -EINVAL;
12708c2ecf20Sopenharmony_ci	err = alloc_fd(from, flags);
12718c2ecf20Sopenharmony_ci	if (err >= 0) {
12728c2ecf20Sopenharmony_ci		get_file(file);
12738c2ecf20Sopenharmony_ci		fd_install(err, file);
12748c2ecf20Sopenharmony_ci	}
12758c2ecf20Sopenharmony_ci	return err;
12768c2ecf20Sopenharmony_ci}
12778c2ecf20Sopenharmony_ci
12788c2ecf20Sopenharmony_ciint iterate_fd(struct files_struct *files, unsigned n,
12798c2ecf20Sopenharmony_ci		int (*f)(const void *, struct file *, unsigned),
12808c2ecf20Sopenharmony_ci		const void *p)
12818c2ecf20Sopenharmony_ci{
12828c2ecf20Sopenharmony_ci	struct fdtable *fdt;
12838c2ecf20Sopenharmony_ci	int res = 0;
12848c2ecf20Sopenharmony_ci	if (!files)
12858c2ecf20Sopenharmony_ci		return 0;
12868c2ecf20Sopenharmony_ci	spin_lock(&files->file_lock);
12878c2ecf20Sopenharmony_ci	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
12888c2ecf20Sopenharmony_ci		struct file *file;
12898c2ecf20Sopenharmony_ci		file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
12908c2ecf20Sopenharmony_ci		if (!file)
12918c2ecf20Sopenharmony_ci			continue;
12928c2ecf20Sopenharmony_ci		res = f(p, file, n);
12938c2ecf20Sopenharmony_ci		if (res)
12948c2ecf20Sopenharmony_ci			break;
12958c2ecf20Sopenharmony_ci	}
12968c2ecf20Sopenharmony_ci	spin_unlock(&files->file_lock);
12978c2ecf20Sopenharmony_ci	return res;
12988c2ecf20Sopenharmony_ci}
12998c2ecf20Sopenharmony_ciEXPORT_SYMBOL(iterate_fd);
1300