162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * fs/kernfs/file.c - kernfs file implementation
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (c) 2001-3 Patrick Mochel
662306a36Sopenharmony_ci * Copyright (c) 2007 SUSE Linux Products GmbH
762306a36Sopenharmony_ci * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/fs.h>
1162306a36Sopenharmony_ci#include <linux/seq_file.h>
1262306a36Sopenharmony_ci#include <linux/slab.h>
1362306a36Sopenharmony_ci#include <linux/poll.h>
1462306a36Sopenharmony_ci#include <linux/pagemap.h>
1562306a36Sopenharmony_ci#include <linux/sched/mm.h>
1662306a36Sopenharmony_ci#include <linux/fsnotify.h>
1762306a36Sopenharmony_ci#include <linux/uio.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "kernfs-internal.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_cistruct kernfs_open_node {
2262306a36Sopenharmony_ci	struct rcu_head		rcu_head;
2362306a36Sopenharmony_ci	atomic_t		event;
2462306a36Sopenharmony_ci	wait_queue_head_t	poll;
2562306a36Sopenharmony_ci	struct list_head	files; /* goes through kernfs_open_file.list */
2662306a36Sopenharmony_ci	unsigned int		nr_mmapped;
2762306a36Sopenharmony_ci	unsigned int		nr_to_release;
2862306a36Sopenharmony_ci};
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/*
3162306a36Sopenharmony_ci * kernfs_notify() may be called from any context and bounces notifications
3262306a36Sopenharmony_ci * through a work item.  To minimize space overhead in kernfs_node, the
3362306a36Sopenharmony_ci * pending queue is implemented as a singly linked list of kernfs_nodes.
3462306a36Sopenharmony_ci * The list is terminated with the self pointer so that whether a
3562306a36Sopenharmony_ci * kernfs_node is on the list or not can be determined by testing the next
3662306a36Sopenharmony_ci * pointer for %NULL.
3762306a36Sopenharmony_ci */
3862306a36Sopenharmony_ci#define KERNFS_NOTIFY_EOL			((void *)&kernfs_notify_list)
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic DEFINE_SPINLOCK(kernfs_notify_lock);
4162306a36Sopenharmony_cistatic struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	return &kernfs_locks->open_file_mutex[idx];
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	struct mutex *lock;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	lock = kernfs_open_file_mutex_ptr(kn);
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	mutex_lock(lock);
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	return lock;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/**
6262306a36Sopenharmony_ci * of_on - Get the kernfs_open_node of the specified kernfs_open_file
6362306a36Sopenharmony_ci * @of: target kernfs_open_file
6462306a36Sopenharmony_ci *
6562306a36Sopenharmony_ci * Return: the kernfs_open_node of the kernfs_open_file
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_cistatic struct kernfs_open_node *of_on(struct kernfs_open_file *of)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	return rcu_dereference_protected(of->kn->attr.open,
7062306a36Sopenharmony_ci					 !list_empty(&of->list));
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci/**
7462306a36Sopenharmony_ci * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
7562306a36Sopenharmony_ci *
7662306a36Sopenharmony_ci * @kn: target kernfs_node.
7762306a36Sopenharmony_ci *
7862306a36Sopenharmony_ci * Fetch and return ->attr.open of @kn when caller holds the
7962306a36Sopenharmony_ci * kernfs_open_file_mutex_ptr(kn).
8062306a36Sopenharmony_ci *
8162306a36Sopenharmony_ci * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
8262306a36Sopenharmony_ci * the caller guarantees that this mutex is being held, other updaters can't
8362306a36Sopenharmony_ci * change ->attr.open and this means that we can safely deref ->attr.open
8462306a36Sopenharmony_ci * outside RCU read-side critical section.
8562306a36Sopenharmony_ci *
8662306a36Sopenharmony_ci * The caller needs to make sure that kernfs_open_file_mutex is held.
8762306a36Sopenharmony_ci *
8862306a36Sopenharmony_ci * Return: @kn->attr.open when kernfs_open_file_mutex is held.
8962306a36Sopenharmony_ci */
9062306a36Sopenharmony_cistatic struct kernfs_open_node *
9162306a36Sopenharmony_cikernfs_deref_open_node_locked(struct kernfs_node *kn)
9262306a36Sopenharmony_ci{
9362306a36Sopenharmony_ci	return rcu_dereference_protected(kn->attr.open,
9462306a36Sopenharmony_ci				lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic struct kernfs_open_file *kernfs_of(struct file *file)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	return ((struct seq_file *)file->private_data)->private;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci/*
10362306a36Sopenharmony_ci * Determine the kernfs_ops for the given kernfs_node.  This function must
10462306a36Sopenharmony_ci * be called while holding an active reference.
10562306a36Sopenharmony_ci */
10662306a36Sopenharmony_cistatic const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
10762306a36Sopenharmony_ci{
10862306a36Sopenharmony_ci	if (kn->flags & KERNFS_LOCKDEP)
10962306a36Sopenharmony_ci		lockdep_assert_held(kn);
11062306a36Sopenharmony_ci	return kn->attr.ops;
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/*
11462306a36Sopenharmony_ci * As kernfs_seq_stop() is also called after kernfs_seq_start() or
11562306a36Sopenharmony_ci * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
11662306a36Sopenharmony_ci * a seq_file iteration which is fully initialized with an active reference
11762306a36Sopenharmony_ci * or an aborted kernfs_seq_start() due to get_active failure.  The
11862306a36Sopenharmony_ci * position pointer is the only context for each seq_file iteration and
11962306a36Sopenharmony_ci * thus the stop condition should be encoded in it.  As the return value is
12062306a36Sopenharmony_ci * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
12162306a36Sopenharmony_ci * choice to indicate get_active failure.
12262306a36Sopenharmony_ci *
12362306a36Sopenharmony_ci * Unfortunately, this is complicated due to the optional custom seq_file
12462306a36Sopenharmony_ci * operations which may return ERR_PTR(-ENODEV) too.  kernfs_seq_stop()
12562306a36Sopenharmony_ci * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
12662306a36Sopenharmony_ci * custom seq_file operations and thus can't decide whether put_active
12762306a36Sopenharmony_ci * should be performed or not only on ERR_PTR(-ENODEV).
12862306a36Sopenharmony_ci *
12962306a36Sopenharmony_ci * This is worked around by factoring out the custom seq_stop() and
13062306a36Sopenharmony_ci * put_active part into kernfs_seq_stop_active(), skipping it from
13162306a36Sopenharmony_ci * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
13262306a36Sopenharmony_ci * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
13362306a36Sopenharmony_ci * that kernfs_seq_stop_active() is skipped only after get_active failure.
13462306a36Sopenharmony_ci */
13562306a36Sopenharmony_cistatic void kernfs_seq_stop_active(struct seq_file *sf, void *v)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	struct kernfs_open_file *of = sf->private;
13862306a36Sopenharmony_ci	const struct kernfs_ops *ops = kernfs_ops(of->kn);
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	if (ops->seq_stop)
14162306a36Sopenharmony_ci		ops->seq_stop(sf, v);
14262306a36Sopenharmony_ci	kernfs_put_active(of->kn);
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_cistatic void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	struct kernfs_open_file *of = sf->private;
14862306a36Sopenharmony_ci	const struct kernfs_ops *ops;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	/*
15162306a36Sopenharmony_ci	 * @of->mutex nests outside active ref and is primarily to ensure that
15262306a36Sopenharmony_ci	 * the ops aren't called concurrently for the same open file.
15362306a36Sopenharmony_ci	 */
15462306a36Sopenharmony_ci	mutex_lock(&of->mutex);
15562306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
15662306a36Sopenharmony_ci		return ERR_PTR(-ENODEV);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	ops = kernfs_ops(of->kn);
15962306a36Sopenharmony_ci	if (ops->seq_start) {
16062306a36Sopenharmony_ci		void *next = ops->seq_start(sf, ppos);
16162306a36Sopenharmony_ci		/* see the comment above kernfs_seq_stop_active() */
16262306a36Sopenharmony_ci		if (next == ERR_PTR(-ENODEV))
16362306a36Sopenharmony_ci			kernfs_seq_stop_active(sf, next);
16462306a36Sopenharmony_ci		return next;
16562306a36Sopenharmony_ci	}
16662306a36Sopenharmony_ci	return single_start(sf, ppos);
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cistatic void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	struct kernfs_open_file *of = sf->private;
17262306a36Sopenharmony_ci	const struct kernfs_ops *ops = kernfs_ops(of->kn);
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	if (ops->seq_next) {
17562306a36Sopenharmony_ci		void *next = ops->seq_next(sf, v, ppos);
17662306a36Sopenharmony_ci		/* see the comment above kernfs_seq_stop_active() */
17762306a36Sopenharmony_ci		if (next == ERR_PTR(-ENODEV))
17862306a36Sopenharmony_ci			kernfs_seq_stop_active(sf, next);
17962306a36Sopenharmony_ci		return next;
18062306a36Sopenharmony_ci	} else {
18162306a36Sopenharmony_ci		/*
18262306a36Sopenharmony_ci		 * The same behavior and code as single_open(), always
18362306a36Sopenharmony_ci		 * terminate after the initial read.
18462306a36Sopenharmony_ci		 */
18562306a36Sopenharmony_ci		++*ppos;
18662306a36Sopenharmony_ci		return NULL;
18762306a36Sopenharmony_ci	}
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistatic void kernfs_seq_stop(struct seq_file *sf, void *v)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	struct kernfs_open_file *of = sf->private;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	if (v != ERR_PTR(-ENODEV))
19562306a36Sopenharmony_ci		kernfs_seq_stop_active(sf, v);
19662306a36Sopenharmony_ci	mutex_unlock(&of->mutex);
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic int kernfs_seq_show(struct seq_file *sf, void *v)
20062306a36Sopenharmony_ci{
20162306a36Sopenharmony_ci	struct kernfs_open_file *of = sf->private;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	of->event = atomic_read(&of_on(of)->event);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	return of->kn->attr.ops->seq_show(sf, v);
20662306a36Sopenharmony_ci}
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_cistatic const struct seq_operations kernfs_seq_ops = {
20962306a36Sopenharmony_ci	.start = kernfs_seq_start,
21062306a36Sopenharmony_ci	.next = kernfs_seq_next,
21162306a36Sopenharmony_ci	.stop = kernfs_seq_stop,
21262306a36Sopenharmony_ci	.show = kernfs_seq_show,
21362306a36Sopenharmony_ci};
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci/*
21662306a36Sopenharmony_ci * As reading a bin file can have side-effects, the exact offset and bytes
21762306a36Sopenharmony_ci * specified in read(2) call should be passed to the read callback making
21862306a36Sopenharmony_ci * it difficult to use seq_file.  Implement simplistic custom buffering for
21962306a36Sopenharmony_ci * bin files.
22062306a36Sopenharmony_ci */
22162306a36Sopenharmony_cistatic ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
22262306a36Sopenharmony_ci{
22362306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
22462306a36Sopenharmony_ci	ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
22562306a36Sopenharmony_ci	const struct kernfs_ops *ops;
22662306a36Sopenharmony_ci	char *buf;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	buf = of->prealloc_buf;
22962306a36Sopenharmony_ci	if (buf)
23062306a36Sopenharmony_ci		mutex_lock(&of->prealloc_mutex);
23162306a36Sopenharmony_ci	else
23262306a36Sopenharmony_ci		buf = kmalloc(len, GFP_KERNEL);
23362306a36Sopenharmony_ci	if (!buf)
23462306a36Sopenharmony_ci		return -ENOMEM;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	/*
23762306a36Sopenharmony_ci	 * @of->mutex nests outside active ref and is used both to ensure that
23862306a36Sopenharmony_ci	 * the ops aren't called concurrently for the same open file.
23962306a36Sopenharmony_ci	 */
24062306a36Sopenharmony_ci	mutex_lock(&of->mutex);
24162306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn)) {
24262306a36Sopenharmony_ci		len = -ENODEV;
24362306a36Sopenharmony_ci		mutex_unlock(&of->mutex);
24462306a36Sopenharmony_ci		goto out_free;
24562306a36Sopenharmony_ci	}
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	of->event = atomic_read(&of_on(of)->event);
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	ops = kernfs_ops(of->kn);
25062306a36Sopenharmony_ci	if (ops->read)
25162306a36Sopenharmony_ci		len = ops->read(of, buf, len, iocb->ki_pos);
25262306a36Sopenharmony_ci	else
25362306a36Sopenharmony_ci		len = -EINVAL;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	kernfs_put_active(of->kn);
25662306a36Sopenharmony_ci	mutex_unlock(&of->mutex);
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	if (len < 0)
25962306a36Sopenharmony_ci		goto out_free;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	if (copy_to_iter(buf, len, iter) != len) {
26262306a36Sopenharmony_ci		len = -EFAULT;
26362306a36Sopenharmony_ci		goto out_free;
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	iocb->ki_pos += len;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci out_free:
26962306a36Sopenharmony_ci	if (buf == of->prealloc_buf)
27062306a36Sopenharmony_ci		mutex_unlock(&of->prealloc_mutex);
27162306a36Sopenharmony_ci	else
27262306a36Sopenharmony_ci		kfree(buf);
27362306a36Sopenharmony_ci	return len;
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
27962306a36Sopenharmony_ci		return seq_read_iter(iocb, iter);
28062306a36Sopenharmony_ci	return kernfs_file_read_iter(iocb, iter);
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/*
28462306a36Sopenharmony_ci * Copy data in from userland and pass it to the matching kernfs write
28562306a36Sopenharmony_ci * operation.
28662306a36Sopenharmony_ci *
28762306a36Sopenharmony_ci * There is no easy way for us to know if userspace is only doing a partial
28862306a36Sopenharmony_ci * write, so we don't support them. We expect the entire buffer to come on
28962306a36Sopenharmony_ci * the first write.  Hint: if you're writing a value, first read the file,
29062306a36Sopenharmony_ci * modify only the value you're changing, then write entire buffer
29162306a36Sopenharmony_ci * back.
29262306a36Sopenharmony_ci */
29362306a36Sopenharmony_cistatic ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
29662306a36Sopenharmony_ci	ssize_t len = iov_iter_count(iter);
29762306a36Sopenharmony_ci	const struct kernfs_ops *ops;
29862306a36Sopenharmony_ci	char *buf;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	if (of->atomic_write_len) {
30162306a36Sopenharmony_ci		if (len > of->atomic_write_len)
30262306a36Sopenharmony_ci			return -E2BIG;
30362306a36Sopenharmony_ci	} else {
30462306a36Sopenharmony_ci		len = min_t(size_t, len, PAGE_SIZE);
30562306a36Sopenharmony_ci	}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	buf = of->prealloc_buf;
30862306a36Sopenharmony_ci	if (buf)
30962306a36Sopenharmony_ci		mutex_lock(&of->prealloc_mutex);
31062306a36Sopenharmony_ci	else
31162306a36Sopenharmony_ci		buf = kmalloc(len + 1, GFP_KERNEL);
31262306a36Sopenharmony_ci	if (!buf)
31362306a36Sopenharmony_ci		return -ENOMEM;
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (copy_from_iter(buf, len, iter) != len) {
31662306a36Sopenharmony_ci		len = -EFAULT;
31762306a36Sopenharmony_ci		goto out_free;
31862306a36Sopenharmony_ci	}
31962306a36Sopenharmony_ci	buf[len] = '\0';	/* guarantee string termination */
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	/*
32262306a36Sopenharmony_ci	 * @of->mutex nests outside active ref and is used both to ensure that
32362306a36Sopenharmony_ci	 * the ops aren't called concurrently for the same open file.
32462306a36Sopenharmony_ci	 */
32562306a36Sopenharmony_ci	mutex_lock(&of->mutex);
32662306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn)) {
32762306a36Sopenharmony_ci		mutex_unlock(&of->mutex);
32862306a36Sopenharmony_ci		len = -ENODEV;
32962306a36Sopenharmony_ci		goto out_free;
33062306a36Sopenharmony_ci	}
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	ops = kernfs_ops(of->kn);
33362306a36Sopenharmony_ci	if (ops->write)
33462306a36Sopenharmony_ci		len = ops->write(of, buf, len, iocb->ki_pos);
33562306a36Sopenharmony_ci	else
33662306a36Sopenharmony_ci		len = -EINVAL;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	kernfs_put_active(of->kn);
33962306a36Sopenharmony_ci	mutex_unlock(&of->mutex);
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	if (len > 0)
34262306a36Sopenharmony_ci		iocb->ki_pos += len;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ciout_free:
34562306a36Sopenharmony_ci	if (buf == of->prealloc_buf)
34662306a36Sopenharmony_ci		mutex_unlock(&of->prealloc_mutex);
34762306a36Sopenharmony_ci	else
34862306a36Sopenharmony_ci		kfree(buf);
34962306a36Sopenharmony_ci	return len;
35062306a36Sopenharmony_ci}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_cistatic void kernfs_vma_open(struct vm_area_struct *vma)
35362306a36Sopenharmony_ci{
35462306a36Sopenharmony_ci	struct file *file = vma->vm_file;
35562306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	if (!of->vm_ops)
35862306a36Sopenharmony_ci		return;
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
36162306a36Sopenharmony_ci		return;
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	if (of->vm_ops->open)
36462306a36Sopenharmony_ci		of->vm_ops->open(vma);
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	kernfs_put_active(of->kn);
36762306a36Sopenharmony_ci}
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_cistatic vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
37062306a36Sopenharmony_ci{
37162306a36Sopenharmony_ci	struct file *file = vmf->vma->vm_file;
37262306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
37362306a36Sopenharmony_ci	vm_fault_t ret;
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	if (!of->vm_ops)
37662306a36Sopenharmony_ci		return VM_FAULT_SIGBUS;
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
37962306a36Sopenharmony_ci		return VM_FAULT_SIGBUS;
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	ret = VM_FAULT_SIGBUS;
38262306a36Sopenharmony_ci	if (of->vm_ops->fault)
38362306a36Sopenharmony_ci		ret = of->vm_ops->fault(vmf);
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	kernfs_put_active(of->kn);
38662306a36Sopenharmony_ci	return ret;
38762306a36Sopenharmony_ci}
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_cistatic vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
39062306a36Sopenharmony_ci{
39162306a36Sopenharmony_ci	struct file *file = vmf->vma->vm_file;
39262306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
39362306a36Sopenharmony_ci	vm_fault_t ret;
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	if (!of->vm_ops)
39662306a36Sopenharmony_ci		return VM_FAULT_SIGBUS;
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
39962306a36Sopenharmony_ci		return VM_FAULT_SIGBUS;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	ret = 0;
40262306a36Sopenharmony_ci	if (of->vm_ops->page_mkwrite)
40362306a36Sopenharmony_ci		ret = of->vm_ops->page_mkwrite(vmf);
40462306a36Sopenharmony_ci	else
40562306a36Sopenharmony_ci		file_update_time(file);
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	kernfs_put_active(of->kn);
40862306a36Sopenharmony_ci	return ret;
40962306a36Sopenharmony_ci}
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_cistatic int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
41262306a36Sopenharmony_ci			     void *buf, int len, int write)
41362306a36Sopenharmony_ci{
41462306a36Sopenharmony_ci	struct file *file = vma->vm_file;
41562306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
41662306a36Sopenharmony_ci	int ret;
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	if (!of->vm_ops)
41962306a36Sopenharmony_ci		return -EINVAL;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
42262306a36Sopenharmony_ci		return -EINVAL;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	ret = -EINVAL;
42562306a36Sopenharmony_ci	if (of->vm_ops->access)
42662306a36Sopenharmony_ci		ret = of->vm_ops->access(vma, addr, buf, len, write);
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	kernfs_put_active(of->kn);
42962306a36Sopenharmony_ci	return ret;
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci#ifdef CONFIG_NUMA
43362306a36Sopenharmony_cistatic int kernfs_vma_set_policy(struct vm_area_struct *vma,
43462306a36Sopenharmony_ci				 struct mempolicy *new)
43562306a36Sopenharmony_ci{
43662306a36Sopenharmony_ci	struct file *file = vma->vm_file;
43762306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
43862306a36Sopenharmony_ci	int ret;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	if (!of->vm_ops)
44162306a36Sopenharmony_ci		return 0;
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
44462306a36Sopenharmony_ci		return -EINVAL;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	ret = 0;
44762306a36Sopenharmony_ci	if (of->vm_ops->set_policy)
44862306a36Sopenharmony_ci		ret = of->vm_ops->set_policy(vma, new);
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	kernfs_put_active(of->kn);
45162306a36Sopenharmony_ci	return ret;
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_cistatic struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
45562306a36Sopenharmony_ci					       unsigned long addr)
45662306a36Sopenharmony_ci{
45762306a36Sopenharmony_ci	struct file *file = vma->vm_file;
45862306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
45962306a36Sopenharmony_ci	struct mempolicy *pol;
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	if (!of->vm_ops)
46262306a36Sopenharmony_ci		return vma->vm_policy;
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
46562306a36Sopenharmony_ci		return vma->vm_policy;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	pol = vma->vm_policy;
46862306a36Sopenharmony_ci	if (of->vm_ops->get_policy)
46962306a36Sopenharmony_ci		pol = of->vm_ops->get_policy(vma, addr);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	kernfs_put_active(of->kn);
47262306a36Sopenharmony_ci	return pol;
47362306a36Sopenharmony_ci}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci#endif
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_cistatic const struct vm_operations_struct kernfs_vm_ops = {
47862306a36Sopenharmony_ci	.open		= kernfs_vma_open,
47962306a36Sopenharmony_ci	.fault		= kernfs_vma_fault,
48062306a36Sopenharmony_ci	.page_mkwrite	= kernfs_vma_page_mkwrite,
48162306a36Sopenharmony_ci	.access		= kernfs_vma_access,
48262306a36Sopenharmony_ci#ifdef CONFIG_NUMA
48362306a36Sopenharmony_ci	.set_policy	= kernfs_vma_set_policy,
48462306a36Sopenharmony_ci	.get_policy	= kernfs_vma_get_policy,
48562306a36Sopenharmony_ci#endif
48662306a36Sopenharmony_ci};
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_cistatic int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
48962306a36Sopenharmony_ci{
49062306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(file);
49162306a36Sopenharmony_ci	const struct kernfs_ops *ops;
49262306a36Sopenharmony_ci	int rc;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	/*
49562306a36Sopenharmony_ci	 * mmap path and of->mutex are prone to triggering spurious lockdep
49662306a36Sopenharmony_ci	 * warnings and we don't want to add spurious locking dependency
49762306a36Sopenharmony_ci	 * between the two.  Check whether mmap is actually implemented
49862306a36Sopenharmony_ci	 * without grabbing @of->mutex by testing HAS_MMAP flag.  See the
49962306a36Sopenharmony_ci	 * comment in kernfs_file_open() for more details.
50062306a36Sopenharmony_ci	 */
50162306a36Sopenharmony_ci	if (!(of->kn->flags & KERNFS_HAS_MMAP))
50262306a36Sopenharmony_ci		return -ENODEV;
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	mutex_lock(&of->mutex);
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	rc = -ENODEV;
50762306a36Sopenharmony_ci	if (!kernfs_get_active(of->kn))
50862306a36Sopenharmony_ci		goto out_unlock;
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	ops = kernfs_ops(of->kn);
51162306a36Sopenharmony_ci	rc = ops->mmap(of, vma);
51262306a36Sopenharmony_ci	if (rc)
51362306a36Sopenharmony_ci		goto out_put;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	/*
51662306a36Sopenharmony_ci	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
51762306a36Sopenharmony_ci	 * to satisfy versions of X which crash if the mmap fails: that
51862306a36Sopenharmony_ci	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
51962306a36Sopenharmony_ci	 */
52062306a36Sopenharmony_ci	if (vma->vm_file != file)
52162306a36Sopenharmony_ci		goto out_put;
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	rc = -EINVAL;
52462306a36Sopenharmony_ci	if (of->mmapped && of->vm_ops != vma->vm_ops)
52562306a36Sopenharmony_ci		goto out_put;
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	/*
52862306a36Sopenharmony_ci	 * It is not possible to successfully wrap close.
52962306a36Sopenharmony_ci	 * So error if someone is trying to use close.
53062306a36Sopenharmony_ci	 */
53162306a36Sopenharmony_ci	if (vma->vm_ops && vma->vm_ops->close)
53262306a36Sopenharmony_ci		goto out_put;
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	rc = 0;
53562306a36Sopenharmony_ci	of->mmapped = true;
53662306a36Sopenharmony_ci	of_on(of)->nr_mmapped++;
53762306a36Sopenharmony_ci	of->vm_ops = vma->vm_ops;
53862306a36Sopenharmony_ci	vma->vm_ops = &kernfs_vm_ops;
53962306a36Sopenharmony_ciout_put:
54062306a36Sopenharmony_ci	kernfs_put_active(of->kn);
54162306a36Sopenharmony_ciout_unlock:
54262306a36Sopenharmony_ci	mutex_unlock(&of->mutex);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	return rc;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci/**
54862306a36Sopenharmony_ci *	kernfs_get_open_node - get or create kernfs_open_node
54962306a36Sopenharmony_ci *	@kn: target kernfs_node
55062306a36Sopenharmony_ci *	@of: kernfs_open_file for this instance of open
55162306a36Sopenharmony_ci *
55262306a36Sopenharmony_ci *	If @kn->attr.open exists, increment its reference count; otherwise,
55362306a36Sopenharmony_ci *	create one.  @of is chained to the files list.
55462306a36Sopenharmony_ci *
55562306a36Sopenharmony_ci *	Locking:
55662306a36Sopenharmony_ci *	Kernel thread context (may sleep).
55762306a36Sopenharmony_ci *
55862306a36Sopenharmony_ci *	Return:
55962306a36Sopenharmony_ci *	%0 on success, -errno on failure.
56062306a36Sopenharmony_ci */
56162306a36Sopenharmony_cistatic int kernfs_get_open_node(struct kernfs_node *kn,
56262306a36Sopenharmony_ci				struct kernfs_open_file *of)
56362306a36Sopenharmony_ci{
56462306a36Sopenharmony_ci	struct kernfs_open_node *on;
56562306a36Sopenharmony_ci	struct mutex *mutex;
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	mutex = kernfs_open_file_mutex_lock(kn);
56862306a36Sopenharmony_ci	on = kernfs_deref_open_node_locked(kn);
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	if (!on) {
57162306a36Sopenharmony_ci		/* not there, initialize a new one */
57262306a36Sopenharmony_ci		on = kzalloc(sizeof(*on), GFP_KERNEL);
57362306a36Sopenharmony_ci		if (!on) {
57462306a36Sopenharmony_ci			mutex_unlock(mutex);
57562306a36Sopenharmony_ci			return -ENOMEM;
57662306a36Sopenharmony_ci		}
57762306a36Sopenharmony_ci		atomic_set(&on->event, 1);
57862306a36Sopenharmony_ci		init_waitqueue_head(&on->poll);
57962306a36Sopenharmony_ci		INIT_LIST_HEAD(&on->files);
58062306a36Sopenharmony_ci		rcu_assign_pointer(kn->attr.open, on);
58162306a36Sopenharmony_ci	}
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	list_add_tail(&of->list, &on->files);
58462306a36Sopenharmony_ci	if (kn->flags & KERNFS_HAS_RELEASE)
58562306a36Sopenharmony_ci		on->nr_to_release++;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	mutex_unlock(mutex);
58862306a36Sopenharmony_ci	return 0;
58962306a36Sopenharmony_ci}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci/**
59262306a36Sopenharmony_ci *	kernfs_unlink_open_file - Unlink @of from @kn.
59362306a36Sopenharmony_ci *
59462306a36Sopenharmony_ci *	@kn: target kernfs_node
59562306a36Sopenharmony_ci *	@of: associated kernfs_open_file
59662306a36Sopenharmony_ci *	@open_failed: ->open() failed, cancel ->release()
59762306a36Sopenharmony_ci *
59862306a36Sopenharmony_ci *	Unlink @of from list of @kn's associated open files. If list of
59962306a36Sopenharmony_ci *	associated open files becomes empty, disassociate and free
60062306a36Sopenharmony_ci *	kernfs_open_node.
60162306a36Sopenharmony_ci *
60262306a36Sopenharmony_ci *	LOCKING:
60362306a36Sopenharmony_ci *	None.
60462306a36Sopenharmony_ci */
60562306a36Sopenharmony_cistatic void kernfs_unlink_open_file(struct kernfs_node *kn,
60662306a36Sopenharmony_ci				    struct kernfs_open_file *of,
60762306a36Sopenharmony_ci				    bool open_failed)
60862306a36Sopenharmony_ci{
60962306a36Sopenharmony_ci	struct kernfs_open_node *on;
61062306a36Sopenharmony_ci	struct mutex *mutex;
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	mutex = kernfs_open_file_mutex_lock(kn);
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	on = kernfs_deref_open_node_locked(kn);
61562306a36Sopenharmony_ci	if (!on) {
61662306a36Sopenharmony_ci		mutex_unlock(mutex);
61762306a36Sopenharmony_ci		return;
61862306a36Sopenharmony_ci	}
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_ci	if (of) {
62162306a36Sopenharmony_ci		if (kn->flags & KERNFS_HAS_RELEASE) {
62262306a36Sopenharmony_ci			WARN_ON_ONCE(of->released == open_failed);
62362306a36Sopenharmony_ci			if (open_failed)
62462306a36Sopenharmony_ci				on->nr_to_release--;
62562306a36Sopenharmony_ci		}
62662306a36Sopenharmony_ci		if (of->mmapped)
62762306a36Sopenharmony_ci			on->nr_mmapped--;
62862306a36Sopenharmony_ci		list_del(&of->list);
62962306a36Sopenharmony_ci	}
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci	if (list_empty(&on->files)) {
63262306a36Sopenharmony_ci		rcu_assign_pointer(kn->attr.open, NULL);
63362306a36Sopenharmony_ci		kfree_rcu(on, rcu_head);
63462306a36Sopenharmony_ci	}
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	mutex_unlock(mutex);
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_cistatic int kernfs_fop_open(struct inode *inode, struct file *file)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	struct kernfs_node *kn = inode->i_private;
64262306a36Sopenharmony_ci	struct kernfs_root *root = kernfs_root(kn);
64362306a36Sopenharmony_ci	const struct kernfs_ops *ops;
64462306a36Sopenharmony_ci	struct kernfs_open_file *of;
64562306a36Sopenharmony_ci	bool has_read, has_write, has_mmap;
64662306a36Sopenharmony_ci	int error = -EACCES;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	if (!kernfs_get_active(kn))
64962306a36Sopenharmony_ci		return -ENODEV;
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	ops = kernfs_ops(kn);
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	has_read = ops->seq_show || ops->read || ops->mmap;
65462306a36Sopenharmony_ci	has_write = ops->write || ops->mmap;
65562306a36Sopenharmony_ci	has_mmap = ops->mmap;
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	/* see the flag definition for details */
65862306a36Sopenharmony_ci	if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
65962306a36Sopenharmony_ci		if ((file->f_mode & FMODE_WRITE) &&
66062306a36Sopenharmony_ci		    (!(inode->i_mode & S_IWUGO) || !has_write))
66162306a36Sopenharmony_ci			goto err_out;
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci		if ((file->f_mode & FMODE_READ) &&
66462306a36Sopenharmony_ci		    (!(inode->i_mode & S_IRUGO) || !has_read))
66562306a36Sopenharmony_ci			goto err_out;
66662306a36Sopenharmony_ci	}
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	/* allocate a kernfs_open_file for the file */
66962306a36Sopenharmony_ci	error = -ENOMEM;
67062306a36Sopenharmony_ci	of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
67162306a36Sopenharmony_ci	if (!of)
67262306a36Sopenharmony_ci		goto err_out;
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	/*
67562306a36Sopenharmony_ci	 * The following is done to give a different lockdep key to
67662306a36Sopenharmony_ci	 * @of->mutex for files which implement mmap.  This is a rather
67762306a36Sopenharmony_ci	 * crude way to avoid false positive lockdep warning around
67862306a36Sopenharmony_ci	 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
67962306a36Sopenharmony_ci	 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
68062306a36Sopenharmony_ci	 * which mm->mmap_lock nests, while holding @of->mutex.  As each
68162306a36Sopenharmony_ci	 * open file has a separate mutex, it's okay as long as those don't
68262306a36Sopenharmony_ci	 * happen on the same file.  At this point, we can't easily give
68362306a36Sopenharmony_ci	 * each file a separate locking class.  Let's differentiate on
68462306a36Sopenharmony_ci	 * whether the file has mmap or not for now.
68562306a36Sopenharmony_ci	 *
68662306a36Sopenharmony_ci	 * Both paths of the branch look the same.  They're supposed to
68762306a36Sopenharmony_ci	 * look that way and give @of->mutex different static lockdep keys.
68862306a36Sopenharmony_ci	 */
68962306a36Sopenharmony_ci	if (has_mmap)
69062306a36Sopenharmony_ci		mutex_init(&of->mutex);
69162306a36Sopenharmony_ci	else
69262306a36Sopenharmony_ci		mutex_init(&of->mutex);
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	of->kn = kn;
69562306a36Sopenharmony_ci	of->file = file;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	/*
69862306a36Sopenharmony_ci	 * Write path needs to atomic_write_len outside active reference.
69962306a36Sopenharmony_ci	 * Cache it in open_file.  See kernfs_fop_write_iter() for details.
70062306a36Sopenharmony_ci	 */
70162306a36Sopenharmony_ci	of->atomic_write_len = ops->atomic_write_len;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	error = -EINVAL;
70462306a36Sopenharmony_ci	/*
70562306a36Sopenharmony_ci	 * ->seq_show is incompatible with ->prealloc,
70662306a36Sopenharmony_ci	 * as seq_read does its own allocation.
70762306a36Sopenharmony_ci	 * ->read must be used instead.
70862306a36Sopenharmony_ci	 */
70962306a36Sopenharmony_ci	if (ops->prealloc && ops->seq_show)
71062306a36Sopenharmony_ci		goto err_free;
71162306a36Sopenharmony_ci	if (ops->prealloc) {
71262306a36Sopenharmony_ci		int len = of->atomic_write_len ?: PAGE_SIZE;
71362306a36Sopenharmony_ci		of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
71462306a36Sopenharmony_ci		error = -ENOMEM;
71562306a36Sopenharmony_ci		if (!of->prealloc_buf)
71662306a36Sopenharmony_ci			goto err_free;
71762306a36Sopenharmony_ci		mutex_init(&of->prealloc_mutex);
71862306a36Sopenharmony_ci	}
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci	/*
72162306a36Sopenharmony_ci	 * Always instantiate seq_file even if read access doesn't use
72262306a36Sopenharmony_ci	 * seq_file or is not requested.  This unifies private data access
72362306a36Sopenharmony_ci	 * and readable regular files are the vast majority anyway.
72462306a36Sopenharmony_ci	 */
72562306a36Sopenharmony_ci	if (ops->seq_show)
72662306a36Sopenharmony_ci		error = seq_open(file, &kernfs_seq_ops);
72762306a36Sopenharmony_ci	else
72862306a36Sopenharmony_ci		error = seq_open(file, NULL);
72962306a36Sopenharmony_ci	if (error)
73062306a36Sopenharmony_ci		goto err_free;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	of->seq_file = file->private_data;
73362306a36Sopenharmony_ci	of->seq_file->private = of;
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	/* seq_file clears PWRITE unconditionally, restore it if WRITE */
73662306a36Sopenharmony_ci	if (file->f_mode & FMODE_WRITE)
73762306a36Sopenharmony_ci		file->f_mode |= FMODE_PWRITE;
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	/* make sure we have open node struct */
74062306a36Sopenharmony_ci	error = kernfs_get_open_node(kn, of);
74162306a36Sopenharmony_ci	if (error)
74262306a36Sopenharmony_ci		goto err_seq_release;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	if (ops->open) {
74562306a36Sopenharmony_ci		/* nobody has access to @of yet, skip @of->mutex */
74662306a36Sopenharmony_ci		error = ops->open(of);
74762306a36Sopenharmony_ci		if (error)
74862306a36Sopenharmony_ci			goto err_put_node;
74962306a36Sopenharmony_ci	}
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	/* open succeeded, put active references */
75262306a36Sopenharmony_ci	kernfs_put_active(kn);
75362306a36Sopenharmony_ci	return 0;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_cierr_put_node:
75662306a36Sopenharmony_ci	kernfs_unlink_open_file(kn, of, true);
75762306a36Sopenharmony_cierr_seq_release:
75862306a36Sopenharmony_ci	seq_release(inode, file);
75962306a36Sopenharmony_cierr_free:
76062306a36Sopenharmony_ci	kfree(of->prealloc_buf);
76162306a36Sopenharmony_ci	kfree(of);
76262306a36Sopenharmony_cierr_out:
76362306a36Sopenharmony_ci	kernfs_put_active(kn);
76462306a36Sopenharmony_ci	return error;
76562306a36Sopenharmony_ci}
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci/* used from release/drain to ensure that ->release() is called exactly once */
76862306a36Sopenharmony_cistatic void kernfs_release_file(struct kernfs_node *kn,
76962306a36Sopenharmony_ci				struct kernfs_open_file *of)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	/*
77262306a36Sopenharmony_ci	 * @of is guaranteed to have no other file operations in flight and
77362306a36Sopenharmony_ci	 * we just want to synchronize release and drain paths.
77462306a36Sopenharmony_ci	 * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
77562306a36Sopenharmony_ci	 * here because drain path may be called from places which can
77662306a36Sopenharmony_ci	 * cause circular dependency.
77762306a36Sopenharmony_ci	 */
77862306a36Sopenharmony_ci	lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	if (!of->released) {
78162306a36Sopenharmony_ci		/*
78262306a36Sopenharmony_ci		 * A file is never detached without being released and we
78362306a36Sopenharmony_ci		 * need to be able to release files which are deactivated
78462306a36Sopenharmony_ci		 * and being drained.  Don't use kernfs_ops().
78562306a36Sopenharmony_ci		 */
78662306a36Sopenharmony_ci		kn->attr.ops->release(of);
78762306a36Sopenharmony_ci		of->released = true;
78862306a36Sopenharmony_ci		of_on(of)->nr_to_release--;
78962306a36Sopenharmony_ci	}
79062306a36Sopenharmony_ci}
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_cistatic int kernfs_fop_release(struct inode *inode, struct file *filp)
79362306a36Sopenharmony_ci{
79462306a36Sopenharmony_ci	struct kernfs_node *kn = inode->i_private;
79562306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(filp);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	if (kn->flags & KERNFS_HAS_RELEASE) {
79862306a36Sopenharmony_ci		struct mutex *mutex;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci		mutex = kernfs_open_file_mutex_lock(kn);
80162306a36Sopenharmony_ci		kernfs_release_file(kn, of);
80262306a36Sopenharmony_ci		mutex_unlock(mutex);
80362306a36Sopenharmony_ci	}
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	kernfs_unlink_open_file(kn, of, false);
80662306a36Sopenharmony_ci	seq_release(inode, filp);
80762306a36Sopenharmony_ci	kfree(of->prealloc_buf);
80862306a36Sopenharmony_ci	kfree(of);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	return 0;
81162306a36Sopenharmony_ci}
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_cibool kernfs_should_drain_open_files(struct kernfs_node *kn)
81462306a36Sopenharmony_ci{
81562306a36Sopenharmony_ci	struct kernfs_open_node *on;
81662306a36Sopenharmony_ci	bool ret;
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	/*
81962306a36Sopenharmony_ci	 * @kn being deactivated guarantees that @kn->attr.open can't change
82062306a36Sopenharmony_ci	 * beneath us making the lockless test below safe.
82162306a36Sopenharmony_ci	 */
82262306a36Sopenharmony_ci	WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	rcu_read_lock();
82562306a36Sopenharmony_ci	on = rcu_dereference(kn->attr.open);
82662306a36Sopenharmony_ci	ret = on && (on->nr_mmapped || on->nr_to_release);
82762306a36Sopenharmony_ci	rcu_read_unlock();
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	return ret;
83062306a36Sopenharmony_ci}
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_civoid kernfs_drain_open_files(struct kernfs_node *kn)
83362306a36Sopenharmony_ci{
83462306a36Sopenharmony_ci	struct kernfs_open_node *on;
83562306a36Sopenharmony_ci	struct kernfs_open_file *of;
83662306a36Sopenharmony_ci	struct mutex *mutex;
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	mutex = kernfs_open_file_mutex_lock(kn);
83962306a36Sopenharmony_ci	on = kernfs_deref_open_node_locked(kn);
84062306a36Sopenharmony_ci	if (!on) {
84162306a36Sopenharmony_ci		mutex_unlock(mutex);
84262306a36Sopenharmony_ci		return;
84362306a36Sopenharmony_ci	}
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	list_for_each_entry(of, &on->files, list) {
84662306a36Sopenharmony_ci		struct inode *inode = file_inode(of->file);
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci		if (of->mmapped) {
84962306a36Sopenharmony_ci			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
85062306a36Sopenharmony_ci			of->mmapped = false;
85162306a36Sopenharmony_ci			on->nr_mmapped--;
85262306a36Sopenharmony_ci		}
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci		if (kn->flags & KERNFS_HAS_RELEASE)
85562306a36Sopenharmony_ci			kernfs_release_file(kn, of);
85662306a36Sopenharmony_ci	}
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
85962306a36Sopenharmony_ci	mutex_unlock(mutex);
86062306a36Sopenharmony_ci}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci/*
86362306a36Sopenharmony_ci * Kernfs attribute files are pollable.  The idea is that you read
86462306a36Sopenharmony_ci * the content and then you use 'poll' or 'select' to wait for
86562306a36Sopenharmony_ci * the content to change.  When the content changes (assuming the
86662306a36Sopenharmony_ci * manager for the kobject supports notification), poll will
86762306a36Sopenharmony_ci * return EPOLLERR|EPOLLPRI, and select will return the fd whether
86862306a36Sopenharmony_ci * it is waiting for read, write, or exceptions.
86962306a36Sopenharmony_ci * Once poll/select indicates that the value has changed, you
87062306a36Sopenharmony_ci * need to close and re-open the file, or seek to 0 and read again.
87162306a36Sopenharmony_ci * Reminder: this only works for attributes which actively support
87262306a36Sopenharmony_ci * it, and it is not possible to test an attribute from userspace
87362306a36Sopenharmony_ci * to see if it supports poll (Neither 'poll' nor 'select' return
87462306a36Sopenharmony_ci * an appropriate error code).  When in doubt, set a suitable timeout value.
87562306a36Sopenharmony_ci */
87662306a36Sopenharmony_ci__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
87762306a36Sopenharmony_ci{
87862306a36Sopenharmony_ci	struct kernfs_open_node *on = of_on(of);
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	poll_wait(of->file, &on->poll, wait);
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	if (of->event != atomic_read(&on->event))
88362306a36Sopenharmony_ci		return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	return DEFAULT_POLLMASK;
88662306a36Sopenharmony_ci}
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_cistatic __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
88962306a36Sopenharmony_ci{
89062306a36Sopenharmony_ci	struct kernfs_open_file *of = kernfs_of(filp);
89162306a36Sopenharmony_ci	struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
89262306a36Sopenharmony_ci	__poll_t ret;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	if (!kernfs_get_active(kn))
89562306a36Sopenharmony_ci		return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	if (kn->attr.ops->poll)
89862306a36Sopenharmony_ci		ret = kn->attr.ops->poll(of, wait);
89962306a36Sopenharmony_ci	else
90062306a36Sopenharmony_ci		ret = kernfs_generic_poll(of, wait);
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	kernfs_put_active(kn);
90362306a36Sopenharmony_ci	return ret;
90462306a36Sopenharmony_ci}
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_cistatic void kernfs_notify_workfn(struct work_struct *work)
90762306a36Sopenharmony_ci{
90862306a36Sopenharmony_ci	struct kernfs_node *kn;
90962306a36Sopenharmony_ci	struct kernfs_super_info *info;
91062306a36Sopenharmony_ci	struct kernfs_root *root;
91162306a36Sopenharmony_cirepeat:
91262306a36Sopenharmony_ci	/* pop one off the notify_list */
91362306a36Sopenharmony_ci	spin_lock_irq(&kernfs_notify_lock);
91462306a36Sopenharmony_ci	kn = kernfs_notify_list;
91562306a36Sopenharmony_ci	if (kn == KERNFS_NOTIFY_EOL) {
91662306a36Sopenharmony_ci		spin_unlock_irq(&kernfs_notify_lock);
91762306a36Sopenharmony_ci		return;
91862306a36Sopenharmony_ci	}
91962306a36Sopenharmony_ci	kernfs_notify_list = kn->attr.notify_next;
92062306a36Sopenharmony_ci	kn->attr.notify_next = NULL;
92162306a36Sopenharmony_ci	spin_unlock_irq(&kernfs_notify_lock);
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	root = kernfs_root(kn);
92462306a36Sopenharmony_ci	/* kick fsnotify */
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	down_read(&root->kernfs_supers_rwsem);
92762306a36Sopenharmony_ci	list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
92862306a36Sopenharmony_ci		struct kernfs_node *parent;
92962306a36Sopenharmony_ci		struct inode *p_inode = NULL;
93062306a36Sopenharmony_ci		struct inode *inode;
93162306a36Sopenharmony_ci		struct qstr name;
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci		/*
93462306a36Sopenharmony_ci		 * We want fsnotify_modify() on @kn but as the
93562306a36Sopenharmony_ci		 * modifications aren't originating from userland don't
93662306a36Sopenharmony_ci		 * have the matching @file available.  Look up the inodes
93762306a36Sopenharmony_ci		 * and generate the events manually.
93862306a36Sopenharmony_ci		 */
93962306a36Sopenharmony_ci		inode = ilookup(info->sb, kernfs_ino(kn));
94062306a36Sopenharmony_ci		if (!inode)
94162306a36Sopenharmony_ci			continue;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci		name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
94462306a36Sopenharmony_ci		parent = kernfs_get_parent(kn);
94562306a36Sopenharmony_ci		if (parent) {
94662306a36Sopenharmony_ci			p_inode = ilookup(info->sb, kernfs_ino(parent));
94762306a36Sopenharmony_ci			if (p_inode) {
94862306a36Sopenharmony_ci				fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
94962306a36Sopenharmony_ci					 inode, FSNOTIFY_EVENT_INODE,
95062306a36Sopenharmony_ci					 p_inode, &name, inode, 0);
95162306a36Sopenharmony_ci				iput(p_inode);
95262306a36Sopenharmony_ci			}
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci			kernfs_put(parent);
95562306a36Sopenharmony_ci		}
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci		if (!p_inode)
95862306a36Sopenharmony_ci			fsnotify_inode(inode, FS_MODIFY);
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci		iput(inode);
96162306a36Sopenharmony_ci	}
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	up_read(&root->kernfs_supers_rwsem);
96462306a36Sopenharmony_ci	kernfs_put(kn);
96562306a36Sopenharmony_ci	goto repeat;
96662306a36Sopenharmony_ci}
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci/**
96962306a36Sopenharmony_ci * kernfs_notify - notify a kernfs file
97062306a36Sopenharmony_ci * @kn: file to notify
97162306a36Sopenharmony_ci *
97262306a36Sopenharmony_ci * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
97362306a36Sopenharmony_ci * context.
97462306a36Sopenharmony_ci */
97562306a36Sopenharmony_civoid kernfs_notify(struct kernfs_node *kn)
97662306a36Sopenharmony_ci{
97762306a36Sopenharmony_ci	static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
97862306a36Sopenharmony_ci	unsigned long flags;
97962306a36Sopenharmony_ci	struct kernfs_open_node *on;
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
98262306a36Sopenharmony_ci		return;
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci	/* kick poll immediately */
98562306a36Sopenharmony_ci	rcu_read_lock();
98662306a36Sopenharmony_ci	on = rcu_dereference(kn->attr.open);
98762306a36Sopenharmony_ci	if (on) {
98862306a36Sopenharmony_ci		atomic_inc(&on->event);
98962306a36Sopenharmony_ci		wake_up_interruptible(&on->poll);
99062306a36Sopenharmony_ci	}
99162306a36Sopenharmony_ci	rcu_read_unlock();
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	/* schedule work to kick fsnotify */
99462306a36Sopenharmony_ci	spin_lock_irqsave(&kernfs_notify_lock, flags);
99562306a36Sopenharmony_ci	if (!kn->attr.notify_next) {
99662306a36Sopenharmony_ci		kernfs_get(kn);
99762306a36Sopenharmony_ci		kn->attr.notify_next = kernfs_notify_list;
99862306a36Sopenharmony_ci		kernfs_notify_list = kn;
99962306a36Sopenharmony_ci		schedule_work(&kernfs_notify_work);
100062306a36Sopenharmony_ci	}
100162306a36Sopenharmony_ci	spin_unlock_irqrestore(&kernfs_notify_lock, flags);
100262306a36Sopenharmony_ci}
100362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kernfs_notify);
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ciconst struct file_operations kernfs_file_fops = {
100662306a36Sopenharmony_ci	.read_iter	= kernfs_fop_read_iter,
100762306a36Sopenharmony_ci	.write_iter	= kernfs_fop_write_iter,
100862306a36Sopenharmony_ci	.llseek		= generic_file_llseek,
100962306a36Sopenharmony_ci	.mmap		= kernfs_fop_mmap,
101062306a36Sopenharmony_ci	.open		= kernfs_fop_open,
101162306a36Sopenharmony_ci	.release	= kernfs_fop_release,
101262306a36Sopenharmony_ci	.poll		= kernfs_fop_poll,
101362306a36Sopenharmony_ci	.fsync		= noop_fsync,
101462306a36Sopenharmony_ci	.splice_read	= copy_splice_read,
101562306a36Sopenharmony_ci	.splice_write	= iter_file_splice_write,
101662306a36Sopenharmony_ci};
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci/**
101962306a36Sopenharmony_ci * __kernfs_create_file - kernfs internal function to create a file
102062306a36Sopenharmony_ci * @parent: directory to create the file in
102162306a36Sopenharmony_ci * @name: name of the file
102262306a36Sopenharmony_ci * @mode: mode of the file
102362306a36Sopenharmony_ci * @uid: uid of the file
102462306a36Sopenharmony_ci * @gid: gid of the file
102562306a36Sopenharmony_ci * @size: size of the file
102662306a36Sopenharmony_ci * @ops: kernfs operations for the file
102762306a36Sopenharmony_ci * @priv: private data for the file
102862306a36Sopenharmony_ci * @ns: optional namespace tag of the file
102962306a36Sopenharmony_ci * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
103062306a36Sopenharmony_ci *
103162306a36Sopenharmony_ci * Return: the created node on success, ERR_PTR() value on error.
103262306a36Sopenharmony_ci */
103362306a36Sopenharmony_cistruct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
103462306a36Sopenharmony_ci					 const char *name,
103562306a36Sopenharmony_ci					 umode_t mode, kuid_t uid, kgid_t gid,
103662306a36Sopenharmony_ci					 loff_t size,
103762306a36Sopenharmony_ci					 const struct kernfs_ops *ops,
103862306a36Sopenharmony_ci					 void *priv, const void *ns,
103962306a36Sopenharmony_ci					 struct lock_class_key *key)
104062306a36Sopenharmony_ci{
104162306a36Sopenharmony_ci	struct kernfs_node *kn;
104262306a36Sopenharmony_ci	unsigned flags;
104362306a36Sopenharmony_ci	int rc;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	flags = KERNFS_FILE;
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
104862306a36Sopenharmony_ci			     uid, gid, flags);
104962306a36Sopenharmony_ci	if (!kn)
105062306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	kn->attr.ops = ops;
105362306a36Sopenharmony_ci	kn->attr.size = size;
105462306a36Sopenharmony_ci	kn->ns = ns;
105562306a36Sopenharmony_ci	kn->priv = priv;
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
105862306a36Sopenharmony_ci	if (key) {
105962306a36Sopenharmony_ci		lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
106062306a36Sopenharmony_ci		kn->flags |= KERNFS_LOCKDEP;
106162306a36Sopenharmony_ci	}
106262306a36Sopenharmony_ci#endif
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	/*
106562306a36Sopenharmony_ci	 * kn->attr.ops is accessible only while holding active ref.  We
106662306a36Sopenharmony_ci	 * need to know whether some ops are implemented outside active
106762306a36Sopenharmony_ci	 * ref.  Cache their existence in flags.
106862306a36Sopenharmony_ci	 */
106962306a36Sopenharmony_ci	if (ops->seq_show)
107062306a36Sopenharmony_ci		kn->flags |= KERNFS_HAS_SEQ_SHOW;
107162306a36Sopenharmony_ci	if (ops->mmap)
107262306a36Sopenharmony_ci		kn->flags |= KERNFS_HAS_MMAP;
107362306a36Sopenharmony_ci	if (ops->release)
107462306a36Sopenharmony_ci		kn->flags |= KERNFS_HAS_RELEASE;
107562306a36Sopenharmony_ci
107662306a36Sopenharmony_ci	rc = kernfs_add_one(kn);
107762306a36Sopenharmony_ci	if (rc) {
107862306a36Sopenharmony_ci		kernfs_put(kn);
107962306a36Sopenharmony_ci		return ERR_PTR(rc);
108062306a36Sopenharmony_ci	}
108162306a36Sopenharmony_ci	return kn;
108262306a36Sopenharmony_ci}
1083