xref: /kernel/linux/linux-6.6/mm/mmap_lock.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#define CREATE_TRACE_POINTS
362306a36Sopenharmony_ci#include <trace/events/mmap_lock.h>
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci#include <linux/mm.h>
662306a36Sopenharmony_ci#include <linux/cgroup.h>
762306a36Sopenharmony_ci#include <linux/memcontrol.h>
862306a36Sopenharmony_ci#include <linux/mmap_lock.h>
962306a36Sopenharmony_ci#include <linux/mutex.h>
1062306a36Sopenharmony_ci#include <linux/percpu.h>
1162306a36Sopenharmony_ci#include <linux/rcupdate.h>
1262306a36Sopenharmony_ci#include <linux/smp.h>
1362306a36Sopenharmony_ci#include <linux/trace_events.h>
1462306a36Sopenharmony_ci#include <linux/local_lock.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciEXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking);
1762306a36Sopenharmony_ciEXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned);
1862306a36Sopenharmony_ciEXPORT_TRACEPOINT_SYMBOL(mmap_lock_released);
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#ifdef CONFIG_MEMCG
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci/*
2362306a36Sopenharmony_ci * Our various events all share the same buffer (because we don't want or need
2462306a36Sopenharmony_ci * to allocate a set of buffers *per event type*), so we need to protect against
2562306a36Sopenharmony_ci * concurrent _reg() and _unreg() calls, and count how many _reg() calls have
2662306a36Sopenharmony_ci * been made.
2762306a36Sopenharmony_ci */
2862306a36Sopenharmony_cistatic DEFINE_MUTEX(reg_lock);
2962306a36Sopenharmony_cistatic int reg_refcount; /* Protected by reg_lock. */
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/*
3262306a36Sopenharmony_ci * Size of the buffer for memcg path names. Ignoring stack trace support,
3362306a36Sopenharmony_ci * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it.
3462306a36Sopenharmony_ci */
3562306a36Sopenharmony_ci#define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * How many contexts our trace events might be called in: normal, softirq, irq,
3962306a36Sopenharmony_ci * and NMI.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_ci#define CONTEXT_COUNT 4
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistruct memcg_path {
4462306a36Sopenharmony_ci	local_lock_t lock;
4562306a36Sopenharmony_ci	char __rcu *buf;
4662306a36Sopenharmony_ci	local_t buf_idx;
4762306a36Sopenharmony_ci};
4862306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct memcg_path, memcg_paths) = {
4962306a36Sopenharmony_ci	.lock = INIT_LOCAL_LOCK(lock),
5062306a36Sopenharmony_ci	.buf_idx = LOCAL_INIT(0),
5162306a36Sopenharmony_ci};
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic char **tmp_bufs;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/* Called with reg_lock held. */
5662306a36Sopenharmony_cistatic void free_memcg_path_bufs(void)
5762306a36Sopenharmony_ci{
5862306a36Sopenharmony_ci	struct memcg_path *memcg_path;
5962306a36Sopenharmony_ci	int cpu;
6062306a36Sopenharmony_ci	char **old = tmp_bufs;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
6362306a36Sopenharmony_ci		memcg_path = per_cpu_ptr(&memcg_paths, cpu);
6462306a36Sopenharmony_ci		*(old++) = rcu_dereference_protected(memcg_path->buf,
6562306a36Sopenharmony_ci			lockdep_is_held(&reg_lock));
6662306a36Sopenharmony_ci		rcu_assign_pointer(memcg_path->buf, NULL);
6762306a36Sopenharmony_ci	}
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	/* Wait for inflight memcg_path_buf users to finish. */
7062306a36Sopenharmony_ci	synchronize_rcu();
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	old = tmp_bufs;
7362306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
7462306a36Sopenharmony_ci		kfree(*(old++));
7562306a36Sopenharmony_ci	}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	kfree(tmp_bufs);
7862306a36Sopenharmony_ci	tmp_bufs = NULL;
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ciint trace_mmap_lock_reg(void)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	int cpu;
8462306a36Sopenharmony_ci	char *new;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	mutex_lock(&reg_lock);
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	/* If the refcount is going 0->1, proceed with allocating buffers. */
8962306a36Sopenharmony_ci	if (reg_refcount++)
9062306a36Sopenharmony_ci		goto out;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs),
9362306a36Sopenharmony_ci				 GFP_KERNEL);
9462306a36Sopenharmony_ci	if (tmp_bufs == NULL)
9562306a36Sopenharmony_ci		goto out_fail;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
9862306a36Sopenharmony_ci		new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL);
9962306a36Sopenharmony_ci		if (new == NULL)
10062306a36Sopenharmony_ci			goto out_fail_free;
10162306a36Sopenharmony_ci		rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new);
10262306a36Sopenharmony_ci		/* Don't need to wait for inflights, they'd have gotten NULL. */
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ciout:
10662306a36Sopenharmony_ci	mutex_unlock(&reg_lock);
10762306a36Sopenharmony_ci	return 0;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ciout_fail_free:
11062306a36Sopenharmony_ci	free_memcg_path_bufs();
11162306a36Sopenharmony_ciout_fail:
11262306a36Sopenharmony_ci	/* Since we failed, undo the earlier ref increment. */
11362306a36Sopenharmony_ci	--reg_refcount;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	mutex_unlock(&reg_lock);
11662306a36Sopenharmony_ci	return -ENOMEM;
11762306a36Sopenharmony_ci}
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_civoid trace_mmap_lock_unreg(void)
12062306a36Sopenharmony_ci{
12162306a36Sopenharmony_ci	mutex_lock(&reg_lock);
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/* If the refcount is going 1->0, proceed with freeing buffers. */
12462306a36Sopenharmony_ci	if (--reg_refcount)
12562306a36Sopenharmony_ci		goto out;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	free_memcg_path_bufs();
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ciout:
13062306a36Sopenharmony_ci	mutex_unlock(&reg_lock);
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic inline char *get_memcg_path_buf(void)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths);
13662306a36Sopenharmony_ci	char *buf;
13762306a36Sopenharmony_ci	int idx;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	rcu_read_lock();
14062306a36Sopenharmony_ci	buf = rcu_dereference(memcg_path->buf);
14162306a36Sopenharmony_ci	if (buf == NULL) {
14262306a36Sopenharmony_ci		rcu_read_unlock();
14362306a36Sopenharmony_ci		return NULL;
14462306a36Sopenharmony_ci	}
14562306a36Sopenharmony_ci	idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) -
14662306a36Sopenharmony_ci	      MEMCG_PATH_BUF_SIZE;
14762306a36Sopenharmony_ci	return &buf[idx];
14862306a36Sopenharmony_ci}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistatic inline void put_memcg_path_buf(void)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx);
15362306a36Sopenharmony_ci	rcu_read_unlock();
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
15762306a36Sopenharmony_ci	do {                                                                   \
15862306a36Sopenharmony_ci		const char *memcg_path;                                        \
15962306a36Sopenharmony_ci		local_lock(&memcg_paths.lock);                                 \
16062306a36Sopenharmony_ci		memcg_path = get_mm_memcg_path(mm);                            \
16162306a36Sopenharmony_ci		trace_mmap_lock_##type(mm,                                     \
16262306a36Sopenharmony_ci				       memcg_path != NULL ? memcg_path : "",   \
16362306a36Sopenharmony_ci				       ##__VA_ARGS__);                         \
16462306a36Sopenharmony_ci		if (likely(memcg_path != NULL))                                \
16562306a36Sopenharmony_ci			put_memcg_path_buf();                                  \
16662306a36Sopenharmony_ci		local_unlock(&memcg_paths.lock);                               \
16762306a36Sopenharmony_ci	} while (0)
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci#else /* !CONFIG_MEMCG */
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ciint trace_mmap_lock_reg(void)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	return 0;
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_civoid trace_mmap_lock_unreg(void)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
18162306a36Sopenharmony_ci	trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci#endif /* CONFIG_MEMCG */
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci#ifdef CONFIG_TRACING
18662306a36Sopenharmony_ci#ifdef CONFIG_MEMCG
18762306a36Sopenharmony_ci/*
18862306a36Sopenharmony_ci * Write the given mm_struct's memcg path to a percpu buffer, and return a
18962306a36Sopenharmony_ci * pointer to it. If the path cannot be determined, or no buffer was available
19062306a36Sopenharmony_ci * (because the trace event is being unregistered), NULL is returned.
19162306a36Sopenharmony_ci *
19262306a36Sopenharmony_ci * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
19362306a36Sopenharmony_ci * disabled by the caller before calling us, and re-enabled only after the
19462306a36Sopenharmony_ci * caller is done with the pointer.
19562306a36Sopenharmony_ci *
19662306a36Sopenharmony_ci * The caller must call put_memcg_path_buf() once the buffer is no longer
19762306a36Sopenharmony_ci * needed. This must be done while preemption is still disabled.
19862306a36Sopenharmony_ci */
19962306a36Sopenharmony_cistatic const char *get_mm_memcg_path(struct mm_struct *mm)
20062306a36Sopenharmony_ci{
20162306a36Sopenharmony_ci	char *buf = NULL;
20262306a36Sopenharmony_ci	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	if (memcg == NULL)
20562306a36Sopenharmony_ci		goto out;
20662306a36Sopenharmony_ci	if (unlikely(memcg->css.cgroup == NULL))
20762306a36Sopenharmony_ci		goto out_put;
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	buf = get_memcg_path_buf();
21062306a36Sopenharmony_ci	if (buf == NULL)
21162306a36Sopenharmony_ci		goto out_put;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE);
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ciout_put:
21662306a36Sopenharmony_ci	css_put(&memcg->css);
21762306a36Sopenharmony_ciout:
21862306a36Sopenharmony_ci	return buf;
21962306a36Sopenharmony_ci}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci#endif /* CONFIG_MEMCG */
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci/*
22462306a36Sopenharmony_ci * Trace calls must be in a separate file, as otherwise there's a circular
22562306a36Sopenharmony_ci * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
22662306a36Sopenharmony_ci */
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_civoid __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	TRACE_MMAP_LOCK_EVENT(start_locking, mm, write);
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ciEXPORT_SYMBOL(__mmap_lock_do_trace_start_locking);
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_civoid __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
23562306a36Sopenharmony_ci					   bool success)
23662306a36Sopenharmony_ci{
23762306a36Sopenharmony_ci	TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success);
23862306a36Sopenharmony_ci}
23962306a36Sopenharmony_ciEXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned);
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_civoid __mmap_lock_do_trace_released(struct mm_struct *mm, bool write)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	TRACE_MMAP_LOCK_EVENT(released, mm, write);
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ciEXPORT_SYMBOL(__mmap_lock_do_trace_released);
24662306a36Sopenharmony_ci#endif /* CONFIG_TRACING */
247