162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 362306a36Sopenharmony_ci#include <trace/events/mmap_lock.h> 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci#include <linux/mm.h> 662306a36Sopenharmony_ci#include <linux/cgroup.h> 762306a36Sopenharmony_ci#include <linux/memcontrol.h> 862306a36Sopenharmony_ci#include <linux/mmap_lock.h> 962306a36Sopenharmony_ci#include <linux/mutex.h> 1062306a36Sopenharmony_ci#include <linux/percpu.h> 1162306a36Sopenharmony_ci#include <linux/rcupdate.h> 1262306a36Sopenharmony_ci#include <linux/smp.h> 1362306a36Sopenharmony_ci#include <linux/trace_events.h> 1462306a36Sopenharmony_ci#include <linux/local_lock.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ciEXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); 1762306a36Sopenharmony_ciEXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); 1862306a36Sopenharmony_ciEXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#ifdef CONFIG_MEMCG 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci/* 2362306a36Sopenharmony_ci * Our various events all share the same buffer (because we don't want or need 2462306a36Sopenharmony_ci * to allocate a set of buffers *per event type*), so we need to protect against 2562306a36Sopenharmony_ci * concurrent _reg() and _unreg() calls, and count how many _reg() calls have 2662306a36Sopenharmony_ci * been made. 2762306a36Sopenharmony_ci */ 2862306a36Sopenharmony_cistatic DEFINE_MUTEX(reg_lock); 2962306a36Sopenharmony_cistatic int reg_refcount; /* Protected by reg_lock. */ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * Size of the buffer for memcg path names. Ignoring stack trace support, 3362306a36Sopenharmony_ci * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it. 3462306a36Sopenharmony_ci */ 3562306a36Sopenharmony_ci#define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * How many contexts our trace events might be called in: normal, softirq, irq, 3962306a36Sopenharmony_ci * and NMI. 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_ci#define CONTEXT_COUNT 4 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_cistruct memcg_path { 4462306a36Sopenharmony_ci local_lock_t lock; 4562306a36Sopenharmony_ci char __rcu *buf; 4662306a36Sopenharmony_ci local_t buf_idx; 4762306a36Sopenharmony_ci}; 4862306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct memcg_path, memcg_paths) = { 4962306a36Sopenharmony_ci .lock = INIT_LOCAL_LOCK(lock), 5062306a36Sopenharmony_ci .buf_idx = LOCAL_INIT(0), 5162306a36Sopenharmony_ci}; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic char **tmp_bufs; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci/* Called with reg_lock held. */ 5662306a36Sopenharmony_cistatic void free_memcg_path_bufs(void) 5762306a36Sopenharmony_ci{ 5862306a36Sopenharmony_ci struct memcg_path *memcg_path; 5962306a36Sopenharmony_ci int cpu; 6062306a36Sopenharmony_ci char **old = tmp_bufs; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 6362306a36Sopenharmony_ci memcg_path = per_cpu_ptr(&memcg_paths, cpu); 6462306a36Sopenharmony_ci *(old++) = rcu_dereference_protected(memcg_path->buf, 6562306a36Sopenharmony_ci lockdep_is_held(®_lock)); 6662306a36Sopenharmony_ci rcu_assign_pointer(memcg_path->buf, NULL); 6762306a36Sopenharmony_ci } 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci /* Wait for inflight memcg_path_buf users to finish. */ 7062306a36Sopenharmony_ci synchronize_rcu(); 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci old = tmp_bufs; 7362306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 7462306a36Sopenharmony_ci kfree(*(old++)); 7562306a36Sopenharmony_ci } 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci kfree(tmp_bufs); 7862306a36Sopenharmony_ci tmp_bufs = NULL; 7962306a36Sopenharmony_ci} 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ciint trace_mmap_lock_reg(void) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci int cpu; 8462306a36Sopenharmony_ci char *new; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci mutex_lock(®_lock); 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci /* If the refcount is going 0->1, proceed with allocating buffers. */ 8962306a36Sopenharmony_ci if (reg_refcount++) 9062306a36Sopenharmony_ci goto out; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs), 9362306a36Sopenharmony_ci GFP_KERNEL); 9462306a36Sopenharmony_ci if (tmp_bufs == NULL) 9562306a36Sopenharmony_ci goto out_fail; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 9862306a36Sopenharmony_ci new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL); 9962306a36Sopenharmony_ci if (new == NULL) 10062306a36Sopenharmony_ci goto out_fail_free; 10162306a36Sopenharmony_ci rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new); 10262306a36Sopenharmony_ci /* Don't need to wait for inflights, they'd have gotten NULL. */ 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ciout: 10662306a36Sopenharmony_ci mutex_unlock(®_lock); 10762306a36Sopenharmony_ci return 0; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ciout_fail_free: 11062306a36Sopenharmony_ci free_memcg_path_bufs(); 11162306a36Sopenharmony_ciout_fail: 11262306a36Sopenharmony_ci /* Since we failed, undo the earlier ref increment. */ 11362306a36Sopenharmony_ci --reg_refcount; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci mutex_unlock(®_lock); 11662306a36Sopenharmony_ci return -ENOMEM; 11762306a36Sopenharmony_ci} 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_civoid trace_mmap_lock_unreg(void) 12062306a36Sopenharmony_ci{ 12162306a36Sopenharmony_ci mutex_lock(®_lock); 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci /* If the refcount is going 1->0, proceed with freeing buffers. */ 12462306a36Sopenharmony_ci if (--reg_refcount) 12562306a36Sopenharmony_ci goto out; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci free_memcg_path_bufs(); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ciout: 13062306a36Sopenharmony_ci mutex_unlock(®_lock); 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic inline char *get_memcg_path_buf(void) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths); 13662306a36Sopenharmony_ci char *buf; 13762306a36Sopenharmony_ci int idx; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci rcu_read_lock(); 14062306a36Sopenharmony_ci buf = rcu_dereference(memcg_path->buf); 14162306a36Sopenharmony_ci if (buf == NULL) { 14262306a36Sopenharmony_ci rcu_read_unlock(); 14362306a36Sopenharmony_ci return NULL; 14462306a36Sopenharmony_ci } 14562306a36Sopenharmony_ci idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) - 14662306a36Sopenharmony_ci MEMCG_PATH_BUF_SIZE; 14762306a36Sopenharmony_ci return &buf[idx]; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic inline void put_memcg_path_buf(void) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx); 15362306a36Sopenharmony_ci rcu_read_unlock(); 15462306a36Sopenharmony_ci} 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ 15762306a36Sopenharmony_ci do { \ 15862306a36Sopenharmony_ci const char *memcg_path; \ 15962306a36Sopenharmony_ci local_lock(&memcg_paths.lock); \ 16062306a36Sopenharmony_ci memcg_path = get_mm_memcg_path(mm); \ 16162306a36Sopenharmony_ci trace_mmap_lock_##type(mm, \ 16262306a36Sopenharmony_ci memcg_path != NULL ? memcg_path : "", \ 16362306a36Sopenharmony_ci ##__VA_ARGS__); \ 16462306a36Sopenharmony_ci if (likely(memcg_path != NULL)) \ 16562306a36Sopenharmony_ci put_memcg_path_buf(); \ 16662306a36Sopenharmony_ci local_unlock(&memcg_paths.lock); \ 16762306a36Sopenharmony_ci } while (0) 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci#else /* !CONFIG_MEMCG */ 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ciint trace_mmap_lock_reg(void) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_civoid trace_mmap_lock_unreg(void) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ 18162306a36Sopenharmony_ci trace_mmap_lock_##type(mm, "", ##__VA_ARGS__) 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci#endif /* CONFIG_MEMCG */ 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci#ifdef CONFIG_TRACING 18662306a36Sopenharmony_ci#ifdef CONFIG_MEMCG 18762306a36Sopenharmony_ci/* 18862306a36Sopenharmony_ci * Write the given mm_struct's memcg path to a percpu buffer, and return a 18962306a36Sopenharmony_ci * pointer to it. If the path cannot be determined, or no buffer was available 19062306a36Sopenharmony_ci * (because the trace event is being unregistered), NULL is returned. 19162306a36Sopenharmony_ci * 19262306a36Sopenharmony_ci * Note: buffers are allocated per-cpu to avoid locking, so preemption must be 19362306a36Sopenharmony_ci * disabled by the caller before calling us, and re-enabled only after the 19462306a36Sopenharmony_ci * caller is done with the pointer. 19562306a36Sopenharmony_ci * 19662306a36Sopenharmony_ci * The caller must call put_memcg_path_buf() once the buffer is no longer 19762306a36Sopenharmony_ci * needed. This must be done while preemption is still disabled. 19862306a36Sopenharmony_ci */ 19962306a36Sopenharmony_cistatic const char *get_mm_memcg_path(struct mm_struct *mm) 20062306a36Sopenharmony_ci{ 20162306a36Sopenharmony_ci char *buf = NULL; 20262306a36Sopenharmony_ci struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci if (memcg == NULL) 20562306a36Sopenharmony_ci goto out; 20662306a36Sopenharmony_ci if (unlikely(memcg->css.cgroup == NULL)) 20762306a36Sopenharmony_ci goto out_put; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci buf = get_memcg_path_buf(); 21062306a36Sopenharmony_ci if (buf == NULL) 21162306a36Sopenharmony_ci goto out_put; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ciout_put: 21662306a36Sopenharmony_ci css_put(&memcg->css); 21762306a36Sopenharmony_ciout: 21862306a36Sopenharmony_ci return buf; 21962306a36Sopenharmony_ci} 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci#endif /* CONFIG_MEMCG */ 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci/* 22462306a36Sopenharmony_ci * Trace calls must be in a separate file, as otherwise there's a circular 22562306a36Sopenharmony_ci * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_civoid __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) 22962306a36Sopenharmony_ci{ 23062306a36Sopenharmony_ci TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ciEXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_civoid __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, 23562306a36Sopenharmony_ci bool success) 23662306a36Sopenharmony_ci{ 23762306a36Sopenharmony_ci TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); 23862306a36Sopenharmony_ci} 23962306a36Sopenharmony_ciEXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_civoid __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci TRACE_MMAP_LOCK_EVENT(released, mm, write); 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ciEXPORT_SYMBOL(__mmap_lock_do_trace_released); 24662306a36Sopenharmony_ci#endif /* CONFIG_TRACING */ 247