1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3#include <vmlinux.h>
4#include <bpf/bpf_core_read.h>
5#include <bpf/bpf_helpers.h>
6#include <bpf/bpf_tracing.h>
7
8#include "profiler.h"
9
10#ifndef NULL
11#define NULL 0
12#endif
13
14#define O_WRONLY 00000001
15#define O_RDWR 00000002
16#define O_DIRECTORY 00200000
17#define __O_TMPFILE 020000000
18#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
19#define MAX_ERRNO 4095
20#define S_IFMT 00170000
21#define S_IFSOCK 0140000
22#define S_IFLNK 0120000
23#define S_IFREG 0100000
24#define S_IFBLK 0060000
25#define S_IFDIR 0040000
26#define S_IFCHR 0020000
27#define S_IFIFO 0010000
28#define S_ISUID 0004000
29#define S_ISGID 0002000
30#define S_ISVTX 0001000
31#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
32#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
33#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
34#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
35#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
36#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
37#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
38
39#define KILL_DATA_ARRAY_SIZE 8
40
41struct var_kill_data_arr_t {
42	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
43};
44
45union any_profiler_data_t {
46	struct var_exec_data_t var_exec;
47	struct var_kill_data_t var_kill;
48	struct var_sysctl_data_t var_sysctl;
49	struct var_filemod_data_t var_filemod;
50	struct var_fork_data_t var_fork;
51	struct var_kill_data_arr_t var_kill_data_arr;
52};
53
54volatile struct profiler_config_struct bpf_config = {};
55
56#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
57#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
58#define CGROUP_LOGIN_SESSION_INODE \
59	(bpf_config.cgroup_login_session_inode)
60#define KILL_SIGNALS (bpf_config.kill_signals_mask)
61#define STALE_INFO (bpf_config.stale_info_secs)
62#define INODE_FILTER (bpf_config.inode_filter)
63#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
64#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
65
66struct kernfs_iattrs___52 {
67	struct iattr ia_iattr;
68};
69
70struct kernfs_node___52 {
71	union /* kernfs_node_id */ {
72		struct {
73			u32 ino;
74			u32 generation;
75		};
76		u64 id;
77	} id;
78};
79
80struct {
81	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
82	__uint(max_entries, 1);
83	__type(key, u32);
84	__type(value, union any_profiler_data_t);
85} data_heap SEC(".maps");
86
87struct {
88	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
89	__uint(key_size, sizeof(int));
90	__uint(value_size, sizeof(int));
91} events SEC(".maps");
92
93struct {
94	__uint(type, BPF_MAP_TYPE_HASH);
95	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
96	__type(key, u32);
97	__type(value, struct var_kill_data_arr_t);
98} var_tpid_to_data SEC(".maps");
99
100struct {
101	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
102	__uint(max_entries, profiler_bpf_max_function_id);
103	__type(key, u32);
104	__type(value, struct bpf_func_stats_data);
105} bpf_func_stats SEC(".maps");
106
107struct {
108	__uint(type, BPF_MAP_TYPE_HASH);
109	__type(key, u32);
110	__type(value, bool);
111	__uint(max_entries, 16);
112} allowed_devices SEC(".maps");
113
114struct {
115	__uint(type, BPF_MAP_TYPE_HASH);
116	__type(key, u64);
117	__type(value, bool);
118	__uint(max_entries, 1024);
119} allowed_file_inodes SEC(".maps");
120
121struct {
122	__uint(type, BPF_MAP_TYPE_HASH);
123	__type(key, u64);
124	__type(value, bool);
125	__uint(max_entries, 1024);
126} allowed_directory_inodes SEC(".maps");
127
128struct {
129	__uint(type, BPF_MAP_TYPE_HASH);
130	__type(key, u32);
131	__type(value, bool);
132	__uint(max_entries, 16);
133} disallowed_exec_inodes SEC(".maps");
134
135#ifndef ARRAY_SIZE
136#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
137#endif
138
139static INLINE bool IS_ERR(const void* ptr)
140{
141	return IS_ERR_VALUE((unsigned long)ptr);
142}
143
144static INLINE u32 get_userspace_pid()
145{
146	return bpf_get_current_pid_tgid() >> 32;
147}
148
149static INLINE bool is_init_process(u32 tgid)
150{
151	return tgid == 1 || tgid == 0;
152}
153
154static INLINE unsigned long
155probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
156{
157	len = len < max ? len : max;
158	if (len > 1) {
159		if (bpf_probe_read(dst, len, src))
160			return 0;
161	} else if (len == 1) {
162		if (bpf_probe_read(dst, 1, src))
163			return 0;
164	}
165	return len;
166}
167
168static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
169				     int spid)
170{
171#ifdef UNROLL
172#pragma unroll
173#endif
174	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
175		if (arr_struct->array[i].meta.pid == spid)
176			return i;
177	return -1;
178}
179
180static INLINE void populate_ancestors(struct task_struct* task,
181				      struct ancestors_data_t* ancestors_data)
182{
183	struct task_struct* parent = task;
184	u32 num_ancestors, ppid;
185
186	ancestors_data->num_ancestors = 0;
187#ifdef UNROLL
188#pragma unroll
189#endif
190	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
191		parent = BPF_CORE_READ(parent, real_parent);
192		if (parent == NULL)
193			break;
194		ppid = BPF_CORE_READ(parent, tgid);
195		if (is_init_process(ppid))
196			break;
197		ancestors_data->ancestor_pids[num_ancestors] = ppid;
198		ancestors_data->ancestor_exec_ids[num_ancestors] =
199			BPF_CORE_READ(parent, self_exec_id);
200		ancestors_data->ancestor_start_times[num_ancestors] =
201			BPF_CORE_READ(parent, start_time);
202		ancestors_data->num_ancestors = num_ancestors;
203	}
204}
205
206static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
207					  struct kernfs_node* cgroup_root_node,
208					  void* payload,
209					  int* root_pos)
210{
211	void* payload_start = payload;
212	size_t filepart_length;
213
214#ifdef UNROLL
215#pragma unroll
216#endif
217	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
218		filepart_length =
219			bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
220		if (!cgroup_node)
221			return payload;
222		if (cgroup_node == cgroup_root_node)
223			*root_pos = payload - payload_start;
224		if (filepart_length <= MAX_PATH) {
225			barrier_var(filepart_length);
226			payload += filepart_length;
227		}
228		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
229	}
230	return payload;
231}
232
233static ino_t get_inode_from_kernfs(struct kernfs_node* node)
234{
235	struct kernfs_node___52* node52 = (void*)node;
236
237	if (bpf_core_field_exists(node52->id.ino)) {
238		barrier_var(node52);
239		return BPF_CORE_READ(node52, id.ino);
240	} else {
241		barrier_var(node);
242		return (u64)BPF_CORE_READ(node, id);
243	}
244}
245
246extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
247enum cgroup_subsys_id___local {
248	pids_cgrp_id___local = 123, /* value doesn't matter */
249};
250
251static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
252					 struct task_struct* task,
253					 void* payload)
254{
255	struct kernfs_node* root_kernfs =
256		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
257	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
258
259#if __has_builtin(__builtin_preserve_enum_value)
260	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
261		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
262						  pids_cgrp_id___local);
263#ifdef UNROLL
264#pragma unroll
265#endif
266		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
267			struct cgroup_subsys_state* subsys =
268				BPF_CORE_READ(task, cgroups, subsys[i]);
269			if (subsys != NULL) {
270				int subsys_id = BPF_CORE_READ(subsys, ss, id);
271				if (subsys_id == cgrp_id) {
272					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
273					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
274					break;
275				}
276			}
277		}
278	}
279#endif
280
281	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
282	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
283
284	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
285		cgroup_data->cgroup_root_mtime =
286			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
287		cgroup_data->cgroup_proc_mtime =
288			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
289	} else {
290		struct kernfs_iattrs___52* root_iattr =
291			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
292		cgroup_data->cgroup_root_mtime =
293			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
294
295		struct kernfs_iattrs___52* proc_iattr =
296			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
297		cgroup_data->cgroup_proc_mtime =
298			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
299	}
300
301	cgroup_data->cgroup_root_length = 0;
302	cgroup_data->cgroup_proc_length = 0;
303	cgroup_data->cgroup_full_length = 0;
304
305	size_t cgroup_root_length =
306		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
307	barrier_var(cgroup_root_length);
308	if (cgroup_root_length <= MAX_PATH) {
309		barrier_var(cgroup_root_length);
310		cgroup_data->cgroup_root_length = cgroup_root_length;
311		payload += cgroup_root_length;
312	}
313
314	size_t cgroup_proc_length =
315		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
316	barrier_var(cgroup_proc_length);
317	if (cgroup_proc_length <= MAX_PATH) {
318		barrier_var(cgroup_proc_length);
319		cgroup_data->cgroup_proc_length = cgroup_proc_length;
320		payload += cgroup_proc_length;
321	}
322
323	if (FETCH_CGROUPS_FROM_BPF) {
324		cgroup_data->cgroup_full_path_root_pos = -1;
325		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
326							      &cgroup_data->cgroup_full_path_root_pos);
327		cgroup_data->cgroup_full_length = payload_end_pos - payload;
328		payload = payload_end_pos;
329	}
330
331	return (void*)payload;
332}
333
334static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
335					  struct task_struct* task,
336					  u32 pid, void* payload)
337{
338	u64 uid_gid = bpf_get_current_uid_gid();
339
340	metadata->uid = (u32)uid_gid;
341	metadata->gid = uid_gid >> 32;
342	metadata->pid = pid;
343	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
344	metadata->start_time = BPF_CORE_READ(task, start_time);
345	metadata->comm_length = 0;
346
347	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
348	barrier_var(comm_length);
349	if (comm_length <= TASK_COMM_LEN) {
350		barrier_var(comm_length);
351		metadata->comm_length = comm_length;
352		payload += comm_length;
353	}
354
355	return (void*)payload;
356}
357
358static INLINE struct var_kill_data_t*
359get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
360{
361	int zero = 0;
362	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
363
364	if (kill_data == NULL)
365		return NULL;
366	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
367
368	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
369	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
370	size_t payload_length = payload - (void*)kill_data->payload;
371	kill_data->payload_length = payload_length;
372	populate_ancestors(task, &kill_data->ancestors_info);
373	kill_data->meta.type = KILL_EVENT;
374	kill_data->kill_target_pid = tpid;
375	kill_data->kill_sig = sig;
376	kill_data->kill_count = 1;
377	kill_data->last_kill_time = bpf_ktime_get_ns();
378	return kill_data;
379}
380
381static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
382{
383	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
384		return 0;
385
386	u32 spid = get_userspace_pid();
387	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
388
389	if (arr_struct == NULL) {
390		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
391		int zero = 0;
392
393		if (kill_data == NULL)
394			return 0;
395		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
396		if (arr_struct == NULL)
397			return 0;
398		bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
399	} else {
400		int index = get_var_spid_index(arr_struct, spid);
401
402		if (index == -1) {
403			struct var_kill_data_t* kill_data =
404				get_var_kill_data(ctx, spid, tpid, sig);
405			if (kill_data == NULL)
406				return 0;
407#ifdef UNROLL
408#pragma unroll
409#endif
410			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
411				if (arr_struct->array[i].meta.pid == 0) {
412					bpf_probe_read(&arr_struct->array[i],
413						       sizeof(arr_struct->array[i]), kill_data);
414					bpf_map_update_elem(&var_tpid_to_data, &tpid,
415							    arr_struct, 0);
416
417					return 0;
418				}
419			return 0;
420		}
421
422		struct var_kill_data_t* kill_data = &arr_struct->array[index];
423
424		u64 delta_sec =
425			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
426
427		if (delta_sec < STALE_INFO) {
428			kill_data->kill_count++;
429			kill_data->last_kill_time = bpf_ktime_get_ns();
430			bpf_probe_read(&arr_struct->array[index],
431				       sizeof(arr_struct->array[index]),
432				       kill_data);
433		} else {
434			struct var_kill_data_t* kill_data =
435				get_var_kill_data(ctx, spid, tpid, sig);
436			if (kill_data == NULL)
437				return 0;
438			bpf_probe_read(&arr_struct->array[index],
439				       sizeof(arr_struct->array[index]),
440				       kill_data);
441		}
442	}
443	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
444	return 0;
445}
446
447static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
448				   enum bpf_function_id func_id)
449{
450	int func_id_key = func_id;
451
452	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
453	bpf_stat_ctx->bpf_func_stats_data_val =
454		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
455	if (bpf_stat_ctx->bpf_func_stats_data_val)
456		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
457}
458
459static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
460{
461	if (bpf_stat_ctx->bpf_func_stats_data_val)
462		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
463			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
464}
465
466static INLINE void
467bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
468				    struct var_metadata_t* meta)
469{
470	if (bpf_stat_ctx->bpf_func_stats_data_val) {
471		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
472		meta->bpf_stats_num_perf_events =
473			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
474	}
475	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
476	meta->cpu_id = bpf_get_smp_processor_id();
477}
478
479static INLINE size_t
480read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
481{
482	size_t length = 0;
483	size_t filepart_length;
484	struct dentry* parent_dentry;
485
486#ifdef UNROLL
487#pragma unroll
488#endif
489	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
490		filepart_length = bpf_probe_read_str(payload, MAX_PATH,
491						     BPF_CORE_READ(filp_dentry, d_name.name));
492		barrier_var(filepart_length);
493		if (filepart_length > MAX_PATH)
494			break;
495		barrier_var(filepart_length);
496		payload += filepart_length;
497		length += filepart_length;
498
499		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
500		if (filp_dentry == parent_dentry)
501			break;
502		filp_dentry = parent_dentry;
503	}
504
505	return length;
506}
507
508static INLINE bool
509is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
510{
511	struct dentry* parent_dentry;
512#ifdef UNROLL
513#pragma unroll
514#endif
515	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
516		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
517		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
518
519		if (allowed_dir != NULL)
520			return true;
521		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
522		if (filp_dentry == parent_dentry)
523			break;
524		filp_dentry = parent_dentry;
525	}
526	return false;
527}
528
529static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
530						 u32* device_id,
531						 u64* file_ino)
532{
533	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
534	*device_id = dev_id;
535	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
536
537	if (allowed_device == NULL)
538		return false;
539
540	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
541	*file_ino = ino;
542	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
543
544	if (allowed_file == NULL)
545		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
546			return false;
547	return true;
548}
549
550SEC("kprobe/proc_sys_write")
551ssize_t BPF_KPROBE(kprobe__proc_sys_write,
552		   struct file* filp, const char* buf,
553		   size_t count, loff_t* ppos)
554{
555	struct bpf_func_stats_ctx stats_ctx;
556	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
557
558	u32 pid = get_userspace_pid();
559	int zero = 0;
560	struct var_sysctl_data_t* sysctl_data =
561		bpf_map_lookup_elem(&data_heap, &zero);
562	if (!sysctl_data)
563		goto out;
564
565	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
566	sysctl_data->meta.type = SYSCTL_EVENT;
567	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
568	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
569
570	populate_ancestors(task, &sysctl_data->ancestors_info);
571
572	sysctl_data->sysctl_val_length = 0;
573	sysctl_data->sysctl_path_length = 0;
574
575	size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
576	barrier_var(sysctl_val_length);
577	if (sysctl_val_length <= CTL_MAXNAME) {
578		barrier_var(sysctl_val_length);
579		sysctl_data->sysctl_val_length = sysctl_val_length;
580		payload += sysctl_val_length;
581	}
582
583	size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
584						       BPF_CORE_READ(filp, f_path.dentry, d_name.name));
585	barrier_var(sysctl_path_length);
586	if (sysctl_path_length <= MAX_PATH) {
587		barrier_var(sysctl_path_length);
588		sysctl_data->sysctl_path_length = sysctl_path_length;
589		payload += sysctl_path_length;
590	}
591
592	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
593	unsigned long data_len = payload - (void*)sysctl_data;
594	data_len = data_len > sizeof(struct var_sysctl_data_t)
595		? sizeof(struct var_sysctl_data_t)
596		: data_len;
597	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
598out:
599	bpf_stats_exit(&stats_ctx);
600	return 0;
601}
602
603SEC("tracepoint/syscalls/sys_enter_kill")
604int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
605{
606	struct bpf_func_stats_ctx stats_ctx;
607
608	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
609	int pid = ctx->args[0];
610	int sig = ctx->args[1];
611	int ret = trace_var_sys_kill(ctx, pid, sig);
612	bpf_stats_exit(&stats_ctx);
613	return ret;
614};
615
616SEC("raw_tracepoint/sched_process_exit")
617int raw_tracepoint__sched_process_exit(void* ctx)
618{
619	int zero = 0;
620	struct bpf_func_stats_ctx stats_ctx;
621	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
622
623	u32 tpid = get_userspace_pid();
624
625	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
626	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
627
628	if (arr_struct == NULL || kill_data == NULL)
629		goto out;
630
631	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
632	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
633
634#ifdef UNROLL
635#pragma unroll
636#endif
637	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
638		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
639
640		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
641			bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
642			void* payload = kill_data->payload;
643			size_t offset = kill_data->payload_length;
644			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
645				return 0;
646			payload += offset;
647
648			kill_data->kill_target_name_length = 0;
649			kill_data->kill_target_cgroup_proc_length = 0;
650
651			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
652			barrier_var(comm_length);
653			if (comm_length <= TASK_COMM_LEN) {
654				barrier_var(comm_length);
655				kill_data->kill_target_name_length = comm_length;
656				payload += comm_length;
657			}
658
659			size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
660								       BPF_CORE_READ(proc_kernfs, name));
661			barrier_var(cgroup_proc_length);
662			if (cgroup_proc_length <= KILL_TARGET_LEN) {
663				barrier_var(cgroup_proc_length);
664				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
665				payload += cgroup_proc_length;
666			}
667
668			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
669			unsigned long data_len = (void*)payload - (void*)kill_data;
670			data_len = data_len > sizeof(struct var_kill_data_t)
671				? sizeof(struct var_kill_data_t)
672				: data_len;
673			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
674		}
675	}
676	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
677out:
678	bpf_stats_exit(&stats_ctx);
679	return 0;
680}
681
682SEC("raw_tracepoint/sched_process_exec")
683int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
684{
685	struct bpf_func_stats_ctx stats_ctx;
686	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
687
688	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
689	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
690
691	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
692	if (should_filter_binprm != NULL)
693		goto out;
694
695	int zero = 0;
696	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
697	if (!proc_exec_data)
698		goto out;
699
700	if (INODE_FILTER && inode != INODE_FILTER)
701		return 0;
702
703	u32 pid = get_userspace_pid();
704	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
705
706	proc_exec_data->meta.type = EXEC_EVENT;
707	proc_exec_data->bin_path_length = 0;
708	proc_exec_data->cmdline_length = 0;
709	proc_exec_data->environment_length = 0;
710	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
711					      proc_exec_data->payload);
712	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
713
714	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
715	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
716	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
717	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
718	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
719
720	const char* filename = BPF_CORE_READ(bprm, filename);
721	size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
722	barrier_var(bin_path_length);
723	if (bin_path_length <= MAX_FILENAME_LEN) {
724		barrier_var(bin_path_length);
725		proc_exec_data->bin_path_length = bin_path_length;
726		payload += bin_path_length;
727	}
728
729	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
730	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
731	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
732						     arg_end - arg_start, MAX_ARGS_LEN);
733
734	if (cmdline_length <= MAX_ARGS_LEN) {
735		barrier_var(cmdline_length);
736		proc_exec_data->cmdline_length = cmdline_length;
737		payload += cmdline_length;
738	}
739
740	if (READ_ENVIRON_FROM_EXEC) {
741		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
742		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
743		unsigned long env_len = probe_read_lim(payload, env_start,
744						       env_end - env_start, MAX_ENVIRON_LEN);
745		if (cmdline_length <= MAX_ENVIRON_LEN) {
746			proc_exec_data->environment_length = env_len;
747			payload += env_len;
748		}
749	}
750
751	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
752	unsigned long data_len = payload - (void*)proc_exec_data;
753	data_len = data_len > sizeof(struct var_exec_data_t)
754		? sizeof(struct var_exec_data_t)
755		: data_len;
756	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
757out:
758	bpf_stats_exit(&stats_ctx);
759	return 0;
760}
761
762SEC("kretprobe/do_filp_open")
763int kprobe_ret__do_filp_open(struct pt_regs* ctx)
764{
765	struct bpf_func_stats_ctx stats_ctx;
766	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
767
768	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
769
770	if (filp == NULL || IS_ERR(filp))
771		goto out;
772	unsigned int flags = BPF_CORE_READ(filp, f_flags);
773	if ((flags & (O_RDWR | O_WRONLY)) == 0)
774		goto out;
775	if ((flags & O_TMPFILE) > 0)
776		goto out;
777	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
778	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
779	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
780	    S_ISSOCK(mode))
781		goto out;
782
783	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
784	u32 device_id = 0;
785	u64 file_ino = 0;
786	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
787		goto out;
788
789	int zero = 0;
790	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
791	if (!filemod_data)
792		goto out;
793
794	u32 pid = get_userspace_pid();
795	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
796
797	filemod_data->meta.type = FILEMOD_EVENT;
798	filemod_data->fmod_type = FMOD_OPEN;
799	filemod_data->dst_flags = flags;
800	filemod_data->src_inode = 0;
801	filemod_data->dst_inode = file_ino;
802	filemod_data->src_device_id = 0;
803	filemod_data->dst_device_id = device_id;
804	filemod_data->src_filepath_length = 0;
805	filemod_data->dst_filepath_length = 0;
806
807	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
808					      filemod_data->payload);
809	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
810
811	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
812	barrier_var(len);
813	if (len <= MAX_FILEPATH_LENGTH) {
814		barrier_var(len);
815		payload += len;
816		filemod_data->dst_filepath_length = len;
817	}
818	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
819	unsigned long data_len = payload - (void*)filemod_data;
820	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
821	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
822out:
823	bpf_stats_exit(&stats_ctx);
824	return 0;
825}
826
827SEC("kprobe/vfs_link")
828int BPF_KPROBE(kprobe__vfs_link,
829	       struct dentry* old_dentry, struct inode* dir,
830	       struct dentry* new_dentry, struct inode** delegated_inode)
831{
832	struct bpf_func_stats_ctx stats_ctx;
833	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
834
835	u32 src_device_id = 0;
836	u64 src_file_ino = 0;
837	u32 dst_device_id = 0;
838	u64 dst_file_ino = 0;
839	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
840	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
841		goto out;
842
843	int zero = 0;
844	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
845	if (!filemod_data)
846		goto out;
847
848	u32 pid = get_userspace_pid();
849	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
850
851	filemod_data->meta.type = FILEMOD_EVENT;
852	filemod_data->fmod_type = FMOD_LINK;
853	filemod_data->dst_flags = 0;
854	filemod_data->src_inode = src_file_ino;
855	filemod_data->dst_inode = dst_file_ino;
856	filemod_data->src_device_id = src_device_id;
857	filemod_data->dst_device_id = dst_device_id;
858	filemod_data->src_filepath_length = 0;
859	filemod_data->dst_filepath_length = 0;
860
861	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
862					      filemod_data->payload);
863	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
864
865	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
866	barrier_var(len);
867	if (len <= MAX_FILEPATH_LENGTH) {
868		barrier_var(len);
869		payload += len;
870		filemod_data->src_filepath_length = len;
871	}
872
873	len = read_absolute_file_path_from_dentry(new_dentry, payload);
874	barrier_var(len);
875	if (len <= MAX_FILEPATH_LENGTH) {
876		barrier_var(len);
877		payload += len;
878		filemod_data->dst_filepath_length = len;
879	}
880
881	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
882	unsigned long data_len = payload - (void*)filemod_data;
883	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
884	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
885out:
886	bpf_stats_exit(&stats_ctx);
887	return 0;
888}
889
890SEC("kprobe/vfs_symlink")
891int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
892	       const char* oldname)
893{
894	struct bpf_func_stats_ctx stats_ctx;
895	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
896
897	u32 dst_device_id = 0;
898	u64 dst_file_ino = 0;
899	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
900		goto out;
901
902	int zero = 0;
903	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
904	if (!filemod_data)
905		goto out;
906
907	u32 pid = get_userspace_pid();
908	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
909
910	filemod_data->meta.type = FILEMOD_EVENT;
911	filemod_data->fmod_type = FMOD_SYMLINK;
912	filemod_data->dst_flags = 0;
913	filemod_data->src_inode = 0;
914	filemod_data->dst_inode = dst_file_ino;
915	filemod_data->src_device_id = 0;
916	filemod_data->dst_device_id = dst_device_id;
917	filemod_data->src_filepath_length = 0;
918	filemod_data->dst_filepath_length = 0;
919
920	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
921					      filemod_data->payload);
922	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
923
924	size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
925	barrier_var(len);
926	if (len <= MAX_FILEPATH_LENGTH) {
927		barrier_var(len);
928		payload += len;
929		filemod_data->src_filepath_length = len;
930	}
931	len = read_absolute_file_path_from_dentry(dentry, payload);
932	barrier_var(len);
933	if (len <= MAX_FILEPATH_LENGTH) {
934		barrier_var(len);
935		payload += len;
936		filemod_data->dst_filepath_length = len;
937	}
938	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
939	unsigned long data_len = payload - (void*)filemod_data;
940	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
941	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
942out:
943	bpf_stats_exit(&stats_ctx);
944	return 0;
945}
946
947SEC("raw_tracepoint/sched_process_fork")
948int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
949{
950	struct bpf_func_stats_ctx stats_ctx;
951	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
952
953	int zero = 0;
954	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
955	if (!fork_data)
956		goto out;
957
958	struct task_struct* parent = (struct task_struct*)ctx->args[0];
959	struct task_struct* child = (struct task_struct*)ctx->args[1];
960	fork_data->meta.type = FORK_EVENT;
961
962	void* payload = populate_var_metadata(&fork_data->meta, child,
963					      BPF_CORE_READ(child, pid), fork_data->payload);
964	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
965	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
966	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
967	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
968
969	unsigned long data_len = payload - (void*)fork_data;
970	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
971	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
972out:
973	bpf_stats_exit(&stats_ctx);
974	return 0;
975}
976char _license[] SEC("license") = "GPL";
977