1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3#include <vmlinux.h>
4#include <bpf/bpf_core_read.h>
5#include <bpf/bpf_helpers.h>
6#include <bpf/bpf_tracing.h>
7
8#include "profiler.h"
9#include "err.h"
10
11#ifndef NULL
12#define NULL 0
13#endif
14
15#define O_WRONLY 00000001
16#define O_RDWR 00000002
17#define O_DIRECTORY 00200000
18#define __O_TMPFILE 020000000
19#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
20#define S_IFMT 00170000
21#define S_IFSOCK 0140000
22#define S_IFLNK 0120000
23#define S_IFREG 0100000
24#define S_IFBLK 0060000
25#define S_IFDIR 0040000
26#define S_IFCHR 0020000
27#define S_IFIFO 0010000
28#define S_ISUID 0004000
29#define S_ISGID 0002000
30#define S_ISVTX 0001000
31#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
32#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
33#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
34#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
35#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
36#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
37
38#define KILL_DATA_ARRAY_SIZE 8
39
40struct var_kill_data_arr_t {
41	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
42};
43
44union any_profiler_data_t {
45	struct var_exec_data_t var_exec;
46	struct var_kill_data_t var_kill;
47	struct var_sysctl_data_t var_sysctl;
48	struct var_filemod_data_t var_filemod;
49	struct var_fork_data_t var_fork;
50	struct var_kill_data_arr_t var_kill_data_arr;
51};
52
53volatile struct profiler_config_struct bpf_config = {};
54
55#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
56#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
57#define CGROUP_LOGIN_SESSION_INODE \
58	(bpf_config.cgroup_login_session_inode)
59#define KILL_SIGNALS (bpf_config.kill_signals_mask)
60#define STALE_INFO (bpf_config.stale_info_secs)
61#define INODE_FILTER (bpf_config.inode_filter)
62#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
63#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
64
65struct kernfs_iattrs___52 {
66	struct iattr ia_iattr;
67};
68
69struct kernfs_node___52 {
70	union /* kernfs_node_id */ {
71		struct {
72			u32 ino;
73			u32 generation;
74		};
75		u64 id;
76	} id;
77};
78
79struct {
80	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
81	__uint(max_entries, 1);
82	__type(key, u32);
83	__type(value, union any_profiler_data_t);
84} data_heap SEC(".maps");
85
86struct {
87	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
88	__uint(key_size, sizeof(int));
89	__uint(value_size, sizeof(int));
90} events SEC(".maps");
91
92struct {
93	__uint(type, BPF_MAP_TYPE_HASH);
94	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
95	__type(key, u32);
96	__type(value, struct var_kill_data_arr_t);
97} var_tpid_to_data SEC(".maps");
98
99struct {
100	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
101	__uint(max_entries, profiler_bpf_max_function_id);
102	__type(key, u32);
103	__type(value, struct bpf_func_stats_data);
104} bpf_func_stats SEC(".maps");
105
106struct {
107	__uint(type, BPF_MAP_TYPE_HASH);
108	__type(key, u32);
109	__type(value, bool);
110	__uint(max_entries, 16);
111} allowed_devices SEC(".maps");
112
113struct {
114	__uint(type, BPF_MAP_TYPE_HASH);
115	__type(key, u64);
116	__type(value, bool);
117	__uint(max_entries, 1024);
118} allowed_file_inodes SEC(".maps");
119
120struct {
121	__uint(type, BPF_MAP_TYPE_HASH);
122	__type(key, u64);
123	__type(value, bool);
124	__uint(max_entries, 1024);
125} allowed_directory_inodes SEC(".maps");
126
127struct {
128	__uint(type, BPF_MAP_TYPE_HASH);
129	__type(key, u32);
130	__type(value, bool);
131	__uint(max_entries, 16);
132} disallowed_exec_inodes SEC(".maps");
133
134#ifndef ARRAY_SIZE
135#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
136#endif
137
138static INLINE bool IS_ERR(const void* ptr)
139{
140	return IS_ERR_VALUE((unsigned long)ptr);
141}
142
143static INLINE u32 get_userspace_pid()
144{
145	return bpf_get_current_pid_tgid() >> 32;
146}
147
148static INLINE bool is_init_process(u32 tgid)
149{
150	return tgid == 1 || tgid == 0;
151}
152
153static INLINE unsigned long
154probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
155{
156	len = len < max ? len : max;
157	if (len > 1) {
158		if (bpf_probe_read_kernel(dst, len, src))
159			return 0;
160	} else if (len == 1) {
161		if (bpf_probe_read_kernel(dst, 1, src))
162			return 0;
163	}
164	return len;
165}
166
167static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
168				     int spid)
169{
170#ifdef UNROLL
171#pragma unroll
172#endif
173	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
174		if (arr_struct->array[i].meta.pid == spid)
175			return i;
176	return -1;
177}
178
179static INLINE void populate_ancestors(struct task_struct* task,
180				      struct ancestors_data_t* ancestors_data)
181{
182	struct task_struct* parent = task;
183	u32 num_ancestors, ppid;
184
185	ancestors_data->num_ancestors = 0;
186#ifdef UNROLL
187#pragma unroll
188#endif
189	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
190		parent = BPF_CORE_READ(parent, real_parent);
191		if (parent == NULL)
192			break;
193		ppid = BPF_CORE_READ(parent, tgid);
194		if (is_init_process(ppid))
195			break;
196		ancestors_data->ancestor_pids[num_ancestors] = ppid;
197		ancestors_data->ancestor_exec_ids[num_ancestors] =
198			BPF_CORE_READ(parent, self_exec_id);
199		ancestors_data->ancestor_start_times[num_ancestors] =
200			BPF_CORE_READ(parent, start_time);
201		ancestors_data->num_ancestors = num_ancestors;
202	}
203}
204
205static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
206					  struct kernfs_node* cgroup_root_node,
207					  void* payload,
208					  int* root_pos)
209{
210	void* payload_start = payload;
211	size_t filepart_length;
212
213#ifdef UNROLL
214#pragma unroll
215#endif
216	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
217		filepart_length =
218			bpf_probe_read_kernel_str(payload, MAX_PATH,
219						  BPF_CORE_READ(cgroup_node, name));
220		if (!cgroup_node)
221			return payload;
222		if (cgroup_node == cgroup_root_node)
223			*root_pos = payload - payload_start;
224		if (filepart_length <= MAX_PATH) {
225			barrier_var(filepart_length);
226			payload += filepart_length;
227		}
228		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
229	}
230	return payload;
231}
232
233static ino_t get_inode_from_kernfs(struct kernfs_node* node)
234{
235	struct kernfs_node___52* node52 = (void*)node;
236
237	if (bpf_core_field_exists(node52->id.ino)) {
238		barrier_var(node52);
239		return BPF_CORE_READ(node52, id.ino);
240	} else {
241		barrier_var(node);
242		return (u64)BPF_CORE_READ(node, id);
243	}
244}
245
246extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
247enum cgroup_subsys_id___local {
248	pids_cgrp_id___local = 123, /* value doesn't matter */
249};
250
251static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
252					 struct task_struct* task,
253					 void* payload)
254{
255	struct kernfs_node* root_kernfs =
256		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
257	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
258
259#if __has_builtin(__builtin_preserve_enum_value)
260	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
261		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
262						  pids_cgrp_id___local);
263#ifdef UNROLL
264#pragma unroll
265#endif
266		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
267			struct cgroup_subsys_state* subsys =
268				BPF_CORE_READ(task, cgroups, subsys[i]);
269			if (subsys != NULL) {
270				int subsys_id = BPF_CORE_READ(subsys, ss, id);
271				if (subsys_id == cgrp_id) {
272					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
273					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
274					break;
275				}
276			}
277		}
278	}
279#endif
280
281	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
282	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
283
284	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
285		cgroup_data->cgroup_root_mtime =
286			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
287		cgroup_data->cgroup_proc_mtime =
288			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
289	} else {
290		struct kernfs_iattrs___52* root_iattr =
291			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
292		cgroup_data->cgroup_root_mtime =
293			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
294
295		struct kernfs_iattrs___52* proc_iattr =
296			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
297		cgroup_data->cgroup_proc_mtime =
298			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
299	}
300
301	cgroup_data->cgroup_root_length = 0;
302	cgroup_data->cgroup_proc_length = 0;
303	cgroup_data->cgroup_full_length = 0;
304
305	size_t cgroup_root_length =
306		bpf_probe_read_kernel_str(payload, MAX_PATH,
307					  BPF_CORE_READ(root_kernfs, name));
308	barrier_var(cgroup_root_length);
309	if (cgroup_root_length <= MAX_PATH) {
310		barrier_var(cgroup_root_length);
311		cgroup_data->cgroup_root_length = cgroup_root_length;
312		payload += cgroup_root_length;
313	}
314
315	size_t cgroup_proc_length =
316		bpf_probe_read_kernel_str(payload, MAX_PATH,
317					  BPF_CORE_READ(proc_kernfs, name));
318	barrier_var(cgroup_proc_length);
319	if (cgroup_proc_length <= MAX_PATH) {
320		barrier_var(cgroup_proc_length);
321		cgroup_data->cgroup_proc_length = cgroup_proc_length;
322		payload += cgroup_proc_length;
323	}
324
325	if (FETCH_CGROUPS_FROM_BPF) {
326		cgroup_data->cgroup_full_path_root_pos = -1;
327		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
328							      &cgroup_data->cgroup_full_path_root_pos);
329		cgroup_data->cgroup_full_length = payload_end_pos - payload;
330		payload = payload_end_pos;
331	}
332
333	return (void*)payload;
334}
335
336static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
337					  struct task_struct* task,
338					  u32 pid, void* payload)
339{
340	u64 uid_gid = bpf_get_current_uid_gid();
341
342	metadata->uid = (u32)uid_gid;
343	metadata->gid = uid_gid >> 32;
344	metadata->pid = pid;
345	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
346	metadata->start_time = BPF_CORE_READ(task, start_time);
347	metadata->comm_length = 0;
348
349	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
350	barrier_var(comm_length);
351	if (comm_length <= TASK_COMM_LEN) {
352		barrier_var(comm_length);
353		metadata->comm_length = comm_length;
354		payload += comm_length;
355	}
356
357	return (void*)payload;
358}
359
360static INLINE struct var_kill_data_t*
361get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
362{
363	int zero = 0;
364	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
365
366	if (kill_data == NULL)
367		return NULL;
368	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
369
370	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
371	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
372	size_t payload_length = payload - (void*)kill_data->payload;
373	kill_data->payload_length = payload_length;
374	populate_ancestors(task, &kill_data->ancestors_info);
375	kill_data->meta.type = KILL_EVENT;
376	kill_data->kill_target_pid = tpid;
377	kill_data->kill_sig = sig;
378	kill_data->kill_count = 1;
379	kill_data->last_kill_time = bpf_ktime_get_ns();
380	return kill_data;
381}
382
383static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
384{
385	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
386		return 0;
387
388	u32 spid = get_userspace_pid();
389	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
390
391	if (arr_struct == NULL) {
392		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
393		int zero = 0;
394
395		if (kill_data == NULL)
396			return 0;
397		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
398		if (arr_struct == NULL)
399			return 0;
400		bpf_probe_read_kernel(&arr_struct->array[0],
401				      sizeof(arr_struct->array[0]), kill_data);
402	} else {
403		int index = get_var_spid_index(arr_struct, spid);
404
405		if (index == -1) {
406			struct var_kill_data_t* kill_data =
407				get_var_kill_data(ctx, spid, tpid, sig);
408			if (kill_data == NULL)
409				return 0;
410#ifdef UNROLL
411#pragma unroll
412#endif
413			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
414				if (arr_struct->array[i].meta.pid == 0) {
415					bpf_probe_read_kernel(&arr_struct->array[i],
416							      sizeof(arr_struct->array[i]),
417							      kill_data);
418					bpf_map_update_elem(&var_tpid_to_data, &tpid,
419							    arr_struct, 0);
420
421					return 0;
422				}
423			return 0;
424		}
425
426		struct var_kill_data_t* kill_data = &arr_struct->array[index];
427
428		u64 delta_sec =
429			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
430
431		if (delta_sec < STALE_INFO) {
432			kill_data->kill_count++;
433			kill_data->last_kill_time = bpf_ktime_get_ns();
434			bpf_probe_read_kernel(&arr_struct->array[index],
435					      sizeof(arr_struct->array[index]),
436					      kill_data);
437		} else {
438			struct var_kill_data_t* kill_data =
439				get_var_kill_data(ctx, spid, tpid, sig);
440			if (kill_data == NULL)
441				return 0;
442			bpf_probe_read_kernel(&arr_struct->array[index],
443					      sizeof(arr_struct->array[index]),
444					      kill_data);
445		}
446	}
447	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
448	return 0;
449}
450
451static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
452				   enum bpf_function_id func_id)
453{
454	int func_id_key = func_id;
455
456	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
457	bpf_stat_ctx->bpf_func_stats_data_val =
458		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
459	if (bpf_stat_ctx->bpf_func_stats_data_val)
460		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
461}
462
463static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
464{
465	if (bpf_stat_ctx->bpf_func_stats_data_val)
466		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
467			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
468}
469
470static INLINE void
471bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
472				    struct var_metadata_t* meta)
473{
474	if (bpf_stat_ctx->bpf_func_stats_data_val) {
475		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
476		meta->bpf_stats_num_perf_events =
477			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
478	}
479	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
480	meta->cpu_id = bpf_get_smp_processor_id();
481}
482
483static INLINE size_t
484read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
485{
486	size_t length = 0;
487	size_t filepart_length;
488	struct dentry* parent_dentry;
489
490#ifdef UNROLL
491#pragma unroll
492#endif
493	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
494		filepart_length =
495			bpf_probe_read_kernel_str(payload, MAX_PATH,
496						  BPF_CORE_READ(filp_dentry, d_name.name));
497		barrier_var(filepart_length);
498		if (filepart_length > MAX_PATH)
499			break;
500		barrier_var(filepart_length);
501		payload += filepart_length;
502		length += filepart_length;
503
504		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
505		if (filp_dentry == parent_dentry)
506			break;
507		filp_dentry = parent_dentry;
508	}
509
510	return length;
511}
512
513static INLINE bool
514is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
515{
516	struct dentry* parent_dentry;
517#ifdef UNROLL
518#pragma unroll
519#endif
520	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
521		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
522		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
523
524		if (allowed_dir != NULL)
525			return true;
526		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
527		if (filp_dentry == parent_dentry)
528			break;
529		filp_dentry = parent_dentry;
530	}
531	return false;
532}
533
534static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
535						 u32* device_id,
536						 u64* file_ino)
537{
538	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
539	*device_id = dev_id;
540	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
541
542	if (allowed_device == NULL)
543		return false;
544
545	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
546	*file_ino = ino;
547	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
548
549	if (allowed_file == NULL)
550		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
551			return false;
552	return true;
553}
554
555SEC("kprobe/proc_sys_write")
556ssize_t BPF_KPROBE(kprobe__proc_sys_write,
557		   struct file* filp, const char* buf,
558		   size_t count, loff_t* ppos)
559{
560	struct bpf_func_stats_ctx stats_ctx;
561	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
562
563	u32 pid = get_userspace_pid();
564	int zero = 0;
565	struct var_sysctl_data_t* sysctl_data =
566		bpf_map_lookup_elem(&data_heap, &zero);
567	if (!sysctl_data)
568		goto out;
569
570	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
571	sysctl_data->meta.type = SYSCTL_EVENT;
572	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
573	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
574
575	populate_ancestors(task, &sysctl_data->ancestors_info);
576
577	sysctl_data->sysctl_val_length = 0;
578	sysctl_data->sysctl_path_length = 0;
579
580	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
581							     CTL_MAXNAME, buf);
582	barrier_var(sysctl_val_length);
583	if (sysctl_val_length <= CTL_MAXNAME) {
584		barrier_var(sysctl_val_length);
585		sysctl_data->sysctl_val_length = sysctl_val_length;
586		payload += sysctl_val_length;
587	}
588
589	size_t sysctl_path_length =
590		bpf_probe_read_kernel_str(payload, MAX_PATH,
591					  BPF_CORE_READ(filp, f_path.dentry,
592							d_name.name));
593	barrier_var(sysctl_path_length);
594	if (sysctl_path_length <= MAX_PATH) {
595		barrier_var(sysctl_path_length);
596		sysctl_data->sysctl_path_length = sysctl_path_length;
597		payload += sysctl_path_length;
598	}
599
600	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
601	unsigned long data_len = payload - (void*)sysctl_data;
602	data_len = data_len > sizeof(struct var_sysctl_data_t)
603		? sizeof(struct var_sysctl_data_t)
604		: data_len;
605	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
606out:
607	bpf_stats_exit(&stats_ctx);
608	return 0;
609}
610
611SEC("tracepoint/syscalls/sys_enter_kill")
612int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
613{
614	struct bpf_func_stats_ctx stats_ctx;
615
616	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
617	int pid = ctx->args[0];
618	int sig = ctx->args[1];
619	int ret = trace_var_sys_kill(ctx, pid, sig);
620	bpf_stats_exit(&stats_ctx);
621	return ret;
622};
623
624SEC("raw_tracepoint/sched_process_exit")
625int raw_tracepoint__sched_process_exit(void* ctx)
626{
627	int zero = 0;
628	struct bpf_func_stats_ctx stats_ctx;
629	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
630
631	u32 tpid = get_userspace_pid();
632
633	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
634	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
635
636	if (arr_struct == NULL || kill_data == NULL)
637		goto out;
638
639	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
640	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
641
642#ifdef UNROLL
643#pragma unroll
644#endif
645	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
646		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
647
648		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
649			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
650					      past_kill_data);
651			void* payload = kill_data->payload;
652			size_t offset = kill_data->payload_length;
653			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
654				return 0;
655			payload += offset;
656
657			kill_data->kill_target_name_length = 0;
658			kill_data->kill_target_cgroup_proc_length = 0;
659
660			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
661			barrier_var(comm_length);
662			if (comm_length <= TASK_COMM_LEN) {
663				barrier_var(comm_length);
664				kill_data->kill_target_name_length = comm_length;
665				payload += comm_length;
666			}
667
668			size_t cgroup_proc_length =
669				bpf_probe_read_kernel_str(payload,
670							  KILL_TARGET_LEN,
671							  BPF_CORE_READ(proc_kernfs, name));
672			barrier_var(cgroup_proc_length);
673			if (cgroup_proc_length <= KILL_TARGET_LEN) {
674				barrier_var(cgroup_proc_length);
675				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
676				payload += cgroup_proc_length;
677			}
678
679			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
680			unsigned long data_len = (void*)payload - (void*)kill_data;
681			data_len = data_len > sizeof(struct var_kill_data_t)
682				? sizeof(struct var_kill_data_t)
683				: data_len;
684			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
685		}
686	}
687	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
688out:
689	bpf_stats_exit(&stats_ctx);
690	return 0;
691}
692
693SEC("raw_tracepoint/sched_process_exec")
694int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
695{
696	struct bpf_func_stats_ctx stats_ctx;
697	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
698
699	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
700	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
701
702	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
703	if (should_filter_binprm != NULL)
704		goto out;
705
706	int zero = 0;
707	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
708	if (!proc_exec_data)
709		goto out;
710
711	if (INODE_FILTER && inode != INODE_FILTER)
712		return 0;
713
714	u32 pid = get_userspace_pid();
715	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
716
717	proc_exec_data->meta.type = EXEC_EVENT;
718	proc_exec_data->bin_path_length = 0;
719	proc_exec_data->cmdline_length = 0;
720	proc_exec_data->environment_length = 0;
721	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
722					      proc_exec_data->payload);
723	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
724
725	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
726	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
727	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
728	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
729	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
730
731	const char* filename = BPF_CORE_READ(bprm, filename);
732	size_t bin_path_length =
733		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
734	barrier_var(bin_path_length);
735	if (bin_path_length <= MAX_FILENAME_LEN) {
736		barrier_var(bin_path_length);
737		proc_exec_data->bin_path_length = bin_path_length;
738		payload += bin_path_length;
739	}
740
741	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
742	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
743	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
744						     arg_end - arg_start, MAX_ARGS_LEN);
745
746	if (cmdline_length <= MAX_ARGS_LEN) {
747		barrier_var(cmdline_length);
748		proc_exec_data->cmdline_length = cmdline_length;
749		payload += cmdline_length;
750	}
751
752	if (READ_ENVIRON_FROM_EXEC) {
753		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
754		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
755		unsigned long env_len = probe_read_lim(payload, env_start,
756						       env_end - env_start, MAX_ENVIRON_LEN);
757		if (cmdline_length <= MAX_ENVIRON_LEN) {
758			proc_exec_data->environment_length = env_len;
759			payload += env_len;
760		}
761	}
762
763	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
764	unsigned long data_len = payload - (void*)proc_exec_data;
765	data_len = data_len > sizeof(struct var_exec_data_t)
766		? sizeof(struct var_exec_data_t)
767		: data_len;
768	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
769out:
770	bpf_stats_exit(&stats_ctx);
771	return 0;
772}
773
774SEC("kretprobe/do_filp_open")
775int kprobe_ret__do_filp_open(struct pt_regs* ctx)
776{
777	struct bpf_func_stats_ctx stats_ctx;
778	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
779
780	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
781
782	if (filp == NULL || IS_ERR(filp))
783		goto out;
784	unsigned int flags = BPF_CORE_READ(filp, f_flags);
785	if ((flags & (O_RDWR | O_WRONLY)) == 0)
786		goto out;
787	if ((flags & O_TMPFILE) > 0)
788		goto out;
789	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
790	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
791	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
792	    S_ISSOCK(mode))
793		goto out;
794
795	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
796	u32 device_id = 0;
797	u64 file_ino = 0;
798	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
799		goto out;
800
801	int zero = 0;
802	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
803	if (!filemod_data)
804		goto out;
805
806	u32 pid = get_userspace_pid();
807	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
808
809	filemod_data->meta.type = FILEMOD_EVENT;
810	filemod_data->fmod_type = FMOD_OPEN;
811	filemod_data->dst_flags = flags;
812	filemod_data->src_inode = 0;
813	filemod_data->dst_inode = file_ino;
814	filemod_data->src_device_id = 0;
815	filemod_data->dst_device_id = device_id;
816	filemod_data->src_filepath_length = 0;
817	filemod_data->dst_filepath_length = 0;
818
819	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
820					      filemod_data->payload);
821	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
822
823	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
824	barrier_var(len);
825	if (len <= MAX_FILEPATH_LENGTH) {
826		barrier_var(len);
827		payload += len;
828		filemod_data->dst_filepath_length = len;
829	}
830	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
831	unsigned long data_len = payload - (void*)filemod_data;
832	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
833	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
834out:
835	bpf_stats_exit(&stats_ctx);
836	return 0;
837}
838
839SEC("kprobe/vfs_link")
840int BPF_KPROBE(kprobe__vfs_link,
841	       struct dentry* old_dentry, struct mnt_idmap *idmap,
842	       struct inode* dir, struct dentry* new_dentry,
843	       struct inode** delegated_inode)
844{
845	struct bpf_func_stats_ctx stats_ctx;
846	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
847
848	u32 src_device_id = 0;
849	u64 src_file_ino = 0;
850	u32 dst_device_id = 0;
851	u64 dst_file_ino = 0;
852	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
853	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
854		goto out;
855
856	int zero = 0;
857	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
858	if (!filemod_data)
859		goto out;
860
861	u32 pid = get_userspace_pid();
862	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
863
864	filemod_data->meta.type = FILEMOD_EVENT;
865	filemod_data->fmod_type = FMOD_LINK;
866	filemod_data->dst_flags = 0;
867	filemod_data->src_inode = src_file_ino;
868	filemod_data->dst_inode = dst_file_ino;
869	filemod_data->src_device_id = src_device_id;
870	filemod_data->dst_device_id = dst_device_id;
871	filemod_data->src_filepath_length = 0;
872	filemod_data->dst_filepath_length = 0;
873
874	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
875					      filemod_data->payload);
876	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
877
878	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
879	barrier_var(len);
880	if (len <= MAX_FILEPATH_LENGTH) {
881		barrier_var(len);
882		payload += len;
883		filemod_data->src_filepath_length = len;
884	}
885
886	len = read_absolute_file_path_from_dentry(new_dentry, payload);
887	barrier_var(len);
888	if (len <= MAX_FILEPATH_LENGTH) {
889		barrier_var(len);
890		payload += len;
891		filemod_data->dst_filepath_length = len;
892	}
893
894	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
895	unsigned long data_len = payload - (void*)filemod_data;
896	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
897	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
898out:
899	bpf_stats_exit(&stats_ctx);
900	return 0;
901}
902
903SEC("kprobe/vfs_symlink")
904int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
905	       const char* oldname)
906{
907	struct bpf_func_stats_ctx stats_ctx;
908	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
909
910	u32 dst_device_id = 0;
911	u64 dst_file_ino = 0;
912	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
913		goto out;
914
915	int zero = 0;
916	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
917	if (!filemod_data)
918		goto out;
919
920	u32 pid = get_userspace_pid();
921	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
922
923	filemod_data->meta.type = FILEMOD_EVENT;
924	filemod_data->fmod_type = FMOD_SYMLINK;
925	filemod_data->dst_flags = 0;
926	filemod_data->src_inode = 0;
927	filemod_data->dst_inode = dst_file_ino;
928	filemod_data->src_device_id = 0;
929	filemod_data->dst_device_id = dst_device_id;
930	filemod_data->src_filepath_length = 0;
931	filemod_data->dst_filepath_length = 0;
932
933	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
934					      filemod_data->payload);
935	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
936
937	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
938					       oldname);
939	barrier_var(len);
940	if (len <= MAX_FILEPATH_LENGTH) {
941		barrier_var(len);
942		payload += len;
943		filemod_data->src_filepath_length = len;
944	}
945	len = read_absolute_file_path_from_dentry(dentry, payload);
946	barrier_var(len);
947	if (len <= MAX_FILEPATH_LENGTH) {
948		barrier_var(len);
949		payload += len;
950		filemod_data->dst_filepath_length = len;
951	}
952	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
953	unsigned long data_len = payload - (void*)filemod_data;
954	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
955	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
956out:
957	bpf_stats_exit(&stats_ctx);
958	return 0;
959}
960
961SEC("raw_tracepoint/sched_process_fork")
962int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
963{
964	struct bpf_func_stats_ctx stats_ctx;
965	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
966
967	int zero = 0;
968	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
969	if (!fork_data)
970		goto out;
971
972	struct task_struct* parent = (struct task_struct*)ctx->args[0];
973	struct task_struct* child = (struct task_struct*)ctx->args[1];
974	fork_data->meta.type = FORK_EVENT;
975
976	void* payload = populate_var_metadata(&fork_data->meta, child,
977					      BPF_CORE_READ(child, pid), fork_data->payload);
978	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
979	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
980	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
981	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
982
983	unsigned long data_len = payload - (void*)fork_data;
984	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
985	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
986out:
987	bpf_stats_exit(&stats_ctx);
988	return 0;
989}
990char _license[] SEC("license") = "GPL";
991