162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci// Copyright (c) 2019 Facebook
362306a36Sopenharmony_ci#include <linux/sched.h>
462306a36Sopenharmony_ci#include <linux/ptrace.h>
562306a36Sopenharmony_ci#include <stdint.h>
662306a36Sopenharmony_ci#include <stddef.h>
762306a36Sopenharmony_ci#include <stdbool.h>
862306a36Sopenharmony_ci#include <linux/bpf.h>
962306a36Sopenharmony_ci#include <bpf/bpf_helpers.h>
1062306a36Sopenharmony_ci#include "bpf_misc.h"
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#define FUNCTION_NAME_LEN 64
1362306a36Sopenharmony_ci#define FILE_NAME_LEN 128
1462306a36Sopenharmony_ci#define TASK_COMM_LEN 16
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_citypedef struct {
1762306a36Sopenharmony_ci	int PyThreadState_frame;
1862306a36Sopenharmony_ci	int PyThreadState_thread;
1962306a36Sopenharmony_ci	int PyFrameObject_back;
2062306a36Sopenharmony_ci	int PyFrameObject_code;
2162306a36Sopenharmony_ci	int PyFrameObject_lineno;
2262306a36Sopenharmony_ci	int PyCodeObject_filename;
2362306a36Sopenharmony_ci	int PyCodeObject_name;
2462306a36Sopenharmony_ci	int String_data;
2562306a36Sopenharmony_ci	int String_size;
2662306a36Sopenharmony_ci} OffsetConfig;
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_citypedef struct {
2962306a36Sopenharmony_ci	uintptr_t current_state_addr;
3062306a36Sopenharmony_ci	uintptr_t tls_key_addr;
3162306a36Sopenharmony_ci	OffsetConfig offsets;
3262306a36Sopenharmony_ci	bool use_tls;
3362306a36Sopenharmony_ci} PidData;
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_citypedef struct {
3662306a36Sopenharmony_ci	uint32_t success;
3762306a36Sopenharmony_ci} Stats;
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_citypedef struct {
4062306a36Sopenharmony_ci	char name[FUNCTION_NAME_LEN];
4162306a36Sopenharmony_ci	char file[FILE_NAME_LEN];
4262306a36Sopenharmony_ci} Symbol;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_citypedef struct {
4562306a36Sopenharmony_ci	uint32_t pid;
4662306a36Sopenharmony_ci	uint32_t tid;
4762306a36Sopenharmony_ci	char comm[TASK_COMM_LEN];
4862306a36Sopenharmony_ci	int32_t kernel_stack_id;
4962306a36Sopenharmony_ci	int32_t user_stack_id;
5062306a36Sopenharmony_ci	bool thread_current;
5162306a36Sopenharmony_ci	bool pthread_match;
5262306a36Sopenharmony_ci	bool stack_complete;
5362306a36Sopenharmony_ci	int16_t stack_len;
5462306a36Sopenharmony_ci	int32_t stack[STACK_MAX_LEN];
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	int has_meta;
5762306a36Sopenharmony_ci	int metadata;
5862306a36Sopenharmony_ci	char dummy_safeguard;
5962306a36Sopenharmony_ci} Event;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_citypedef int pid_t;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_citypedef struct {
6562306a36Sopenharmony_ci	void* f_back; // PyFrameObject.f_back, previous frame
6662306a36Sopenharmony_ci	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
6762306a36Sopenharmony_ci	void* co_filename; // PyCodeObject.co_filename
6862306a36Sopenharmony_ci	void* co_name; // PyCodeObject.co_name
6962306a36Sopenharmony_ci} FrameData;
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci#ifdef SUBPROGS
7262306a36Sopenharmony_ci__noinline
7362306a36Sopenharmony_ci#else
7462306a36Sopenharmony_ci__always_inline
7562306a36Sopenharmony_ci#endif
7662306a36Sopenharmony_cistatic void *get_thread_state(void *tls_base, PidData *pidData)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	void* thread_state;
7962306a36Sopenharmony_ci	int key;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
8262306a36Sopenharmony_ci	bpf_probe_read_user(&thread_state, sizeof(thread_state),
8362306a36Sopenharmony_ci			    tls_base + 0x310 + key * 0x10 + 0x08);
8462306a36Sopenharmony_ci	return thread_state;
8562306a36Sopenharmony_ci}
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistatic __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
8862306a36Sopenharmony_ci					   FrameData *frame, Symbol *symbol)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	// read data from PyFrameObject
9162306a36Sopenharmony_ci	bpf_probe_read_user(&frame->f_back,
9262306a36Sopenharmony_ci			    sizeof(frame->f_back),
9362306a36Sopenharmony_ci			    frame_ptr + pidData->offsets.PyFrameObject_back);
9462306a36Sopenharmony_ci	bpf_probe_read_user(&frame->f_code,
9562306a36Sopenharmony_ci			    sizeof(frame->f_code),
9662306a36Sopenharmony_ci			    frame_ptr + pidData->offsets.PyFrameObject_code);
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	// read data from PyCodeObject
9962306a36Sopenharmony_ci	if (!frame->f_code)
10062306a36Sopenharmony_ci		return false;
10162306a36Sopenharmony_ci	bpf_probe_read_user(&frame->co_filename,
10262306a36Sopenharmony_ci			    sizeof(frame->co_filename),
10362306a36Sopenharmony_ci			    frame->f_code + pidData->offsets.PyCodeObject_filename);
10462306a36Sopenharmony_ci	bpf_probe_read_user(&frame->co_name,
10562306a36Sopenharmony_ci			    sizeof(frame->co_name),
10662306a36Sopenharmony_ci			    frame->f_code + pidData->offsets.PyCodeObject_name);
10762306a36Sopenharmony_ci	// read actual names into symbol
10862306a36Sopenharmony_ci	if (frame->co_filename)
10962306a36Sopenharmony_ci		bpf_probe_read_user_str(&symbol->file,
11062306a36Sopenharmony_ci					sizeof(symbol->file),
11162306a36Sopenharmony_ci					frame->co_filename +
11262306a36Sopenharmony_ci					pidData->offsets.String_data);
11362306a36Sopenharmony_ci	if (frame->co_name)
11462306a36Sopenharmony_ci		bpf_probe_read_user_str(&symbol->name,
11562306a36Sopenharmony_ci					sizeof(symbol->name),
11662306a36Sopenharmony_ci					frame->co_name +
11762306a36Sopenharmony_ci					pidData->offsets.String_data);
11862306a36Sopenharmony_ci	return true;
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_cistruct {
12262306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_HASH);
12362306a36Sopenharmony_ci	__uint(max_entries, 1);
12462306a36Sopenharmony_ci	__type(key, int);
12562306a36Sopenharmony_ci	__type(value, PidData);
12662306a36Sopenharmony_ci} pidmap SEC(".maps");
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_cistruct {
12962306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_HASH);
13062306a36Sopenharmony_ci	__uint(max_entries, 1);
13162306a36Sopenharmony_ci	__type(key, int);
13262306a36Sopenharmony_ci	__type(value, Event);
13362306a36Sopenharmony_ci} eventmap SEC(".maps");
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_cistruct {
13662306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_HASH);
13762306a36Sopenharmony_ci	__uint(max_entries, 1);
13862306a36Sopenharmony_ci	__type(key, Symbol);
13962306a36Sopenharmony_ci	__type(value, int);
14062306a36Sopenharmony_ci} symbolmap SEC(".maps");
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cistruct {
14362306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_ARRAY);
14462306a36Sopenharmony_ci	__uint(max_entries, 1);
14562306a36Sopenharmony_ci	__type(key, int);
14662306a36Sopenharmony_ci	__type(value, Stats);
14762306a36Sopenharmony_ci} statsmap SEC(".maps");
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_cistruct {
15062306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
15162306a36Sopenharmony_ci	__uint(max_entries, 32);
15262306a36Sopenharmony_ci	__uint(key_size, sizeof(int));
15362306a36Sopenharmony_ci	__uint(value_size, sizeof(int));
15462306a36Sopenharmony_ci} perfmap SEC(".maps");
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_cistruct {
15762306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
15862306a36Sopenharmony_ci	__uint(max_entries, 1000);
15962306a36Sopenharmony_ci	__uint(key_size, sizeof(int));
16062306a36Sopenharmony_ci	__uint(value_size, sizeof(long long) * 127);
16162306a36Sopenharmony_ci} stackmap SEC(".maps");
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci#ifdef USE_BPF_LOOP
16462306a36Sopenharmony_cistruct process_frame_ctx {
16562306a36Sopenharmony_ci	int cur_cpu;
16662306a36Sopenharmony_ci	int32_t *symbol_counter;
16762306a36Sopenharmony_ci	void *frame_ptr;
16862306a36Sopenharmony_ci	FrameData *frame;
16962306a36Sopenharmony_ci	PidData *pidData;
17062306a36Sopenharmony_ci	Symbol *sym;
17162306a36Sopenharmony_ci	Event *event;
17262306a36Sopenharmony_ci	bool done;
17362306a36Sopenharmony_ci};
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	int zero = 0;
17862306a36Sopenharmony_ci	void *frame_ptr = ctx->frame_ptr;
17962306a36Sopenharmony_ci	PidData *pidData = ctx->pidData;
18062306a36Sopenharmony_ci	FrameData *frame = ctx->frame;
18162306a36Sopenharmony_ci	int32_t *symbol_counter = ctx->symbol_counter;
18262306a36Sopenharmony_ci	int cur_cpu = ctx->cur_cpu;
18362306a36Sopenharmony_ci	Event *event = ctx->event;
18462306a36Sopenharmony_ci	Symbol *sym = ctx->sym;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
18762306a36Sopenharmony_ci		int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
18862306a36Sopenharmony_ci		int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci		if (!symbol_id) {
19162306a36Sopenharmony_ci			bpf_map_update_elem(&symbolmap, sym, &zero, 0);
19262306a36Sopenharmony_ci			symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
19362306a36Sopenharmony_ci			if (!symbol_id) {
19462306a36Sopenharmony_ci				ctx->done = true;
19562306a36Sopenharmony_ci				return 1;
19662306a36Sopenharmony_ci			}
19762306a36Sopenharmony_ci		}
19862306a36Sopenharmony_ci		if (*symbol_id == new_symbol_id)
19962306a36Sopenharmony_ci			(*symbol_counter)++;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci		barrier_var(i);
20262306a36Sopenharmony_ci		if (i >= STACK_MAX_LEN)
20362306a36Sopenharmony_ci			return 1;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci		event->stack[i] = *symbol_id;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		event->stack_len = i + 1;
20862306a36Sopenharmony_ci		frame_ptr = frame->f_back;
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci	return 0;
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci#endif /* USE_BPF_LOOP */
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci#ifdef GLOBAL_FUNC
21562306a36Sopenharmony_ci__noinline
21662306a36Sopenharmony_ci#elif defined(SUBPROGS)
21762306a36Sopenharmony_cistatic __noinline
21862306a36Sopenharmony_ci#else
21962306a36Sopenharmony_cistatic __always_inline
22062306a36Sopenharmony_ci#endif
22162306a36Sopenharmony_ciint __on_event(struct bpf_raw_tracepoint_args *ctx)
22262306a36Sopenharmony_ci{
22362306a36Sopenharmony_ci	uint64_t pid_tgid = bpf_get_current_pid_tgid();
22462306a36Sopenharmony_ci	pid_t pid = (pid_t)(pid_tgid >> 32);
22562306a36Sopenharmony_ci	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
22662306a36Sopenharmony_ci	if (!pidData)
22762306a36Sopenharmony_ci		return 0;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	int zero = 0;
23062306a36Sopenharmony_ci	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
23162306a36Sopenharmony_ci	if (!event)
23262306a36Sopenharmony_ci		return 0;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	event->pid = pid;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	event->tid = (pid_t)pid_tgid;
23762306a36Sopenharmony_ci	bpf_get_current_comm(&event->comm, sizeof(event->comm));
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
24062306a36Sopenharmony_ci	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	void* thread_state_current = (void*)0;
24362306a36Sopenharmony_ci	bpf_probe_read_user(&thread_state_current,
24462306a36Sopenharmony_ci			    sizeof(thread_state_current),
24562306a36Sopenharmony_ci			    (void*)(long)pidData->current_state_addr);
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
24862306a36Sopenharmony_ci	void* tls_base = (void*)task;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
25162306a36Sopenharmony_ci		: thread_state_current;
25262306a36Sopenharmony_ci	event->thread_current = thread_state == thread_state_current;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	if (pidData->use_tls) {
25562306a36Sopenharmony_ci		uint64_t pthread_created;
25662306a36Sopenharmony_ci		uint64_t pthread_self;
25762306a36Sopenharmony_ci		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
25862306a36Sopenharmony_ci				    tls_base + 0x10);
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci		bpf_probe_read_user(&pthread_created,
26162306a36Sopenharmony_ci				    sizeof(pthread_created),
26262306a36Sopenharmony_ci				    thread_state +
26362306a36Sopenharmony_ci				    pidData->offsets.PyThreadState_thread);
26462306a36Sopenharmony_ci		event->pthread_match = pthread_created == pthread_self;
26562306a36Sopenharmony_ci	} else {
26662306a36Sopenharmony_ci		event->pthread_match = 1;
26762306a36Sopenharmony_ci	}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	if (event->pthread_match || !pidData->use_tls) {
27062306a36Sopenharmony_ci		void* frame_ptr;
27162306a36Sopenharmony_ci		FrameData frame;
27262306a36Sopenharmony_ci		Symbol sym = {};
27362306a36Sopenharmony_ci		int cur_cpu = bpf_get_smp_processor_id();
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci		bpf_probe_read_user(&frame_ptr,
27662306a36Sopenharmony_ci				    sizeof(frame_ptr),
27762306a36Sopenharmony_ci				    thread_state +
27862306a36Sopenharmony_ci				    pidData->offsets.PyThreadState_frame);
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
28162306a36Sopenharmony_ci		if (symbol_counter == NULL)
28262306a36Sopenharmony_ci			return 0;
28362306a36Sopenharmony_ci#ifdef USE_BPF_LOOP
28462306a36Sopenharmony_ci	struct process_frame_ctx ctx = {
28562306a36Sopenharmony_ci		.cur_cpu = cur_cpu,
28662306a36Sopenharmony_ci		.symbol_counter = symbol_counter,
28762306a36Sopenharmony_ci		.frame_ptr = frame_ptr,
28862306a36Sopenharmony_ci		.frame = &frame,
28962306a36Sopenharmony_ci		.pidData = pidData,
29062306a36Sopenharmony_ci		.sym = &sym,
29162306a36Sopenharmony_ci		.event = event,
29262306a36Sopenharmony_ci	};
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
29562306a36Sopenharmony_ci	if (ctx.done)
29662306a36Sopenharmony_ci		return 0;
29762306a36Sopenharmony_ci#else
29862306a36Sopenharmony_ci#if defined(USE_ITER)
29962306a36Sopenharmony_ci/* no for loop, no unrolling */
30062306a36Sopenharmony_ci#elif defined(NO_UNROLL)
30162306a36Sopenharmony_ci#pragma clang loop unroll(disable)
30262306a36Sopenharmony_ci#elif defined(UNROLL_COUNT)
30362306a36Sopenharmony_ci#pragma clang loop unroll_count(UNROLL_COUNT)
30462306a36Sopenharmony_ci#else
30562306a36Sopenharmony_ci#pragma clang loop unroll(full)
30662306a36Sopenharmony_ci#endif /* NO_UNROLL */
30762306a36Sopenharmony_ci		/* Unwind python stack */
30862306a36Sopenharmony_ci#ifdef USE_ITER
30962306a36Sopenharmony_ci		int i;
31062306a36Sopenharmony_ci		bpf_for(i, 0, STACK_MAX_LEN) {
31162306a36Sopenharmony_ci#else /* !USE_ITER */
31262306a36Sopenharmony_ci		for (int i = 0; i < STACK_MAX_LEN; ++i) {
31362306a36Sopenharmony_ci#endif
31462306a36Sopenharmony_ci			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
31562306a36Sopenharmony_ci				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
31662306a36Sopenharmony_ci				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
31762306a36Sopenharmony_ci				if (!symbol_id) {
31862306a36Sopenharmony_ci					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
31962306a36Sopenharmony_ci					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
32062306a36Sopenharmony_ci					if (!symbol_id)
32162306a36Sopenharmony_ci						return 0;
32262306a36Sopenharmony_ci				}
32362306a36Sopenharmony_ci				if (*symbol_id == new_symbol_id)
32462306a36Sopenharmony_ci					(*symbol_counter)++;
32562306a36Sopenharmony_ci				event->stack[i] = *symbol_id;
32662306a36Sopenharmony_ci				event->stack_len = i + 1;
32762306a36Sopenharmony_ci				frame_ptr = frame.f_back;
32862306a36Sopenharmony_ci			}
32962306a36Sopenharmony_ci		}
33062306a36Sopenharmony_ci#endif /* USE_BPF_LOOP */
33162306a36Sopenharmony_ci		event->stack_complete = frame_ptr == NULL;
33262306a36Sopenharmony_ci	} else {
33362306a36Sopenharmony_ci		event->stack_complete = 1;
33462306a36Sopenharmony_ci	}
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
33762306a36Sopenharmony_ci	if (stats)
33862306a36Sopenharmony_ci		stats->success++;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	event->has_meta = 0;
34162306a36Sopenharmony_ci	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
34262306a36Sopenharmony_ci	return 0;
34362306a36Sopenharmony_ci}
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ciSEC("raw_tracepoint/kfree_skb")
34662306a36Sopenharmony_ciint on_event(struct bpf_raw_tracepoint_args* ctx)
34762306a36Sopenharmony_ci{
34862306a36Sopenharmony_ci	int ret = 0;
34962306a36Sopenharmony_ci	ret |= __on_event(ctx);
35062306a36Sopenharmony_ci	ret |= __on_event(ctx);
35162306a36Sopenharmony_ci	ret |= __on_event(ctx);
35262306a36Sopenharmony_ci	ret |= __on_event(ctx);
35362306a36Sopenharmony_ci	ret |= __on_event(ctx);
35462306a36Sopenharmony_ci	return ret;
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cichar _license[] SEC("license") = "GPL";
358