18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci// Copyright (c) 2019 Facebook
38c2ecf20Sopenharmony_ci#include <linux/sched.h>
48c2ecf20Sopenharmony_ci#include <linux/ptrace.h>
58c2ecf20Sopenharmony_ci#include <stdint.h>
68c2ecf20Sopenharmony_ci#include <stddef.h>
78c2ecf20Sopenharmony_ci#include <stdbool.h>
88c2ecf20Sopenharmony_ci#include <linux/bpf.h>
98c2ecf20Sopenharmony_ci#include <bpf/bpf_helpers.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#define FUNCTION_NAME_LEN 64
128c2ecf20Sopenharmony_ci#define FILE_NAME_LEN 128
138c2ecf20Sopenharmony_ci#define TASK_COMM_LEN 16
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_citypedef struct {
168c2ecf20Sopenharmony_ci	int PyThreadState_frame;
178c2ecf20Sopenharmony_ci	int PyThreadState_thread;
188c2ecf20Sopenharmony_ci	int PyFrameObject_back;
198c2ecf20Sopenharmony_ci	int PyFrameObject_code;
208c2ecf20Sopenharmony_ci	int PyFrameObject_lineno;
218c2ecf20Sopenharmony_ci	int PyCodeObject_filename;
228c2ecf20Sopenharmony_ci	int PyCodeObject_name;
238c2ecf20Sopenharmony_ci	int String_data;
248c2ecf20Sopenharmony_ci	int String_size;
258c2ecf20Sopenharmony_ci} OffsetConfig;
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_citypedef struct {
288c2ecf20Sopenharmony_ci	uintptr_t current_state_addr;
298c2ecf20Sopenharmony_ci	uintptr_t tls_key_addr;
308c2ecf20Sopenharmony_ci	OffsetConfig offsets;
318c2ecf20Sopenharmony_ci	bool use_tls;
328c2ecf20Sopenharmony_ci} PidData;
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_citypedef struct {
358c2ecf20Sopenharmony_ci	uint32_t success;
368c2ecf20Sopenharmony_ci} Stats;
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_citypedef struct {
398c2ecf20Sopenharmony_ci	char name[FUNCTION_NAME_LEN];
408c2ecf20Sopenharmony_ci	char file[FILE_NAME_LEN];
418c2ecf20Sopenharmony_ci} Symbol;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_citypedef struct {
448c2ecf20Sopenharmony_ci	uint32_t pid;
458c2ecf20Sopenharmony_ci	uint32_t tid;
468c2ecf20Sopenharmony_ci	char comm[TASK_COMM_LEN];
478c2ecf20Sopenharmony_ci	int32_t kernel_stack_id;
488c2ecf20Sopenharmony_ci	int32_t user_stack_id;
498c2ecf20Sopenharmony_ci	bool thread_current;
508c2ecf20Sopenharmony_ci	bool pthread_match;
518c2ecf20Sopenharmony_ci	bool stack_complete;
528c2ecf20Sopenharmony_ci	int16_t stack_len;
538c2ecf20Sopenharmony_ci	int32_t stack[STACK_MAX_LEN];
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	int has_meta;
568c2ecf20Sopenharmony_ci	int metadata;
578c2ecf20Sopenharmony_ci	char dummy_safeguard;
588c2ecf20Sopenharmony_ci} Event;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_citypedef int pid_t;
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_citypedef struct {
648c2ecf20Sopenharmony_ci	void* f_back; // PyFrameObject.f_back, previous frame
658c2ecf20Sopenharmony_ci	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
668c2ecf20Sopenharmony_ci	void* co_filename; // PyCodeObject.co_filename
678c2ecf20Sopenharmony_ci	void* co_name; // PyCodeObject.co_name
688c2ecf20Sopenharmony_ci} FrameData;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci#ifdef SUBPROGS
718c2ecf20Sopenharmony_ci__noinline
728c2ecf20Sopenharmony_ci#else
738c2ecf20Sopenharmony_ci__always_inline
748c2ecf20Sopenharmony_ci#endif
758c2ecf20Sopenharmony_cistatic void *get_thread_state(void *tls_base, PidData *pidData)
768c2ecf20Sopenharmony_ci{
778c2ecf20Sopenharmony_ci	void* thread_state;
788c2ecf20Sopenharmony_ci	int key;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
818c2ecf20Sopenharmony_ci	bpf_probe_read_user(&thread_state, sizeof(thread_state),
828c2ecf20Sopenharmony_ci			    tls_base + 0x310 + key * 0x10 + 0x08);
838c2ecf20Sopenharmony_ci	return thread_state;
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_cistatic __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
878c2ecf20Sopenharmony_ci					   FrameData *frame, Symbol *symbol)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	// read data from PyFrameObject
908c2ecf20Sopenharmony_ci	bpf_probe_read_user(&frame->f_back,
918c2ecf20Sopenharmony_ci			    sizeof(frame->f_back),
928c2ecf20Sopenharmony_ci			    frame_ptr + pidData->offsets.PyFrameObject_back);
938c2ecf20Sopenharmony_ci	bpf_probe_read_user(&frame->f_code,
948c2ecf20Sopenharmony_ci			    sizeof(frame->f_code),
958c2ecf20Sopenharmony_ci			    frame_ptr + pidData->offsets.PyFrameObject_code);
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	// read data from PyCodeObject
988c2ecf20Sopenharmony_ci	if (!frame->f_code)
998c2ecf20Sopenharmony_ci		return false;
1008c2ecf20Sopenharmony_ci	bpf_probe_read_user(&frame->co_filename,
1018c2ecf20Sopenharmony_ci			    sizeof(frame->co_filename),
1028c2ecf20Sopenharmony_ci			    frame->f_code + pidData->offsets.PyCodeObject_filename);
1038c2ecf20Sopenharmony_ci	bpf_probe_read_user(&frame->co_name,
1048c2ecf20Sopenharmony_ci			    sizeof(frame->co_name),
1058c2ecf20Sopenharmony_ci			    frame->f_code + pidData->offsets.PyCodeObject_name);
1068c2ecf20Sopenharmony_ci	// read actual names into symbol
1078c2ecf20Sopenharmony_ci	if (frame->co_filename)
1088c2ecf20Sopenharmony_ci		bpf_probe_read_user_str(&symbol->file,
1098c2ecf20Sopenharmony_ci					sizeof(symbol->file),
1108c2ecf20Sopenharmony_ci					frame->co_filename +
1118c2ecf20Sopenharmony_ci					pidData->offsets.String_data);
1128c2ecf20Sopenharmony_ci	if (frame->co_name)
1138c2ecf20Sopenharmony_ci		bpf_probe_read_user_str(&symbol->name,
1148c2ecf20Sopenharmony_ci					sizeof(symbol->name),
1158c2ecf20Sopenharmony_ci					frame->co_name +
1168c2ecf20Sopenharmony_ci					pidData->offsets.String_data);
1178c2ecf20Sopenharmony_ci	return true;
1188c2ecf20Sopenharmony_ci}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_cistruct {
1218c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_HASH);
1228c2ecf20Sopenharmony_ci	__uint(max_entries, 1);
1238c2ecf20Sopenharmony_ci	__type(key, int);
1248c2ecf20Sopenharmony_ci	__type(value, PidData);
1258c2ecf20Sopenharmony_ci} pidmap SEC(".maps");
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_cistruct {
1288c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_HASH);
1298c2ecf20Sopenharmony_ci	__uint(max_entries, 1);
1308c2ecf20Sopenharmony_ci	__type(key, int);
1318c2ecf20Sopenharmony_ci	__type(value, Event);
1328c2ecf20Sopenharmony_ci} eventmap SEC(".maps");
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_cistruct {
1358c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_HASH);
1368c2ecf20Sopenharmony_ci	__uint(max_entries, 1);
1378c2ecf20Sopenharmony_ci	__type(key, Symbol);
1388c2ecf20Sopenharmony_ci	__type(value, int);
1398c2ecf20Sopenharmony_ci} symbolmap SEC(".maps");
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_cistruct {
1428c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_ARRAY);
1438c2ecf20Sopenharmony_ci	__uint(max_entries, 1);
1448c2ecf20Sopenharmony_ci	__type(key, int);
1458c2ecf20Sopenharmony_ci	__type(value, Stats);
1468c2ecf20Sopenharmony_ci} statsmap SEC(".maps");
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistruct {
1498c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
1508c2ecf20Sopenharmony_ci	__uint(max_entries, 32);
1518c2ecf20Sopenharmony_ci	__uint(key_size, sizeof(int));
1528c2ecf20Sopenharmony_ci	__uint(value_size, sizeof(int));
1538c2ecf20Sopenharmony_ci} perfmap SEC(".maps");
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_cistruct {
1568c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
1578c2ecf20Sopenharmony_ci	__uint(max_entries, 1000);
1588c2ecf20Sopenharmony_ci	__uint(key_size, sizeof(int));
1598c2ecf20Sopenharmony_ci	__uint(value_size, sizeof(long long) * 127);
1608c2ecf20Sopenharmony_ci} stackmap SEC(".maps");
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci#ifdef GLOBAL_FUNC
1638c2ecf20Sopenharmony_ci__noinline
1648c2ecf20Sopenharmony_ci#elif defined(SUBPROGS)
1658c2ecf20Sopenharmony_cistatic __noinline
1668c2ecf20Sopenharmony_ci#else
1678c2ecf20Sopenharmony_cistatic __always_inline
1688c2ecf20Sopenharmony_ci#endif
1698c2ecf20Sopenharmony_ciint __on_event(struct bpf_raw_tracepoint_args *ctx)
1708c2ecf20Sopenharmony_ci{
1718c2ecf20Sopenharmony_ci	uint64_t pid_tgid = bpf_get_current_pid_tgid();
1728c2ecf20Sopenharmony_ci	pid_t pid = (pid_t)(pid_tgid >> 32);
1738c2ecf20Sopenharmony_ci	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
1748c2ecf20Sopenharmony_ci	if (!pidData)
1758c2ecf20Sopenharmony_ci		return 0;
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	int zero = 0;
1788c2ecf20Sopenharmony_ci	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
1798c2ecf20Sopenharmony_ci	if (!event)
1808c2ecf20Sopenharmony_ci		return 0;
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	event->pid = pid;
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	event->tid = (pid_t)pid_tgid;
1858c2ecf20Sopenharmony_ci	bpf_get_current_comm(&event->comm, sizeof(event->comm));
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
1888c2ecf20Sopenharmony_ci	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	void* thread_state_current = (void*)0;
1918c2ecf20Sopenharmony_ci	bpf_probe_read_user(&thread_state_current,
1928c2ecf20Sopenharmony_ci			    sizeof(thread_state_current),
1938c2ecf20Sopenharmony_ci			    (void*)(long)pidData->current_state_addr);
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
1968c2ecf20Sopenharmony_ci	void* tls_base = (void*)task;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
1998c2ecf20Sopenharmony_ci		: thread_state_current;
2008c2ecf20Sopenharmony_ci	event->thread_current = thread_state == thread_state_current;
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	if (pidData->use_tls) {
2038c2ecf20Sopenharmony_ci		uint64_t pthread_created;
2048c2ecf20Sopenharmony_ci		uint64_t pthread_self;
2058c2ecf20Sopenharmony_ci		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
2068c2ecf20Sopenharmony_ci				    tls_base + 0x10);
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci		bpf_probe_read_user(&pthread_created,
2098c2ecf20Sopenharmony_ci				    sizeof(pthread_created),
2108c2ecf20Sopenharmony_ci				    thread_state +
2118c2ecf20Sopenharmony_ci				    pidData->offsets.PyThreadState_thread);
2128c2ecf20Sopenharmony_ci		event->pthread_match = pthread_created == pthread_self;
2138c2ecf20Sopenharmony_ci	} else {
2148c2ecf20Sopenharmony_ci		event->pthread_match = 1;
2158c2ecf20Sopenharmony_ci	}
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	if (event->pthread_match || !pidData->use_tls) {
2188c2ecf20Sopenharmony_ci		void* frame_ptr;
2198c2ecf20Sopenharmony_ci		FrameData frame;
2208c2ecf20Sopenharmony_ci		Symbol sym = {};
2218c2ecf20Sopenharmony_ci		int cur_cpu = bpf_get_smp_processor_id();
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci		bpf_probe_read_user(&frame_ptr,
2248c2ecf20Sopenharmony_ci				    sizeof(frame_ptr),
2258c2ecf20Sopenharmony_ci				    thread_state +
2268c2ecf20Sopenharmony_ci				    pidData->offsets.PyThreadState_frame);
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
2298c2ecf20Sopenharmony_ci		if (symbol_counter == NULL)
2308c2ecf20Sopenharmony_ci			return 0;
2318c2ecf20Sopenharmony_ci#ifdef NO_UNROLL
2328c2ecf20Sopenharmony_ci#pragma clang loop unroll(disable)
2338c2ecf20Sopenharmony_ci#else
2348c2ecf20Sopenharmony_ci#pragma clang loop unroll(full)
2358c2ecf20Sopenharmony_ci#endif
2368c2ecf20Sopenharmony_ci		/* Unwind python stack */
2378c2ecf20Sopenharmony_ci		for (int i = 0; i < STACK_MAX_LEN; ++i) {
2388c2ecf20Sopenharmony_ci			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
2398c2ecf20Sopenharmony_ci				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
2408c2ecf20Sopenharmony_ci				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
2418c2ecf20Sopenharmony_ci				if (!symbol_id) {
2428c2ecf20Sopenharmony_ci					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
2438c2ecf20Sopenharmony_ci					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
2448c2ecf20Sopenharmony_ci					if (!symbol_id)
2458c2ecf20Sopenharmony_ci						return 0;
2468c2ecf20Sopenharmony_ci				}
2478c2ecf20Sopenharmony_ci				if (*symbol_id == new_symbol_id)
2488c2ecf20Sopenharmony_ci					(*symbol_counter)++;
2498c2ecf20Sopenharmony_ci				event->stack[i] = *symbol_id;
2508c2ecf20Sopenharmony_ci				event->stack_len = i + 1;
2518c2ecf20Sopenharmony_ci				frame_ptr = frame.f_back;
2528c2ecf20Sopenharmony_ci			}
2538c2ecf20Sopenharmony_ci		}
2548c2ecf20Sopenharmony_ci		event->stack_complete = frame_ptr == NULL;
2558c2ecf20Sopenharmony_ci	} else {
2568c2ecf20Sopenharmony_ci		event->stack_complete = 1;
2578c2ecf20Sopenharmony_ci	}
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
2608c2ecf20Sopenharmony_ci	if (stats)
2618c2ecf20Sopenharmony_ci		stats->success++;
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	event->has_meta = 0;
2648c2ecf20Sopenharmony_ci	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
2658c2ecf20Sopenharmony_ci	return 0;
2668c2ecf20Sopenharmony_ci}
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ciSEC("raw_tracepoint/kfree_skb")
2698c2ecf20Sopenharmony_ciint on_event(struct bpf_raw_tracepoint_args* ctx)
2708c2ecf20Sopenharmony_ci{
2718c2ecf20Sopenharmony_ci	int i, ret = 0;
2728c2ecf20Sopenharmony_ci	ret |= __on_event(ctx);
2738c2ecf20Sopenharmony_ci	ret |= __on_event(ctx);
2748c2ecf20Sopenharmony_ci	ret |= __on_event(ctx);
2758c2ecf20Sopenharmony_ci	ret |= __on_event(ctx);
2768c2ecf20Sopenharmony_ci	ret |= __on_event(ctx);
2778c2ecf20Sopenharmony_ci	return ret;
2788c2ecf20Sopenharmony_ci}
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_cichar _license[] SEC("license") = "GPL";
281