1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdbool.h>
8#include <linux/bpf.h>
9#include <bpf/bpf_helpers.h>
10
11#define FUNCTION_NAME_LEN 64
12#define FILE_NAME_LEN 128
13#define TASK_COMM_LEN 16
14
15typedef struct {
16	int PyThreadState_frame;
17	int PyThreadState_thread;
18	int PyFrameObject_back;
19	int PyFrameObject_code;
20	int PyFrameObject_lineno;
21	int PyCodeObject_filename;
22	int PyCodeObject_name;
23	int String_data;
24	int String_size;
25} OffsetConfig;
26
27typedef struct {
28	uintptr_t current_state_addr;
29	uintptr_t tls_key_addr;
30	OffsetConfig offsets;
31	bool use_tls;
32} PidData;
33
34typedef struct {
35	uint32_t success;
36} Stats;
37
38typedef struct {
39	char name[FUNCTION_NAME_LEN];
40	char file[FILE_NAME_LEN];
41} Symbol;
42
43typedef struct {
44	uint32_t pid;
45	uint32_t tid;
46	char comm[TASK_COMM_LEN];
47	int32_t kernel_stack_id;
48	int32_t user_stack_id;
49	bool thread_current;
50	bool pthread_match;
51	bool stack_complete;
52	int16_t stack_len;
53	int32_t stack[STACK_MAX_LEN];
54
55	int has_meta;
56	int metadata;
57	char dummy_safeguard;
58} Event;
59
60
61typedef int pid_t;
62
63typedef struct {
64	void* f_back; // PyFrameObject.f_back, previous frame
65	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
66	void* co_filename; // PyCodeObject.co_filename
67	void* co_name; // PyCodeObject.co_name
68} FrameData;
69
70#ifdef SUBPROGS
71__noinline
72#else
73__always_inline
74#endif
75static void *get_thread_state(void *tls_base, PidData *pidData)
76{
77	void* thread_state;
78	int key;
79
80	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
81	bpf_probe_read_user(&thread_state, sizeof(thread_state),
82			    tls_base + 0x310 + key * 0x10 + 0x08);
83	return thread_state;
84}
85
86static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
87					   FrameData *frame, Symbol *symbol)
88{
89	// read data from PyFrameObject
90	bpf_probe_read_user(&frame->f_back,
91			    sizeof(frame->f_back),
92			    frame_ptr + pidData->offsets.PyFrameObject_back);
93	bpf_probe_read_user(&frame->f_code,
94			    sizeof(frame->f_code),
95			    frame_ptr + pidData->offsets.PyFrameObject_code);
96
97	// read data from PyCodeObject
98	if (!frame->f_code)
99		return false;
100	bpf_probe_read_user(&frame->co_filename,
101			    sizeof(frame->co_filename),
102			    frame->f_code + pidData->offsets.PyCodeObject_filename);
103	bpf_probe_read_user(&frame->co_name,
104			    sizeof(frame->co_name),
105			    frame->f_code + pidData->offsets.PyCodeObject_name);
106	// read actual names into symbol
107	if (frame->co_filename)
108		bpf_probe_read_user_str(&symbol->file,
109					sizeof(symbol->file),
110					frame->co_filename +
111					pidData->offsets.String_data);
112	if (frame->co_name)
113		bpf_probe_read_user_str(&symbol->name,
114					sizeof(symbol->name),
115					frame->co_name +
116					pidData->offsets.String_data);
117	return true;
118}
119
120struct {
121	__uint(type, BPF_MAP_TYPE_HASH);
122	__uint(max_entries, 1);
123	__type(key, int);
124	__type(value, PidData);
125} pidmap SEC(".maps");
126
127struct {
128	__uint(type, BPF_MAP_TYPE_HASH);
129	__uint(max_entries, 1);
130	__type(key, int);
131	__type(value, Event);
132} eventmap SEC(".maps");
133
134struct {
135	__uint(type, BPF_MAP_TYPE_HASH);
136	__uint(max_entries, 1);
137	__type(key, Symbol);
138	__type(value, int);
139} symbolmap SEC(".maps");
140
141struct {
142	__uint(type, BPF_MAP_TYPE_ARRAY);
143	__uint(max_entries, 1);
144	__type(key, int);
145	__type(value, Stats);
146} statsmap SEC(".maps");
147
148struct {
149	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
150	__uint(max_entries, 32);
151	__uint(key_size, sizeof(int));
152	__uint(value_size, sizeof(int));
153} perfmap SEC(".maps");
154
155struct {
156	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
157	__uint(max_entries, 1000);
158	__uint(key_size, sizeof(int));
159	__uint(value_size, sizeof(long long) * 127);
160} stackmap SEC(".maps");
161
162#ifdef GLOBAL_FUNC
163__noinline
164#elif defined(SUBPROGS)
165static __noinline
166#else
167static __always_inline
168#endif
169int __on_event(struct bpf_raw_tracepoint_args *ctx)
170{
171	uint64_t pid_tgid = bpf_get_current_pid_tgid();
172	pid_t pid = (pid_t)(pid_tgid >> 32);
173	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
174	if (!pidData)
175		return 0;
176
177	int zero = 0;
178	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
179	if (!event)
180		return 0;
181
182	event->pid = pid;
183
184	event->tid = (pid_t)pid_tgid;
185	bpf_get_current_comm(&event->comm, sizeof(event->comm));
186
187	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
188	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
189
190	void* thread_state_current = (void*)0;
191	bpf_probe_read_user(&thread_state_current,
192			    sizeof(thread_state_current),
193			    (void*)(long)pidData->current_state_addr);
194
195	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
196	void* tls_base = (void*)task;
197
198	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
199		: thread_state_current;
200	event->thread_current = thread_state == thread_state_current;
201
202	if (pidData->use_tls) {
203		uint64_t pthread_created;
204		uint64_t pthread_self;
205		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
206				    tls_base + 0x10);
207
208		bpf_probe_read_user(&pthread_created,
209				    sizeof(pthread_created),
210				    thread_state +
211				    pidData->offsets.PyThreadState_thread);
212		event->pthread_match = pthread_created == pthread_self;
213	} else {
214		event->pthread_match = 1;
215	}
216
217	if (event->pthread_match || !pidData->use_tls) {
218		void* frame_ptr;
219		FrameData frame;
220		Symbol sym = {};
221		int cur_cpu = bpf_get_smp_processor_id();
222
223		bpf_probe_read_user(&frame_ptr,
224				    sizeof(frame_ptr),
225				    thread_state +
226				    pidData->offsets.PyThreadState_frame);
227
228		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
229		if (symbol_counter == NULL)
230			return 0;
231#ifdef NO_UNROLL
232#pragma clang loop unroll(disable)
233#else
234#pragma clang loop unroll(full)
235#endif
236		/* Unwind python stack */
237		for (int i = 0; i < STACK_MAX_LEN; ++i) {
238			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
239				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
240				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
241				if (!symbol_id) {
242					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
243					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
244					if (!symbol_id)
245						return 0;
246				}
247				if (*symbol_id == new_symbol_id)
248					(*symbol_counter)++;
249				event->stack[i] = *symbol_id;
250				event->stack_len = i + 1;
251				frame_ptr = frame.f_back;
252			}
253		}
254		event->stack_complete = frame_ptr == NULL;
255	} else {
256		event->stack_complete = 1;
257	}
258
259	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
260	if (stats)
261		stats->success++;
262
263	event->has_meta = 0;
264	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
265	return 0;
266}
267
268SEC("raw_tracepoint/kfree_skb")
269int on_event(struct bpf_raw_tracepoint_args* ctx)
270{
271	int i, ret = 0;
272	ret |= __on_event(ctx);
273	ret |= __on_event(ctx);
274	ret |= __on_event(ctx);
275	ret |= __on_event(ctx);
276	ret |= __on_event(ctx);
277	return ret;
278}
279
280char _license[] SEC("license") = "GPL";
281