1// SPDX-License-Identifier: GPL-2.0
2#include <string.h>
3#include <linux/tcp.h>
4#include <linux/bpf.h>
5#include <netinet/in.h>
6#include <bpf/bpf_helpers.h>
7
8char _license[] SEC("license") = "GPL";
9__u32 _version SEC("version") = 1;
10
11#ifndef PAGE_SIZE
12#define PAGE_SIZE 4096
13#endif
14
15#ifndef SOL_TCP
16#define SOL_TCP IPPROTO_TCP
17#endif
18
19#define SOL_CUSTOM			0xdeadbeef
20
21struct sockopt_sk {
22	__u8 val;
23};
24
25struct {
26	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
27	__uint(map_flags, BPF_F_NO_PREALLOC);
28	__type(key, int);
29	__type(value, struct sockopt_sk);
30} socket_storage_map SEC(".maps");
31
32SEC("cgroup/getsockopt")
33int _getsockopt(struct bpf_sockopt *ctx)
34{
35	__u8 *optval_end = ctx->optval_end;
36	__u8 *optval = ctx->optval;
37	struct sockopt_sk *storage;
38
39	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
40		/* Not interested in SOL_IP:IP_TOS;
41		 * let next BPF program in the cgroup chain or kernel
42		 * handle it.
43		 */
44		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
45		return 1;
46	}
47
48	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
49		/* Not interested in SOL_SOCKET:SO_SNDBUF;
50		 * let next BPF program in the cgroup chain or kernel
51		 * handle it.
52		 */
53		return 1;
54	}
55
56	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
57		/* Not interested in SOL_TCP:TCP_CONGESTION;
58		 * let next BPF program in the cgroup chain or kernel
59		 * handle it.
60		 */
61		return 1;
62	}
63
64	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
65		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
66		 * It has a custom implementation for performance
67		 * reasons.
68		 */
69
70		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
71			return 0; /* EPERM, bounds check */
72
73		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
74			return 0; /* EPERM, unexpected data */
75
76		return 1;
77	}
78
79	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
80		if (optval + 1 > optval_end)
81			return 0; /* EPERM, bounds check */
82
83		ctx->retval = 0; /* Reset system call return value to zero */
84
85		/* Always export 0x55 */
86		optval[0] = 0x55;
87		ctx->optlen = 1;
88
89		/* Userspace buffer is PAGE_SIZE * 2, but BPF
90		 * program can only see the first PAGE_SIZE
91		 * bytes of data.
92		 */
93		if (optval_end - optval != PAGE_SIZE)
94			return 0; /* EPERM, unexpected data size */
95
96		return 1;
97	}
98
99	if (ctx->level != SOL_CUSTOM)
100		return 0; /* EPERM, deny everything except custom level */
101
102	if (optval + 1 > optval_end)
103		return 0; /* EPERM, bounds check */
104
105	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
106				     BPF_SK_STORAGE_GET_F_CREATE);
107	if (!storage)
108		return 0; /* EPERM, couldn't get sk storage */
109
110	if (!ctx->retval)
111		return 0; /* EPERM, kernel should not have handled
112			   * SOL_CUSTOM, something is wrong!
113			   */
114	ctx->retval = 0; /* Reset system call return value to zero */
115
116	optval[0] = storage->val;
117	ctx->optlen = 1;
118
119	return 1;
120}
121
122SEC("cgroup/setsockopt")
123int _setsockopt(struct bpf_sockopt *ctx)
124{
125	__u8 *optval_end = ctx->optval_end;
126	__u8 *optval = ctx->optval;
127	struct sockopt_sk *storage;
128
129	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
130		/* Not interested in SOL_IP:IP_TOS;
131		 * let next BPF program in the cgroup chain or kernel
132		 * handle it.
133		 */
134		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
135		return 1;
136	}
137
138	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
139		/* Overwrite SO_SNDBUF value */
140
141		if (optval + sizeof(__u32) > optval_end)
142			return 0; /* EPERM, bounds check */
143
144		*(__u32 *)optval = 0x55AA;
145		ctx->optlen = 4;
146
147		return 1;
148	}
149
150	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
151		/* Always use cubic */
152
153		if (optval + 5 > optval_end)
154			return 0; /* EPERM, bounds check */
155
156		memcpy(optval, "cubic", 5);
157		ctx->optlen = 5;
158
159		return 1;
160	}
161
162	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
163		/* Original optlen is larger than PAGE_SIZE. */
164		if (ctx->optlen != PAGE_SIZE * 2)
165			return 0; /* EPERM, unexpected data size */
166
167		if (optval + 1 > optval_end)
168			return 0; /* EPERM, bounds check */
169
170		/* Make sure we can trim the buffer. */
171		optval[0] = 0;
172		ctx->optlen = 1;
173
174		/* Usepace buffer is PAGE_SIZE * 2, but BPF
175		 * program can only see the first PAGE_SIZE
176		 * bytes of data.
177		 */
178		if (optval_end - optval != PAGE_SIZE)
179			return 0; /* EPERM, unexpected data size */
180
181		return 1;
182	}
183
184	if (ctx->level != SOL_CUSTOM)
185		return 0; /* EPERM, deny everything except custom level */
186
187	if (optval + 1 > optval_end)
188		return 0; /* EPERM, bounds check */
189
190	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
191				     BPF_SK_STORAGE_GET_F_CREATE);
192	if (!storage)
193		return 0; /* EPERM, couldn't get sk storage */
194
195	storage->val = optval[0];
196	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
197			   * setsockopt handler.
198			   */
199
200	return 1;
201}
202