18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci#include <string.h>
38c2ecf20Sopenharmony_ci#include <linux/tcp.h>
48c2ecf20Sopenharmony_ci#include <linux/bpf.h>
58c2ecf20Sopenharmony_ci#include <netinet/in.h>
68c2ecf20Sopenharmony_ci#include <bpf/bpf_helpers.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_cichar _license[] SEC("license") = "GPL";
98c2ecf20Sopenharmony_ci__u32 _version SEC("version") = 1;
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#ifndef PAGE_SIZE
128c2ecf20Sopenharmony_ci#define PAGE_SIZE 4096
138c2ecf20Sopenharmony_ci#endif
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#ifndef SOL_TCP
168c2ecf20Sopenharmony_ci#define SOL_TCP IPPROTO_TCP
178c2ecf20Sopenharmony_ci#endif
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#define SOL_CUSTOM			0xdeadbeef
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_cistruct sockopt_sk {
228c2ecf20Sopenharmony_ci	__u8 val;
238c2ecf20Sopenharmony_ci};
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_cistruct {
268c2ecf20Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
278c2ecf20Sopenharmony_ci	__uint(map_flags, BPF_F_NO_PREALLOC);
288c2ecf20Sopenharmony_ci	__type(key, int);
298c2ecf20Sopenharmony_ci	__type(value, struct sockopt_sk);
308c2ecf20Sopenharmony_ci} socket_storage_map SEC(".maps");
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ciSEC("cgroup/getsockopt")
338c2ecf20Sopenharmony_ciint _getsockopt(struct bpf_sockopt *ctx)
348c2ecf20Sopenharmony_ci{
358c2ecf20Sopenharmony_ci	__u8 *optval_end = ctx->optval_end;
368c2ecf20Sopenharmony_ci	__u8 *optval = ctx->optval;
378c2ecf20Sopenharmony_ci	struct sockopt_sk *storage;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
408c2ecf20Sopenharmony_ci		/* Not interested in SOL_IP:IP_TOS;
418c2ecf20Sopenharmony_ci		 * let next BPF program in the cgroup chain or kernel
428c2ecf20Sopenharmony_ci		 * handle it.
438c2ecf20Sopenharmony_ci		 */
448c2ecf20Sopenharmony_ci		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
458c2ecf20Sopenharmony_ci		return 1;
468c2ecf20Sopenharmony_ci	}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
498c2ecf20Sopenharmony_ci		/* Not interested in SOL_SOCKET:SO_SNDBUF;
508c2ecf20Sopenharmony_ci		 * let next BPF program in the cgroup chain or kernel
518c2ecf20Sopenharmony_ci		 * handle it.
528c2ecf20Sopenharmony_ci		 */
538c2ecf20Sopenharmony_ci		return 1;
548c2ecf20Sopenharmony_ci	}
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
578c2ecf20Sopenharmony_ci		/* Not interested in SOL_TCP:TCP_CONGESTION;
588c2ecf20Sopenharmony_ci		 * let next BPF program in the cgroup chain or kernel
598c2ecf20Sopenharmony_ci		 * handle it.
608c2ecf20Sopenharmony_ci		 */
618c2ecf20Sopenharmony_ci		return 1;
628c2ecf20Sopenharmony_ci	}
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
658c2ecf20Sopenharmony_ci		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
668c2ecf20Sopenharmony_ci		 * It has a custom implementation for performance
678c2ecf20Sopenharmony_ci		 * reasons.
688c2ecf20Sopenharmony_ci		 */
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
718c2ecf20Sopenharmony_ci			return 0; /* EPERM, bounds check */
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
748c2ecf20Sopenharmony_ci			return 0; /* EPERM, unexpected data */
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci		return 1;
778c2ecf20Sopenharmony_ci	}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
808c2ecf20Sopenharmony_ci		if (optval + 1 > optval_end)
818c2ecf20Sopenharmony_ci			return 0; /* EPERM, bounds check */
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci		ctx->retval = 0; /* Reset system call return value to zero */
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci		/* Always export 0x55 */
868c2ecf20Sopenharmony_ci		optval[0] = 0x55;
878c2ecf20Sopenharmony_ci		ctx->optlen = 1;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci		/* Userspace buffer is PAGE_SIZE * 2, but BPF
908c2ecf20Sopenharmony_ci		 * program can only see the first PAGE_SIZE
918c2ecf20Sopenharmony_ci		 * bytes of data.
928c2ecf20Sopenharmony_ci		 */
938c2ecf20Sopenharmony_ci		if (optval_end - optval != PAGE_SIZE)
948c2ecf20Sopenharmony_ci			return 0; /* EPERM, unexpected data size */
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci		return 1;
978c2ecf20Sopenharmony_ci	}
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	if (ctx->level != SOL_CUSTOM)
1008c2ecf20Sopenharmony_ci		return 0; /* EPERM, deny everything except custom level */
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	if (optval + 1 > optval_end)
1038c2ecf20Sopenharmony_ci		return 0; /* EPERM, bounds check */
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
1068c2ecf20Sopenharmony_ci				     BPF_SK_STORAGE_GET_F_CREATE);
1078c2ecf20Sopenharmony_ci	if (!storage)
1088c2ecf20Sopenharmony_ci		return 0; /* EPERM, couldn't get sk storage */
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	if (!ctx->retval)
1118c2ecf20Sopenharmony_ci		return 0; /* EPERM, kernel should not have handled
1128c2ecf20Sopenharmony_ci			   * SOL_CUSTOM, something is wrong!
1138c2ecf20Sopenharmony_ci			   */
1148c2ecf20Sopenharmony_ci	ctx->retval = 0; /* Reset system call return value to zero */
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	optval[0] = storage->val;
1178c2ecf20Sopenharmony_ci	ctx->optlen = 1;
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	return 1;
1208c2ecf20Sopenharmony_ci}
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ciSEC("cgroup/setsockopt")
1238c2ecf20Sopenharmony_ciint _setsockopt(struct bpf_sockopt *ctx)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	__u8 *optval_end = ctx->optval_end;
1268c2ecf20Sopenharmony_ci	__u8 *optval = ctx->optval;
1278c2ecf20Sopenharmony_ci	struct sockopt_sk *storage;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
1308c2ecf20Sopenharmony_ci		/* Not interested in SOL_IP:IP_TOS;
1318c2ecf20Sopenharmony_ci		 * let next BPF program in the cgroup chain or kernel
1328c2ecf20Sopenharmony_ci		 * handle it.
1338c2ecf20Sopenharmony_ci		 */
1348c2ecf20Sopenharmony_ci		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
1358c2ecf20Sopenharmony_ci		return 1;
1368c2ecf20Sopenharmony_ci	}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
1398c2ecf20Sopenharmony_ci		/* Overwrite SO_SNDBUF value */
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci		if (optval + sizeof(__u32) > optval_end)
1428c2ecf20Sopenharmony_ci			return 0; /* EPERM, bounds check */
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci		*(__u32 *)optval = 0x55AA;
1458c2ecf20Sopenharmony_ci		ctx->optlen = 4;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci		return 1;
1488c2ecf20Sopenharmony_ci	}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
1518c2ecf20Sopenharmony_ci		/* Always use cubic */
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci		if (optval + 5 > optval_end)
1548c2ecf20Sopenharmony_ci			return 0; /* EPERM, bounds check */
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci		memcpy(optval, "cubic", 5);
1578c2ecf20Sopenharmony_ci		ctx->optlen = 5;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci		return 1;
1608c2ecf20Sopenharmony_ci	}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
1638c2ecf20Sopenharmony_ci		/* Original optlen is larger than PAGE_SIZE. */
1648c2ecf20Sopenharmony_ci		if (ctx->optlen != PAGE_SIZE * 2)
1658c2ecf20Sopenharmony_ci			return 0; /* EPERM, unexpected data size */
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci		if (optval + 1 > optval_end)
1688c2ecf20Sopenharmony_ci			return 0; /* EPERM, bounds check */
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci		/* Make sure we can trim the buffer. */
1718c2ecf20Sopenharmony_ci		optval[0] = 0;
1728c2ecf20Sopenharmony_ci		ctx->optlen = 1;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci		/* Usepace buffer is PAGE_SIZE * 2, but BPF
1758c2ecf20Sopenharmony_ci		 * program can only see the first PAGE_SIZE
1768c2ecf20Sopenharmony_ci		 * bytes of data.
1778c2ecf20Sopenharmony_ci		 */
1788c2ecf20Sopenharmony_ci		if (optval_end - optval != PAGE_SIZE)
1798c2ecf20Sopenharmony_ci			return 0; /* EPERM, unexpected data size */
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci		return 1;
1828c2ecf20Sopenharmony_ci	}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	if (ctx->level != SOL_CUSTOM)
1858c2ecf20Sopenharmony_ci		return 0; /* EPERM, deny everything except custom level */
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	if (optval + 1 > optval_end)
1888c2ecf20Sopenharmony_ci		return 0; /* EPERM, bounds check */
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
1918c2ecf20Sopenharmony_ci				     BPF_SK_STORAGE_GET_F_CREATE);
1928c2ecf20Sopenharmony_ci	if (!storage)
1938c2ecf20Sopenharmony_ci		return 0; /* EPERM, couldn't get sk storage */
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	storage->val = optval[0];
1968c2ecf20Sopenharmony_ci	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
1978c2ecf20Sopenharmony_ci			   * setsockopt handler.
1988c2ecf20Sopenharmony_ci			   */
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	return 1;
2018c2ecf20Sopenharmony_ci}
202