162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci// Copyright (c) 2018 Facebook
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci#include <string.h>
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/stddef.h>
762306a36Sopenharmony_ci#include <linux/bpf.h>
862306a36Sopenharmony_ci#include <linux/in.h>
962306a36Sopenharmony_ci#include <linux/in6.h>
1062306a36Sopenharmony_ci#include <linux/tcp.h>
1162306a36Sopenharmony_ci#include <linux/if.h>
1262306a36Sopenharmony_ci#include <errno.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <bpf/bpf_helpers.h>
1562306a36Sopenharmony_ci#include <bpf/bpf_endian.h>
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#include "bpf_tcp_helpers.h"
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#define SRC_REWRITE_IP4		0x7f000004U
2062306a36Sopenharmony_ci#define DST_REWRITE_IP4		0x7f000001U
2162306a36Sopenharmony_ci#define DST_REWRITE_PORT4	4444
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#ifndef TCP_CA_NAME_MAX
2462306a36Sopenharmony_ci#define TCP_CA_NAME_MAX 16
2562306a36Sopenharmony_ci#endif
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#ifndef TCP_NOTSENT_LOWAT
2862306a36Sopenharmony_ci#define TCP_NOTSENT_LOWAT 25
2962306a36Sopenharmony_ci#endif
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#ifndef IFNAMSIZ
3262306a36Sopenharmony_ci#define IFNAMSIZ 16
3362306a36Sopenharmony_ci#endif
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci__attribute__ ((noinline)) __weak
3662306a36Sopenharmony_ciint do_bind(struct bpf_sock_addr *ctx)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	struct sockaddr_in sa = {};
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	sa.sin_family = AF_INET;
4162306a36Sopenharmony_ci	sa.sin_port = bpf_htons(0);
4262306a36Sopenharmony_ci	sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
4562306a36Sopenharmony_ci		return 0;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	return 1;
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic __inline int verify_cc(struct bpf_sock_addr *ctx,
5162306a36Sopenharmony_ci			      char expected[TCP_CA_NAME_MAX])
5262306a36Sopenharmony_ci{
5362306a36Sopenharmony_ci	char buf[TCP_CA_NAME_MAX];
5462306a36Sopenharmony_ci	int i;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
5762306a36Sopenharmony_ci		return 1;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
6062306a36Sopenharmony_ci		if (buf[i] != expected[i])
6162306a36Sopenharmony_ci			return 1;
6262306a36Sopenharmony_ci		if (buf[i] == 0)
6362306a36Sopenharmony_ci			break;
6462306a36Sopenharmony_ci	}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	return 0;
6762306a36Sopenharmony_ci}
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic __inline int set_cc(struct bpf_sock_addr *ctx)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	char reno[TCP_CA_NAME_MAX] = "reno";
7262306a36Sopenharmony_ci	char cubic[TCP_CA_NAME_MAX] = "cubic";
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
7562306a36Sopenharmony_ci		return 1;
7662306a36Sopenharmony_ci	if (verify_cc(ctx, reno))
7762306a36Sopenharmony_ci		return 1;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
8062306a36Sopenharmony_ci		return 1;
8162306a36Sopenharmony_ci	if (verify_cc(ctx, cubic))
8262306a36Sopenharmony_ci		return 1;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	return 0;
8562306a36Sopenharmony_ci}
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistatic __inline int bind_to_device(struct bpf_sock_addr *ctx)
8862306a36Sopenharmony_ci{
8962306a36Sopenharmony_ci	char veth1[IFNAMSIZ] = "test_sock_addr1";
9062306a36Sopenharmony_ci	char veth2[IFNAMSIZ] = "test_sock_addr2";
9162306a36Sopenharmony_ci	char missing[IFNAMSIZ] = "nonexistent_dev";
9262306a36Sopenharmony_ci	char del_bind[IFNAMSIZ] = "";
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
9562306a36Sopenharmony_ci				&veth1, sizeof(veth1)))
9662306a36Sopenharmony_ci		return 1;
9762306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
9862306a36Sopenharmony_ci				&veth2, sizeof(veth2)))
9962306a36Sopenharmony_ci		return 1;
10062306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
10162306a36Sopenharmony_ci				&missing, sizeof(missing)) != -ENODEV)
10262306a36Sopenharmony_ci		return 1;
10362306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
10462306a36Sopenharmony_ci				&del_bind, sizeof(del_bind)))
10562306a36Sopenharmony_ci		return 1;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	return 0;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_cistatic __inline int set_keepalive(struct bpf_sock_addr *ctx)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	int zero = 0, one = 1;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
11562306a36Sopenharmony_ci		return 1;
11662306a36Sopenharmony_ci	if (ctx->type == SOCK_STREAM) {
11762306a36Sopenharmony_ci		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
11862306a36Sopenharmony_ci			return 1;
11962306a36Sopenharmony_ci		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
12062306a36Sopenharmony_ci			return 1;
12162306a36Sopenharmony_ci		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
12262306a36Sopenharmony_ci			return 1;
12362306a36Sopenharmony_ci		if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
12462306a36Sopenharmony_ci			return 1;
12562306a36Sopenharmony_ci		if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
12662306a36Sopenharmony_ci			return 1;
12762306a36Sopenharmony_ci	}
12862306a36Sopenharmony_ci	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
12962306a36Sopenharmony_ci		return 1;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	return 0;
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_cistatic __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci	int lowat = 65535;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	if (ctx->type == SOCK_STREAM) {
13962306a36Sopenharmony_ci		if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
14062306a36Sopenharmony_ci			return 1;
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	return 0;
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ciSEC("cgroup/connect4")
14762306a36Sopenharmony_ciint connect_v4_prog(struct bpf_sock_addr *ctx)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	struct bpf_sock_tuple tuple = {};
15062306a36Sopenharmony_ci	struct bpf_sock *sk;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	/* Verify that new destination is available. */
15362306a36Sopenharmony_ci	memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
15462306a36Sopenharmony_ci	memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
15762306a36Sopenharmony_ci	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	/* Bind to device and unbind it. */
16062306a36Sopenharmony_ci	if (bind_to_device(ctx))
16162306a36Sopenharmony_ci		return 0;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	if (set_keepalive(ctx))
16462306a36Sopenharmony_ci		return 0;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	if (set_notsent_lowat(ctx))
16762306a36Sopenharmony_ci		return 0;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
17062306a36Sopenharmony_ci		return 0;
17162306a36Sopenharmony_ci	else if (ctx->type == SOCK_STREAM)
17262306a36Sopenharmony_ci		sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
17362306a36Sopenharmony_ci				       BPF_F_CURRENT_NETNS, 0);
17462306a36Sopenharmony_ci	else
17562306a36Sopenharmony_ci		sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
17662306a36Sopenharmony_ci				       BPF_F_CURRENT_NETNS, 0);
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	if (!sk)
17962306a36Sopenharmony_ci		return 0;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (sk->src_ip4 != tuple.ipv4.daddr ||
18262306a36Sopenharmony_ci	    sk->src_port != DST_REWRITE_PORT4) {
18362306a36Sopenharmony_ci		bpf_sk_release(sk);
18462306a36Sopenharmony_ci		return 0;
18562306a36Sopenharmony_ci	}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	bpf_sk_release(sk);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	/* Rewrite congestion control. */
19062306a36Sopenharmony_ci	if (ctx->type == SOCK_STREAM && set_cc(ctx))
19162306a36Sopenharmony_ci		return 0;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	/* Rewrite destination. */
19462306a36Sopenharmony_ci	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
19562306a36Sopenharmony_ci	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	return do_bind(ctx) ? 1 : 0;
19862306a36Sopenharmony_ci}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_cichar _license[] SEC("license") = "GPL";
201