162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci// Copyright (c) 2018 Facebook 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <string.h> 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/stddef.h> 762306a36Sopenharmony_ci#include <linux/bpf.h> 862306a36Sopenharmony_ci#include <linux/in.h> 962306a36Sopenharmony_ci#include <linux/in6.h> 1062306a36Sopenharmony_ci#include <linux/tcp.h> 1162306a36Sopenharmony_ci#include <linux/if.h> 1262306a36Sopenharmony_ci#include <errno.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <bpf/bpf_helpers.h> 1562306a36Sopenharmony_ci#include <bpf/bpf_endian.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include "bpf_tcp_helpers.h" 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#define SRC_REWRITE_IP4 0x7f000004U 2062306a36Sopenharmony_ci#define DST_REWRITE_IP4 0x7f000001U 2162306a36Sopenharmony_ci#define DST_REWRITE_PORT4 4444 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#ifndef TCP_CA_NAME_MAX 2462306a36Sopenharmony_ci#define TCP_CA_NAME_MAX 16 2562306a36Sopenharmony_ci#endif 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#ifndef TCP_NOTSENT_LOWAT 2862306a36Sopenharmony_ci#define TCP_NOTSENT_LOWAT 25 2962306a36Sopenharmony_ci#endif 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#ifndef IFNAMSIZ 3262306a36Sopenharmony_ci#define IFNAMSIZ 16 3362306a36Sopenharmony_ci#endif 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci__attribute__ ((noinline)) __weak 3662306a36Sopenharmony_ciint do_bind(struct bpf_sock_addr *ctx) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci struct sockaddr_in sa = {}; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci sa.sin_family = AF_INET; 4162306a36Sopenharmony_ci sa.sin_port = bpf_htons(0); 4262306a36Sopenharmony_ci sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) 4562306a36Sopenharmony_ci return 0; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci return 1; 4862306a36Sopenharmony_ci} 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistatic __inline int verify_cc(struct bpf_sock_addr *ctx, 5162306a36Sopenharmony_ci char expected[TCP_CA_NAME_MAX]) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci char buf[TCP_CA_NAME_MAX]; 5462306a36Sopenharmony_ci int i; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) 5762306a36Sopenharmony_ci return 1; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci for (i = 0; i < TCP_CA_NAME_MAX; i++) { 6062306a36Sopenharmony_ci if (buf[i] != expected[i]) 6162306a36Sopenharmony_ci return 1; 6262306a36Sopenharmony_ci if (buf[i] == 0) 6362306a36Sopenharmony_ci break; 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci return 0; 6762306a36Sopenharmony_ci} 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic __inline int set_cc(struct bpf_sock_addr *ctx) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci char reno[TCP_CA_NAME_MAX] = "reno"; 7262306a36Sopenharmony_ci char cubic[TCP_CA_NAME_MAX] = "cubic"; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno))) 7562306a36Sopenharmony_ci return 1; 7662306a36Sopenharmony_ci if (verify_cc(ctx, reno)) 7762306a36Sopenharmony_ci return 1; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic))) 8062306a36Sopenharmony_ci return 1; 8162306a36Sopenharmony_ci if (verify_cc(ctx, cubic)) 8262306a36Sopenharmony_ci return 1; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci return 0; 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic __inline int bind_to_device(struct bpf_sock_addr *ctx) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci char veth1[IFNAMSIZ] = "test_sock_addr1"; 9062306a36Sopenharmony_ci char veth2[IFNAMSIZ] = "test_sock_addr2"; 9162306a36Sopenharmony_ci char missing[IFNAMSIZ] = "nonexistent_dev"; 9262306a36Sopenharmony_ci char del_bind[IFNAMSIZ] = ""; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 9562306a36Sopenharmony_ci &veth1, sizeof(veth1))) 9662306a36Sopenharmony_ci return 1; 9762306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 9862306a36Sopenharmony_ci &veth2, sizeof(veth2))) 9962306a36Sopenharmony_ci return 1; 10062306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 10162306a36Sopenharmony_ci &missing, sizeof(missing)) != -ENODEV) 10262306a36Sopenharmony_ci return 1; 10362306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 10462306a36Sopenharmony_ci &del_bind, sizeof(del_bind))) 10562306a36Sopenharmony_ci return 1; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci return 0; 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_cistatic __inline int set_keepalive(struct bpf_sock_addr *ctx) 11162306a36Sopenharmony_ci{ 11262306a36Sopenharmony_ci int zero = 0, one = 1; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one))) 11562306a36Sopenharmony_ci return 1; 11662306a36Sopenharmony_ci if (ctx->type == SOCK_STREAM) { 11762306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one))) 11862306a36Sopenharmony_ci return 1; 11962306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one))) 12062306a36Sopenharmony_ci return 1; 12162306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one))) 12262306a36Sopenharmony_ci return 1; 12362306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one))) 12462306a36Sopenharmony_ci return 1; 12562306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one))) 12662306a36Sopenharmony_ci return 1; 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero))) 12962306a36Sopenharmony_ci return 1; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci return 0; 13262306a36Sopenharmony_ci} 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_cistatic __inline int set_notsent_lowat(struct bpf_sock_addr *ctx) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci int lowat = 65535; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (ctx->type == SOCK_STREAM) { 13962306a36Sopenharmony_ci if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat))) 14062306a36Sopenharmony_ci return 1; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci return 0; 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ciSEC("cgroup/connect4") 14762306a36Sopenharmony_ciint connect_v4_prog(struct bpf_sock_addr *ctx) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci struct bpf_sock_tuple tuple = {}; 15062306a36Sopenharmony_ci struct bpf_sock *sk; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci /* Verify that new destination is available. */ 15362306a36Sopenharmony_ci memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); 15462306a36Sopenharmony_ci memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport)); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); 15762306a36Sopenharmony_ci tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci /* Bind to device and unbind it. */ 16062306a36Sopenharmony_ci if (bind_to_device(ctx)) 16162306a36Sopenharmony_ci return 0; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci if (set_keepalive(ctx)) 16462306a36Sopenharmony_ci return 0; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci if (set_notsent_lowat(ctx)) 16762306a36Sopenharmony_ci return 0; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) 17062306a36Sopenharmony_ci return 0; 17162306a36Sopenharmony_ci else if (ctx->type == SOCK_STREAM) 17262306a36Sopenharmony_ci sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), 17362306a36Sopenharmony_ci BPF_F_CURRENT_NETNS, 0); 17462306a36Sopenharmony_ci else 17562306a36Sopenharmony_ci sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), 17662306a36Sopenharmony_ci BPF_F_CURRENT_NETNS, 0); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci if (!sk) 17962306a36Sopenharmony_ci return 0; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci if (sk->src_ip4 != tuple.ipv4.daddr || 18262306a36Sopenharmony_ci sk->src_port != DST_REWRITE_PORT4) { 18362306a36Sopenharmony_ci bpf_sk_release(sk); 18462306a36Sopenharmony_ci return 0; 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci bpf_sk_release(sk); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci /* Rewrite congestion control. */ 19062306a36Sopenharmony_ci if (ctx->type == SOCK_STREAM && set_cc(ctx)) 19162306a36Sopenharmony_ci return 0; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci /* Rewrite destination. */ 19462306a36Sopenharmony_ci ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); 19562306a36Sopenharmony_ci ctx->user_port = bpf_htons(DST_REWRITE_PORT4); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci return do_bind(ctx) ? 1 : 0; 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_cichar _license[] SEC("license") = "GPL"; 201