18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/* Copyright (c) 2016 Facebook
38c2ecf20Sopenharmony_ci */
48c2ecf20Sopenharmony_ci#include <linux/bpf.h>
58c2ecf20Sopenharmony_ci#include <linux/if_link.h>
68c2ecf20Sopenharmony_ci#include <assert.h>
78c2ecf20Sopenharmony_ci#include <errno.h>
88c2ecf20Sopenharmony_ci#include <signal.h>
98c2ecf20Sopenharmony_ci#include <stdio.h>
108c2ecf20Sopenharmony_ci#include <stdlib.h>
118c2ecf20Sopenharmony_ci#include <string.h>
128c2ecf20Sopenharmony_ci#include <net/if.h>
138c2ecf20Sopenharmony_ci#include <sys/resource.h>
148c2ecf20Sopenharmony_ci#include <arpa/inet.h>
158c2ecf20Sopenharmony_ci#include <netinet/ether.h>
168c2ecf20Sopenharmony_ci#include <unistd.h>
178c2ecf20Sopenharmony_ci#include <time.h>
188c2ecf20Sopenharmony_ci#include <bpf/libbpf.h>
198c2ecf20Sopenharmony_ci#include <bpf/bpf.h>
208c2ecf20Sopenharmony_ci#include "bpf_util.h"
218c2ecf20Sopenharmony_ci#include "xdp_tx_iptunnel_common.h"
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci#define STATS_INTERVAL_S 2U
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_cistatic int ifindex = -1;
268c2ecf20Sopenharmony_cistatic __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
278c2ecf20Sopenharmony_cistatic int rxcnt_map_fd;
288c2ecf20Sopenharmony_cistatic __u32 prog_id;
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_cistatic void int_exit(int sig)
318c2ecf20Sopenharmony_ci{
328c2ecf20Sopenharmony_ci	__u32 curr_prog_id = 0;
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci	if (ifindex > -1) {
358c2ecf20Sopenharmony_ci		if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
368c2ecf20Sopenharmony_ci			printf("bpf_get_link_xdp_id failed\n");
378c2ecf20Sopenharmony_ci			exit(1);
388c2ecf20Sopenharmony_ci		}
398c2ecf20Sopenharmony_ci		if (prog_id == curr_prog_id)
408c2ecf20Sopenharmony_ci			bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
418c2ecf20Sopenharmony_ci		else if (!curr_prog_id)
428c2ecf20Sopenharmony_ci			printf("couldn't find a prog id on a given iface\n");
438c2ecf20Sopenharmony_ci		else
448c2ecf20Sopenharmony_ci			printf("program on interface changed, not removing\n");
458c2ecf20Sopenharmony_ci	}
468c2ecf20Sopenharmony_ci	exit(0);
478c2ecf20Sopenharmony_ci}
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/* simple per-protocol drop counter
508c2ecf20Sopenharmony_ci */
518c2ecf20Sopenharmony_cistatic void poll_stats(unsigned int kill_after_s)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	const unsigned int nr_protos = 256;
548c2ecf20Sopenharmony_ci	unsigned int nr_cpus = bpf_num_possible_cpus();
558c2ecf20Sopenharmony_ci	time_t started_at = time(NULL);
568c2ecf20Sopenharmony_ci	__u64 values[nr_cpus], prev[nr_protos][nr_cpus];
578c2ecf20Sopenharmony_ci	__u32 proto;
588c2ecf20Sopenharmony_ci	int i;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	memset(prev, 0, sizeof(prev));
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
638c2ecf20Sopenharmony_ci		sleep(STATS_INTERVAL_S);
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci		for (proto = 0; proto < nr_protos; proto++) {
668c2ecf20Sopenharmony_ci			__u64 sum = 0;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci			assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto,
698c2ecf20Sopenharmony_ci						   values) == 0);
708c2ecf20Sopenharmony_ci			for (i = 0; i < nr_cpus; i++)
718c2ecf20Sopenharmony_ci				sum += (values[i] - prev[proto][i]);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci			if (sum)
748c2ecf20Sopenharmony_ci				printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
758c2ecf20Sopenharmony_ci				       proto, sum, sum / STATS_INTERVAL_S);
768c2ecf20Sopenharmony_ci			memcpy(prev[proto], values, sizeof(values));
778c2ecf20Sopenharmony_ci		}
788c2ecf20Sopenharmony_ci	}
798c2ecf20Sopenharmony_ci}
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_cistatic void usage(const char *cmd)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	printf("Start a XDP prog which encapsulates incoming packets\n"
848c2ecf20Sopenharmony_ci	       "in an IPv4/v6 header and XDP_TX it out.  The dst <VIP:PORT>\n"
858c2ecf20Sopenharmony_ci	       "is used to select packets to encapsulate\n\n");
868c2ecf20Sopenharmony_ci	printf("Usage: %s [...]\n", cmd);
878c2ecf20Sopenharmony_ci	printf("    -i <ifname|ifindex> Interface\n");
888c2ecf20Sopenharmony_ci	printf("    -a <vip-service-address> IPv4 or IPv6\n");
898c2ecf20Sopenharmony_ci	printf("    -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
908c2ecf20Sopenharmony_ci	printf("    -s <source-ip> Used in the IPTunnel header\n");
918c2ecf20Sopenharmony_ci	printf("    -d <dest-ip> Used in the IPTunnel header\n");
928c2ecf20Sopenharmony_ci	printf("    -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
938c2ecf20Sopenharmony_ci	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
948c2ecf20Sopenharmony_ci	printf("    -P <IP-Protocol> Default is TCP\n");
958c2ecf20Sopenharmony_ci	printf("    -S use skb-mode\n");
968c2ecf20Sopenharmony_ci	printf("    -N enforce native mode\n");
978c2ecf20Sopenharmony_ci	printf("    -F Force loading the XDP prog\n");
988c2ecf20Sopenharmony_ci	printf("    -h Display this help\n");
998c2ecf20Sopenharmony_ci}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_cistatic int parse_ipstr(const char *ipstr, unsigned int *addr)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	if (inet_pton(AF_INET6, ipstr, addr) == 1) {
1048c2ecf20Sopenharmony_ci		return AF_INET6;
1058c2ecf20Sopenharmony_ci	} else if (inet_pton(AF_INET, ipstr, addr) == 1) {
1068c2ecf20Sopenharmony_ci		addr[1] = addr[2] = addr[3] = 0;
1078c2ecf20Sopenharmony_ci		return AF_INET;
1088c2ecf20Sopenharmony_ci	}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	fprintf(stderr, "%s is an invalid IP\n", ipstr);
1118c2ecf20Sopenharmony_ci	return AF_UNSPEC;
1128c2ecf20Sopenharmony_ci}
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_cistatic int parse_ports(const char *port_str, int *min_port, int *max_port)
1158c2ecf20Sopenharmony_ci{
1168c2ecf20Sopenharmony_ci	char *end;
1178c2ecf20Sopenharmony_ci	long tmp_min_port;
1188c2ecf20Sopenharmony_ci	long tmp_max_port;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	tmp_min_port = strtol(optarg, &end, 10);
1218c2ecf20Sopenharmony_ci	if (tmp_min_port < 1 || tmp_min_port > 65535) {
1228c2ecf20Sopenharmony_ci		fprintf(stderr, "Invalid port(s):%s\n", optarg);
1238c2ecf20Sopenharmony_ci		return 1;
1248c2ecf20Sopenharmony_ci	}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	if (*end == '-') {
1278c2ecf20Sopenharmony_ci		end++;
1288c2ecf20Sopenharmony_ci		tmp_max_port = strtol(end, NULL, 10);
1298c2ecf20Sopenharmony_ci		if (tmp_max_port < 1 || tmp_max_port > 65535) {
1308c2ecf20Sopenharmony_ci			fprintf(stderr, "Invalid port(s):%s\n", optarg);
1318c2ecf20Sopenharmony_ci			return 1;
1328c2ecf20Sopenharmony_ci		}
1338c2ecf20Sopenharmony_ci	} else {
1348c2ecf20Sopenharmony_ci		tmp_max_port = tmp_min_port;
1358c2ecf20Sopenharmony_ci	}
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	if (tmp_min_port > tmp_max_port) {
1388c2ecf20Sopenharmony_ci		fprintf(stderr, "Invalid port(s):%s\n", optarg);
1398c2ecf20Sopenharmony_ci		return 1;
1408c2ecf20Sopenharmony_ci	}
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
1438c2ecf20Sopenharmony_ci		fprintf(stderr, "Port range (%s) is larger than %u\n",
1448c2ecf20Sopenharmony_ci			port_str, MAX_IPTNL_ENTRIES);
1458c2ecf20Sopenharmony_ci		return 1;
1468c2ecf20Sopenharmony_ci	}
1478c2ecf20Sopenharmony_ci	*min_port = tmp_min_port;
1488c2ecf20Sopenharmony_ci	*max_port = tmp_max_port;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	return 0;
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ciint main(int argc, char **argv)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	struct bpf_prog_load_attr prog_load_attr = {
1568c2ecf20Sopenharmony_ci		.prog_type	= BPF_PROG_TYPE_XDP,
1578c2ecf20Sopenharmony_ci	};
1588c2ecf20Sopenharmony_ci	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
1598c2ecf20Sopenharmony_ci	int min_port = 0, max_port = 0, vip2tnl_map_fd;
1608c2ecf20Sopenharmony_ci	const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
1618c2ecf20Sopenharmony_ci	unsigned char opt_flags[256] = {};
1628c2ecf20Sopenharmony_ci	struct bpf_prog_info info = {};
1638c2ecf20Sopenharmony_ci	__u32 info_len = sizeof(info);
1648c2ecf20Sopenharmony_ci	unsigned int kill_after_s = 0;
1658c2ecf20Sopenharmony_ci	struct iptnl_info tnl = {};
1668c2ecf20Sopenharmony_ci	struct bpf_object *obj;
1678c2ecf20Sopenharmony_ci	struct vip vip = {};
1688c2ecf20Sopenharmony_ci	char filename[256];
1698c2ecf20Sopenharmony_ci	int opt, prog_fd;
1708c2ecf20Sopenharmony_ci	int i, err;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	tnl.family = AF_UNSPEC;
1738c2ecf20Sopenharmony_ci	vip.protocol = IPPROTO_TCP;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	for (i = 0; i < strlen(optstr); i++)
1768c2ecf20Sopenharmony_ci		if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
1778c2ecf20Sopenharmony_ci			opt_flags[(unsigned char)optstr[i]] = 1;
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	while ((opt = getopt(argc, argv, optstr)) != -1) {
1808c2ecf20Sopenharmony_ci		unsigned short family;
1818c2ecf20Sopenharmony_ci		unsigned int *v6;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci		switch (opt) {
1848c2ecf20Sopenharmony_ci		case 'i':
1858c2ecf20Sopenharmony_ci			ifindex = if_nametoindex(optarg);
1868c2ecf20Sopenharmony_ci			if (!ifindex)
1878c2ecf20Sopenharmony_ci				ifindex = atoi(optarg);
1888c2ecf20Sopenharmony_ci			break;
1898c2ecf20Sopenharmony_ci		case 'a':
1908c2ecf20Sopenharmony_ci			vip.family = parse_ipstr(optarg, vip.daddr.v6);
1918c2ecf20Sopenharmony_ci			if (vip.family == AF_UNSPEC)
1928c2ecf20Sopenharmony_ci				return 1;
1938c2ecf20Sopenharmony_ci			break;
1948c2ecf20Sopenharmony_ci		case 'p':
1958c2ecf20Sopenharmony_ci			if (parse_ports(optarg, &min_port, &max_port))
1968c2ecf20Sopenharmony_ci				return 1;
1978c2ecf20Sopenharmony_ci			break;
1988c2ecf20Sopenharmony_ci		case 'P':
1998c2ecf20Sopenharmony_ci			vip.protocol = atoi(optarg);
2008c2ecf20Sopenharmony_ci			break;
2018c2ecf20Sopenharmony_ci		case 's':
2028c2ecf20Sopenharmony_ci		case 'd':
2038c2ecf20Sopenharmony_ci			if (opt == 's')
2048c2ecf20Sopenharmony_ci				v6 = tnl.saddr.v6;
2058c2ecf20Sopenharmony_ci			else
2068c2ecf20Sopenharmony_ci				v6 = tnl.daddr.v6;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci			family = parse_ipstr(optarg, v6);
2098c2ecf20Sopenharmony_ci			if (family == AF_UNSPEC)
2108c2ecf20Sopenharmony_ci				return 1;
2118c2ecf20Sopenharmony_ci			if (tnl.family == AF_UNSPEC) {
2128c2ecf20Sopenharmony_ci				tnl.family = family;
2138c2ecf20Sopenharmony_ci			} else if (tnl.family != family) {
2148c2ecf20Sopenharmony_ci				fprintf(stderr,
2158c2ecf20Sopenharmony_ci					"The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
2168c2ecf20Sopenharmony_ci				return 1;
2178c2ecf20Sopenharmony_ci			}
2188c2ecf20Sopenharmony_ci			break;
2198c2ecf20Sopenharmony_ci		case 'm':
2208c2ecf20Sopenharmony_ci			if (!ether_aton_r(optarg,
2218c2ecf20Sopenharmony_ci					  (struct ether_addr *)tnl.dmac)) {
2228c2ecf20Sopenharmony_ci				fprintf(stderr, "Invalid mac address:%s\n",
2238c2ecf20Sopenharmony_ci					optarg);
2248c2ecf20Sopenharmony_ci				return 1;
2258c2ecf20Sopenharmony_ci			}
2268c2ecf20Sopenharmony_ci			break;
2278c2ecf20Sopenharmony_ci		case 'T':
2288c2ecf20Sopenharmony_ci			kill_after_s = atoi(optarg);
2298c2ecf20Sopenharmony_ci			break;
2308c2ecf20Sopenharmony_ci		case 'S':
2318c2ecf20Sopenharmony_ci			xdp_flags |= XDP_FLAGS_SKB_MODE;
2328c2ecf20Sopenharmony_ci			break;
2338c2ecf20Sopenharmony_ci		case 'N':
2348c2ecf20Sopenharmony_ci			/* default, set below */
2358c2ecf20Sopenharmony_ci			break;
2368c2ecf20Sopenharmony_ci		case 'F':
2378c2ecf20Sopenharmony_ci			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
2388c2ecf20Sopenharmony_ci			break;
2398c2ecf20Sopenharmony_ci		default:
2408c2ecf20Sopenharmony_ci			usage(argv[0]);
2418c2ecf20Sopenharmony_ci			return 1;
2428c2ecf20Sopenharmony_ci		}
2438c2ecf20Sopenharmony_ci		opt_flags[opt] = 0;
2448c2ecf20Sopenharmony_ci	}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
2478c2ecf20Sopenharmony_ci		xdp_flags |= XDP_FLAGS_DRV_MODE;
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci	for (i = 0; i < strlen(optstr); i++) {
2508c2ecf20Sopenharmony_ci		if (opt_flags[(unsigned int)optstr[i]]) {
2518c2ecf20Sopenharmony_ci			fprintf(stderr, "Missing argument -%c\n", optstr[i]);
2528c2ecf20Sopenharmony_ci			usage(argv[0]);
2538c2ecf20Sopenharmony_ci			return 1;
2548c2ecf20Sopenharmony_ci		}
2558c2ecf20Sopenharmony_ci	}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
2588c2ecf20Sopenharmony_ci		perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
2598c2ecf20Sopenharmony_ci		return 1;
2608c2ecf20Sopenharmony_ci	}
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	if (!ifindex) {
2638c2ecf20Sopenharmony_ci		fprintf(stderr, "Invalid ifname\n");
2648c2ecf20Sopenharmony_ci		return 1;
2658c2ecf20Sopenharmony_ci	}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
2688c2ecf20Sopenharmony_ci	prog_load_attr.file = filename;
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
2718c2ecf20Sopenharmony_ci		return 1;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	if (!prog_fd) {
2748c2ecf20Sopenharmony_ci		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
2758c2ecf20Sopenharmony_ci		return 1;
2768c2ecf20Sopenharmony_ci	}
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
2798c2ecf20Sopenharmony_ci	vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
2808c2ecf20Sopenharmony_ci	if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) {
2818c2ecf20Sopenharmony_ci		printf("bpf_object__find_map_fd_by_name failed\n");
2828c2ecf20Sopenharmony_ci		return 1;
2838c2ecf20Sopenharmony_ci	}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	signal(SIGINT, int_exit);
2868c2ecf20Sopenharmony_ci	signal(SIGTERM, int_exit);
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	while (min_port <= max_port) {
2898c2ecf20Sopenharmony_ci		vip.dport = htons(min_port++);
2908c2ecf20Sopenharmony_ci		if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl,
2918c2ecf20Sopenharmony_ci					BPF_NOEXIST)) {
2928c2ecf20Sopenharmony_ci			perror("bpf_map_update_elem(&vip2tnl)");
2938c2ecf20Sopenharmony_ci			return 1;
2948c2ecf20Sopenharmony_ci		}
2958c2ecf20Sopenharmony_ci	}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
2988c2ecf20Sopenharmony_ci		printf("link set xdp fd failed\n");
2998c2ecf20Sopenharmony_ci		return 1;
3008c2ecf20Sopenharmony_ci	}
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
3038c2ecf20Sopenharmony_ci	if (err) {
3048c2ecf20Sopenharmony_ci		printf("can't get prog info - %s\n", strerror(errno));
3058c2ecf20Sopenharmony_ci		return err;
3068c2ecf20Sopenharmony_ci	}
3078c2ecf20Sopenharmony_ci	prog_id = info.id;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	poll_stats(kill_after_s);
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	return 0;
3148c2ecf20Sopenharmony_ci}
315