1// SPDX-License-Identifier: GPL-2.0
2
3/* Reference program for verifying XDP metadata on real HW. Functional test
4 * only, doesn't test the performance.
5 *
6 * RX:
7 * - UDP 9091 packets are diverted into AF_XDP
8 * - Metadata verified:
9 *   - rx_timestamp
10 *   - rx_hash
11 *
12 * TX:
13 * - TBD
14 */
15
16#include <test_progs.h>
17#include <network_helpers.h>
18#include "xdp_hw_metadata.skel.h"
19#include "xsk.h"
20
21#include <error.h>
22#include <linux/errqueue.h>
23#include <linux/if_link.h>
24#include <linux/net_tstamp.h>
25#include <linux/udp.h>
26#include <linux/sockios.h>
27#include <sys/mman.h>
28#include <net/if.h>
29#include <poll.h>
30#include <time.h>
31
32#include "xdp_metadata.h"
33
34#define UMEM_NUM 16
35#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
36#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
37#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
38
39struct xsk {
40	void *umem_area;
41	struct xsk_umem *umem;
42	struct xsk_ring_prod fill;
43	struct xsk_ring_cons comp;
44	struct xsk_ring_prod tx;
45	struct xsk_ring_cons rx;
46	struct xsk_socket *socket;
47};
48
49struct xdp_hw_metadata *bpf_obj;
50struct xsk *rx_xsk;
51const char *ifname;
52int ifindex;
53int rxq;
54
55void test__fail(void) { /* for network_helpers.c */ }
56
57static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
58{
59	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
60	const struct xsk_socket_config socket_config = {
61		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
62		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
63		.bind_flags = XDP_COPY,
64	};
65	const struct xsk_umem_config umem_config = {
66		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
67		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
68		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
69		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
70	};
71	__u32 idx = 0;
72	u64 addr;
73	int ret;
74	int i;
75
76	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
77	if (xsk->umem_area == MAP_FAILED)
78		return -ENOMEM;
79
80	ret = xsk_umem__create(&xsk->umem,
81			       xsk->umem_area, UMEM_SIZE,
82			       &xsk->fill,
83			       &xsk->comp,
84			       &umem_config);
85	if (ret)
86		return ret;
87
88	ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
89				 xsk->umem,
90				 &xsk->rx,
91				 &xsk->tx,
92				 &socket_config);
93	if (ret)
94		return ret;
95
96	/* First half of umem is for TX. This way address matches 1-to-1
97	 * to the completion queue index.
98	 */
99
100	for (i = 0; i < UMEM_NUM / 2; i++) {
101		addr = i * UMEM_FRAME_SIZE;
102		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
103	}
104
105	/* Second half of umem is for RX. */
106
107	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
108	for (i = 0; i < UMEM_NUM / 2; i++) {
109		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
110		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
111		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
112	}
113	xsk_ring_prod__submit(&xsk->fill, ret);
114
115	return 0;
116}
117
118static void close_xsk(struct xsk *xsk)
119{
120	if (xsk->umem)
121		xsk_umem__delete(xsk->umem);
122	if (xsk->socket)
123		xsk_socket__delete(xsk->socket);
124	munmap(xsk->umem_area, UMEM_SIZE);
125}
126
127static void refill_rx(struct xsk *xsk, __u64 addr)
128{
129	__u32 idx;
130
131	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
132		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
133		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
134		xsk_ring_prod__submit(&xsk->fill, 1);
135	}
136}
137
138#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
139static __u64 gettime(clockid_t clock_id)
140{
141	struct timespec t;
142	int res;
143
144	/* See man clock_gettime(2) for type of clock_id's */
145	res = clock_gettime(clock_id, &t);
146
147	if (res < 0)
148		error(res, errno, "Error with clock_gettime()");
149
150	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
151}
152
153static void verify_xdp_metadata(void *data, clockid_t clock_id)
154{
155	struct xdp_meta *meta;
156
157	meta = data - sizeof(*meta);
158
159	if (meta->rx_hash_err < 0)
160		printf("No rx_hash err=%d\n", meta->rx_hash_err);
161	else
162		printf("rx_hash: 0x%X with RSS type:0x%X\n",
163		       meta->rx_hash, meta->rx_hash_type);
164
165	printf("rx_timestamp:  %llu (sec:%0.4f)\n", meta->rx_timestamp,
166	       (double)meta->rx_timestamp / NANOSEC_PER_SEC);
167	if (meta->rx_timestamp) {
168		__u64 usr_clock = gettime(clock_id);
169		__u64 xdp_clock = meta->xdp_timestamp;
170		__s64 delta_X = xdp_clock - meta->rx_timestamp;
171		__s64 delta_X2U = usr_clock - xdp_clock;
172
173		printf("XDP RX-time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
174		       xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
175		       (double)delta_X / NANOSEC_PER_SEC,
176		       (double)delta_X / 1000);
177
178		printf("AF_XDP time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
179		       usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
180		       (double)delta_X2U / NANOSEC_PER_SEC,
181		       (double)delta_X2U / 1000);
182	}
183
184}
185
186static void verify_skb_metadata(int fd)
187{
188	char cmsg_buf[1024];
189	char packet_buf[128];
190
191	struct scm_timestamping *ts;
192	struct iovec packet_iov;
193	struct cmsghdr *cmsg;
194	struct msghdr hdr;
195
196	memset(&hdr, 0, sizeof(hdr));
197	hdr.msg_iov = &packet_iov;
198	hdr.msg_iovlen = 1;
199	packet_iov.iov_base = packet_buf;
200	packet_iov.iov_len = sizeof(packet_buf);
201
202	hdr.msg_control = cmsg_buf;
203	hdr.msg_controllen = sizeof(cmsg_buf);
204
205	if (recvmsg(fd, &hdr, 0) < 0)
206		error(1, errno, "recvmsg");
207
208	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
209	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
210
211		if (cmsg->cmsg_level != SOL_SOCKET)
212			continue;
213
214		switch (cmsg->cmsg_type) {
215		case SCM_TIMESTAMPING:
216			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
217			if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
218				printf("found skb hwtstamp = %lu.%lu\n",
219				       ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
220				return;
221			}
222			break;
223		default:
224			break;
225		}
226	}
227
228	printf("skb hwtstamp is not found!\n");
229}
230
231static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
232{
233	const struct xdp_desc *rx_desc;
234	struct pollfd fds[rxq + 1];
235	__u64 comp_addr;
236	__u64 addr;
237	__u32 idx;
238	int ret;
239	int i;
240
241	for (i = 0; i < rxq; i++) {
242		fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
243		fds[i].events = POLLIN;
244		fds[i].revents = 0;
245	}
246
247	fds[rxq].fd = server_fd;
248	fds[rxq].events = POLLIN;
249	fds[rxq].revents = 0;
250
251	while (true) {
252		errno = 0;
253		ret = poll(fds, rxq + 1, 1000);
254		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
255		       ret, errno, bpf_obj->bss->pkts_skip,
256		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
257		if (ret < 0)
258			break;
259		if (ret == 0)
260			continue;
261
262		if (fds[rxq].revents)
263			verify_skb_metadata(server_fd);
264
265		for (i = 0; i < rxq; i++) {
266			if (fds[i].revents == 0)
267				continue;
268
269			struct xsk *xsk = &rx_xsk[i];
270
271			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
272			printf("xsk_ring_cons__peek: %d\n", ret);
273			if (ret != 1)
274				continue;
275
276			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
277			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
278			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
279			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
280			       xsk, idx, rx_desc->addr, addr, comp_addr);
281			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
282					    clock_id);
283			xsk_ring_cons__release(&xsk->rx, 1);
284			refill_rx(xsk, comp_addr);
285		}
286	}
287
288	return 0;
289}
290
291struct ethtool_channels {
292	__u32	cmd;
293	__u32	max_rx;
294	__u32	max_tx;
295	__u32	max_other;
296	__u32	max_combined;
297	__u32	rx_count;
298	__u32	tx_count;
299	__u32	other_count;
300	__u32	combined_count;
301};
302
303#define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
304
305static int rxq_num(const char *ifname)
306{
307	struct ethtool_channels ch = {
308		.cmd = ETHTOOL_GCHANNELS,
309	};
310
311	struct ifreq ifr = {
312		.ifr_data = (void *)&ch,
313	};
314	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
315	int fd, ret;
316
317	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
318	if (fd < 0)
319		error(1, errno, "socket");
320
321	ret = ioctl(fd, SIOCETHTOOL, &ifr);
322	if (ret < 0)
323		error(1, errno, "ioctl(SIOCETHTOOL)");
324
325	close(fd);
326
327	return ch.rx_count + ch.combined_count;
328}
329
330static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
331{
332	struct ifreq ifr = {
333		.ifr_data = (void *)cfg,
334	};
335	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
336	int fd, ret;
337
338	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
339	if (fd < 0)
340		error(1, errno, "socket");
341
342	ret = ioctl(fd, op, &ifr);
343	if (ret < 0)
344		error(1, errno, "ioctl(%d)", op);
345
346	close(fd);
347}
348
349static struct hwtstamp_config saved_hwtstamp_cfg;
350static const char *saved_hwtstamp_ifname;
351
352static void hwtstamp_restore(void)
353{
354	hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
355}
356
357static void hwtstamp_enable(const char *ifname)
358{
359	struct hwtstamp_config cfg = {
360		.rx_filter = HWTSTAMP_FILTER_ALL,
361	};
362
363	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
364	saved_hwtstamp_ifname = strdup(ifname);
365	atexit(hwtstamp_restore);
366
367	hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
368}
369
370static void cleanup(void)
371{
372	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
373	int ret;
374	int i;
375
376	if (bpf_obj) {
377		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
378		if (opts.old_prog_fd >= 0) {
379			printf("detaching bpf program....\n");
380			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
381			if (ret)
382				printf("failed to detach XDP program: %d\n", ret);
383		}
384	}
385
386	for (i = 0; i < rxq; i++)
387		close_xsk(&rx_xsk[i]);
388
389	if (bpf_obj)
390		xdp_hw_metadata__destroy(bpf_obj);
391}
392
393static void handle_signal(int sig)
394{
395	/* interrupting poll() is all we need */
396}
397
398static void timestamping_enable(int fd, int val)
399{
400	int ret;
401
402	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
403	if (ret < 0)
404		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
405}
406
407int main(int argc, char *argv[])
408{
409	clockid_t clock_id = CLOCK_TAI;
410	int server_fd = -1;
411	int ret;
412	int i;
413
414	struct bpf_program *prog;
415
416	if (argc != 2) {
417		fprintf(stderr, "pass device name\n");
418		return -1;
419	}
420
421	ifname = argv[1];
422	ifindex = if_nametoindex(ifname);
423	rxq = rxq_num(ifname);
424
425	printf("rxq: %d\n", rxq);
426
427	hwtstamp_enable(ifname);
428
429	rx_xsk = malloc(sizeof(struct xsk) * rxq);
430	if (!rx_xsk)
431		error(1, ENOMEM, "malloc");
432
433	for (i = 0; i < rxq; i++) {
434		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
435		ret = open_xsk(ifindex, &rx_xsk[i], i);
436		if (ret)
437			error(1, -ret, "open_xsk");
438
439		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
440	}
441
442	printf("open bpf program...\n");
443	bpf_obj = xdp_hw_metadata__open();
444	if (libbpf_get_error(bpf_obj))
445		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
446
447	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
448	bpf_program__set_ifindex(prog, ifindex);
449	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
450
451	printf("load bpf program...\n");
452	ret = xdp_hw_metadata__load(bpf_obj);
453	if (ret)
454		error(1, -ret, "xdp_hw_metadata__load");
455
456	printf("prepare skb endpoint...\n");
457	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
458	if (server_fd < 0)
459		error(1, errno, "start_server");
460	timestamping_enable(server_fd,
461			    SOF_TIMESTAMPING_SOFTWARE |
462			    SOF_TIMESTAMPING_RAW_HARDWARE);
463
464	printf("prepare xsk map...\n");
465	for (i = 0; i < rxq; i++) {
466		int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
467		__u32 queue_id = i;
468
469		printf("map[%d] = %d\n", queue_id, sock_fd);
470		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
471		if (ret)
472			error(1, -ret, "bpf_map_update_elem");
473	}
474
475	printf("attach bpf program...\n");
476	ret = bpf_xdp_attach(ifindex,
477			     bpf_program__fd(bpf_obj->progs.rx),
478			     XDP_FLAGS, NULL);
479	if (ret)
480		error(1, -ret, "bpf_xdp_attach");
481
482	signal(SIGINT, handle_signal);
483	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
484	close(server_fd);
485	cleanup();
486	if (ret)
487		error(1, -ret, "verify_metadata");
488}
489