1// SPDX-License-Identifier: GPL-2.0 2/* Copyright(c) 2017 - 2018 Intel Corporation. */ 3 4#include <asm/barrier.h> 5#include <errno.h> 6#include <getopt.h> 7#include <libgen.h> 8#include <linux/bpf.h> 9#include <linux/compiler.h> 10#include <linux/if_link.h> 11#include <linux/if_xdp.h> 12#include <linux/if_ether.h> 13#include <linux/ip.h> 14#include <linux/limits.h> 15#include <linux/udp.h> 16#include <arpa/inet.h> 17#include <locale.h> 18#include <net/ethernet.h> 19#include <net/if.h> 20#include <poll.h> 21#include <pthread.h> 22#include <signal.h> 23#include <stdbool.h> 24#include <stdio.h> 25#include <stdlib.h> 26#include <string.h> 27#include <sys/mman.h> 28#include <sys/resource.h> 29#include <sys/socket.h> 30#include <sys/types.h> 31#include <time.h> 32#include <unistd.h> 33 34#include <bpf/libbpf.h> 35#include <bpf/xsk.h> 36#include <bpf/bpf.h> 37#include "xdpsock.h" 38 39#ifndef SOL_XDP 40#define SOL_XDP 283 41#endif 42 43#ifndef AF_XDP 44#define AF_XDP 44 45#endif 46 47#ifndef PF_XDP 48#define PF_XDP AF_XDP 49#endif 50 51#define NUM_FRAMES (4 * 1024) 52#define MIN_PKT_SIZE 64 53 54#define DEBUG_HEXDUMP 0 55 56typedef __u64 u64; 57typedef __u32 u32; 58typedef __u16 u16; 59typedef __u8 u8; 60 61static unsigned long prev_time; 62 63enum benchmark_type { 64 BENCH_RXDROP = 0, 65 BENCH_TXONLY = 1, 66 BENCH_L2FWD = 2, 67}; 68 69static enum benchmark_type opt_bench = BENCH_RXDROP; 70static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 71static const char *opt_if = ""; 72static int opt_ifindex; 73static int opt_queue; 74static unsigned long opt_duration; 75static unsigned long start_time; 76static bool benchmark_done; 77static u32 opt_batch_size = 64; 78static int opt_pkt_count; 79static u16 opt_pkt_size = MIN_PKT_SIZE; 80static u32 opt_pkt_fill_pattern = 0x12345678; 81static bool opt_extra_stats; 82static bool opt_quiet; 83static bool opt_app_stats; 84static const char *opt_irq_str = ""; 85static u32 irq_no; 86static int irqs_at_init = -1; 87static int opt_poll; 88static int opt_interval = 1; 89static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; 90static u32 opt_umem_flags; 91static int opt_unaligned_chunks; 92static int opt_mmap_flags; 93static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 94static int opt_timeout = 1000; 95static bool opt_need_wakeup = true; 96static u32 opt_num_xsks = 1; 97static u32 prog_id; 98 99struct xsk_ring_stats { 100 unsigned long rx_npkts; 101 unsigned long tx_npkts; 102 unsigned long rx_dropped_npkts; 103 unsigned long rx_invalid_npkts; 104 unsigned long tx_invalid_npkts; 105 unsigned long rx_full_npkts; 106 unsigned long rx_fill_empty_npkts; 107 unsigned long tx_empty_npkts; 108 unsigned long prev_rx_npkts; 109 unsigned long prev_tx_npkts; 110 unsigned long prev_rx_dropped_npkts; 111 unsigned long prev_rx_invalid_npkts; 112 unsigned long prev_tx_invalid_npkts; 113 unsigned long prev_rx_full_npkts; 114 unsigned long prev_rx_fill_empty_npkts; 115 unsigned long prev_tx_empty_npkts; 116}; 117 118struct xsk_driver_stats { 119 unsigned long intrs; 120 unsigned long prev_intrs; 121}; 122 123struct xsk_app_stats { 124 unsigned long rx_empty_polls; 125 unsigned long fill_fail_polls; 126 unsigned long copy_tx_sendtos; 127 unsigned long tx_wakeup_sendtos; 128 unsigned long opt_polls; 129 unsigned long prev_rx_empty_polls; 130 unsigned long prev_fill_fail_polls; 131 unsigned long prev_copy_tx_sendtos; 132 unsigned long prev_tx_wakeup_sendtos; 133 unsigned long prev_opt_polls; 134}; 135 136struct xsk_umem_info { 137 struct xsk_ring_prod fq; 138 struct xsk_ring_cons cq; 139 struct xsk_umem *umem; 140 void *buffer; 141}; 142 143struct xsk_socket_info { 144 struct xsk_ring_cons rx; 145 struct xsk_ring_prod tx; 146 struct xsk_umem_info *umem; 147 struct xsk_socket *xsk; 148 struct xsk_ring_stats ring_stats; 149 struct xsk_app_stats app_stats; 150 struct xsk_driver_stats drv_stats; 151 u32 outstanding_tx; 152}; 153 154static int num_socks; 155struct xsk_socket_info *xsks[MAX_SOCKS]; 156 157static unsigned long get_nsecs(void) 158{ 159 struct timespec ts; 160 161 clock_gettime(CLOCK_MONOTONIC, &ts); 162 return ts.tv_sec * 1000000000UL + ts.tv_nsec; 163} 164 165static void print_benchmark(bool running) 166{ 167 const char *bench_str = "INVALID"; 168 169 if (opt_bench == BENCH_RXDROP) 170 bench_str = "rxdrop"; 171 else if (opt_bench == BENCH_TXONLY) 172 bench_str = "txonly"; 173 else if (opt_bench == BENCH_L2FWD) 174 bench_str = "l2fwd"; 175 176 printf("%s:%d %s ", opt_if, opt_queue, bench_str); 177 if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) 178 printf("xdp-skb "); 179 else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) 180 printf("xdp-drv "); 181 else 182 printf(" "); 183 184 if (opt_poll) 185 printf("poll() "); 186 187 if (running) { 188 printf("running..."); 189 fflush(stdout); 190 } 191} 192 193static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) 194{ 195 struct xdp_statistics stats; 196 socklen_t optlen; 197 int err; 198 199 optlen = sizeof(stats); 200 err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); 201 if (err) 202 return err; 203 204 if (optlen == sizeof(struct xdp_statistics)) { 205 xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped; 206 xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs; 207 xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs; 208 xsk->ring_stats.rx_full_npkts = stats.rx_ring_full; 209 xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; 210 xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs; 211 return 0; 212 } 213 214 return -EINVAL; 215} 216 217static void dump_app_stats(long dt) 218{ 219 int i; 220 221 for (i = 0; i < num_socks && xsks[i]; i++) { 222 char *fmt = "%-18s %'-14.0f %'-14lu\n"; 223 double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps, 224 tx_wakeup_sendtos_ps, opt_polls_ps; 225 226 rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls - 227 xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt; 228 fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls - 229 xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt; 230 copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos - 231 xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt; 232 tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos - 233 xsks[i]->app_stats.prev_tx_wakeup_sendtos) 234 * 1000000000. / dt; 235 opt_polls_ps = (xsks[i]->app_stats.opt_polls - 236 xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt; 237 238 printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count"); 239 printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls); 240 printf(fmt, "fill fail polls", fill_fail_polls_ps, 241 xsks[i]->app_stats.fill_fail_polls); 242 printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps, 243 xsks[i]->app_stats.copy_tx_sendtos); 244 printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps, 245 xsks[i]->app_stats.tx_wakeup_sendtos); 246 printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls); 247 248 xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls; 249 xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls; 250 xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos; 251 xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; 252 xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; 253 } 254} 255 256static bool get_interrupt_number(void) 257{ 258 FILE *f_int_proc; 259 char line[4096]; 260 bool found = false; 261 262 f_int_proc = fopen("/proc/interrupts", "r"); 263 if (f_int_proc == NULL) { 264 printf("Failed to open /proc/interrupts.\n"); 265 return found; 266 } 267 268 while (!feof(f_int_proc) && !found) { 269 /* Make sure to read a full line at a time */ 270 if (fgets(line, sizeof(line), f_int_proc) == NULL || 271 line[strlen(line) - 1] != '\n') { 272 printf("Error reading from interrupts file\n"); 273 break; 274 } 275 276 /* Extract interrupt number from line */ 277 if (strstr(line, opt_irq_str) != NULL) { 278 irq_no = atoi(line); 279 found = true; 280 break; 281 } 282 } 283 284 fclose(f_int_proc); 285 286 return found; 287} 288 289static int get_irqs(void) 290{ 291 char count_path[PATH_MAX]; 292 int total_intrs = -1; 293 FILE *f_count_proc; 294 char line[4096]; 295 296 snprintf(count_path, sizeof(count_path), 297 "/sys/kernel/irq/%i/per_cpu_count", irq_no); 298 f_count_proc = fopen(count_path, "r"); 299 if (f_count_proc == NULL) { 300 printf("Failed to open %s\n", count_path); 301 return total_intrs; 302 } 303 304 if (fgets(line, sizeof(line), f_count_proc) == NULL || 305 line[strlen(line) - 1] != '\n') { 306 printf("Error reading from %s\n", count_path); 307 } else { 308 static const char com[2] = ","; 309 char *token; 310 311 total_intrs = 0; 312 token = strtok(line, com); 313 while (token != NULL) { 314 /* sum up interrupts across all cores */ 315 total_intrs += atoi(token); 316 token = strtok(NULL, com); 317 } 318 } 319 320 fclose(f_count_proc); 321 322 return total_intrs; 323} 324 325static void dump_driver_stats(long dt) 326{ 327 int i; 328 329 for (i = 0; i < num_socks && xsks[i]; i++) { 330 char *fmt = "%-18s %'-14.0f %'-14lu\n"; 331 double intrs_ps; 332 int n_ints = get_irqs(); 333 334 if (n_ints < 0) { 335 printf("error getting intr info for intr %i\n", irq_no); 336 return; 337 } 338 xsks[i]->drv_stats.intrs = n_ints - irqs_at_init; 339 340 intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) * 341 1000000000. / dt; 342 343 printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count"); 344 printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs); 345 346 xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs; 347 } 348} 349 350static void dump_stats(void) 351{ 352 unsigned long now = get_nsecs(); 353 long dt = now - prev_time; 354 int i; 355 356 prev_time = now; 357 358 for (i = 0; i < num_socks && xsks[i]; i++) { 359 char *fmt = "%-18s %'-14.0f %'-14lu\n"; 360 double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, 361 tx_invalid_pps, tx_empty_pps; 362 363 rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) * 364 1000000000. / dt; 365 tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) * 366 1000000000. / dt; 367 368 printf("\n sock%d@", i); 369 print_benchmark(false); 370 printf("\n"); 371 372 printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts", 373 dt / 1000000000.); 374 printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts); 375 printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts); 376 377 xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts; 378 xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts; 379 380 if (opt_extra_stats) { 381 if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { 382 dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts - 383 xsks[i]->ring_stats.prev_rx_dropped_npkts) * 384 1000000000. / dt; 385 rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts - 386 xsks[i]->ring_stats.prev_rx_invalid_npkts) * 387 1000000000. / dt; 388 tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts - 389 xsks[i]->ring_stats.prev_tx_invalid_npkts) * 390 1000000000. / dt; 391 full_pps = (xsks[i]->ring_stats.rx_full_npkts - 392 xsks[i]->ring_stats.prev_rx_full_npkts) * 393 1000000000. / dt; 394 fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts - 395 xsks[i]->ring_stats.prev_rx_fill_empty_npkts) * 396 1000000000. / dt; 397 tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts - 398 xsks[i]->ring_stats.prev_tx_empty_npkts) * 399 1000000000. / dt; 400 401 printf(fmt, "rx dropped", dropped_pps, 402 xsks[i]->ring_stats.rx_dropped_npkts); 403 printf(fmt, "rx invalid", rx_invalid_pps, 404 xsks[i]->ring_stats.rx_invalid_npkts); 405 printf(fmt, "tx invalid", tx_invalid_pps, 406 xsks[i]->ring_stats.tx_invalid_npkts); 407 printf(fmt, "rx queue full", full_pps, 408 xsks[i]->ring_stats.rx_full_npkts); 409 printf(fmt, "fill ring empty", fill_empty_pps, 410 xsks[i]->ring_stats.rx_fill_empty_npkts); 411 printf(fmt, "tx ring empty", tx_empty_pps, 412 xsks[i]->ring_stats.tx_empty_npkts); 413 414 xsks[i]->ring_stats.prev_rx_dropped_npkts = 415 xsks[i]->ring_stats.rx_dropped_npkts; 416 xsks[i]->ring_stats.prev_rx_invalid_npkts = 417 xsks[i]->ring_stats.rx_invalid_npkts; 418 xsks[i]->ring_stats.prev_tx_invalid_npkts = 419 xsks[i]->ring_stats.tx_invalid_npkts; 420 xsks[i]->ring_stats.prev_rx_full_npkts = 421 xsks[i]->ring_stats.rx_full_npkts; 422 xsks[i]->ring_stats.prev_rx_fill_empty_npkts = 423 xsks[i]->ring_stats.rx_fill_empty_npkts; 424 xsks[i]->ring_stats.prev_tx_empty_npkts = 425 xsks[i]->ring_stats.tx_empty_npkts; 426 } else { 427 printf("%-15s\n", "Error retrieving extra stats"); 428 } 429 } 430 } 431 432 if (opt_app_stats) 433 dump_app_stats(dt); 434 if (irq_no) 435 dump_driver_stats(dt); 436} 437 438static bool is_benchmark_done(void) 439{ 440 if (opt_duration > 0) { 441 unsigned long dt = (get_nsecs() - start_time); 442 443 if (dt >= opt_duration) 444 benchmark_done = true; 445 } 446 return benchmark_done; 447} 448 449static void *poller(void *arg) 450{ 451 (void)arg; 452 while (!is_benchmark_done()) { 453 sleep(opt_interval); 454 dump_stats(); 455 } 456 457 return NULL; 458} 459 460static void remove_xdp_program(void) 461{ 462 u32 curr_prog_id = 0; 463 464 if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { 465 printf("bpf_get_link_xdp_id failed\n"); 466 exit(EXIT_FAILURE); 467 } 468 if (prog_id == curr_prog_id) 469 bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); 470 else if (!curr_prog_id) 471 printf("couldn't find a prog id on a given interface\n"); 472 else 473 printf("program on interface changed, not removing\n"); 474} 475 476static void int_exit(int sig) 477{ 478 benchmark_done = true; 479} 480 481static void xdpsock_cleanup(void) 482{ 483 struct xsk_umem *umem = xsks[0]->umem->umem; 484 int i; 485 486 dump_stats(); 487 for (i = 0; i < num_socks; i++) 488 xsk_socket__delete(xsks[i]->xsk); 489 (void)xsk_umem__delete(umem); 490 remove_xdp_program(); 491} 492 493static void __exit_with_error(int error, const char *file, const char *func, 494 int line) 495{ 496 fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, 497 line, error, strerror(error)); 498 dump_stats(); 499 remove_xdp_program(); 500 exit(EXIT_FAILURE); 501} 502 503#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ 504 __LINE__) 505static void swap_mac_addresses(void *data) 506{ 507 struct ether_header *eth = (struct ether_header *)data; 508 struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; 509 struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; 510 struct ether_addr tmp; 511 512 tmp = *src_addr; 513 *src_addr = *dst_addr; 514 *dst_addr = tmp; 515} 516 517static void hex_dump(void *pkt, size_t length, u64 addr) 518{ 519 const unsigned char *address = (unsigned char *)pkt; 520 const unsigned char *line = address; 521 size_t line_size = 32; 522 unsigned char c; 523 char buf[32]; 524 int i = 0; 525 526 if (!DEBUG_HEXDUMP) 527 return; 528 529 sprintf(buf, "addr=%llu", addr); 530 printf("length = %zu\n", length); 531 printf("%s | ", buf); 532 while (length-- > 0) { 533 printf("%02X ", *address++); 534 if (!(++i % line_size) || (length == 0 && i % line_size)) { 535 if (length == 0) { 536 while (i++ % line_size) 537 printf("__ "); 538 } 539 printf(" | "); /* right close */ 540 while (line < address) { 541 c = *line++; 542 printf("%c", (c < 33 || c == 255) ? 0x2E : c); 543 } 544 printf("\n"); 545 if (length > 0) 546 printf("%s | ", buf); 547 } 548 } 549 printf("\n"); 550} 551 552static void *memset32_htonl(void *dest, u32 val, u32 size) 553{ 554 u32 *ptr = (u32 *)dest; 555 int i; 556 557 val = htonl(val); 558 559 for (i = 0; i < (size & (~0x3)); i += 4) 560 ptr[i >> 2] = val; 561 562 for (; i < size; i++) 563 ((char *)dest)[i] = ((char *)&val)[i & 3]; 564 565 return dest; 566} 567 568/* 569 * This function code has been taken from 570 * Linux kernel lib/checksum.c 571 */ 572static inline unsigned short from32to16(unsigned int x) 573{ 574 /* add up 16-bit and 16-bit for 16+c bit */ 575 x = (x & 0xffff) + (x >> 16); 576 /* add up carry.. */ 577 x = (x & 0xffff) + (x >> 16); 578 return x; 579} 580 581/* 582 * This function code has been taken from 583 * Linux kernel lib/checksum.c 584 */ 585static unsigned int do_csum(const unsigned char *buff, int len) 586{ 587 unsigned int result = 0; 588 int odd; 589 590 if (len <= 0) 591 goto out; 592 odd = 1 & (unsigned long)buff; 593 if (odd) { 594#ifdef __LITTLE_ENDIAN 595 result += (*buff << 8); 596#else 597 result = *buff; 598#endif 599 len--; 600 buff++; 601 } 602 if (len >= 2) { 603 if (2 & (unsigned long)buff) { 604 result += *(unsigned short *)buff; 605 len -= 2; 606 buff += 2; 607 } 608 if (len >= 4) { 609 const unsigned char *end = buff + 610 ((unsigned int)len & ~3); 611 unsigned int carry = 0; 612 613 do { 614 unsigned int w = *(unsigned int *)buff; 615 616 buff += 4; 617 result += carry; 618 result += w; 619 carry = (w > result); 620 } while (buff < end); 621 result += carry; 622 result = (result & 0xffff) + (result >> 16); 623 } 624 if (len & 2) { 625 result += *(unsigned short *)buff; 626 buff += 2; 627 } 628 } 629 if (len & 1) 630#ifdef __LITTLE_ENDIAN 631 result += *buff; 632#else 633 result += (*buff << 8); 634#endif 635 result = from32to16(result); 636 if (odd) 637 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 638out: 639 return result; 640} 641 642__sum16 ip_fast_csum(const void *iph, unsigned int ihl); 643 644/* 645 * This is a version of ip_compute_csum() optimized for IP headers, 646 * which always checksum on 4 octet boundaries. 647 * This function code has been taken from 648 * Linux kernel lib/checksum.c 649 */ 650__sum16 ip_fast_csum(const void *iph, unsigned int ihl) 651{ 652 return (__force __sum16)~do_csum(iph, ihl * 4); 653} 654 655/* 656 * Fold a partial checksum 657 * This function code has been taken from 658 * Linux kernel include/asm-generic/checksum.h 659 */ 660static inline __sum16 csum_fold(__wsum csum) 661{ 662 u32 sum = (__force u32)csum; 663 664 sum = (sum & 0xffff) + (sum >> 16); 665 sum = (sum & 0xffff) + (sum >> 16); 666 return (__force __sum16)~sum; 667} 668 669/* 670 * This function code has been taken from 671 * Linux kernel lib/checksum.c 672 */ 673static inline u32 from64to32(u64 x) 674{ 675 /* add up 32-bit and 32-bit for 32+c bit */ 676 x = (x & 0xffffffff) + (x >> 32); 677 /* add up carry.. */ 678 x = (x & 0xffffffff) + (x >> 32); 679 return (u32)x; 680} 681 682__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 683 __u32 len, __u8 proto, __wsum sum); 684 685/* 686 * This function code has been taken from 687 * Linux kernel lib/checksum.c 688 */ 689__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 690 __u32 len, __u8 proto, __wsum sum) 691{ 692 unsigned long long s = (__force u32)sum; 693 694 s += (__force u32)saddr; 695 s += (__force u32)daddr; 696#ifdef __BIG_ENDIAN__ 697 s += proto + len; 698#else 699 s += (proto + len) << 8; 700#endif 701 return (__force __wsum)from64to32(s); 702} 703 704/* 705 * This function has been taken from 706 * Linux kernel include/asm-generic/checksum.h 707 */ 708static inline __sum16 709csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, 710 __u8 proto, __wsum sum) 711{ 712 return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 713} 714 715static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, 716 u8 proto, u16 *udp_pkt) 717{ 718 u32 csum = 0; 719 u32 cnt = 0; 720 721 /* udp hdr and data */ 722 for (; cnt < len; cnt += 2) 723 csum += udp_pkt[cnt >> 1]; 724 725 return csum_tcpudp_magic(saddr, daddr, len, proto, csum); 726} 727 728#define ETH_FCS_SIZE 4 729 730#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ 731 sizeof(struct udphdr)) 732 733#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) 734#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) 735#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) 736#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) 737 738static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; 739 740static void gen_eth_hdr_data(void) 741{ 742 struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + 743 sizeof(struct ethhdr) + 744 sizeof(struct iphdr)); 745 struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + 746 sizeof(struct ethhdr)); 747 struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; 748 749 /* ethernet header */ 750 memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); 751 memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); 752 eth_hdr->h_proto = htons(ETH_P_IP); 753 754 /* IP header */ 755 ip_hdr->version = IPVERSION; 756 ip_hdr->ihl = 0x5; /* 20 byte header */ 757 ip_hdr->tos = 0x0; 758 ip_hdr->tot_len = htons(IP_PKT_SIZE); 759 ip_hdr->id = 0; 760 ip_hdr->frag_off = 0; 761 ip_hdr->ttl = IPDEFTTL; 762 ip_hdr->protocol = IPPROTO_UDP; 763 ip_hdr->saddr = htonl(0x0a0a0a10); 764 ip_hdr->daddr = htonl(0x0a0a0a20); 765 766 /* IP header checksum */ 767 ip_hdr->check = 0; 768 ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); 769 770 /* UDP header */ 771 udp_hdr->source = htons(0x1000); 772 udp_hdr->dest = htons(0x1000); 773 udp_hdr->len = htons(UDP_PKT_SIZE); 774 775 /* UDP data */ 776 memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, 777 UDP_PKT_DATA_SIZE); 778 779 /* UDP header checksum */ 780 udp_hdr->check = 0; 781 udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, 782 IPPROTO_UDP, (u16 *)udp_hdr); 783} 784 785static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) 786{ 787 memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, 788 PKT_SIZE); 789} 790 791static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) 792{ 793 struct xsk_umem_info *umem; 794 struct xsk_umem_config cfg = { 795 /* We recommend that you set the fill ring size >= HW RX ring size + 796 * AF_XDP RX ring size. Make sure you fill up the fill ring 797 * with buffers at regular intervals, and you will with this setting 798 * avoid allocation failures in the driver. These are usually quite 799 * expensive since drivers have not been written to assume that 800 * allocation failures are common. For regular sockets, kernel 801 * allocated memory is used that only runs out in OOM situations 802 * that should be rare. 803 */ 804 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, 805 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 806 .frame_size = opt_xsk_frame_size, 807 .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, 808 .flags = opt_umem_flags 809 }; 810 int ret; 811 812 umem = calloc(1, sizeof(*umem)); 813 if (!umem) 814 exit_with_error(errno); 815 816 ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, 817 &cfg); 818 if (ret) 819 exit_with_error(-ret); 820 821 umem->buffer = buffer; 822 return umem; 823} 824 825static void xsk_populate_fill_ring(struct xsk_umem_info *umem) 826{ 827 int ret, i; 828 u32 idx; 829 830 ret = xsk_ring_prod__reserve(&umem->fq, 831 XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx); 832 if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2) 833 exit_with_error(-ret); 834 for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++) 835 *xsk_ring_prod__fill_addr(&umem->fq, idx++) = 836 i * opt_xsk_frame_size; 837 xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2); 838} 839 840static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, 841 bool rx, bool tx) 842{ 843 struct xsk_socket_config cfg; 844 struct xsk_socket_info *xsk; 845 struct xsk_ring_cons *rxr; 846 struct xsk_ring_prod *txr; 847 int ret; 848 849 xsk = calloc(1, sizeof(*xsk)); 850 if (!xsk) 851 exit_with_error(errno); 852 853 xsk->umem = umem; 854 cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 855 cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 856 if (opt_num_xsks > 1) 857 cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; 858 else 859 cfg.libbpf_flags = 0; 860 cfg.xdp_flags = opt_xdp_flags; 861 cfg.bind_flags = opt_xdp_bind_flags; 862 863 rxr = rx ? &xsk->rx : NULL; 864 txr = tx ? &xsk->tx : NULL; 865 ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, 866 rxr, txr, &cfg); 867 if (ret) 868 exit_with_error(-ret); 869 870 ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); 871 if (ret) 872 exit_with_error(-ret); 873 874 xsk->app_stats.rx_empty_polls = 0; 875 xsk->app_stats.fill_fail_polls = 0; 876 xsk->app_stats.copy_tx_sendtos = 0; 877 xsk->app_stats.tx_wakeup_sendtos = 0; 878 xsk->app_stats.opt_polls = 0; 879 xsk->app_stats.prev_rx_empty_polls = 0; 880 xsk->app_stats.prev_fill_fail_polls = 0; 881 xsk->app_stats.prev_copy_tx_sendtos = 0; 882 xsk->app_stats.prev_tx_wakeup_sendtos = 0; 883 xsk->app_stats.prev_opt_polls = 0; 884 885 return xsk; 886} 887 888static struct option long_options[] = { 889 {"rxdrop", no_argument, 0, 'r'}, 890 {"txonly", no_argument, 0, 't'}, 891 {"l2fwd", no_argument, 0, 'l'}, 892 {"interface", required_argument, 0, 'i'}, 893 {"queue", required_argument, 0, 'q'}, 894 {"poll", no_argument, 0, 'p'}, 895 {"xdp-skb", no_argument, 0, 'S'}, 896 {"xdp-native", no_argument, 0, 'N'}, 897 {"interval", required_argument, 0, 'n'}, 898 {"zero-copy", no_argument, 0, 'z'}, 899 {"copy", no_argument, 0, 'c'}, 900 {"frame-size", required_argument, 0, 'f'}, 901 {"no-need-wakeup", no_argument, 0, 'm'}, 902 {"unaligned", no_argument, 0, 'u'}, 903 {"shared-umem", no_argument, 0, 'M'}, 904 {"force", no_argument, 0, 'F'}, 905 {"duration", required_argument, 0, 'd'}, 906 {"batch-size", required_argument, 0, 'b'}, 907 {"tx-pkt-count", required_argument, 0, 'C'}, 908 {"tx-pkt-size", required_argument, 0, 's'}, 909 {"tx-pkt-pattern", required_argument, 0, 'P'}, 910 {"extra-stats", no_argument, 0, 'x'}, 911 {"quiet", no_argument, 0, 'Q'}, 912 {"app-stats", no_argument, 0, 'a'}, 913 {"irq-string", no_argument, 0, 'I'}, 914 {0, 0, 0, 0} 915}; 916 917static void usage(const char *prog) 918{ 919 const char *str = 920 " Usage: %s [OPTIONS]\n" 921 " Options:\n" 922 " -r, --rxdrop Discard all incoming packets (default)\n" 923 " -t, --txonly Only send packets\n" 924 " -l, --l2fwd MAC swap L2 forwarding\n" 925 " -i, --interface=n Run on interface n\n" 926 " -q, --queue=n Use queue n (default 0)\n" 927 " -p, --poll Use poll syscall\n" 928 " -S, --xdp-skb=n Use XDP skb-mod\n" 929 " -N, --xdp-native=n Enforce XDP native mode\n" 930 " -n, --interval=n Specify statistics update interval (default 1 sec).\n" 931 " -z, --zero-copy Force zero-copy mode.\n" 932 " -c, --copy Force copy mode.\n" 933 " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" 934 " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" 935 " -u, --unaligned Enable unaligned chunk placement\n" 936 " -M, --shared-umem Enable XDP_SHARED_UMEM\n" 937 " -F, --force Force loading the XDP prog\n" 938 " -d, --duration=n Duration in secs to run command.\n" 939 " Default: forever.\n" 940 " -b, --batch-size=n Batch size for sending or receiving\n" 941 " packets. Default: %d\n" 942 " -C, --tx-pkt-count=n Number of packets to send.\n" 943 " Default: Continuous packets.\n" 944 " -s, --tx-pkt-size=n Transmit packet size.\n" 945 " (Default: %d bytes)\n" 946 " Min size: %d, Max size %d.\n" 947 " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" 948 " -x, --extra-stats Display extra statistics.\n" 949 " -Q, --quiet Do not display any stats.\n" 950 " -a, --app-stats Display application (syscall) statistics.\n" 951 " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" 952 "\n"; 953 fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, 954 opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, 955 XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); 956 957 exit(EXIT_FAILURE); 958} 959 960static void parse_command_line(int argc, char **argv) 961{ 962 int option_index, c; 963 964 opterr = 0; 965 966 for (;;) { 967 c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:", 968 long_options, &option_index); 969 if (c == -1) 970 break; 971 972 switch (c) { 973 case 'r': 974 opt_bench = BENCH_RXDROP; 975 break; 976 case 't': 977 opt_bench = BENCH_TXONLY; 978 break; 979 case 'l': 980 opt_bench = BENCH_L2FWD; 981 break; 982 case 'i': 983 opt_if = optarg; 984 break; 985 case 'q': 986 opt_queue = atoi(optarg); 987 break; 988 case 'p': 989 opt_poll = 1; 990 break; 991 case 'S': 992 opt_xdp_flags |= XDP_FLAGS_SKB_MODE; 993 opt_xdp_bind_flags |= XDP_COPY; 994 break; 995 case 'N': 996 /* default, set below */ 997 break; 998 case 'n': 999 opt_interval = atoi(optarg); 1000 break; 1001 case 'z': 1002 opt_xdp_bind_flags |= XDP_ZEROCOPY; 1003 break; 1004 case 'c': 1005 opt_xdp_bind_flags |= XDP_COPY; 1006 break; 1007 case 'u': 1008 opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; 1009 opt_unaligned_chunks = 1; 1010 opt_mmap_flags = MAP_HUGETLB; 1011 break; 1012 case 'F': 1013 opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 1014 break; 1015 case 'f': 1016 opt_xsk_frame_size = atoi(optarg); 1017 break; 1018 case 'm': 1019 opt_need_wakeup = false; 1020 opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; 1021 break; 1022 case 'M': 1023 opt_num_xsks = MAX_SOCKS; 1024 break; 1025 case 'd': 1026 opt_duration = atoi(optarg); 1027 opt_duration *= 1000000000; 1028 break; 1029 case 'b': 1030 opt_batch_size = atoi(optarg); 1031 break; 1032 case 'C': 1033 opt_pkt_count = atoi(optarg); 1034 break; 1035 case 's': 1036 opt_pkt_size = atoi(optarg); 1037 if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || 1038 opt_pkt_size < MIN_PKT_SIZE) { 1039 fprintf(stderr, 1040 "ERROR: Invalid frame size %d\n", 1041 opt_pkt_size); 1042 usage(basename(argv[0])); 1043 } 1044 break; 1045 case 'P': 1046 opt_pkt_fill_pattern = strtol(optarg, NULL, 16); 1047 break; 1048 case 'x': 1049 opt_extra_stats = 1; 1050 break; 1051 case 'Q': 1052 opt_quiet = 1; 1053 break; 1054 case 'a': 1055 opt_app_stats = 1; 1056 break; 1057 case 'I': 1058 opt_irq_str = optarg; 1059 if (get_interrupt_number()) 1060 irqs_at_init = get_irqs(); 1061 if (irqs_at_init < 0) { 1062 fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str); 1063 usage(basename(argv[0])); 1064 } 1065 1066 break; 1067 default: 1068 usage(basename(argv[0])); 1069 } 1070 } 1071 1072 if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) 1073 opt_xdp_flags |= XDP_FLAGS_DRV_MODE; 1074 1075 opt_ifindex = if_nametoindex(opt_if); 1076 if (!opt_ifindex) { 1077 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", 1078 opt_if); 1079 usage(basename(argv[0])); 1080 } 1081 1082 if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) && 1083 !opt_unaligned_chunks) { 1084 fprintf(stderr, "--frame-size=%d is not a power of two\n", 1085 opt_xsk_frame_size); 1086 usage(basename(argv[0])); 1087 } 1088} 1089 1090static void kick_tx(struct xsk_socket_info *xsk) 1091{ 1092 int ret; 1093 1094 ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); 1095 if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || 1096 errno == EBUSY || errno == ENETDOWN) 1097 return; 1098 exit_with_error(errno); 1099} 1100 1101static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, 1102 struct pollfd *fds) 1103{ 1104 struct xsk_umem_info *umem = xsk->umem; 1105 u32 idx_cq = 0, idx_fq = 0; 1106 unsigned int rcvd; 1107 size_t ndescs; 1108 1109 if (!xsk->outstanding_tx) 1110 return; 1111 1112 /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to 1113 * really send the packets. In zero-copy mode we do not have to do this, since Tx 1114 * is driven by the NAPI loop. So as an optimization, we do not have to call 1115 * sendto() all the time in zero-copy mode for l2fwd. 1116 */ 1117 if (opt_xdp_bind_flags & XDP_COPY) { 1118 xsk->app_stats.copy_tx_sendtos++; 1119 kick_tx(xsk); 1120 } 1121 1122 ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : 1123 xsk->outstanding_tx; 1124 1125 /* re-add completed Tx buffers */ 1126 rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq); 1127 if (rcvd > 0) { 1128 unsigned int i; 1129 int ret; 1130 1131 ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); 1132 while (ret != rcvd) { 1133 if (ret < 0) 1134 exit_with_error(-ret); 1135 if (xsk_ring_prod__needs_wakeup(&umem->fq)) { 1136 xsk->app_stats.fill_fail_polls++; 1137 ret = poll(fds, num_socks, opt_timeout); 1138 } 1139 ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); 1140 } 1141 1142 for (i = 0; i < rcvd; i++) 1143 *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = 1144 *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++); 1145 1146 xsk_ring_prod__submit(&xsk->umem->fq, rcvd); 1147 xsk_ring_cons__release(&xsk->umem->cq, rcvd); 1148 xsk->outstanding_tx -= rcvd; 1149 xsk->ring_stats.tx_npkts += rcvd; 1150 } 1151} 1152 1153static inline void complete_tx_only(struct xsk_socket_info *xsk, 1154 int batch_size) 1155{ 1156 unsigned int rcvd; 1157 u32 idx; 1158 1159 if (!xsk->outstanding_tx) 1160 return; 1161 1162 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) { 1163 xsk->app_stats.tx_wakeup_sendtos++; 1164 kick_tx(xsk); 1165 } 1166 1167 rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); 1168 if (rcvd > 0) { 1169 xsk_ring_cons__release(&xsk->umem->cq, rcvd); 1170 xsk->outstanding_tx -= rcvd; 1171 xsk->ring_stats.tx_npkts += rcvd; 1172 } 1173} 1174 1175static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) 1176{ 1177 unsigned int rcvd, i; 1178 u32 idx_rx = 0, idx_fq = 0; 1179 int ret; 1180 1181 rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); 1182 if (!rcvd) { 1183 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { 1184 xsk->app_stats.rx_empty_polls++; 1185 ret = poll(fds, num_socks, opt_timeout); 1186 } 1187 return; 1188 } 1189 1190 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); 1191 while (ret != rcvd) { 1192 if (ret < 0) 1193 exit_with_error(-ret); 1194 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { 1195 xsk->app_stats.fill_fail_polls++; 1196 ret = poll(fds, num_socks, opt_timeout); 1197 } 1198 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); 1199 } 1200 1201 for (i = 0; i < rcvd; i++) { 1202 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; 1203 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; 1204 u64 orig = xsk_umem__extract_addr(addr); 1205 1206 addr = xsk_umem__add_offset_to_addr(addr); 1207 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); 1208 1209 hex_dump(pkt, len, addr); 1210 *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; 1211 } 1212 1213 xsk_ring_prod__submit(&xsk->umem->fq, rcvd); 1214 xsk_ring_cons__release(&xsk->rx, rcvd); 1215 xsk->ring_stats.rx_npkts += rcvd; 1216} 1217 1218static void rx_drop_all(void) 1219{ 1220 struct pollfd fds[MAX_SOCKS] = {}; 1221 int i, ret; 1222 1223 for (i = 0; i < num_socks; i++) { 1224 fds[i].fd = xsk_socket__fd(xsks[i]->xsk); 1225 fds[i].events = POLLIN; 1226 } 1227 1228 for (;;) { 1229 if (opt_poll) { 1230 for (i = 0; i < num_socks; i++) 1231 xsks[i]->app_stats.opt_polls++; 1232 ret = poll(fds, num_socks, opt_timeout); 1233 if (ret <= 0) 1234 continue; 1235 } 1236 1237 for (i = 0; i < num_socks; i++) 1238 rx_drop(xsks[i], fds); 1239 1240 if (benchmark_done) 1241 break; 1242 } 1243} 1244 1245static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) 1246{ 1247 u32 idx; 1248 unsigned int i; 1249 1250 while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < 1251 batch_size) { 1252 complete_tx_only(xsk, batch_size); 1253 if (benchmark_done) 1254 return; 1255 } 1256 1257 for (i = 0; i < batch_size; i++) { 1258 struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, 1259 idx + i); 1260 tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; 1261 tx_desc->len = PKT_SIZE; 1262 } 1263 1264 xsk_ring_prod__submit(&xsk->tx, batch_size); 1265 xsk->outstanding_tx += batch_size; 1266 *frame_nb += batch_size; 1267 *frame_nb %= NUM_FRAMES; 1268 complete_tx_only(xsk, batch_size); 1269} 1270 1271static inline int get_batch_size(int pkt_cnt) 1272{ 1273 if (!opt_pkt_count) 1274 return opt_batch_size; 1275 1276 if (pkt_cnt + opt_batch_size <= opt_pkt_count) 1277 return opt_batch_size; 1278 1279 return opt_pkt_count - pkt_cnt; 1280} 1281 1282static void complete_tx_only_all(void) 1283{ 1284 bool pending; 1285 int i; 1286 1287 do { 1288 pending = false; 1289 for (i = 0; i < num_socks; i++) { 1290 if (xsks[i]->outstanding_tx) { 1291 complete_tx_only(xsks[i], opt_batch_size); 1292 pending = !!xsks[i]->outstanding_tx; 1293 } 1294 } 1295 } while (pending); 1296} 1297 1298static void tx_only_all(void) 1299{ 1300 struct pollfd fds[MAX_SOCKS] = {}; 1301 u32 frame_nb[MAX_SOCKS] = {}; 1302 int pkt_cnt = 0; 1303 int i, ret; 1304 1305 for (i = 0; i < num_socks; i++) { 1306 fds[0].fd = xsk_socket__fd(xsks[i]->xsk); 1307 fds[0].events = POLLOUT; 1308 } 1309 1310 while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { 1311 int batch_size = get_batch_size(pkt_cnt); 1312 1313 if (opt_poll) { 1314 for (i = 0; i < num_socks; i++) 1315 xsks[i]->app_stats.opt_polls++; 1316 ret = poll(fds, num_socks, opt_timeout); 1317 if (ret <= 0) 1318 continue; 1319 1320 if (!(fds[0].revents & POLLOUT)) 1321 continue; 1322 } 1323 1324 for (i = 0; i < num_socks; i++) 1325 tx_only(xsks[i], &frame_nb[i], batch_size); 1326 1327 pkt_cnt += batch_size; 1328 1329 if (benchmark_done) 1330 break; 1331 } 1332 1333 if (opt_pkt_count) 1334 complete_tx_only_all(); 1335} 1336 1337static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) 1338{ 1339 unsigned int rcvd, i; 1340 u32 idx_rx = 0, idx_tx = 0; 1341 int ret; 1342 1343 complete_tx_l2fwd(xsk, fds); 1344 1345 rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); 1346 if (!rcvd) { 1347 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { 1348 xsk->app_stats.rx_empty_polls++; 1349 ret = poll(fds, num_socks, opt_timeout); 1350 } 1351 return; 1352 } 1353 1354 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); 1355 while (ret != rcvd) { 1356 if (ret < 0) 1357 exit_with_error(-ret); 1358 complete_tx_l2fwd(xsk, fds); 1359 if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { 1360 xsk->app_stats.tx_wakeup_sendtos++; 1361 kick_tx(xsk); 1362 } 1363 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); 1364 } 1365 1366 for (i = 0; i < rcvd; i++) { 1367 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; 1368 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; 1369 u64 orig = addr; 1370 1371 addr = xsk_umem__add_offset_to_addr(addr); 1372 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); 1373 1374 swap_mac_addresses(pkt); 1375 1376 hex_dump(pkt, len, addr); 1377 xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig; 1378 xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; 1379 } 1380 1381 xsk_ring_prod__submit(&xsk->tx, rcvd); 1382 xsk_ring_cons__release(&xsk->rx, rcvd); 1383 1384 xsk->ring_stats.rx_npkts += rcvd; 1385 xsk->outstanding_tx += rcvd; 1386} 1387 1388static void l2fwd_all(void) 1389{ 1390 struct pollfd fds[MAX_SOCKS] = {}; 1391 int i, ret; 1392 1393 for (i = 0; i < num_socks; i++) { 1394 fds[i].fd = xsk_socket__fd(xsks[i]->xsk); 1395 fds[i].events = POLLOUT | POLLIN; 1396 } 1397 1398 for (;;) { 1399 if (opt_poll) { 1400 for (i = 0; i < num_socks; i++) 1401 xsks[i]->app_stats.opt_polls++; 1402 ret = poll(fds, num_socks, opt_timeout); 1403 if (ret <= 0) 1404 continue; 1405 } 1406 1407 for (i = 0; i < num_socks; i++) 1408 l2fwd(xsks[i], fds); 1409 1410 if (benchmark_done) 1411 break; 1412 } 1413} 1414 1415static void load_xdp_program(char **argv, struct bpf_object **obj) 1416{ 1417 struct bpf_prog_load_attr prog_load_attr = { 1418 .prog_type = BPF_PROG_TYPE_XDP, 1419 }; 1420 char xdp_filename[256]; 1421 int prog_fd; 1422 1423 snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); 1424 prog_load_attr.file = xdp_filename; 1425 1426 if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd)) 1427 exit(EXIT_FAILURE); 1428 if (prog_fd < 0) { 1429 fprintf(stderr, "ERROR: no program found: %s\n", 1430 strerror(prog_fd)); 1431 exit(EXIT_FAILURE); 1432 } 1433 1434 if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { 1435 fprintf(stderr, "ERROR: link set xdp fd failed\n"); 1436 exit(EXIT_FAILURE); 1437 } 1438} 1439 1440static void enter_xsks_into_map(struct bpf_object *obj) 1441{ 1442 struct bpf_map *map; 1443 int i, xsks_map; 1444 1445 map = bpf_object__find_map_by_name(obj, "xsks_map"); 1446 xsks_map = bpf_map__fd(map); 1447 if (xsks_map < 0) { 1448 fprintf(stderr, "ERROR: no xsks map found: %s\n", 1449 strerror(xsks_map)); 1450 exit(EXIT_FAILURE); 1451 } 1452 1453 for (i = 0; i < num_socks; i++) { 1454 int fd = xsk_socket__fd(xsks[i]->xsk); 1455 int key, ret; 1456 1457 key = i; 1458 ret = bpf_map_update_elem(xsks_map, &key, &fd, 0); 1459 if (ret) { 1460 fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); 1461 exit(EXIT_FAILURE); 1462 } 1463 } 1464} 1465 1466int main(int argc, char **argv) 1467{ 1468 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 1469 bool rx = false, tx = false; 1470 struct xsk_umem_info *umem; 1471 struct bpf_object *obj; 1472 pthread_t pt; 1473 int i, ret; 1474 void *bufs; 1475 1476 parse_command_line(argc, argv); 1477 1478 if (setrlimit(RLIMIT_MEMLOCK, &r)) { 1479 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", 1480 strerror(errno)); 1481 exit(EXIT_FAILURE); 1482 } 1483 1484 if (opt_num_xsks > 1) 1485 load_xdp_program(argv, &obj); 1486 1487 /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ 1488 bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, 1489 PROT_READ | PROT_WRITE, 1490 MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0); 1491 if (bufs == MAP_FAILED) { 1492 printf("ERROR: mmap failed\n"); 1493 exit(EXIT_FAILURE); 1494 } 1495 1496 /* Create sockets... */ 1497 umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); 1498 if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) { 1499 rx = true; 1500 xsk_populate_fill_ring(umem); 1501 } 1502 if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY) 1503 tx = true; 1504 for (i = 0; i < opt_num_xsks; i++) 1505 xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); 1506 1507 if (opt_bench == BENCH_TXONLY) { 1508 gen_eth_hdr_data(); 1509 1510 for (i = 0; i < NUM_FRAMES; i++) 1511 gen_eth_frame(umem, i * opt_xsk_frame_size); 1512 } 1513 1514 if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) 1515 enter_xsks_into_map(obj); 1516 1517 signal(SIGINT, int_exit); 1518 signal(SIGTERM, int_exit); 1519 signal(SIGABRT, int_exit); 1520 1521 setlocale(LC_ALL, ""); 1522 1523 prev_time = get_nsecs(); 1524 start_time = prev_time; 1525 1526 if (!opt_quiet) { 1527 ret = pthread_create(&pt, NULL, poller, NULL); 1528 if (ret) 1529 exit_with_error(ret); 1530 } 1531 1532 1533 if (opt_bench == BENCH_RXDROP) 1534 rx_drop_all(); 1535 else if (opt_bench == BENCH_TXONLY) 1536 tx_only_all(); 1537 else 1538 l2fwd_all(); 1539 1540 benchmark_done = true; 1541 1542 if (!opt_quiet) 1543 pthread_join(pt, NULL); 1544 1545 xdpsock_cleanup(); 1546 1547 munmap(bufs, NUM_FRAMES * opt_xsk_frame_size); 1548 1549 return 0; 1550} 1551