1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3/* 4 * AF_XDP user-space access library. 5 * 6 * Copyright(c) 2018 - 2019 Intel Corporation. 7 * 8 * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> 9 */ 10 11#include <errno.h> 12#include <stdlib.h> 13#include <string.h> 14#include <unistd.h> 15#include <arpa/inet.h> 16#include <asm/barrier.h> 17#include <linux/compiler.h> 18#include <linux/ethtool.h> 19#include <linux/filter.h> 20#include <linux/if_ether.h> 21#include <linux/if_packet.h> 22#include <linux/if_xdp.h> 23#include <linux/kernel.h> 24#include <linux/list.h> 25#include <linux/sockios.h> 26#include <net/if.h> 27#include <sys/ioctl.h> 28#include <sys/mman.h> 29#include <sys/socket.h> 30#include <sys/types.h> 31 32#include "bpf.h" 33#include "libbpf.h" 34#include "libbpf_internal.h" 35#include "xsk.h" 36 37#ifndef SOL_XDP 38 #define SOL_XDP 283 39#endif 40 41#ifndef AF_XDP 42 #define AF_XDP 44 43#endif 44 45#ifndef PF_XDP 46 #define PF_XDP AF_XDP 47#endif 48 49struct xsk_umem { 50 struct xsk_ring_prod *fill_save; 51 struct xsk_ring_cons *comp_save; 52 char *umem_area; 53 struct xsk_umem_config config; 54 int fd; 55 int refcount; 56 struct list_head ctx_list; 57 bool rx_ring_setup_done; 58 bool tx_ring_setup_done; 59}; 60 61struct xsk_ctx { 62 struct xsk_ring_prod *fill; 63 struct xsk_ring_cons *comp; 64 __u32 queue_id; 65 struct xsk_umem *umem; 66 int refcount; 67 int ifindex; 68 struct list_head list; 69 int prog_fd; 70 int xsks_map_fd; 71 char ifname[IFNAMSIZ]; 72}; 73 74struct xsk_socket { 75 struct xsk_ring_cons *rx; 76 struct xsk_ring_prod *tx; 77 __u64 outstanding_tx; 78 struct xsk_ctx *ctx; 79 struct xsk_socket_config config; 80 int fd; 81}; 82 83struct xsk_nl_info { 84 bool xdp_prog_attached; 85 int ifindex; 86 int fd; 87}; 88 89/* Up until and including Linux 5.3 */ 90struct xdp_ring_offset_v1 { 91 __u64 producer; 92 __u64 consumer; 93 __u64 desc; 94}; 95 96/* Up until and including Linux 5.3 */ 97struct xdp_mmap_offsets_v1 { 98 struct xdp_ring_offset_v1 rx; 99 struct xdp_ring_offset_v1 tx; 100 struct xdp_ring_offset_v1 fr; 101 struct xdp_ring_offset_v1 cr; 102}; 103 104int xsk_umem__fd(const struct xsk_umem *umem) 105{ 106 return umem ? umem->fd : -EINVAL; 107} 108 109int xsk_socket__fd(const struct xsk_socket *xsk) 110{ 111 return xsk ? xsk->fd : -EINVAL; 112} 113 114static bool xsk_page_aligned(void *buffer) 115{ 116 unsigned long addr = (unsigned long)buffer; 117 118 return !(addr & (getpagesize() - 1)); 119} 120 121static void xsk_set_umem_config(struct xsk_umem_config *cfg, 122 const struct xsk_umem_config *usr_cfg) 123{ 124 if (!usr_cfg) { 125 cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 126 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 127 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 128 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; 129 cfg->flags = XSK_UMEM__DEFAULT_FLAGS; 130 return; 131 } 132 133 cfg->fill_size = usr_cfg->fill_size; 134 cfg->comp_size = usr_cfg->comp_size; 135 cfg->frame_size = usr_cfg->frame_size; 136 cfg->frame_headroom = usr_cfg->frame_headroom; 137 cfg->flags = usr_cfg->flags; 138} 139 140static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, 141 const struct xsk_socket_config *usr_cfg) 142{ 143 if (!usr_cfg) { 144 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 145 cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 146 cfg->libbpf_flags = 0; 147 cfg->xdp_flags = 0; 148 cfg->bind_flags = 0; 149 return 0; 150 } 151 152 if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) 153 return -EINVAL; 154 155 cfg->rx_size = usr_cfg->rx_size; 156 cfg->tx_size = usr_cfg->tx_size; 157 cfg->libbpf_flags = usr_cfg->libbpf_flags; 158 cfg->xdp_flags = usr_cfg->xdp_flags; 159 cfg->bind_flags = usr_cfg->bind_flags; 160 161 return 0; 162} 163 164static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) 165{ 166 struct xdp_mmap_offsets_v1 off_v1; 167 168 /* getsockopt on a kernel <= 5.3 has no flags fields. 169 * Copy over the offsets to the correct places in the >=5.4 format 170 * and put the flags where they would have been on that kernel. 171 */ 172 memcpy(&off_v1, off, sizeof(off_v1)); 173 174 off->rx.producer = off_v1.rx.producer; 175 off->rx.consumer = off_v1.rx.consumer; 176 off->rx.desc = off_v1.rx.desc; 177 off->rx.flags = off_v1.rx.consumer + sizeof(__u32); 178 179 off->tx.producer = off_v1.tx.producer; 180 off->tx.consumer = off_v1.tx.consumer; 181 off->tx.desc = off_v1.tx.desc; 182 off->tx.flags = off_v1.tx.consumer + sizeof(__u32); 183 184 off->fr.producer = off_v1.fr.producer; 185 off->fr.consumer = off_v1.fr.consumer; 186 off->fr.desc = off_v1.fr.desc; 187 off->fr.flags = off_v1.fr.consumer + sizeof(__u32); 188 189 off->cr.producer = off_v1.cr.producer; 190 off->cr.consumer = off_v1.cr.consumer; 191 off->cr.desc = off_v1.cr.desc; 192 off->cr.flags = off_v1.cr.consumer + sizeof(__u32); 193} 194 195static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) 196{ 197 socklen_t optlen; 198 int err; 199 200 optlen = sizeof(*off); 201 err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); 202 if (err) 203 return err; 204 205 if (optlen == sizeof(*off)) 206 return 0; 207 208 if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { 209 xsk_mmap_offsets_v1(off); 210 return 0; 211 } 212 213 return -EINVAL; 214} 215 216static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, 217 struct xsk_ring_prod *fill, 218 struct xsk_ring_cons *comp) 219{ 220 struct xdp_mmap_offsets off; 221 void *map; 222 int err; 223 224 err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, 225 &umem->config.fill_size, 226 sizeof(umem->config.fill_size)); 227 if (err) 228 return -errno; 229 230 err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, 231 &umem->config.comp_size, 232 sizeof(umem->config.comp_size)); 233 if (err) 234 return -errno; 235 236 err = xsk_get_mmap_offsets(fd, &off); 237 if (err) 238 return -errno; 239 240 map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), 241 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 242 XDP_UMEM_PGOFF_FILL_RING); 243 if (map == MAP_FAILED) 244 return -errno; 245 246 fill->mask = umem->config.fill_size - 1; 247 fill->size = umem->config.fill_size; 248 fill->producer = map + off.fr.producer; 249 fill->consumer = map + off.fr.consumer; 250 fill->flags = map + off.fr.flags; 251 fill->ring = map + off.fr.desc; 252 fill->cached_cons = umem->config.fill_size; 253 254 map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), 255 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 256 XDP_UMEM_PGOFF_COMPLETION_RING); 257 if (map == MAP_FAILED) { 258 err = -errno; 259 goto out_mmap; 260 } 261 262 comp->mask = umem->config.comp_size - 1; 263 comp->size = umem->config.comp_size; 264 comp->producer = map + off.cr.producer; 265 comp->consumer = map + off.cr.consumer; 266 comp->flags = map + off.cr.flags; 267 comp->ring = map + off.cr.desc; 268 269 return 0; 270 271out_mmap: 272 munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); 273 return err; 274} 275 276int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, 277 __u64 size, struct xsk_ring_prod *fill, 278 struct xsk_ring_cons *comp, 279 const struct xsk_umem_config *usr_config) 280{ 281 struct xdp_umem_reg mr; 282 struct xsk_umem *umem; 283 int err; 284 285 if (!umem_area || !umem_ptr || !fill || !comp) 286 return -EFAULT; 287 if (!size && !xsk_page_aligned(umem_area)) 288 return -EINVAL; 289 290 umem = calloc(1, sizeof(*umem)); 291 if (!umem) 292 return -ENOMEM; 293 294 umem->fd = socket(AF_XDP, SOCK_RAW, 0); 295 if (umem->fd < 0) { 296 err = -errno; 297 goto out_umem_alloc; 298 } 299 300 umem->umem_area = umem_area; 301 INIT_LIST_HEAD(&umem->ctx_list); 302 xsk_set_umem_config(&umem->config, usr_config); 303 304 memset(&mr, 0, sizeof(mr)); 305 mr.addr = (uintptr_t)umem_area; 306 mr.len = size; 307 mr.chunk_size = umem->config.frame_size; 308 mr.headroom = umem->config.frame_headroom; 309 mr.flags = umem->config.flags; 310 311 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); 312 if (err) { 313 err = -errno; 314 goto out_socket; 315 } 316 317 err = xsk_create_umem_rings(umem, umem->fd, fill, comp); 318 if (err) 319 goto out_socket; 320 321 umem->fill_save = fill; 322 umem->comp_save = comp; 323 *umem_ptr = umem; 324 return 0; 325 326out_socket: 327 close(umem->fd); 328out_umem_alloc: 329 free(umem); 330 return err; 331} 332 333struct xsk_umem_config_v1 { 334 __u32 fill_size; 335 __u32 comp_size; 336 __u32 frame_size; 337 __u32 frame_headroom; 338}; 339 340int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, 341 __u64 size, struct xsk_ring_prod *fill, 342 struct xsk_ring_cons *comp, 343 const struct xsk_umem_config *usr_config) 344{ 345 struct xsk_umem_config config; 346 347 memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); 348 config.flags = 0; 349 350 return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, 351 &config); 352} 353COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) 354DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) 355 356static int xsk_load_xdp_prog(struct xsk_socket *xsk) 357{ 358 static const int log_buf_size = 16 * 1024; 359 struct xsk_ctx *ctx = xsk->ctx; 360 char log_buf[log_buf_size]; 361 int err, prog_fd; 362 363 /* This is the C-program: 364 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) 365 * { 366 * int ret, index = ctx->rx_queue_index; 367 * 368 * // A set entry here means that the correspnding queue_id 369 * // has an active AF_XDP socket bound to it. 370 * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); 371 * if (ret > 0) 372 * return ret; 373 * 374 * // Fallback for pre-5.3 kernels, not supporting default 375 * // action in the flags parameter. 376 * if (bpf_map_lookup_elem(&xsks_map, &index)) 377 * return bpf_redirect_map(&xsks_map, index, 0); 378 * return XDP_PASS; 379 * } 380 */ 381 struct bpf_insn prog[] = { 382 /* r2 = *(u32 *)(r1 + 16) */ 383 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), 384 /* *(u32 *)(r10 - 4) = r2 */ 385 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), 386 /* r1 = xskmap[] */ 387 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), 388 /* r3 = XDP_PASS */ 389 BPF_MOV64_IMM(BPF_REG_3, 2), 390 /* call bpf_redirect_map */ 391 BPF_EMIT_CALL(BPF_FUNC_redirect_map), 392 /* if w0 != 0 goto pc+13 */ 393 BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), 394 /* r2 = r10 */ 395 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 396 /* r2 += -4 */ 397 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 398 /* r1 = xskmap[] */ 399 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), 400 /* call bpf_map_lookup_elem */ 401 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), 402 /* r1 = r0 */ 403 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 404 /* r0 = XDP_PASS */ 405 BPF_MOV64_IMM(BPF_REG_0, 2), 406 /* if r1 == 0 goto pc+5 */ 407 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), 408 /* r2 = *(u32 *)(r10 - 4) */ 409 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), 410 /* r1 = xskmap[] */ 411 BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), 412 /* r3 = 0 */ 413 BPF_MOV64_IMM(BPF_REG_3, 0), 414 /* call bpf_redirect_map */ 415 BPF_EMIT_CALL(BPF_FUNC_redirect_map), 416 /* The jumps are to this instruction */ 417 BPF_EXIT_INSN(), 418 }; 419 size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); 420 421 prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, 422 "LGPL-2.1 or BSD-2-Clause", 0, log_buf, 423 log_buf_size); 424 if (prog_fd < 0) { 425 pr_warn("BPF log buffer:\n%s", log_buf); 426 return prog_fd; 427 } 428 429 err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd, 430 xsk->config.xdp_flags); 431 if (err) { 432 close(prog_fd); 433 return err; 434 } 435 436 ctx->prog_fd = prog_fd; 437 return 0; 438} 439 440static int xsk_get_max_queues(struct xsk_socket *xsk) 441{ 442 struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; 443 struct xsk_ctx *ctx = xsk->ctx; 444 struct ifreq ifr = {}; 445 int fd, err, ret; 446 447 fd = socket(AF_INET, SOCK_DGRAM, 0); 448 if (fd < 0) 449 return -errno; 450 451 ifr.ifr_data = (void *)&channels; 452 memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); 453 ifr.ifr_name[IFNAMSIZ - 1] = '\0'; 454 err = ioctl(fd, SIOCETHTOOL, &ifr); 455 if (err && errno != EOPNOTSUPP) { 456 ret = -errno; 457 goto out; 458 } 459 460 if (err) { 461 /* If the device says it has no channels, then all traffic 462 * is sent to a single stream, so max queues = 1. 463 */ 464 ret = 1; 465 } else { 466 /* Take the max of rx, tx, combined. Drivers return 467 * the number of channels in different ways. 468 */ 469 ret = max(channels.max_rx, channels.max_tx); 470 ret = max(ret, (int)channels.max_combined); 471 } 472 473out: 474 close(fd); 475 return ret; 476} 477 478static int xsk_create_bpf_maps(struct xsk_socket *xsk) 479{ 480 struct xsk_ctx *ctx = xsk->ctx; 481 int max_queues; 482 int fd; 483 484 max_queues = xsk_get_max_queues(xsk); 485 if (max_queues < 0) 486 return max_queues; 487 488 fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", 489 sizeof(int), sizeof(int), max_queues, 0); 490 if (fd < 0) 491 return fd; 492 493 ctx->xsks_map_fd = fd; 494 495 return 0; 496} 497 498static void xsk_delete_bpf_maps(struct xsk_socket *xsk) 499{ 500 struct xsk_ctx *ctx = xsk->ctx; 501 502 bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); 503 close(ctx->xsks_map_fd); 504} 505 506static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) 507{ 508 __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); 509 __u32 map_len = sizeof(struct bpf_map_info); 510 struct bpf_prog_info prog_info = {}; 511 struct xsk_ctx *ctx = xsk->ctx; 512 struct bpf_map_info map_info; 513 int fd, err; 514 515 err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); 516 if (err) 517 return err; 518 519 num_maps = prog_info.nr_map_ids; 520 521 map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); 522 if (!map_ids) 523 return -ENOMEM; 524 525 memset(&prog_info, 0, prog_len); 526 prog_info.nr_map_ids = num_maps; 527 prog_info.map_ids = (__u64)(unsigned long)map_ids; 528 529 err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); 530 if (err) 531 goto out_map_ids; 532 533 ctx->xsks_map_fd = -1; 534 535 for (i = 0; i < prog_info.nr_map_ids; i++) { 536 fd = bpf_map_get_fd_by_id(map_ids[i]); 537 if (fd < 0) 538 continue; 539 540 memset(&map_info, 0, map_len); 541 err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); 542 if (err) { 543 close(fd); 544 continue; 545 } 546 547 if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { 548 ctx->xsks_map_fd = fd; 549 break; 550 } 551 552 close(fd); 553 } 554 555 err = 0; 556 if (ctx->xsks_map_fd == -1) 557 err = -ENOENT; 558 559out_map_ids: 560 free(map_ids); 561 return err; 562} 563 564static int xsk_set_bpf_maps(struct xsk_socket *xsk) 565{ 566 struct xsk_ctx *ctx = xsk->ctx; 567 568 return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, 569 &xsk->fd, 0); 570} 571 572static int xsk_setup_xdp_prog(struct xsk_socket *xsk) 573{ 574 struct xsk_ctx *ctx = xsk->ctx; 575 __u32 prog_id = 0; 576 int err; 577 578 err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, 579 xsk->config.xdp_flags); 580 if (err) 581 return err; 582 583 if (!prog_id) { 584 err = xsk_create_bpf_maps(xsk); 585 if (err) 586 return err; 587 588 err = xsk_load_xdp_prog(xsk); 589 if (err) { 590 xsk_delete_bpf_maps(xsk); 591 return err; 592 } 593 } else { 594 ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); 595 if (ctx->prog_fd < 0) 596 return -errno; 597 err = xsk_lookup_bpf_maps(xsk); 598 if (err) { 599 close(ctx->prog_fd); 600 return err; 601 } 602 } 603 604 if (xsk->rx) 605 err = xsk_set_bpf_maps(xsk); 606 if (err) { 607 xsk_delete_bpf_maps(xsk); 608 close(ctx->prog_fd); 609 return err; 610 } 611 612 return 0; 613} 614 615static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, 616 __u32 queue_id) 617{ 618 struct xsk_ctx *ctx; 619 620 if (list_empty(&umem->ctx_list)) 621 return NULL; 622 623 list_for_each_entry(ctx, &umem->ctx_list, list) { 624 if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { 625 ctx->refcount++; 626 return ctx; 627 } 628 } 629 630 return NULL; 631} 632 633static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) 634{ 635 struct xsk_umem *umem = ctx->umem; 636 struct xdp_mmap_offsets off; 637 int err; 638 639 if (--ctx->refcount) 640 return; 641 642 if (!unmap) 643 goto out_free; 644 645 err = xsk_get_mmap_offsets(umem->fd, &off); 646 if (err) 647 goto out_free; 648 649 munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * 650 sizeof(__u64)); 651 munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * 652 sizeof(__u64)); 653 654out_free: 655 list_del(&ctx->list); 656 free(ctx); 657} 658 659static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, 660 struct xsk_umem *umem, int ifindex, 661 const char *ifname, __u32 queue_id, 662 struct xsk_ring_prod *fill, 663 struct xsk_ring_cons *comp) 664{ 665 struct xsk_ctx *ctx; 666 int err; 667 668 ctx = calloc(1, sizeof(*ctx)); 669 if (!ctx) 670 return NULL; 671 672 if (!umem->fill_save) { 673 err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); 674 if (err) { 675 free(ctx); 676 return NULL; 677 } 678 } else if (umem->fill_save != fill || umem->comp_save != comp) { 679 /* Copy over rings to new structs. */ 680 memcpy(fill, umem->fill_save, sizeof(*fill)); 681 memcpy(comp, umem->comp_save, sizeof(*comp)); 682 } 683 684 ctx->ifindex = ifindex; 685 ctx->refcount = 1; 686 ctx->umem = umem; 687 ctx->queue_id = queue_id; 688 memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); 689 ctx->ifname[IFNAMSIZ - 1] = '\0'; 690 691 ctx->fill = fill; 692 ctx->comp = comp; 693 list_add(&ctx->list, &umem->ctx_list); 694 return ctx; 695} 696 697int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, 698 const char *ifname, 699 __u32 queue_id, struct xsk_umem *umem, 700 struct xsk_ring_cons *rx, 701 struct xsk_ring_prod *tx, 702 struct xsk_ring_prod *fill, 703 struct xsk_ring_cons *comp, 704 const struct xsk_socket_config *usr_config) 705{ 706 bool unmap, rx_setup_done = false, tx_setup_done = false; 707 void *rx_map = NULL, *tx_map = NULL; 708 struct sockaddr_xdp sxdp = {}; 709 struct xdp_mmap_offsets off; 710 struct xsk_socket *xsk; 711 struct xsk_ctx *ctx; 712 int err, ifindex; 713 714 if (!umem || !xsk_ptr || !(rx || tx)) 715 return -EFAULT; 716 717 unmap = umem->fill_save != fill; 718 719 xsk = calloc(1, sizeof(*xsk)); 720 if (!xsk) 721 return -ENOMEM; 722 723 err = xsk_set_xdp_socket_config(&xsk->config, usr_config); 724 if (err) 725 goto out_xsk_alloc; 726 727 xsk->outstanding_tx = 0; 728 ifindex = if_nametoindex(ifname); 729 if (!ifindex) { 730 err = -errno; 731 goto out_xsk_alloc; 732 } 733 734 if (umem->refcount++ > 0) { 735 xsk->fd = socket(AF_XDP, SOCK_RAW, 0); 736 if (xsk->fd < 0) { 737 err = -errno; 738 goto out_xsk_alloc; 739 } 740 } else { 741 xsk->fd = umem->fd; 742 rx_setup_done = umem->rx_ring_setup_done; 743 tx_setup_done = umem->tx_ring_setup_done; 744 } 745 746 ctx = xsk_get_ctx(umem, ifindex, queue_id); 747 if (!ctx) { 748 if (!fill || !comp) { 749 err = -EFAULT; 750 goto out_socket; 751 } 752 753 ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, 754 fill, comp); 755 if (!ctx) { 756 err = -ENOMEM; 757 goto out_socket; 758 } 759 } 760 xsk->ctx = ctx; 761 762 if (rx && !rx_setup_done) { 763 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, 764 &xsk->config.rx_size, 765 sizeof(xsk->config.rx_size)); 766 if (err) { 767 err = -errno; 768 goto out_put_ctx; 769 } 770 if (xsk->fd == umem->fd) 771 umem->rx_ring_setup_done = true; 772 } 773 if (tx && !tx_setup_done) { 774 err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, 775 &xsk->config.tx_size, 776 sizeof(xsk->config.tx_size)); 777 if (err) { 778 err = -errno; 779 goto out_put_ctx; 780 } 781 if (xsk->fd == umem->fd) 782 umem->tx_ring_setup_done = true; 783 } 784 785 err = xsk_get_mmap_offsets(xsk->fd, &off); 786 if (err) { 787 err = -errno; 788 goto out_put_ctx; 789 } 790 791 if (rx) { 792 rx_map = mmap(NULL, off.rx.desc + 793 xsk->config.rx_size * sizeof(struct xdp_desc), 794 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, 795 xsk->fd, XDP_PGOFF_RX_RING); 796 if (rx_map == MAP_FAILED) { 797 err = -errno; 798 goto out_put_ctx; 799 } 800 801 rx->mask = xsk->config.rx_size - 1; 802 rx->size = xsk->config.rx_size; 803 rx->producer = rx_map + off.rx.producer; 804 rx->consumer = rx_map + off.rx.consumer; 805 rx->flags = rx_map + off.rx.flags; 806 rx->ring = rx_map + off.rx.desc; 807 rx->cached_prod = *rx->producer; 808 rx->cached_cons = *rx->consumer; 809 } 810 xsk->rx = rx; 811 812 if (tx) { 813 tx_map = mmap(NULL, off.tx.desc + 814 xsk->config.tx_size * sizeof(struct xdp_desc), 815 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, 816 xsk->fd, XDP_PGOFF_TX_RING); 817 if (tx_map == MAP_FAILED) { 818 err = -errno; 819 goto out_mmap_rx; 820 } 821 822 tx->mask = xsk->config.tx_size - 1; 823 tx->size = xsk->config.tx_size; 824 tx->producer = tx_map + off.tx.producer; 825 tx->consumer = tx_map + off.tx.consumer; 826 tx->flags = tx_map + off.tx.flags; 827 tx->ring = tx_map + off.tx.desc; 828 tx->cached_prod = *tx->producer; 829 /* cached_cons is r->size bigger than the real consumer pointer 830 * See xsk_prod_nb_free 831 */ 832 tx->cached_cons = *tx->consumer + xsk->config.tx_size; 833 } 834 xsk->tx = tx; 835 836 sxdp.sxdp_family = PF_XDP; 837 sxdp.sxdp_ifindex = ctx->ifindex; 838 sxdp.sxdp_queue_id = ctx->queue_id; 839 if (umem->refcount > 1) { 840 sxdp.sxdp_flags |= XDP_SHARED_UMEM; 841 sxdp.sxdp_shared_umem_fd = umem->fd; 842 } else { 843 sxdp.sxdp_flags = xsk->config.bind_flags; 844 } 845 846 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); 847 if (err) { 848 err = -errno; 849 goto out_mmap_tx; 850 } 851 852 if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { 853 err = xsk_setup_xdp_prog(xsk); 854 if (err) 855 goto out_mmap_tx; 856 } 857 858 *xsk_ptr = xsk; 859 umem->fill_save = NULL; 860 umem->comp_save = NULL; 861 return 0; 862 863out_mmap_tx: 864 if (tx) 865 munmap(tx_map, off.tx.desc + 866 xsk->config.tx_size * sizeof(struct xdp_desc)); 867out_mmap_rx: 868 if (rx) 869 munmap(rx_map, off.rx.desc + 870 xsk->config.rx_size * sizeof(struct xdp_desc)); 871out_put_ctx: 872 xsk_put_ctx(ctx, unmap); 873out_socket: 874 if (--umem->refcount) 875 close(xsk->fd); 876out_xsk_alloc: 877 free(xsk); 878 return err; 879} 880 881int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, 882 __u32 queue_id, struct xsk_umem *umem, 883 struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, 884 const struct xsk_socket_config *usr_config) 885{ 886 if (!umem) 887 return -EFAULT; 888 889 return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, 890 rx, tx, umem->fill_save, 891 umem->comp_save, usr_config); 892} 893 894int xsk_umem__delete(struct xsk_umem *umem) 895{ 896 struct xdp_mmap_offsets off; 897 int err; 898 899 if (!umem) 900 return 0; 901 902 if (umem->refcount) 903 return -EBUSY; 904 905 err = xsk_get_mmap_offsets(umem->fd, &off); 906 if (!err && umem->fill_save && umem->comp_save) { 907 munmap(umem->fill_save->ring - off.fr.desc, 908 off.fr.desc + umem->config.fill_size * sizeof(__u64)); 909 munmap(umem->comp_save->ring - off.cr.desc, 910 off.cr.desc + umem->config.comp_size * sizeof(__u64)); 911 } 912 913 close(umem->fd); 914 free(umem); 915 916 return 0; 917} 918 919void xsk_socket__delete(struct xsk_socket *xsk) 920{ 921 size_t desc_sz = sizeof(struct xdp_desc); 922 struct xdp_mmap_offsets off; 923 struct xsk_umem *umem; 924 struct xsk_ctx *ctx; 925 int err; 926 927 if (!xsk) 928 return; 929 930 ctx = xsk->ctx; 931 umem = ctx->umem; 932 933 if (ctx->refcount == 1) { 934 xsk_delete_bpf_maps(xsk); 935 close(ctx->prog_fd); 936 } 937 938 xsk_put_ctx(ctx, true); 939 940 err = xsk_get_mmap_offsets(xsk->fd, &off); 941 if (!err) { 942 if (xsk->rx) { 943 munmap(xsk->rx->ring - off.rx.desc, 944 off.rx.desc + xsk->config.rx_size * desc_sz); 945 } 946 if (xsk->tx) { 947 munmap(xsk->tx->ring - off.tx.desc, 948 off.tx.desc + xsk->config.tx_size * desc_sz); 949 } 950 } 951 952 umem->refcount--; 953 /* Do not close an fd that also has an associated umem connected 954 * to it. 955 */ 956 if (xsk->fd != umem->fd) 957 close(xsk->fd); 958 free(xsk); 959} 960