1// SPDX-License-Identifier: GPL-2.0-only 2#include <linux/module.h> 3#include <linux/errno.h> 4#include <linux/socket.h> 5#include <linux/skbuff.h> 6#include <linux/ip.h> 7#include <linux/icmp.h> 8#include <linux/udp.h> 9#include <linux/types.h> 10#include <linux/kernel.h> 11#include <net/genetlink.h> 12#include <net/gue.h> 13#include <net/fou.h> 14#include <net/ip.h> 15#include <net/protocol.h> 16#include <net/udp.h> 17#include <net/udp_tunnel.h> 18#include <net/xfrm.h> 19#include <uapi/linux/fou.h> 20#include <uapi/linux/genetlink.h> 21 22struct fou { 23 struct socket *sock; 24 u8 protocol; 25 u8 flags; 26 __be16 port; 27 u8 family; 28 u16 type; 29 struct list_head list; 30 struct rcu_head rcu; 31}; 32 33#define FOU_F_REMCSUM_NOPARTIAL BIT(0) 34 35struct fou_cfg { 36 u16 type; 37 u8 protocol; 38 u8 flags; 39 struct udp_port_cfg udp_config; 40}; 41 42static unsigned int fou_net_id; 43 44struct fou_net { 45 struct list_head fou_list; 46 struct mutex fou_lock; 47}; 48 49static inline struct fou *fou_from_sock(struct sock *sk) 50{ 51 return sk->sk_user_data; 52} 53 54static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) 55{ 56 /* Remove 'len' bytes from the packet (UDP header and 57 * FOU header if present). 58 */ 59 if (fou->family == AF_INET) 60 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); 61 else 62 ipv6_hdr(skb)->payload_len = 63 htons(ntohs(ipv6_hdr(skb)->payload_len) - len); 64 65 __skb_pull(skb, len); 66 skb_postpull_rcsum(skb, udp_hdr(skb), len); 67 skb_reset_transport_header(skb); 68 return iptunnel_pull_offloads(skb); 69} 70 71static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) 72{ 73 struct fou *fou = fou_from_sock(sk); 74 75 if (!fou) 76 return 1; 77 78 if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) 79 goto drop; 80 81 return -fou->protocol; 82 83drop: 84 kfree_skb(skb); 85 return 0; 86} 87 88static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, 89 void *data, size_t hdrlen, u8 ipproto, 90 bool nopartial) 91{ 92 __be16 *pd = data; 93 size_t start = ntohs(pd[0]); 94 size_t offset = ntohs(pd[1]); 95 size_t plen = sizeof(struct udphdr) + hdrlen + 96 max_t(size_t, offset + sizeof(u16), start); 97 98 if (skb->remcsum_offload) 99 return guehdr; 100 101 if (!pskb_may_pull(skb, plen)) 102 return NULL; 103 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 104 105 skb_remcsum_process(skb, (void *)guehdr + hdrlen, 106 start, offset, nopartial); 107 108 return guehdr; 109} 110 111static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr) 112{ 113 /* No support yet */ 114 kfree_skb(skb); 115 return 0; 116} 117 118static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) 119{ 120 struct fou *fou = fou_from_sock(sk); 121 size_t len, optlen, hdrlen; 122 struct guehdr *guehdr; 123 void *data; 124 u16 doffset = 0; 125 u8 proto_ctype; 126 127 if (!fou) 128 return 1; 129 130 len = sizeof(struct udphdr) + sizeof(struct guehdr); 131 if (!pskb_may_pull(skb, len)) 132 goto drop; 133 134 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 135 136 switch (guehdr->version) { 137 case 0: /* Full GUE header present */ 138 break; 139 140 case 1: { 141 /* Direct encapsulation of IPv4 or IPv6 */ 142 143 int prot; 144 145 switch (((struct iphdr *)guehdr)->version) { 146 case 4: 147 prot = IPPROTO_IPIP; 148 break; 149 case 6: 150 prot = IPPROTO_IPV6; 151 break; 152 default: 153 goto drop; 154 } 155 156 if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) 157 goto drop; 158 159 return -prot; 160 } 161 162 default: /* Undefined version */ 163 goto drop; 164 } 165 166 optlen = guehdr->hlen << 2; 167 len += optlen; 168 169 if (!pskb_may_pull(skb, len)) 170 goto drop; 171 172 /* guehdr may change after pull */ 173 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 174 175 if (validate_gue_flags(guehdr, optlen)) 176 goto drop; 177 178 hdrlen = sizeof(struct guehdr) + optlen; 179 180 if (fou->family == AF_INET) 181 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); 182 else 183 ipv6_hdr(skb)->payload_len = 184 htons(ntohs(ipv6_hdr(skb)->payload_len) - len); 185 186 /* Pull csum through the guehdr now . This can be used if 187 * there is a remote checksum offload. 188 */ 189 skb_postpull_rcsum(skb, udp_hdr(skb), len); 190 191 data = &guehdr[1]; 192 193 if (guehdr->flags & GUE_FLAG_PRIV) { 194 __be32 flags = *(__be32 *)(data + doffset); 195 196 doffset += GUE_LEN_PRIV; 197 198 if (flags & GUE_PFLAG_REMCSUM) { 199 guehdr = gue_remcsum(skb, guehdr, data + doffset, 200 hdrlen, guehdr->proto_ctype, 201 !!(fou->flags & 202 FOU_F_REMCSUM_NOPARTIAL)); 203 if (!guehdr) 204 goto drop; 205 206 data = &guehdr[1]; 207 208 doffset += GUE_PLEN_REMCSUM; 209 } 210 } 211 212 if (unlikely(guehdr->control)) 213 return gue_control_message(skb, guehdr); 214 215 proto_ctype = guehdr->proto_ctype; 216 __skb_pull(skb, sizeof(struct udphdr) + hdrlen); 217 skb_reset_transport_header(skb); 218 219 if (iptunnel_pull_offloads(skb)) 220 goto drop; 221 222 return -proto_ctype; 223 224drop: 225 kfree_skb(skb); 226 return 0; 227} 228 229static struct sk_buff *fou_gro_receive(struct sock *sk, 230 struct list_head *head, 231 struct sk_buff *skb) 232{ 233 u8 proto = fou_from_sock(sk)->protocol; 234 const struct net_offload **offloads; 235 const struct net_offload *ops; 236 struct sk_buff *pp = NULL; 237 238 /* We can clear the encap_mark for FOU as we are essentially doing 239 * one of two possible things. We are either adding an L4 tunnel 240 * header to the outer L3 tunnel header, or we are simply 241 * treating the GRE tunnel header as though it is a UDP protocol 242 * specific header such as VXLAN or GENEVE. 243 */ 244 NAPI_GRO_CB(skb)->encap_mark = 0; 245 246 /* Flag this frame as already having an outer encap header */ 247 NAPI_GRO_CB(skb)->is_fou = 1; 248 249 rcu_read_lock(); 250 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 251 ops = rcu_dereference(offloads[proto]); 252 if (!ops || !ops->callbacks.gro_receive) 253 goto out_unlock; 254 255 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); 256 257out_unlock: 258 rcu_read_unlock(); 259 260 return pp; 261} 262 263static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, 264 int nhoff) 265{ 266 const struct net_offload *ops; 267 u8 proto = fou_from_sock(sk)->protocol; 268 int err = -ENOSYS; 269 const struct net_offload **offloads; 270 271 rcu_read_lock(); 272 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 273 ops = rcu_dereference(offloads[proto]); 274 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 275 goto out_unlock; 276 277 err = ops->callbacks.gro_complete(skb, nhoff); 278 279 skb_set_inner_mac_header(skb, nhoff); 280 281out_unlock: 282 rcu_read_unlock(); 283 284 return err; 285} 286 287static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, 288 struct guehdr *guehdr, void *data, 289 size_t hdrlen, struct gro_remcsum *grc, 290 bool nopartial) 291{ 292 __be16 *pd = data; 293 size_t start = ntohs(pd[0]); 294 size_t offset = ntohs(pd[1]); 295 296 if (skb->remcsum_offload) 297 return guehdr; 298 299 if (!NAPI_GRO_CB(skb)->csum_valid) 300 return NULL; 301 302 guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen, 303 start, offset, grc, nopartial); 304 305 skb->remcsum_offload = 1; 306 307 return guehdr; 308} 309 310static struct sk_buff *gue_gro_receive(struct sock *sk, 311 struct list_head *head, 312 struct sk_buff *skb) 313{ 314 const struct net_offload **offloads; 315 const struct net_offload *ops; 316 struct sk_buff *pp = NULL; 317 struct sk_buff *p; 318 struct guehdr *guehdr; 319 size_t len, optlen, hdrlen, off; 320 void *data; 321 u16 doffset = 0; 322 int flush = 1; 323 struct fou *fou = fou_from_sock(sk); 324 struct gro_remcsum grc; 325 u8 proto; 326 327 skb_gro_remcsum_init(&grc); 328 329 off = skb_gro_offset(skb); 330 len = off + sizeof(*guehdr); 331 332 guehdr = skb_gro_header_fast(skb, off); 333 if (skb_gro_header_hard(skb, len)) { 334 guehdr = skb_gro_header_slow(skb, len, off); 335 if (unlikely(!guehdr)) 336 goto out; 337 } 338 339 switch (guehdr->version) { 340 case 0: 341 break; 342 case 1: 343 switch (((struct iphdr *)guehdr)->version) { 344 case 4: 345 proto = IPPROTO_IPIP; 346 break; 347 case 6: 348 proto = IPPROTO_IPV6; 349 break; 350 default: 351 goto out; 352 } 353 goto next_proto; 354 default: 355 goto out; 356 } 357 358 optlen = guehdr->hlen << 2; 359 len += optlen; 360 361 if (skb_gro_header_hard(skb, len)) { 362 guehdr = skb_gro_header_slow(skb, len, off); 363 if (unlikely(!guehdr)) 364 goto out; 365 } 366 367 if (unlikely(guehdr->control) || guehdr->version != 0 || 368 validate_gue_flags(guehdr, optlen)) 369 goto out; 370 371 hdrlen = sizeof(*guehdr) + optlen; 372 373 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr, 374 * this is needed if there is a remote checkcsum offload. 375 */ 376 skb_gro_postpull_rcsum(skb, guehdr, hdrlen); 377 378 data = &guehdr[1]; 379 380 if (guehdr->flags & GUE_FLAG_PRIV) { 381 __be32 flags = *(__be32 *)(data + doffset); 382 383 doffset += GUE_LEN_PRIV; 384 385 if (flags & GUE_PFLAG_REMCSUM) { 386 guehdr = gue_gro_remcsum(skb, off, guehdr, 387 data + doffset, hdrlen, &grc, 388 !!(fou->flags & 389 FOU_F_REMCSUM_NOPARTIAL)); 390 391 if (!guehdr) 392 goto out; 393 394 data = &guehdr[1]; 395 396 doffset += GUE_PLEN_REMCSUM; 397 } 398 } 399 400 skb_gro_pull(skb, hdrlen); 401 402 list_for_each_entry(p, head, list) { 403 const struct guehdr *guehdr2; 404 405 if (!NAPI_GRO_CB(p)->same_flow) 406 continue; 407 408 guehdr2 = (struct guehdr *)(p->data + off); 409 410 /* Compare base GUE header to be equal (covers 411 * hlen, version, proto_ctype, and flags. 412 */ 413 if (guehdr->word != guehdr2->word) { 414 NAPI_GRO_CB(p)->same_flow = 0; 415 continue; 416 } 417 418 /* Compare optional fields are the same. */ 419 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1], 420 guehdr->hlen << 2)) { 421 NAPI_GRO_CB(p)->same_flow = 0; 422 continue; 423 } 424 } 425 426 proto = guehdr->proto_ctype; 427 428next_proto: 429 430 /* We can clear the encap_mark for GUE as we are essentially doing 431 * one of two possible things. We are either adding an L4 tunnel 432 * header to the outer L3 tunnel header, or we are simply 433 * treating the GRE tunnel header as though it is a UDP protocol 434 * specific header such as VXLAN or GENEVE. 435 */ 436 NAPI_GRO_CB(skb)->encap_mark = 0; 437 438 /* Flag this frame as already having an outer encap header */ 439 NAPI_GRO_CB(skb)->is_fou = 1; 440 441 rcu_read_lock(); 442 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 443 ops = rcu_dereference(offloads[proto]); 444 if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) 445 goto out_unlock; 446 447 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); 448 flush = 0; 449 450out_unlock: 451 rcu_read_unlock(); 452out: 453 skb_gro_flush_final_remcsum(skb, pp, flush, &grc); 454 455 return pp; 456} 457 458static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) 459{ 460 const struct net_offload **offloads; 461 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); 462 const struct net_offload *ops; 463 unsigned int guehlen = 0; 464 u8 proto; 465 int err = -ENOENT; 466 467 switch (guehdr->version) { 468 case 0: 469 proto = guehdr->proto_ctype; 470 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); 471 break; 472 case 1: 473 switch (((struct iphdr *)guehdr)->version) { 474 case 4: 475 proto = IPPROTO_IPIP; 476 break; 477 case 6: 478 proto = IPPROTO_IPV6; 479 break; 480 default: 481 return err; 482 } 483 break; 484 default: 485 return err; 486 } 487 488 rcu_read_lock(); 489 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 490 ops = rcu_dereference(offloads[proto]); 491 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 492 goto out_unlock; 493 494 err = ops->callbacks.gro_complete(skb, nhoff + guehlen); 495 496 skb_set_inner_mac_header(skb, nhoff + guehlen); 497 498out_unlock: 499 rcu_read_unlock(); 500 return err; 501} 502 503static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg) 504{ 505 struct sock *sk = fou->sock->sk; 506 struct udp_port_cfg *udp_cfg = &cfg->udp_config; 507 508 if (fou->family != udp_cfg->family || 509 fou->port != udp_cfg->local_udp_port || 510 sk->sk_dport != udp_cfg->peer_udp_port || 511 sk->sk_bound_dev_if != udp_cfg->bind_ifindex) 512 return false; 513 514 if (fou->family == AF_INET) { 515 if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr || 516 sk->sk_daddr != udp_cfg->peer_ip.s_addr) 517 return false; 518 else 519 return true; 520#if IS_ENABLED(CONFIG_IPV6) 521 } else { 522 if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) || 523 ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6)) 524 return false; 525 else 526 return true; 527#endif 528 } 529 530 return false; 531} 532 533static int fou_add_to_port_list(struct net *net, struct fou *fou, 534 struct fou_cfg *cfg) 535{ 536 struct fou_net *fn = net_generic(net, fou_net_id); 537 struct fou *fout; 538 539 mutex_lock(&fn->fou_lock); 540 list_for_each_entry(fout, &fn->fou_list, list) { 541 if (fou_cfg_cmp(fout, cfg)) { 542 mutex_unlock(&fn->fou_lock); 543 return -EALREADY; 544 } 545 } 546 547 list_add(&fou->list, &fn->fou_list); 548 mutex_unlock(&fn->fou_lock); 549 550 return 0; 551} 552 553static void fou_release(struct fou *fou) 554{ 555 struct socket *sock = fou->sock; 556 557 list_del(&fou->list); 558 udp_tunnel_sock_release(sock); 559 560 kfree_rcu(fou, rcu); 561} 562 563static int fou_create(struct net *net, struct fou_cfg *cfg, 564 struct socket **sockp) 565{ 566 struct socket *sock = NULL; 567 struct fou *fou = NULL; 568 struct sock *sk; 569 struct udp_tunnel_sock_cfg tunnel_cfg; 570 int err; 571 572 /* Open UDP socket */ 573 err = udp_sock_create(net, &cfg->udp_config, &sock); 574 if (err < 0) 575 goto error; 576 577 /* Allocate FOU port structure */ 578 fou = kzalloc(sizeof(*fou), GFP_KERNEL); 579 if (!fou) { 580 err = -ENOMEM; 581 goto error; 582 } 583 584 sk = sock->sk; 585 586 fou->port = cfg->udp_config.local_udp_port; 587 fou->family = cfg->udp_config.family; 588 fou->flags = cfg->flags; 589 fou->type = cfg->type; 590 fou->sock = sock; 591 592 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 593 tunnel_cfg.encap_type = 1; 594 tunnel_cfg.sk_user_data = fou; 595 tunnel_cfg.encap_destroy = NULL; 596 597 /* Initial for fou type */ 598 switch (cfg->type) { 599 case FOU_ENCAP_DIRECT: 600 tunnel_cfg.encap_rcv = fou_udp_recv; 601 tunnel_cfg.gro_receive = fou_gro_receive; 602 tunnel_cfg.gro_complete = fou_gro_complete; 603 fou->protocol = cfg->protocol; 604 break; 605 case FOU_ENCAP_GUE: 606 tunnel_cfg.encap_rcv = gue_udp_recv; 607 tunnel_cfg.gro_receive = gue_gro_receive; 608 tunnel_cfg.gro_complete = gue_gro_complete; 609 break; 610 default: 611 err = -EINVAL; 612 goto error; 613 } 614 615 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 616 617 sk->sk_allocation = GFP_ATOMIC; 618 619 err = fou_add_to_port_list(net, fou, cfg); 620 if (err) 621 goto error; 622 623 if (sockp) 624 *sockp = sock; 625 626 return 0; 627 628error: 629 kfree(fou); 630 if (sock) 631 udp_tunnel_sock_release(sock); 632 633 return err; 634} 635 636static int fou_destroy(struct net *net, struct fou_cfg *cfg) 637{ 638 struct fou_net *fn = net_generic(net, fou_net_id); 639 int err = -EINVAL; 640 struct fou *fou; 641 642 mutex_lock(&fn->fou_lock); 643 list_for_each_entry(fou, &fn->fou_list, list) { 644 if (fou_cfg_cmp(fou, cfg)) { 645 fou_release(fou); 646 err = 0; 647 break; 648 } 649 } 650 mutex_unlock(&fn->fou_lock); 651 652 return err; 653} 654 655static struct genl_family fou_nl_family; 656 657static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { 658 [FOU_ATTR_PORT] = { .type = NLA_U16, }, 659 [FOU_ATTR_AF] = { .type = NLA_U8, }, 660 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, 661 [FOU_ATTR_TYPE] = { .type = NLA_U8, }, 662 [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, 663 [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, 664 [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, 665 [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), }, 666 [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), }, 667 [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, 668 [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, 669}; 670 671static int parse_nl_config(struct genl_info *info, 672 struct fou_cfg *cfg) 673{ 674 bool has_local = false, has_peer = false; 675 struct nlattr *attr; 676 int ifindex; 677 __be16 port; 678 679 memset(cfg, 0, sizeof(*cfg)); 680 681 cfg->udp_config.family = AF_INET; 682 683 if (info->attrs[FOU_ATTR_AF]) { 684 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); 685 686 switch (family) { 687 case AF_INET: 688 break; 689 case AF_INET6: 690 cfg->udp_config.ipv6_v6only = 1; 691 break; 692 default: 693 return -EAFNOSUPPORT; 694 } 695 696 cfg->udp_config.family = family; 697 } 698 699 if (info->attrs[FOU_ATTR_PORT]) { 700 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); 701 cfg->udp_config.local_udp_port = port; 702 } 703 704 if (info->attrs[FOU_ATTR_IPPROTO]) 705 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]); 706 707 if (info->attrs[FOU_ATTR_TYPE]) 708 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); 709 710 if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) 711 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; 712 713 if (cfg->udp_config.family == AF_INET) { 714 if (info->attrs[FOU_ATTR_LOCAL_V4]) { 715 attr = info->attrs[FOU_ATTR_LOCAL_V4]; 716 cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr); 717 has_local = true; 718 } 719 720 if (info->attrs[FOU_ATTR_PEER_V4]) { 721 attr = info->attrs[FOU_ATTR_PEER_V4]; 722 cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr); 723 has_peer = true; 724 } 725#if IS_ENABLED(CONFIG_IPV6) 726 } else { 727 if (info->attrs[FOU_ATTR_LOCAL_V6]) { 728 attr = info->attrs[FOU_ATTR_LOCAL_V6]; 729 cfg->udp_config.local_ip6 = nla_get_in6_addr(attr); 730 has_local = true; 731 } 732 733 if (info->attrs[FOU_ATTR_PEER_V6]) { 734 attr = info->attrs[FOU_ATTR_PEER_V6]; 735 cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr); 736 has_peer = true; 737 } 738#endif 739 } 740 741 if (has_peer) { 742 if (info->attrs[FOU_ATTR_PEER_PORT]) { 743 port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]); 744 cfg->udp_config.peer_udp_port = port; 745 } else { 746 return -EINVAL; 747 } 748 } 749 750 if (info->attrs[FOU_ATTR_IFINDEX]) { 751 if (!has_local) 752 return -EINVAL; 753 754 ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]); 755 756 cfg->udp_config.bind_ifindex = ifindex; 757 } 758 759 return 0; 760} 761 762static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) 763{ 764 struct net *net = genl_info_net(info); 765 struct fou_cfg cfg; 766 int err; 767 768 err = parse_nl_config(info, &cfg); 769 if (err) 770 return err; 771 772 return fou_create(net, &cfg, NULL); 773} 774 775static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) 776{ 777 struct net *net = genl_info_net(info); 778 struct fou_cfg cfg; 779 int err; 780 781 err = parse_nl_config(info, &cfg); 782 if (err) 783 return err; 784 785 return fou_destroy(net, &cfg); 786} 787 788static int fou_fill_info(struct fou *fou, struct sk_buff *msg) 789{ 790 struct sock *sk = fou->sock->sk; 791 792 if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || 793 nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || 794 nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) || 795 nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || 796 nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) || 797 nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if)) 798 return -1; 799 800 if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) 801 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) 802 return -1; 803 804 if (fou->sock->sk->sk_family == AF_INET) { 805 if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr)) 806 return -1; 807 808 if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr)) 809 return -1; 810#if IS_ENABLED(CONFIG_IPV6) 811 } else { 812 if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6, 813 &sk->sk_v6_rcv_saddr)) 814 return -1; 815 816 if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr)) 817 return -1; 818#endif 819 } 820 821 return 0; 822} 823 824static int fou_dump_info(struct fou *fou, u32 portid, u32 seq, 825 u32 flags, struct sk_buff *skb, u8 cmd) 826{ 827 void *hdr; 828 829 hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd); 830 if (!hdr) 831 return -ENOMEM; 832 833 if (fou_fill_info(fou, skb) < 0) 834 goto nla_put_failure; 835 836 genlmsg_end(skb, hdr); 837 return 0; 838 839nla_put_failure: 840 genlmsg_cancel(skb, hdr); 841 return -EMSGSIZE; 842} 843 844static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) 845{ 846 struct net *net = genl_info_net(info); 847 struct fou_net *fn = net_generic(net, fou_net_id); 848 struct sk_buff *msg; 849 struct fou_cfg cfg; 850 struct fou *fout; 851 __be16 port; 852 u8 family; 853 int ret; 854 855 ret = parse_nl_config(info, &cfg); 856 if (ret) 857 return ret; 858 port = cfg.udp_config.local_udp_port; 859 if (port == 0) 860 return -EINVAL; 861 862 family = cfg.udp_config.family; 863 if (family != AF_INET && family != AF_INET6) 864 return -EINVAL; 865 866 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 867 if (!msg) 868 return -ENOMEM; 869 870 ret = -ESRCH; 871 mutex_lock(&fn->fou_lock); 872 list_for_each_entry(fout, &fn->fou_list, list) { 873 if (fou_cfg_cmp(fout, &cfg)) { 874 ret = fou_dump_info(fout, info->snd_portid, 875 info->snd_seq, 0, msg, 876 info->genlhdr->cmd); 877 break; 878 } 879 } 880 mutex_unlock(&fn->fou_lock); 881 if (ret < 0) 882 goto out_free; 883 884 return genlmsg_reply(msg, info); 885 886out_free: 887 nlmsg_free(msg); 888 return ret; 889} 890 891static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) 892{ 893 struct net *net = sock_net(skb->sk); 894 struct fou_net *fn = net_generic(net, fou_net_id); 895 struct fou *fout; 896 int idx = 0, ret; 897 898 mutex_lock(&fn->fou_lock); 899 list_for_each_entry(fout, &fn->fou_list, list) { 900 if (idx++ < cb->args[0]) 901 continue; 902 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid, 903 cb->nlh->nlmsg_seq, NLM_F_MULTI, 904 skb, FOU_CMD_GET); 905 if (ret) 906 break; 907 } 908 mutex_unlock(&fn->fou_lock); 909 910 cb->args[0] = idx; 911 return skb->len; 912} 913 914static const struct genl_small_ops fou_nl_ops[] = { 915 { 916 .cmd = FOU_CMD_ADD, 917 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 918 .doit = fou_nl_cmd_add_port, 919 .flags = GENL_ADMIN_PERM, 920 }, 921 { 922 .cmd = FOU_CMD_DEL, 923 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 924 .doit = fou_nl_cmd_rm_port, 925 .flags = GENL_ADMIN_PERM, 926 }, 927 { 928 .cmd = FOU_CMD_GET, 929 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 930 .doit = fou_nl_cmd_get_port, 931 .dumpit = fou_nl_dump, 932 }, 933}; 934 935static struct genl_family fou_nl_family __ro_after_init = { 936 .hdrsize = 0, 937 .name = FOU_GENL_NAME, 938 .version = FOU_GENL_VERSION, 939 .maxattr = FOU_ATTR_MAX, 940 .policy = fou_nl_policy, 941 .netnsok = true, 942 .module = THIS_MODULE, 943 .small_ops = fou_nl_ops, 944 .n_small_ops = ARRAY_SIZE(fou_nl_ops), 945}; 946 947size_t fou_encap_hlen(struct ip_tunnel_encap *e) 948{ 949 return sizeof(struct udphdr); 950} 951EXPORT_SYMBOL(fou_encap_hlen); 952 953size_t gue_encap_hlen(struct ip_tunnel_encap *e) 954{ 955 size_t len; 956 bool need_priv = false; 957 958 len = sizeof(struct udphdr) + sizeof(struct guehdr); 959 960 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) { 961 len += GUE_PLEN_REMCSUM; 962 need_priv = true; 963 } 964 965 len += need_priv ? GUE_LEN_PRIV : 0; 966 967 return len; 968} 969EXPORT_SYMBOL(gue_encap_hlen); 970 971int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 972 u8 *protocol, __be16 *sport, int type) 973{ 974 int err; 975 976 err = iptunnel_handle_offloads(skb, type); 977 if (err) 978 return err; 979 980 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 981 skb, 0, 0, false); 982 983 return 0; 984} 985EXPORT_SYMBOL(__fou_build_header); 986 987int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 988 u8 *protocol, __be16 *sport, int type) 989{ 990 struct guehdr *guehdr; 991 size_t hdrlen, optlen = 0; 992 void *data; 993 bool need_priv = false; 994 int err; 995 996 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && 997 skb->ip_summed == CHECKSUM_PARTIAL) { 998 optlen += GUE_PLEN_REMCSUM; 999 type |= SKB_GSO_TUNNEL_REMCSUM; 1000 need_priv = true; 1001 } 1002 1003 optlen += need_priv ? GUE_LEN_PRIV : 0; 1004 1005 err = iptunnel_handle_offloads(skb, type); 1006 if (err) 1007 return err; 1008 1009 /* Get source port (based on flow hash) before skb_push */ 1010 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 1011 skb, 0, 0, false); 1012 1013 hdrlen = sizeof(struct guehdr) + optlen; 1014 1015 skb_push(skb, hdrlen); 1016 1017 guehdr = (struct guehdr *)skb->data; 1018 1019 guehdr->control = 0; 1020 guehdr->version = 0; 1021 guehdr->hlen = optlen >> 2; 1022 guehdr->flags = 0; 1023 guehdr->proto_ctype = *protocol; 1024 1025 data = &guehdr[1]; 1026 1027 if (need_priv) { 1028 __be32 *flags = data; 1029 1030 guehdr->flags |= GUE_FLAG_PRIV; 1031 *flags = 0; 1032 data += GUE_LEN_PRIV; 1033 1034 if (type & SKB_GSO_TUNNEL_REMCSUM) { 1035 u16 csum_start = skb_checksum_start_offset(skb); 1036 __be16 *pd = data; 1037 1038 if (csum_start < hdrlen) 1039 return -EINVAL; 1040 1041 csum_start -= hdrlen; 1042 pd[0] = htons(csum_start); 1043 pd[1] = htons(csum_start + skb->csum_offset); 1044 1045 if (!skb_is_gso(skb)) { 1046 skb->ip_summed = CHECKSUM_NONE; 1047 skb->encapsulation = 0; 1048 } 1049 1050 *flags |= GUE_PFLAG_REMCSUM; 1051 data += GUE_PLEN_REMCSUM; 1052 } 1053 1054 } 1055 1056 return 0; 1057} 1058EXPORT_SYMBOL(__gue_build_header); 1059 1060#ifdef CONFIG_NET_FOU_IP_TUNNELS 1061 1062static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, 1063 struct flowi4 *fl4, u8 *protocol, __be16 sport) 1064{ 1065 struct udphdr *uh; 1066 1067 skb_push(skb, sizeof(struct udphdr)); 1068 skb_reset_transport_header(skb); 1069 1070 uh = udp_hdr(skb); 1071 1072 uh->dest = e->dport; 1073 uh->source = sport; 1074 uh->len = htons(skb->len); 1075 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, 1076 fl4->saddr, fl4->daddr, skb->len); 1077 1078 *protocol = IPPROTO_UDP; 1079} 1080 1081static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 1082 u8 *protocol, struct flowi4 *fl4) 1083{ 1084 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : 1085 SKB_GSO_UDP_TUNNEL; 1086 __be16 sport; 1087 int err; 1088 1089 err = __fou_build_header(skb, e, protocol, &sport, type); 1090 if (err) 1091 return err; 1092 1093 fou_build_udp(skb, e, fl4, protocol, sport); 1094 1095 return 0; 1096} 1097 1098static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 1099 u8 *protocol, struct flowi4 *fl4) 1100{ 1101 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : 1102 SKB_GSO_UDP_TUNNEL; 1103 __be16 sport; 1104 int err; 1105 1106 err = __gue_build_header(skb, e, protocol, &sport, type); 1107 if (err) 1108 return err; 1109 1110 fou_build_udp(skb, e, fl4, protocol, sport); 1111 1112 return 0; 1113} 1114 1115static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info) 1116{ 1117 const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]); 1118 1119 if (ipprot && ipprot->err_handler) { 1120 if (!ipprot->err_handler(skb, info)) 1121 return 0; 1122 } 1123 1124 return -ENOENT; 1125} 1126 1127static int gue_err(struct sk_buff *skb, u32 info) 1128{ 1129 int transport_offset = skb_transport_offset(skb); 1130 struct guehdr *guehdr; 1131 size_t len, optlen; 1132 int ret; 1133 1134 len = sizeof(struct udphdr) + sizeof(struct guehdr); 1135 if (!pskb_may_pull(skb, transport_offset + len)) 1136 return -EINVAL; 1137 1138 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 1139 1140 switch (guehdr->version) { 1141 case 0: /* Full GUE header present */ 1142 break; 1143 case 1: { 1144 /* Direct encapsulation of IPv4 or IPv6 */ 1145 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); 1146 1147 switch (((struct iphdr *)guehdr)->version) { 1148 case 4: 1149 ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info); 1150 goto out; 1151#if IS_ENABLED(CONFIG_IPV6) 1152 case 6: 1153 ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info); 1154 goto out; 1155#endif 1156 default: 1157 ret = -EOPNOTSUPP; 1158 goto out; 1159 } 1160 } 1161 default: /* Undefined version */ 1162 return -EOPNOTSUPP; 1163 } 1164 1165 if (guehdr->control) 1166 return -ENOENT; 1167 1168 optlen = guehdr->hlen << 2; 1169 1170 if (!pskb_may_pull(skb, transport_offset + len + optlen)) 1171 return -EINVAL; 1172 1173 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 1174 if (validate_gue_flags(guehdr, optlen)) 1175 return -EINVAL; 1176 1177 /* Handling exceptions for direct UDP encapsulation in GUE would lead to 1178 * recursion. Besides, this kind of encapsulation can't even be 1179 * configured currently. Discard this. 1180 */ 1181 if (guehdr->proto_ctype == IPPROTO_UDP || 1182 guehdr->proto_ctype == IPPROTO_UDPLITE) 1183 return -EOPNOTSUPP; 1184 1185 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); 1186 ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info); 1187 1188out: 1189 skb_set_transport_header(skb, transport_offset); 1190 return ret; 1191} 1192 1193 1194static const struct ip_tunnel_encap_ops fou_iptun_ops = { 1195 .encap_hlen = fou_encap_hlen, 1196 .build_header = fou_build_header, 1197 .err_handler = gue_err, 1198}; 1199 1200static const struct ip_tunnel_encap_ops gue_iptun_ops = { 1201 .encap_hlen = gue_encap_hlen, 1202 .build_header = gue_build_header, 1203 .err_handler = gue_err, 1204}; 1205 1206static int ip_tunnel_encap_add_fou_ops(void) 1207{ 1208 int ret; 1209 1210 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 1211 if (ret < 0) { 1212 pr_err("can't add fou ops\n"); 1213 return ret; 1214 } 1215 1216 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE); 1217 if (ret < 0) { 1218 pr_err("can't add gue ops\n"); 1219 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 1220 return ret; 1221 } 1222 1223 return 0; 1224} 1225 1226static void ip_tunnel_encap_del_fou_ops(void) 1227{ 1228 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 1229 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE); 1230} 1231 1232#else 1233 1234static int ip_tunnel_encap_add_fou_ops(void) 1235{ 1236 return 0; 1237} 1238 1239static void ip_tunnel_encap_del_fou_ops(void) 1240{ 1241} 1242 1243#endif 1244 1245static __net_init int fou_init_net(struct net *net) 1246{ 1247 struct fou_net *fn = net_generic(net, fou_net_id); 1248 1249 INIT_LIST_HEAD(&fn->fou_list); 1250 mutex_init(&fn->fou_lock); 1251 return 0; 1252} 1253 1254static __net_exit void fou_exit_net(struct net *net) 1255{ 1256 struct fou_net *fn = net_generic(net, fou_net_id); 1257 struct fou *fou, *next; 1258 1259 /* Close all the FOU sockets */ 1260 mutex_lock(&fn->fou_lock); 1261 list_for_each_entry_safe(fou, next, &fn->fou_list, list) 1262 fou_release(fou); 1263 mutex_unlock(&fn->fou_lock); 1264} 1265 1266static struct pernet_operations fou_net_ops = { 1267 .init = fou_init_net, 1268 .exit = fou_exit_net, 1269 .id = &fou_net_id, 1270 .size = sizeof(struct fou_net), 1271}; 1272 1273static int __init fou_init(void) 1274{ 1275 int ret; 1276 1277 ret = register_pernet_device(&fou_net_ops); 1278 if (ret) 1279 goto exit; 1280 1281 ret = genl_register_family(&fou_nl_family); 1282 if (ret < 0) 1283 goto unregister; 1284 1285 ret = ip_tunnel_encap_add_fou_ops(); 1286 if (ret == 0) 1287 return 0; 1288 1289 genl_unregister_family(&fou_nl_family); 1290unregister: 1291 unregister_pernet_device(&fou_net_ops); 1292exit: 1293 return ret; 1294} 1295 1296static void __exit fou_fini(void) 1297{ 1298 ip_tunnel_encap_del_fou_ops(); 1299 genl_unregister_family(&fou_nl_family); 1300 unregister_pernet_device(&fou_net_ops); 1301} 1302 1303module_init(fou_init); 1304module_exit(fou_fini); 1305MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); 1306MODULE_LICENSE("GPL"); 1307MODULE_DESCRIPTION("Foo over UDP"); 1308