1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * IPv6 tunneling device 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Ville Nuorvala <vnuorval@tcs.hut.fi> 8 * Yasuyuki Kozakai <kozakai@linux-ipv6.org> 9 * 10 * Based on: 11 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c 12 * 13 * RFC 2473 14 */ 15 16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 18#include <linux/module.h> 19#include <linux/capability.h> 20#include <linux/errno.h> 21#include <linux/types.h> 22#include <linux/sockios.h> 23#include <linux/icmp.h> 24#include <linux/if.h> 25#include <linux/in.h> 26#include <linux/ip.h> 27#include <linux/net.h> 28#include <linux/in6.h> 29#include <linux/netdevice.h> 30#include <linux/if_arp.h> 31#include <linux/icmpv6.h> 32#include <linux/init.h> 33#include <linux/route.h> 34#include <linux/rtnetlink.h> 35#include <linux/netfilter_ipv6.h> 36#include <linux/slab.h> 37#include <linux/hash.h> 38#include <linux/etherdevice.h> 39 40#include <linux/uaccess.h> 41#include <linux/atomic.h> 42 43#include <net/icmp.h> 44#include <net/ip.h> 45#include <net/ip_tunnels.h> 46#include <net/ipv6.h> 47#include <net/ip6_route.h> 48#include <net/addrconf.h> 49#include <net/ip6_tunnel.h> 50#include <net/xfrm.h> 51#include <net/dsfield.h> 52#include <net/inet_ecn.h> 53#include <net/net_namespace.h> 54#include <net/netns/generic.h> 55#include <net/dst_metadata.h> 56 57MODULE_AUTHOR("Ville Nuorvala"); 58MODULE_DESCRIPTION("IPv6 tunneling device"); 59MODULE_LICENSE("GPL"); 60MODULE_ALIAS_RTNL_LINK("ip6tnl"); 61MODULE_ALIAS_NETDEV("ip6tnl0"); 62 63#define IP6_TUNNEL_HASH_SIZE_SHIFT 5 64#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) 65 66static bool log_ecn_error = true; 67module_param(log_ecn_error, bool, 0644); 68MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 69 70static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) 71{ 72 u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); 73 74 return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); 75} 76 77static int ip6_tnl_dev_init(struct net_device *dev); 78static void ip6_tnl_dev_setup(struct net_device *dev); 79static struct rtnl_link_ops ip6_link_ops __read_mostly; 80 81static unsigned int ip6_tnl_net_id __read_mostly; 82struct ip6_tnl_net { 83 /* the IPv6 tunnel fallback device */ 84 struct net_device *fb_tnl_dev; 85 /* lists for storing tunnels in use */ 86 struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; 87 struct ip6_tnl __rcu *tnls_wc[1]; 88 struct ip6_tnl __rcu **tnls[2]; 89 struct ip6_tnl __rcu *collect_md_tun; 90}; 91 92static inline int ip6_tnl_mpls_supported(void) 93{ 94 return IS_ENABLED(CONFIG_MPLS); 95} 96 97static struct net_device_stats *ip6_get_stats(struct net_device *dev) 98{ 99 struct pcpu_sw_netstats tmp, sum = { 0 }; 100 int i; 101 102 for_each_possible_cpu(i) { 103 unsigned int start; 104 const struct pcpu_sw_netstats *tstats = 105 per_cpu_ptr(dev->tstats, i); 106 107 do { 108 start = u64_stats_fetch_begin_irq(&tstats->syncp); 109 tmp.rx_packets = tstats->rx_packets; 110 tmp.rx_bytes = tstats->rx_bytes; 111 tmp.tx_packets = tstats->tx_packets; 112 tmp.tx_bytes = tstats->tx_bytes; 113 } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); 114 115 sum.rx_packets += tmp.rx_packets; 116 sum.rx_bytes += tmp.rx_bytes; 117 sum.tx_packets += tmp.tx_packets; 118 sum.tx_bytes += tmp.tx_bytes; 119 } 120 dev->stats.rx_packets = sum.rx_packets; 121 dev->stats.rx_bytes = sum.rx_bytes; 122 dev->stats.tx_packets = sum.tx_packets; 123 dev->stats.tx_bytes = sum.tx_bytes; 124 return &dev->stats; 125} 126 127#define for_each_ip6_tunnel_rcu(start) \ 128 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 129 130/** 131 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 132 * @net: network namespace 133 * @link: ifindex of underlying interface 134 * @remote: the address of the tunnel exit-point 135 * @local: the address of the tunnel entry-point 136 * 137 * Return: 138 * tunnel matching given end-points if found, 139 * else fallback tunnel if its device is up, 140 * else %NULL 141 **/ 142 143static struct ip6_tnl * 144ip6_tnl_lookup(struct net *net, int link, 145 const struct in6_addr *remote, const struct in6_addr *local) 146{ 147 unsigned int hash = HASH(remote, local); 148 struct ip6_tnl *t, *cand = NULL; 149 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 150 struct in6_addr any; 151 152 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 153 if (!ipv6_addr_equal(local, &t->parms.laddr) || 154 !ipv6_addr_equal(remote, &t->parms.raddr) || 155 !(t->dev->flags & IFF_UP)) 156 continue; 157 158 if (link == t->parms.link) 159 return t; 160 else 161 cand = t; 162 } 163 164 memset(&any, 0, sizeof(any)); 165 hash = HASH(&any, local); 166 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 167 if (!ipv6_addr_equal(local, &t->parms.laddr) || 168 !ipv6_addr_any(&t->parms.raddr) || 169 !(t->dev->flags & IFF_UP)) 170 continue; 171 172 if (link == t->parms.link) 173 return t; 174 else if (!cand) 175 cand = t; 176 } 177 178 hash = HASH(remote, &any); 179 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 180 if (!ipv6_addr_equal(remote, &t->parms.raddr) || 181 !ipv6_addr_any(&t->parms.laddr) || 182 !(t->dev->flags & IFF_UP)) 183 continue; 184 185 if (link == t->parms.link) 186 return t; 187 else if (!cand) 188 cand = t; 189 } 190 191 if (cand) 192 return cand; 193 194 t = rcu_dereference(ip6n->collect_md_tun); 195 if (t && t->dev->flags & IFF_UP) 196 return t; 197 198 t = rcu_dereference(ip6n->tnls_wc[0]); 199 if (t && (t->dev->flags & IFF_UP)) 200 return t; 201 202 return NULL; 203} 204 205/** 206 * ip6_tnl_bucket - get head of list matching given tunnel parameters 207 * @p: parameters containing tunnel end-points 208 * 209 * Description: 210 * ip6_tnl_bucket() returns the head of the list matching the 211 * &struct in6_addr entries laddr and raddr in @p. 212 * 213 * Return: head of IPv6 tunnel list 214 **/ 215 216static struct ip6_tnl __rcu ** 217ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) 218{ 219 const struct in6_addr *remote = &p->raddr; 220 const struct in6_addr *local = &p->laddr; 221 unsigned int h = 0; 222 int prio = 0; 223 224 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { 225 prio = 1; 226 h = HASH(remote, local); 227 } 228 return &ip6n->tnls[prio][h]; 229} 230 231/** 232 * ip6_tnl_link - add tunnel to hash table 233 * @t: tunnel to be added 234 **/ 235 236static void 237ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 238{ 239 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); 240 241 if (t->parms.collect_md) 242 rcu_assign_pointer(ip6n->collect_md_tun, t); 243 rcu_assign_pointer(t->next , rtnl_dereference(*tp)); 244 rcu_assign_pointer(*tp, t); 245} 246 247/** 248 * ip6_tnl_unlink - remove tunnel from hash table 249 * @t: tunnel to be removed 250 **/ 251 252static void 253ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 254{ 255 struct ip6_tnl __rcu **tp; 256 struct ip6_tnl *iter; 257 258 if (t->parms.collect_md) 259 rcu_assign_pointer(ip6n->collect_md_tun, NULL); 260 261 for (tp = ip6_tnl_bucket(ip6n, &t->parms); 262 (iter = rtnl_dereference(*tp)) != NULL; 263 tp = &iter->next) { 264 if (t == iter) { 265 rcu_assign_pointer(*tp, t->next); 266 break; 267 } 268 } 269} 270 271static void ip6_dev_free(struct net_device *dev) 272{ 273 struct ip6_tnl *t = netdev_priv(dev); 274 275 gro_cells_destroy(&t->gro_cells); 276 dst_cache_destroy(&t->dst_cache); 277 free_percpu(dev->tstats); 278} 279 280static int ip6_tnl_create2(struct net_device *dev) 281{ 282 struct ip6_tnl *t = netdev_priv(dev); 283 struct net *net = dev_net(dev); 284 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 285 int err; 286 287 t = netdev_priv(dev); 288 289 dev->rtnl_link_ops = &ip6_link_ops; 290 err = register_netdevice(dev); 291 if (err < 0) 292 goto out; 293 294 strcpy(t->parms.name, dev->name); 295 296 ip6_tnl_link(ip6n, t); 297 return 0; 298 299out: 300 return err; 301} 302 303/** 304 * ip6_tnl_create - create a new tunnel 305 * @net: network namespace 306 * @p: tunnel parameters 307 * 308 * Description: 309 * Create tunnel matching given parameters. 310 * 311 * Return: 312 * created tunnel or error pointer 313 **/ 314 315static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) 316{ 317 struct net_device *dev; 318 struct ip6_tnl *t; 319 char name[IFNAMSIZ]; 320 int err = -E2BIG; 321 322 if (p->name[0]) { 323 if (!dev_valid_name(p->name)) 324 goto failed; 325 strlcpy(name, p->name, IFNAMSIZ); 326 } else { 327 sprintf(name, "ip6tnl%%d"); 328 } 329 err = -ENOMEM; 330 dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, 331 ip6_tnl_dev_setup); 332 if (!dev) 333 goto failed; 334 335 dev_net_set(dev, net); 336 337 t = netdev_priv(dev); 338 t->parms = *p; 339 t->net = dev_net(dev); 340 err = ip6_tnl_create2(dev); 341 if (err < 0) 342 goto failed_free; 343 344 return t; 345 346failed_free: 347 free_netdev(dev); 348failed: 349 return ERR_PTR(err); 350} 351 352/** 353 * ip6_tnl_locate - find or create tunnel matching given parameters 354 * @net: network namespace 355 * @p: tunnel parameters 356 * @create: != 0 if allowed to create new tunnel if no match found 357 * 358 * Description: 359 * ip6_tnl_locate() first tries to locate an existing tunnel 360 * based on @parms. If this is unsuccessful, but @create is set a new 361 * tunnel device is created and registered for use. 362 * 363 * Return: 364 * matching tunnel or error pointer 365 **/ 366 367static struct ip6_tnl *ip6_tnl_locate(struct net *net, 368 struct __ip6_tnl_parm *p, int create) 369{ 370 const struct in6_addr *remote = &p->raddr; 371 const struct in6_addr *local = &p->laddr; 372 struct ip6_tnl __rcu **tp; 373 struct ip6_tnl *t; 374 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 375 376 for (tp = ip6_tnl_bucket(ip6n, p); 377 (t = rtnl_dereference(*tp)) != NULL; 378 tp = &t->next) { 379 if (ipv6_addr_equal(local, &t->parms.laddr) && 380 ipv6_addr_equal(remote, &t->parms.raddr) && 381 p->link == t->parms.link) { 382 if (create) 383 return ERR_PTR(-EEXIST); 384 385 return t; 386 } 387 } 388 if (!create) 389 return ERR_PTR(-ENODEV); 390 return ip6_tnl_create(net, p); 391} 392 393/** 394 * ip6_tnl_dev_uninit - tunnel device uninitializer 395 * @dev: the device to be destroyed 396 * 397 * Description: 398 * ip6_tnl_dev_uninit() removes tunnel from its list 399 **/ 400 401static void 402ip6_tnl_dev_uninit(struct net_device *dev) 403{ 404 struct ip6_tnl *t = netdev_priv(dev); 405 struct net *net = t->net; 406 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 407 408 if (dev == ip6n->fb_tnl_dev) 409 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); 410 else 411 ip6_tnl_unlink(ip6n, t); 412 dst_cache_reset(&t->dst_cache); 413 dev_put(dev); 414} 415 416/** 417 * parse_tvl_tnl_enc_lim - handle encapsulation limit option 418 * @skb: received socket buffer 419 * 420 * Return: 421 * 0 if none was found, 422 * else index to encapsulation limit 423 **/ 424 425__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) 426{ 427 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; 428 unsigned int nhoff = raw - skb->data; 429 unsigned int off = nhoff + sizeof(*ipv6h); 430 u8 nexthdr = ipv6h->nexthdr; 431 432 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { 433 struct ipv6_opt_hdr *hdr; 434 u16 optlen; 435 436 if (!pskb_may_pull(skb, off + sizeof(*hdr))) 437 break; 438 439 hdr = (struct ipv6_opt_hdr *)(skb->data + off); 440 if (nexthdr == NEXTHDR_FRAGMENT) { 441 optlen = 8; 442 } else if (nexthdr == NEXTHDR_AUTH) { 443 optlen = ipv6_authlen(hdr); 444 } else { 445 optlen = ipv6_optlen(hdr); 446 } 447 448 if (!pskb_may_pull(skb, off + optlen)) 449 break; 450 451 hdr = (struct ipv6_opt_hdr *)(skb->data + off); 452 if (nexthdr == NEXTHDR_FRAGMENT) { 453 struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr; 454 455 if (frag_hdr->frag_off) 456 break; 457 } 458 if (nexthdr == NEXTHDR_DEST) { 459 u16 i = 2; 460 461 while (1) { 462 struct ipv6_tlv_tnl_enc_lim *tel; 463 464 /* No more room for encapsulation limit */ 465 if (i + sizeof(*tel) > optlen) 466 break; 467 468 tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); 469 /* return index of option if found and valid */ 470 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && 471 tel->length == 1) 472 return i + off - nhoff; 473 /* else jump to next option */ 474 if (tel->type) 475 i += tel->length + 2; 476 else 477 i++; 478 } 479 } 480 nexthdr = hdr->nexthdr; 481 off += optlen; 482 } 483 return 0; 484} 485EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); 486 487/** 488 * ip6_tnl_err - tunnel error handler 489 * 490 * Description: 491 * ip6_tnl_err() should handle errors in the tunnel according 492 * to the specifications in RFC 2473. 493 **/ 494 495static int 496ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 497 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 498{ 499 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; 500 struct net *net = dev_net(skb->dev); 501 u8 rel_type = ICMPV6_DEST_UNREACH; 502 u8 rel_code = ICMPV6_ADDR_UNREACH; 503 __u32 rel_info = 0; 504 struct ip6_tnl *t; 505 int err = -ENOENT; 506 int rel_msg = 0; 507 u8 tproto; 508 __u16 len; 509 510 /* If the packet doesn't contain the original IPv6 header we are 511 in trouble since we might need the source address for further 512 processing of the error. */ 513 514 rcu_read_lock(); 515 t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr); 516 if (!t) 517 goto out; 518 519 tproto = READ_ONCE(t->parms.proto); 520 if (tproto != ipproto && tproto != 0) 521 goto out; 522 523 err = 0; 524 525 switch (*type) { 526 case ICMPV6_DEST_UNREACH: 527 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 528 t->parms.name); 529 rel_msg = 1; 530 break; 531 case ICMPV6_TIME_EXCEED: 532 if ((*code) == ICMPV6_EXC_HOPLIMIT) { 533 net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 534 t->parms.name); 535 rel_msg = 1; 536 } 537 break; 538 case ICMPV6_PARAMPROB: { 539 struct ipv6_tlv_tnl_enc_lim *tel; 540 __u32 teli; 541 542 teli = 0; 543 if ((*code) == ICMPV6_HDR_FIELD) 544 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); 545 546 if (teli && teli == *info - 2) { 547 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 548 if (tel->encap_limit == 0) { 549 net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", 550 t->parms.name); 551 rel_msg = 1; 552 } 553 } else { 554 net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 555 t->parms.name); 556 } 557 break; 558 } 559 case ICMPV6_PKT_TOOBIG: { 560 __u32 mtu; 561 562 ip6_update_pmtu(skb, net, htonl(*info), 0, 0, 563 sock_net_uid(net, NULL)); 564 mtu = *info - offset; 565 if (mtu < IPV6_MIN_MTU) 566 mtu = IPV6_MIN_MTU; 567 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); 568 if (len > mtu) { 569 rel_type = ICMPV6_PKT_TOOBIG; 570 rel_code = 0; 571 rel_info = mtu; 572 rel_msg = 1; 573 } 574 break; 575 } 576 case NDISC_REDIRECT: 577 ip6_redirect(skb, net, skb->dev->ifindex, 0, 578 sock_net_uid(net, NULL)); 579 break; 580 } 581 582 *type = rel_type; 583 *code = rel_code; 584 *info = rel_info; 585 *msg = rel_msg; 586 587out: 588 rcu_read_unlock(); 589 return err; 590} 591 592static int 593ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 594 u8 type, u8 code, int offset, __be32 info) 595{ 596 __u32 rel_info = ntohl(info); 597 const struct iphdr *eiph; 598 struct sk_buff *skb2; 599 int err, rel_msg = 0; 600 u8 rel_type = type; 601 u8 rel_code = code; 602 struct rtable *rt; 603 struct flowi4 fl4; 604 605 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, 606 &rel_msg, &rel_info, offset); 607 if (err < 0) 608 return err; 609 610 if (rel_msg == 0) 611 return 0; 612 613 switch (rel_type) { 614 case ICMPV6_DEST_UNREACH: 615 if (rel_code != ICMPV6_ADDR_UNREACH) 616 return 0; 617 rel_type = ICMP_DEST_UNREACH; 618 rel_code = ICMP_HOST_UNREACH; 619 break; 620 case ICMPV6_PKT_TOOBIG: 621 if (rel_code != 0) 622 return 0; 623 rel_type = ICMP_DEST_UNREACH; 624 rel_code = ICMP_FRAG_NEEDED; 625 break; 626 default: 627 return 0; 628 } 629 630 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) 631 return 0; 632 633 skb2 = skb_clone(skb, GFP_ATOMIC); 634 if (!skb2) 635 return 0; 636 637 skb_dst_drop(skb2); 638 639 skb_pull(skb2, offset); 640 skb_reset_network_header(skb2); 641 eiph = ip_hdr(skb2); 642 643 /* Try to guess incoming interface */ 644 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, 645 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); 646 if (IS_ERR(rt)) 647 goto out; 648 649 skb2->dev = rt->dst.dev; 650 ip_rt_put(rt); 651 652 /* route "incoming" packet */ 653 if (rt->rt_flags & RTCF_LOCAL) { 654 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 655 eiph->daddr, eiph->saddr, 0, 0, 656 IPPROTO_IPIP, RT_TOS(eiph->tos), 0); 657 if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { 658 if (!IS_ERR(rt)) 659 ip_rt_put(rt); 660 goto out; 661 } 662 skb_dst_set(skb2, &rt->dst); 663 } else { 664 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 665 skb2->dev) || 666 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) 667 goto out; 668 } 669 670 /* change mtu on this route */ 671 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { 672 if (rel_info > dst_mtu(skb_dst(skb2))) 673 goto out; 674 675 skb_dst_update_pmtu_no_confirm(skb2, rel_info); 676 } 677 678 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 679 680out: 681 kfree_skb(skb2); 682 return 0; 683} 684 685static int 686ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 687 u8 type, u8 code, int offset, __be32 info) 688{ 689 __u32 rel_info = ntohl(info); 690 int err, rel_msg = 0; 691 u8 rel_type = type; 692 u8 rel_code = code; 693 694 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 695 &rel_msg, &rel_info, offset); 696 if (err < 0) 697 return err; 698 699 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { 700 struct rt6_info *rt; 701 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 702 703 if (!skb2) 704 return 0; 705 706 skb_dst_drop(skb2); 707 skb_pull(skb2, offset); 708 skb_reset_network_header(skb2); 709 710 /* Try to guess incoming interface */ 711 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, 712 NULL, 0, skb2, 0); 713 714 if (rt && rt->dst.dev) 715 skb2->dev = rt->dst.dev; 716 717 icmpv6_send(skb2, rel_type, rel_code, rel_info); 718 719 ip6_rt_put(rt); 720 721 kfree_skb(skb2); 722 } 723 724 return 0; 725} 726 727static int 728mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 729 u8 type, u8 code, int offset, __be32 info) 730{ 731 __u32 rel_info = ntohl(info); 732 int err, rel_msg = 0; 733 u8 rel_type = type; 734 u8 rel_code = code; 735 736 err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, 737 &rel_msg, &rel_info, offset); 738 return err; 739} 740 741static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 742 const struct ipv6hdr *ipv6h, 743 struct sk_buff *skb) 744{ 745 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; 746 747 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 748 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); 749 750 return IP6_ECN_decapsulate(ipv6h, skb); 751} 752 753static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 754 const struct ipv6hdr *ipv6h, 755 struct sk_buff *skb) 756{ 757 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 758 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); 759 760 return IP6_ECN_decapsulate(ipv6h, skb); 761} 762 763static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 764 const struct ipv6hdr *ipv6h, 765 struct sk_buff *skb) 766{ 767 /* ECN is not supported in AF_MPLS */ 768 return 0; 769} 770 771__u32 ip6_tnl_get_cap(struct ip6_tnl *t, 772 const struct in6_addr *laddr, 773 const struct in6_addr *raddr) 774{ 775 struct __ip6_tnl_parm *p = &t->parms; 776 int ltype = ipv6_addr_type(laddr); 777 int rtype = ipv6_addr_type(raddr); 778 __u32 flags = 0; 779 780 if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { 781 flags = IP6_TNL_F_CAP_PER_PACKET; 782 } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 783 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 784 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && 785 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { 786 if (ltype&IPV6_ADDR_UNICAST) 787 flags |= IP6_TNL_F_CAP_XMIT; 788 if (rtype&IPV6_ADDR_UNICAST) 789 flags |= IP6_TNL_F_CAP_RCV; 790 } 791 return flags; 792} 793EXPORT_SYMBOL(ip6_tnl_get_cap); 794 795/* called with rcu_read_lock() */ 796int ip6_tnl_rcv_ctl(struct ip6_tnl *t, 797 const struct in6_addr *laddr, 798 const struct in6_addr *raddr) 799{ 800 struct __ip6_tnl_parm *p = &t->parms; 801 int ret = 0; 802 struct net *net = t->net; 803 804 if ((p->flags & IP6_TNL_F_CAP_RCV) || 805 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 806 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { 807 struct net_device *ldev = NULL; 808 809 if (p->link) 810 ldev = dev_get_by_index_rcu(net, p->link); 811 812 if ((ipv6_addr_is_multicast(laddr) || 813 likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, 814 0, IFA_F_TENTATIVE))) && 815 ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || 816 likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, 817 0, IFA_F_TENTATIVE)))) 818 ret = 1; 819 } 820 return ret; 821} 822EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); 823 824static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, 825 const struct tnl_ptk_info *tpi, 826 struct metadata_dst *tun_dst, 827 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, 828 const struct ipv6hdr *ipv6h, 829 struct sk_buff *skb), 830 bool log_ecn_err) 831{ 832 const struct ipv6hdr *ipv6h; 833 int nh, err; 834 835 if ((!(tpi->flags & TUNNEL_CSUM) && 836 (tunnel->parms.i_flags & TUNNEL_CSUM)) || 837 ((tpi->flags & TUNNEL_CSUM) && 838 !(tunnel->parms.i_flags & TUNNEL_CSUM))) { 839 tunnel->dev->stats.rx_crc_errors++; 840 tunnel->dev->stats.rx_errors++; 841 goto drop; 842 } 843 844 if (tunnel->parms.i_flags & TUNNEL_SEQ) { 845 if (!(tpi->flags & TUNNEL_SEQ) || 846 (tunnel->i_seqno && 847 (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 848 tunnel->dev->stats.rx_fifo_errors++; 849 tunnel->dev->stats.rx_errors++; 850 goto drop; 851 } 852 tunnel->i_seqno = ntohl(tpi->seq) + 1; 853 } 854 855 skb->protocol = tpi->proto; 856 857 /* Warning: All skb pointers will be invalidated! */ 858 if (tunnel->dev->type == ARPHRD_ETHER) { 859 if (!pskb_may_pull(skb, ETH_HLEN)) { 860 tunnel->dev->stats.rx_length_errors++; 861 tunnel->dev->stats.rx_errors++; 862 goto drop; 863 } 864 865 skb->protocol = eth_type_trans(skb, tunnel->dev); 866 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 867 } else { 868 skb->dev = tunnel->dev; 869 } 870 871 /* Save offset of outer header relative to skb->head, 872 * because we are going to reset the network header to the inner header 873 * and might change skb->head. 874 */ 875 nh = skb_network_header(skb) - skb->head; 876 877 skb_reset_network_header(skb); 878 879 if (!pskb_inet_may_pull(skb)) { 880 DEV_STATS_INC(tunnel->dev, rx_length_errors); 881 DEV_STATS_INC(tunnel->dev, rx_errors); 882 goto drop; 883 } 884 885 /* Get the outer header. */ 886 ipv6h = (struct ipv6hdr *)(skb->head + nh); 887 888 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 889 890 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); 891 892 err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); 893 if (unlikely(err)) { 894 if (log_ecn_err) 895 net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n", 896 &ipv6h->saddr, 897 ipv6_get_dsfield(ipv6h)); 898 if (err > 1) { 899 ++tunnel->dev->stats.rx_frame_errors; 900 ++tunnel->dev->stats.rx_errors; 901 goto drop; 902 } 903 } 904 905 dev_sw_netstats_rx_add(tunnel->dev, skb->len); 906 907 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 908 909 if (tun_dst) 910 skb_dst_set(skb, (struct dst_entry *)tun_dst); 911 912 gro_cells_receive(&tunnel->gro_cells, skb); 913 return 0; 914 915drop: 916 if (tun_dst) 917 dst_release((struct dst_entry *)tun_dst); 918 kfree_skb(skb); 919 return 0; 920} 921 922int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, 923 const struct tnl_ptk_info *tpi, 924 struct metadata_dst *tun_dst, 925 bool log_ecn_err) 926{ 927 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, 928 const struct ipv6hdr *ipv6h, 929 struct sk_buff *skb); 930 931 dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate; 932 if (tpi->proto == htons(ETH_P_IP)) 933 dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate; 934 935 return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, 936 log_ecn_err); 937} 938EXPORT_SYMBOL(ip6_tnl_rcv); 939 940static const struct tnl_ptk_info tpi_v6 = { 941 /* no tunnel info required for ipxip6. */ 942 .proto = htons(ETH_P_IPV6), 943}; 944 945static const struct tnl_ptk_info tpi_v4 = { 946 /* no tunnel info required for ipxip6. */ 947 .proto = htons(ETH_P_IP), 948}; 949 950static const struct tnl_ptk_info tpi_mpls = { 951 /* no tunnel info required for mplsip6. */ 952 .proto = htons(ETH_P_MPLS_UC), 953}; 954 955static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, 956 const struct tnl_ptk_info *tpi, 957 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, 958 const struct ipv6hdr *ipv6h, 959 struct sk_buff *skb)) 960{ 961 struct ip6_tnl *t; 962 const struct ipv6hdr *ipv6h = ipv6_hdr(skb); 963 struct metadata_dst *tun_dst = NULL; 964 int ret = -1; 965 966 rcu_read_lock(); 967 t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr); 968 969 if (t) { 970 u8 tproto = READ_ONCE(t->parms.proto); 971 972 if (tproto != ipproto && tproto != 0) 973 goto drop; 974 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 975 goto drop; 976 ipv6h = ipv6_hdr(skb); 977 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) 978 goto drop; 979 if (iptunnel_pull_header(skb, 0, tpi->proto, false)) 980 goto drop; 981 if (t->parms.collect_md) { 982 tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); 983 if (!tun_dst) 984 goto drop; 985 } 986 ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, 987 log_ecn_error); 988 } 989 990 rcu_read_unlock(); 991 992 return ret; 993 994drop: 995 rcu_read_unlock(); 996 kfree_skb(skb); 997 return 0; 998} 999 1000static int ip4ip6_rcv(struct sk_buff *skb) 1001{ 1002 return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4, 1003 ip4ip6_dscp_ecn_decapsulate); 1004} 1005 1006static int ip6ip6_rcv(struct sk_buff *skb) 1007{ 1008 return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6, 1009 ip6ip6_dscp_ecn_decapsulate); 1010} 1011 1012static int mplsip6_rcv(struct sk_buff *skb) 1013{ 1014 return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, 1015 mplsip6_dscp_ecn_decapsulate); 1016} 1017 1018struct ipv6_tel_txoption { 1019 struct ipv6_txoptions ops; 1020 __u8 dst_opt[8]; 1021}; 1022 1023static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) 1024{ 1025 memset(opt, 0, sizeof(struct ipv6_tel_txoption)); 1026 1027 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; 1028 opt->dst_opt[3] = 1; 1029 opt->dst_opt[4] = encap_limit; 1030 opt->dst_opt[5] = IPV6_TLV_PADN; 1031 opt->dst_opt[6] = 1; 1032 1033 opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt; 1034 opt->ops.opt_nflen = 8; 1035} 1036 1037/** 1038 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own 1039 * @t: the outgoing tunnel device 1040 * @hdr: IPv6 header from the incoming packet 1041 * 1042 * Description: 1043 * Avoid trivial tunneling loop by checking that tunnel exit-point 1044 * doesn't match source of incoming packet. 1045 * 1046 * Return: 1047 * 1 if conflict, 1048 * 0 else 1049 **/ 1050 1051static inline bool 1052ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) 1053{ 1054 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 1055} 1056 1057int ip6_tnl_xmit_ctl(struct ip6_tnl *t, 1058 const struct in6_addr *laddr, 1059 const struct in6_addr *raddr) 1060{ 1061 struct __ip6_tnl_parm *p = &t->parms; 1062 int ret = 0; 1063 struct net *net = t->net; 1064 1065 if (t->parms.collect_md) 1066 return 1; 1067 1068 if ((p->flags & IP6_TNL_F_CAP_XMIT) || 1069 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 1070 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { 1071 struct net_device *ldev = NULL; 1072 1073 rcu_read_lock(); 1074 if (p->link) 1075 ldev = dev_get_by_index_rcu(net, p->link); 1076 1077 if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 1078 0, IFA_F_TENTATIVE))) 1079 pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", 1080 p->name); 1081 else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && 1082 !ipv6_addr_is_multicast(raddr) && 1083 unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, 1084 true, 0, IFA_F_TENTATIVE))) 1085 pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", 1086 p->name); 1087 else 1088 ret = 1; 1089 rcu_read_unlock(); 1090 } 1091 return ret; 1092} 1093EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); 1094 1095/** 1096 * ip6_tnl_xmit - encapsulate packet and send 1097 * @skb: the outgoing socket buffer 1098 * @dev: the outgoing tunnel device 1099 * @dsfield: dscp code for outer header 1100 * @fl6: flow of tunneled packet 1101 * @encap_limit: encapsulation limit 1102 * @pmtu: Path MTU is stored if packet is too big 1103 * @proto: next header value 1104 * 1105 * Description: 1106 * Build new header and do some sanity checks on the packet before sending 1107 * it. 1108 * 1109 * Return: 1110 * 0 on success 1111 * -1 fail 1112 * %-EMSGSIZE message too big. return mtu in this case. 1113 **/ 1114 1115int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, 1116 struct flowi6 *fl6, int encap_limit, __u32 *pmtu, 1117 __u8 proto) 1118{ 1119 struct ip6_tnl *t = netdev_priv(dev); 1120 struct net *net = t->net; 1121 struct net_device_stats *stats = &t->dev->stats; 1122 struct ipv6hdr *ipv6h; 1123 struct ipv6_tel_txoption opt; 1124 struct dst_entry *dst = NULL, *ndst = NULL; 1125 struct net_device *tdev; 1126 int mtu; 1127 unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; 1128 unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; 1129 unsigned int max_headroom = psh_hlen; 1130 bool use_cache = false; 1131 u8 hop_limit; 1132 int err = -1; 1133 1134 if (t->parms.collect_md) { 1135 hop_limit = skb_tunnel_info(skb)->key.ttl; 1136 goto route_lookup; 1137 } else { 1138 hop_limit = t->parms.hop_limit; 1139 } 1140 1141 /* NBMA tunnel */ 1142 if (ipv6_addr_any(&t->parms.raddr)) { 1143 if (skb->protocol == htons(ETH_P_IPV6)) { 1144 struct in6_addr *addr6; 1145 struct neighbour *neigh; 1146 int addr_type; 1147 1148 if (!skb_dst(skb)) 1149 goto tx_err_link_failure; 1150 1151 neigh = dst_neigh_lookup(skb_dst(skb), 1152 &ipv6_hdr(skb)->daddr); 1153 if (!neigh) 1154 goto tx_err_link_failure; 1155 1156 addr6 = (struct in6_addr *)&neigh->primary_key; 1157 addr_type = ipv6_addr_type(addr6); 1158 1159 if (addr_type == IPV6_ADDR_ANY) 1160 addr6 = &ipv6_hdr(skb)->daddr; 1161 1162 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); 1163 neigh_release(neigh); 1164 } 1165 } else if (t->parms.proto != 0 && !(t->parms.flags & 1166 (IP6_TNL_F_USE_ORIG_TCLASS | 1167 IP6_TNL_F_USE_ORIG_FWMARK))) { 1168 /* enable the cache only if neither the outer protocol nor the 1169 * routing decision depends on the current inner header value 1170 */ 1171 use_cache = true; 1172 } 1173 1174 if (use_cache) 1175 dst = dst_cache_get(&t->dst_cache); 1176 1177 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) 1178 goto tx_err_link_failure; 1179 1180 if (!dst) { 1181route_lookup: 1182 /* add dsfield to flowlabel for route lookup */ 1183 fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel); 1184 1185 dst = ip6_route_output(net, NULL, fl6); 1186 1187 if (dst->error) 1188 goto tx_err_link_failure; 1189 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); 1190 if (IS_ERR(dst)) { 1191 err = PTR_ERR(dst); 1192 dst = NULL; 1193 goto tx_err_link_failure; 1194 } 1195 if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) && 1196 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 1197 &fl6->daddr, 0, &fl6->saddr)) 1198 goto tx_err_link_failure; 1199 ndst = dst; 1200 } 1201 1202 tdev = dst->dev; 1203 1204 if (tdev == dev) { 1205 stats->collisions++; 1206 net_warn_ratelimited("%s: Local routing loop detected!\n", 1207 t->parms.name); 1208 goto tx_err_dst_release; 1209 } 1210 mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; 1211 if (encap_limit >= 0) { 1212 max_headroom += 8; 1213 mtu -= 8; 1214 } 1215 mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? 1216 IPV6_MIN_MTU : IPV4_MIN_MTU); 1217 1218 skb_dst_update_pmtu_no_confirm(skb, mtu); 1219 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1220 *pmtu = mtu; 1221 err = -EMSGSIZE; 1222 goto tx_err_dst_release; 1223 } 1224 1225 if (t->err_count > 0) { 1226 if (time_before(jiffies, 1227 t->err_time + IP6TUNNEL_ERR_TIMEO)) { 1228 t->err_count--; 1229 1230 dst_link_failure(skb); 1231 } else { 1232 t->err_count = 0; 1233 } 1234 } 1235 1236 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); 1237 1238 /* 1239 * Okay, now see if we can stuff it in the buffer as-is. 1240 */ 1241 max_headroom += LL_RESERVED_SPACE(tdev); 1242 1243 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 1244 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 1245 struct sk_buff *new_skb; 1246 1247 new_skb = skb_realloc_headroom(skb, max_headroom); 1248 if (!new_skb) 1249 goto tx_err_dst_release; 1250 1251 if (skb->sk) 1252 skb_set_owner_w(new_skb, skb->sk); 1253 consume_skb(skb); 1254 skb = new_skb; 1255 } 1256 1257 if (t->parms.collect_md) { 1258 if (t->encap.type != TUNNEL_ENCAP_NONE) 1259 goto tx_err_dst_release; 1260 } else { 1261 if (use_cache && ndst) 1262 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); 1263 } 1264 skb_dst_set(skb, dst); 1265 1266 if (hop_limit == 0) { 1267 if (skb->protocol == htons(ETH_P_IP)) 1268 hop_limit = ip_hdr(skb)->ttl; 1269 else if (skb->protocol == htons(ETH_P_IPV6)) 1270 hop_limit = ipv6_hdr(skb)->hop_limit; 1271 else 1272 hop_limit = ip6_dst_hoplimit(dst); 1273 } 1274 1275 /* Calculate max headroom for all the headers and adjust 1276 * needed_headroom if necessary. 1277 */ 1278 max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) 1279 + dst->header_len + t->hlen; 1280 if (max_headroom > READ_ONCE(dev->needed_headroom)) 1281 WRITE_ONCE(dev->needed_headroom, max_headroom); 1282 1283 err = ip6_tnl_encap(skb, t, &proto, fl6); 1284 if (err) 1285 return err; 1286 1287 if (encap_limit >= 0) { 1288 init_tel_txopt(&opt, encap_limit); 1289 ipv6_push_frag_opts(skb, &opt.ops, &proto); 1290 } 1291 1292 skb_push(skb, sizeof(struct ipv6hdr)); 1293 skb_reset_network_header(skb); 1294 ipv6h = ipv6_hdr(skb); 1295 ip6_flow_hdr(ipv6h, dsfield, 1296 ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); 1297 ipv6h->hop_limit = hop_limit; 1298 ipv6h->nexthdr = proto; 1299 ipv6h->saddr = fl6->saddr; 1300 ipv6h->daddr = fl6->daddr; 1301 ip6tunnel_xmit(NULL, skb, dev); 1302 return 0; 1303tx_err_link_failure: 1304 stats->tx_carrier_errors++; 1305 dst_link_failure(skb); 1306tx_err_dst_release: 1307 dst_release(dst); 1308 return err; 1309} 1310EXPORT_SYMBOL(ip6_tnl_xmit); 1311 1312static inline int 1313ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, 1314 u8 protocol) 1315{ 1316 struct ip6_tnl *t = netdev_priv(dev); 1317 struct ipv6hdr *ipv6h; 1318 const struct iphdr *iph; 1319 int encap_limit = -1; 1320 __u16 offset; 1321 struct flowi6 fl6; 1322 __u8 dsfield, orig_dsfield; 1323 __u32 mtu; 1324 u8 tproto; 1325 int err; 1326 1327 tproto = READ_ONCE(t->parms.proto); 1328 if (tproto != protocol && tproto != 0) 1329 return -1; 1330 1331 if (t->parms.collect_md) { 1332 struct ip_tunnel_info *tun_info; 1333 const struct ip_tunnel_key *key; 1334 1335 tun_info = skb_tunnel_info(skb); 1336 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 1337 ip_tunnel_info_af(tun_info) != AF_INET6)) 1338 return -1; 1339 key = &tun_info->key; 1340 memset(&fl6, 0, sizeof(fl6)); 1341 fl6.flowi6_proto = protocol; 1342 fl6.saddr = key->u.ipv6.src; 1343 fl6.daddr = key->u.ipv6.dst; 1344 fl6.flowlabel = key->label; 1345 dsfield = key->tos; 1346 switch (protocol) { 1347 case IPPROTO_IPIP: 1348 iph = ip_hdr(skb); 1349 orig_dsfield = ipv4_get_dsfield(iph); 1350 break; 1351 case IPPROTO_IPV6: 1352 ipv6h = ipv6_hdr(skb); 1353 orig_dsfield = ipv6_get_dsfield(ipv6h); 1354 break; 1355 default: 1356 orig_dsfield = dsfield; 1357 break; 1358 } 1359 } else { 1360 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1361 encap_limit = t->parms.encap_limit; 1362 if (protocol == IPPROTO_IPV6) { 1363 offset = ip6_tnl_parse_tlv_enc_lim(skb, 1364 skb_network_header(skb)); 1365 /* ip6_tnl_parse_tlv_enc_lim() might have 1366 * reallocated skb->head 1367 */ 1368 if (offset > 0) { 1369 struct ipv6_tlv_tnl_enc_lim *tel; 1370 1371 tel = (void *)&skb_network_header(skb)[offset]; 1372 if (tel->encap_limit == 0) { 1373 icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, 1374 ICMPV6_HDR_FIELD, offset + 2); 1375 return -1; 1376 } 1377 encap_limit = tel->encap_limit - 1; 1378 } 1379 } 1380 1381 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 1382 fl6.flowi6_proto = protocol; 1383 1384 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1385 fl6.flowi6_mark = skb->mark; 1386 else 1387 fl6.flowi6_mark = t->parms.fwmark; 1388 switch (protocol) { 1389 case IPPROTO_IPIP: 1390 iph = ip_hdr(skb); 1391 orig_dsfield = ipv4_get_dsfield(iph); 1392 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1393 dsfield = orig_dsfield; 1394 else 1395 dsfield = ip6_tclass(t->parms.flowinfo); 1396 break; 1397 case IPPROTO_IPV6: 1398 ipv6h = ipv6_hdr(skb); 1399 orig_dsfield = ipv6_get_dsfield(ipv6h); 1400 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1401 dsfield = orig_dsfield; 1402 else 1403 dsfield = ip6_tclass(t->parms.flowinfo); 1404 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) 1405 fl6.flowlabel |= ip6_flowlabel(ipv6h); 1406 break; 1407 default: 1408 orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); 1409 break; 1410 } 1411 } 1412 1413 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); 1414 dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); 1415 1416 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) 1417 return -1; 1418 1419 skb_set_inner_ipproto(skb, protocol); 1420 1421 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, 1422 protocol); 1423 if (err != 0) { 1424 /* XXX: send ICMP error even if DF is not set. */ 1425 if (err == -EMSGSIZE) 1426 switch (protocol) { 1427 case IPPROTO_IPIP: 1428 icmp_ndo_send(skb, ICMP_DEST_UNREACH, 1429 ICMP_FRAG_NEEDED, htonl(mtu)); 1430 break; 1431 case IPPROTO_IPV6: 1432 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1433 break; 1434 default: 1435 break; 1436 } 1437 return -1; 1438 } 1439 1440 return 0; 1441} 1442 1443static netdev_tx_t 1444ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) 1445{ 1446 struct ip6_tnl *t = netdev_priv(dev); 1447 struct net_device_stats *stats = &t->dev->stats; 1448 u8 ipproto; 1449 int ret; 1450 1451 if (!pskb_inet_may_pull(skb)) 1452 goto tx_err; 1453 1454 switch (skb->protocol) { 1455 case htons(ETH_P_IP): 1456 ipproto = IPPROTO_IPIP; 1457 break; 1458 case htons(ETH_P_IPV6): 1459 if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) 1460 goto tx_err; 1461 ipproto = IPPROTO_IPV6; 1462 break; 1463 case htons(ETH_P_MPLS_UC): 1464 ipproto = IPPROTO_MPLS; 1465 break; 1466 default: 1467 goto tx_err; 1468 } 1469 1470 ret = ipxip6_tnl_xmit(skb, dev, ipproto); 1471 if (ret < 0) 1472 goto tx_err; 1473 1474 return NETDEV_TX_OK; 1475 1476tx_err: 1477 stats->tx_errors++; 1478 stats->tx_dropped++; 1479 kfree_skb(skb); 1480 return NETDEV_TX_OK; 1481} 1482 1483static void ip6_tnl_link_config(struct ip6_tnl *t) 1484{ 1485 struct net_device *dev = t->dev; 1486 struct net_device *tdev = NULL; 1487 struct __ip6_tnl_parm *p = &t->parms; 1488 struct flowi6 *fl6 = &t->fl.u.ip6; 1489 int t_hlen; 1490 int mtu; 1491 1492 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1493 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1494 1495 /* Set up flowi template */ 1496 fl6->saddr = p->laddr; 1497 fl6->daddr = p->raddr; 1498 fl6->flowi6_oif = p->link; 1499 fl6->flowlabel = 0; 1500 1501 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) 1502 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; 1503 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1504 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1505 1506 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); 1507 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); 1508 1509 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) 1510 dev->flags |= IFF_POINTOPOINT; 1511 else 1512 dev->flags &= ~IFF_POINTOPOINT; 1513 1514 t->tun_hlen = 0; 1515 t->hlen = t->encap_hlen + t->tun_hlen; 1516 t_hlen = t->hlen + sizeof(struct ipv6hdr); 1517 1518 if (p->flags & IP6_TNL_F_CAP_XMIT) { 1519 int strict = (ipv6_addr_type(&p->raddr) & 1520 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); 1521 1522 struct rt6_info *rt = rt6_lookup(t->net, 1523 &p->raddr, &p->laddr, 1524 p->link, NULL, strict); 1525 if (rt) { 1526 tdev = rt->dst.dev; 1527 ip6_rt_put(rt); 1528 } 1529 1530 if (!tdev && p->link) 1531 tdev = __dev_get_by_index(t->net, p->link); 1532 1533 if (tdev) { 1534 dev->hard_header_len = tdev->hard_header_len + t_hlen; 1535 mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); 1536 1537 mtu = mtu - t_hlen; 1538 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1539 mtu -= 8; 1540 1541 if (mtu < IPV6_MIN_MTU) 1542 mtu = IPV6_MIN_MTU; 1543 WRITE_ONCE(dev->mtu, mtu); 1544 } 1545 } 1546} 1547 1548/** 1549 * ip6_tnl_change - update the tunnel parameters 1550 * @t: tunnel to be changed 1551 * @p: tunnel configuration parameters 1552 * 1553 * Description: 1554 * ip6_tnl_change() updates the tunnel parameters 1555 **/ 1556 1557static int 1558ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) 1559{ 1560 t->parms.laddr = p->laddr; 1561 t->parms.raddr = p->raddr; 1562 t->parms.flags = p->flags; 1563 t->parms.hop_limit = p->hop_limit; 1564 t->parms.encap_limit = p->encap_limit; 1565 t->parms.flowinfo = p->flowinfo; 1566 t->parms.link = p->link; 1567 t->parms.proto = p->proto; 1568 t->parms.fwmark = p->fwmark; 1569 dst_cache_reset(&t->dst_cache); 1570 ip6_tnl_link_config(t); 1571 return 0; 1572} 1573 1574static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) 1575{ 1576 struct net *net = t->net; 1577 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1578 int err; 1579 1580 ip6_tnl_unlink(ip6n, t); 1581 synchronize_net(); 1582 err = ip6_tnl_change(t, p); 1583 ip6_tnl_link(ip6n, t); 1584 netdev_state_change(t->dev); 1585 return err; 1586} 1587 1588static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) 1589{ 1590 /* for default tnl0 device allow to change only the proto */ 1591 t->parms.proto = p->proto; 1592 netdev_state_change(t->dev); 1593 return 0; 1594} 1595 1596static void 1597ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) 1598{ 1599 p->laddr = u->laddr; 1600 p->raddr = u->raddr; 1601 p->flags = u->flags; 1602 p->hop_limit = u->hop_limit; 1603 p->encap_limit = u->encap_limit; 1604 p->flowinfo = u->flowinfo; 1605 p->link = u->link; 1606 p->proto = u->proto; 1607 memcpy(p->name, u->name, sizeof(u->name)); 1608} 1609 1610static void 1611ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) 1612{ 1613 u->laddr = p->laddr; 1614 u->raddr = p->raddr; 1615 u->flags = p->flags; 1616 u->hop_limit = p->hop_limit; 1617 u->encap_limit = p->encap_limit; 1618 u->flowinfo = p->flowinfo; 1619 u->link = p->link; 1620 u->proto = p->proto; 1621 memcpy(u->name, p->name, sizeof(u->name)); 1622} 1623 1624/** 1625 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1626 * @dev: virtual device associated with tunnel 1627 * @ifr: parameters passed from userspace 1628 * @cmd: command to be performed 1629 * 1630 * Description: 1631 * ip6_tnl_ioctl() is used for managing IPv6 tunnels 1632 * from userspace. 1633 * 1634 * The possible commands are the following: 1635 * %SIOCGETTUNNEL: get tunnel parameters for device 1636 * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters 1637 * %SIOCCHGTUNNEL: change tunnel parameters to those given 1638 * %SIOCDELTUNNEL: delete tunnel 1639 * 1640 * The fallback device "ip6tnl0", created during module 1641 * initialization, can be used for creating other tunnel devices. 1642 * 1643 * Return: 1644 * 0 on success, 1645 * %-EFAULT if unable to copy data to or from userspace, 1646 * %-EPERM if current process hasn't %CAP_NET_ADMIN set 1647 * %-EINVAL if passed tunnel parameters are invalid, 1648 * %-EEXIST if changing a tunnel's parameters would cause a conflict 1649 * %-ENODEV if attempting to change or delete a nonexisting device 1650 **/ 1651 1652static int 1653ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1654{ 1655 int err = 0; 1656 struct ip6_tnl_parm p; 1657 struct __ip6_tnl_parm p1; 1658 struct ip6_tnl *t = netdev_priv(dev); 1659 struct net *net = t->net; 1660 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1661 1662 memset(&p1, 0, sizeof(p1)); 1663 1664 switch (cmd) { 1665 case SIOCGETTUNNEL: 1666 if (dev == ip6n->fb_tnl_dev) { 1667 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 1668 err = -EFAULT; 1669 break; 1670 } 1671 ip6_tnl_parm_from_user(&p1, &p); 1672 t = ip6_tnl_locate(net, &p1, 0); 1673 if (IS_ERR(t)) 1674 t = netdev_priv(dev); 1675 } else { 1676 memset(&p, 0, sizeof(p)); 1677 } 1678 ip6_tnl_parm_to_user(&p, &t->parms); 1679 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { 1680 err = -EFAULT; 1681 } 1682 break; 1683 case SIOCADDTUNNEL: 1684 case SIOCCHGTUNNEL: 1685 err = -EPERM; 1686 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1687 break; 1688 err = -EFAULT; 1689 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1690 break; 1691 err = -EINVAL; 1692 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1693 p.proto != 0) 1694 break; 1695 ip6_tnl_parm_from_user(&p1, &p); 1696 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); 1697 if (cmd == SIOCCHGTUNNEL) { 1698 if (!IS_ERR(t)) { 1699 if (t->dev != dev) { 1700 err = -EEXIST; 1701 break; 1702 } 1703 } else 1704 t = netdev_priv(dev); 1705 if (dev == ip6n->fb_tnl_dev) 1706 err = ip6_tnl0_update(t, &p1); 1707 else 1708 err = ip6_tnl_update(t, &p1); 1709 } 1710 if (!IS_ERR(t)) { 1711 err = 0; 1712 ip6_tnl_parm_to_user(&p, &t->parms); 1713 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 1714 err = -EFAULT; 1715 1716 } else { 1717 err = PTR_ERR(t); 1718 } 1719 break; 1720 case SIOCDELTUNNEL: 1721 err = -EPERM; 1722 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1723 break; 1724 1725 if (dev == ip6n->fb_tnl_dev) { 1726 err = -EFAULT; 1727 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1728 break; 1729 err = -ENOENT; 1730 ip6_tnl_parm_from_user(&p1, &p); 1731 t = ip6_tnl_locate(net, &p1, 0); 1732 if (IS_ERR(t)) 1733 break; 1734 err = -EPERM; 1735 if (t->dev == ip6n->fb_tnl_dev) 1736 break; 1737 dev = t->dev; 1738 } 1739 err = 0; 1740 unregister_netdevice(dev); 1741 break; 1742 default: 1743 err = -EINVAL; 1744 } 1745 return err; 1746} 1747 1748/** 1749 * ip6_tnl_change_mtu - change mtu manually for tunnel device 1750 * @dev: virtual device associated with tunnel 1751 * @new_mtu: the new mtu 1752 * 1753 * Return: 1754 * 0 on success, 1755 * %-EINVAL if mtu too small 1756 **/ 1757 1758int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) 1759{ 1760 struct ip6_tnl *tnl = netdev_priv(dev); 1761 1762 if (tnl->parms.proto == IPPROTO_IPV6) { 1763 if (new_mtu < IPV6_MIN_MTU) 1764 return -EINVAL; 1765 } else { 1766 if (new_mtu < ETH_MIN_MTU) 1767 return -EINVAL; 1768 } 1769 if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { 1770 if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) 1771 return -EINVAL; 1772 } else { 1773 if (new_mtu > IP_MAX_MTU - dev->hard_header_len) 1774 return -EINVAL; 1775 } 1776 dev->mtu = new_mtu; 1777 return 0; 1778} 1779EXPORT_SYMBOL(ip6_tnl_change_mtu); 1780 1781int ip6_tnl_get_iflink(const struct net_device *dev) 1782{ 1783 struct ip6_tnl *t = netdev_priv(dev); 1784 1785 return t->parms.link; 1786} 1787EXPORT_SYMBOL(ip6_tnl_get_iflink); 1788 1789int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, 1790 unsigned int num) 1791{ 1792 if (num >= MAX_IPTUN_ENCAP_OPS) 1793 return -ERANGE; 1794 1795 return !cmpxchg((const struct ip6_tnl_encap_ops **) 1796 &ip6tun_encaps[num], 1797 NULL, ops) ? 0 : -1; 1798} 1799EXPORT_SYMBOL(ip6_tnl_encap_add_ops); 1800 1801int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, 1802 unsigned int num) 1803{ 1804 int ret; 1805 1806 if (num >= MAX_IPTUN_ENCAP_OPS) 1807 return -ERANGE; 1808 1809 ret = (cmpxchg((const struct ip6_tnl_encap_ops **) 1810 &ip6tun_encaps[num], 1811 ops, NULL) == ops) ? 0 : -1; 1812 1813 synchronize_net(); 1814 1815 return ret; 1816} 1817EXPORT_SYMBOL(ip6_tnl_encap_del_ops); 1818 1819int ip6_tnl_encap_setup(struct ip6_tnl *t, 1820 struct ip_tunnel_encap *ipencap) 1821{ 1822 int hlen; 1823 1824 memset(&t->encap, 0, sizeof(t->encap)); 1825 1826 hlen = ip6_encap_hlen(ipencap); 1827 if (hlen < 0) 1828 return hlen; 1829 1830 t->encap.type = ipencap->type; 1831 t->encap.sport = ipencap->sport; 1832 t->encap.dport = ipencap->dport; 1833 t->encap.flags = ipencap->flags; 1834 1835 t->encap_hlen = hlen; 1836 t->hlen = t->encap_hlen + t->tun_hlen; 1837 1838 return 0; 1839} 1840EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); 1841 1842static const struct net_device_ops ip6_tnl_netdev_ops = { 1843 .ndo_init = ip6_tnl_dev_init, 1844 .ndo_uninit = ip6_tnl_dev_uninit, 1845 .ndo_start_xmit = ip6_tnl_start_xmit, 1846 .ndo_do_ioctl = ip6_tnl_ioctl, 1847 .ndo_change_mtu = ip6_tnl_change_mtu, 1848 .ndo_get_stats = ip6_get_stats, 1849 .ndo_get_iflink = ip6_tnl_get_iflink, 1850}; 1851 1852#define IPXIPX_FEATURES (NETIF_F_SG | \ 1853 NETIF_F_FRAGLIST | \ 1854 NETIF_F_HIGHDMA | \ 1855 NETIF_F_GSO_SOFTWARE | \ 1856 NETIF_F_HW_CSUM) 1857 1858/** 1859 * ip6_tnl_dev_setup - setup virtual tunnel device 1860 * @dev: virtual device associated with tunnel 1861 * 1862 * Description: 1863 * Initialize function pointers and device parameters 1864 **/ 1865 1866static void ip6_tnl_dev_setup(struct net_device *dev) 1867{ 1868 dev->netdev_ops = &ip6_tnl_netdev_ops; 1869 dev->header_ops = &ip_tunnel_header_ops; 1870 dev->needs_free_netdev = true; 1871 dev->priv_destructor = ip6_dev_free; 1872 1873 dev->type = ARPHRD_TUNNEL6; 1874 dev->flags |= IFF_NOARP; 1875 dev->addr_len = sizeof(struct in6_addr); 1876 dev->features |= NETIF_F_LLTX; 1877 netif_keep_dst(dev); 1878 1879 dev->features |= IPXIPX_FEATURES; 1880 dev->hw_features |= IPXIPX_FEATURES; 1881 1882 /* This perm addr will be used as interface identifier by IPv6 */ 1883 dev->addr_assign_type = NET_ADDR_RANDOM; 1884 eth_random_addr(dev->perm_addr); 1885} 1886 1887 1888/** 1889 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices 1890 * @dev: virtual device associated with tunnel 1891 **/ 1892 1893static inline int 1894ip6_tnl_dev_init_gen(struct net_device *dev) 1895{ 1896 struct ip6_tnl *t = netdev_priv(dev); 1897 int ret; 1898 int t_hlen; 1899 1900 t->dev = dev; 1901 t->net = dev_net(dev); 1902 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1903 if (!dev->tstats) 1904 return -ENOMEM; 1905 1906 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); 1907 if (ret) 1908 goto free_stats; 1909 1910 ret = gro_cells_init(&t->gro_cells, dev); 1911 if (ret) 1912 goto destroy_dst; 1913 1914 t->tun_hlen = 0; 1915 t->hlen = t->encap_hlen + t->tun_hlen; 1916 t_hlen = t->hlen + sizeof(struct ipv6hdr); 1917 1918 dev->type = ARPHRD_TUNNEL6; 1919 dev->hard_header_len = LL_MAX_HEADER + t_hlen; 1920 dev->mtu = ETH_DATA_LEN - t_hlen; 1921 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1922 dev->mtu -= 8; 1923 dev->min_mtu = ETH_MIN_MTU; 1924 dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; 1925 1926 dev_hold(dev); 1927 return 0; 1928 1929destroy_dst: 1930 dst_cache_destroy(&t->dst_cache); 1931free_stats: 1932 free_percpu(dev->tstats); 1933 dev->tstats = NULL; 1934 1935 return ret; 1936} 1937 1938/** 1939 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices 1940 * @dev: virtual device associated with tunnel 1941 **/ 1942 1943static int ip6_tnl_dev_init(struct net_device *dev) 1944{ 1945 struct ip6_tnl *t = netdev_priv(dev); 1946 int err = ip6_tnl_dev_init_gen(dev); 1947 1948 if (err) 1949 return err; 1950 ip6_tnl_link_config(t); 1951 if (t->parms.collect_md) 1952 netif_keep_dst(dev); 1953 return 0; 1954} 1955 1956/** 1957 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device 1958 * @dev: fallback device 1959 * 1960 * Return: 0 1961 **/ 1962 1963static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1964{ 1965 struct ip6_tnl *t = netdev_priv(dev); 1966 struct net *net = dev_net(dev); 1967 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1968 1969 t->parms.proto = IPPROTO_IPV6; 1970 1971 rcu_assign_pointer(ip6n->tnls_wc[0], t); 1972 return 0; 1973} 1974 1975static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[], 1976 struct netlink_ext_ack *extack) 1977{ 1978 u8 proto; 1979 1980 if (!data || !data[IFLA_IPTUN_PROTO]) 1981 return 0; 1982 1983 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); 1984 if (proto != IPPROTO_IPV6 && 1985 proto != IPPROTO_IPIP && 1986 proto != 0) 1987 return -EINVAL; 1988 1989 return 0; 1990} 1991 1992static void ip6_tnl_netlink_parms(struct nlattr *data[], 1993 struct __ip6_tnl_parm *parms) 1994{ 1995 memset(parms, 0, sizeof(*parms)); 1996 1997 if (!data) 1998 return; 1999 2000 if (data[IFLA_IPTUN_LINK]) 2001 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); 2002 2003 if (data[IFLA_IPTUN_LOCAL]) 2004 parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); 2005 2006 if (data[IFLA_IPTUN_REMOTE]) 2007 parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); 2008 2009 if (data[IFLA_IPTUN_TTL]) 2010 parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); 2011 2012 if (data[IFLA_IPTUN_ENCAP_LIMIT]) 2013 parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); 2014 2015 if (data[IFLA_IPTUN_FLOWINFO]) 2016 parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); 2017 2018 if (data[IFLA_IPTUN_FLAGS]) 2019 parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); 2020 2021 if (data[IFLA_IPTUN_PROTO]) 2022 parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); 2023 2024 if (data[IFLA_IPTUN_COLLECT_METADATA]) 2025 parms->collect_md = true; 2026 2027 if (data[IFLA_IPTUN_FWMARK]) 2028 parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); 2029} 2030 2031static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], 2032 struct ip_tunnel_encap *ipencap) 2033{ 2034 bool ret = false; 2035 2036 memset(ipencap, 0, sizeof(*ipencap)); 2037 2038 if (!data) 2039 return ret; 2040 2041 if (data[IFLA_IPTUN_ENCAP_TYPE]) { 2042 ret = true; 2043 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); 2044 } 2045 2046 if (data[IFLA_IPTUN_ENCAP_FLAGS]) { 2047 ret = true; 2048 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); 2049 } 2050 2051 if (data[IFLA_IPTUN_ENCAP_SPORT]) { 2052 ret = true; 2053 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); 2054 } 2055 2056 if (data[IFLA_IPTUN_ENCAP_DPORT]) { 2057 ret = true; 2058 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); 2059 } 2060 2061 return ret; 2062} 2063 2064static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, 2065 struct nlattr *tb[], struct nlattr *data[], 2066 struct netlink_ext_ack *extack) 2067{ 2068 struct net *net = dev_net(dev); 2069 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2070 struct ip_tunnel_encap ipencap; 2071 struct ip6_tnl *nt, *t; 2072 int err; 2073 2074 nt = netdev_priv(dev); 2075 2076 if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { 2077 err = ip6_tnl_encap_setup(nt, &ipencap); 2078 if (err < 0) 2079 return err; 2080 } 2081 2082 ip6_tnl_netlink_parms(data, &nt->parms); 2083 2084 if (nt->parms.collect_md) { 2085 if (rtnl_dereference(ip6n->collect_md_tun)) 2086 return -EEXIST; 2087 } else { 2088 t = ip6_tnl_locate(net, &nt->parms, 0); 2089 if (!IS_ERR(t)) 2090 return -EEXIST; 2091 } 2092 2093 err = ip6_tnl_create2(dev); 2094 if (!err && tb[IFLA_MTU]) 2095 ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 2096 2097 return err; 2098} 2099 2100static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], 2101 struct nlattr *data[], 2102 struct netlink_ext_ack *extack) 2103{ 2104 struct ip6_tnl *t = netdev_priv(dev); 2105 struct __ip6_tnl_parm p; 2106 struct net *net = t->net; 2107 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2108 struct ip_tunnel_encap ipencap; 2109 2110 if (dev == ip6n->fb_tnl_dev) 2111 return -EINVAL; 2112 2113 if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { 2114 int err = ip6_tnl_encap_setup(t, &ipencap); 2115 2116 if (err < 0) 2117 return err; 2118 } 2119 ip6_tnl_netlink_parms(data, &p); 2120 if (p.collect_md) 2121 return -EINVAL; 2122 2123 t = ip6_tnl_locate(net, &p, 0); 2124 if (!IS_ERR(t)) { 2125 if (t->dev != dev) 2126 return -EEXIST; 2127 } else 2128 t = netdev_priv(dev); 2129 2130 return ip6_tnl_update(t, &p); 2131} 2132 2133static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) 2134{ 2135 struct net *net = dev_net(dev); 2136 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2137 2138 if (dev != ip6n->fb_tnl_dev) 2139 unregister_netdevice_queue(dev, head); 2140} 2141 2142static size_t ip6_tnl_get_size(const struct net_device *dev) 2143{ 2144 return 2145 /* IFLA_IPTUN_LINK */ 2146 nla_total_size(4) + 2147 /* IFLA_IPTUN_LOCAL */ 2148 nla_total_size(sizeof(struct in6_addr)) + 2149 /* IFLA_IPTUN_REMOTE */ 2150 nla_total_size(sizeof(struct in6_addr)) + 2151 /* IFLA_IPTUN_TTL */ 2152 nla_total_size(1) + 2153 /* IFLA_IPTUN_ENCAP_LIMIT */ 2154 nla_total_size(1) + 2155 /* IFLA_IPTUN_FLOWINFO */ 2156 nla_total_size(4) + 2157 /* IFLA_IPTUN_FLAGS */ 2158 nla_total_size(4) + 2159 /* IFLA_IPTUN_PROTO */ 2160 nla_total_size(1) + 2161 /* IFLA_IPTUN_ENCAP_TYPE */ 2162 nla_total_size(2) + 2163 /* IFLA_IPTUN_ENCAP_FLAGS */ 2164 nla_total_size(2) + 2165 /* IFLA_IPTUN_ENCAP_SPORT */ 2166 nla_total_size(2) + 2167 /* IFLA_IPTUN_ENCAP_DPORT */ 2168 nla_total_size(2) + 2169 /* IFLA_IPTUN_COLLECT_METADATA */ 2170 nla_total_size(0) + 2171 /* IFLA_IPTUN_FWMARK */ 2172 nla_total_size(4) + 2173 0; 2174} 2175 2176static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) 2177{ 2178 struct ip6_tnl *tunnel = netdev_priv(dev); 2179 struct __ip6_tnl_parm *parm = &tunnel->parms; 2180 2181 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || 2182 nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || 2183 nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || 2184 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || 2185 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || 2186 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || 2187 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || 2188 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || 2189 nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) 2190 goto nla_put_failure; 2191 2192 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || 2193 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || 2194 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || 2195 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) 2196 goto nla_put_failure; 2197 2198 if (parm->collect_md) 2199 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) 2200 goto nla_put_failure; 2201 2202 return 0; 2203 2204nla_put_failure: 2205 return -EMSGSIZE; 2206} 2207 2208struct net *ip6_tnl_get_link_net(const struct net_device *dev) 2209{ 2210 struct ip6_tnl *tunnel = netdev_priv(dev); 2211 2212 return tunnel->net; 2213} 2214EXPORT_SYMBOL(ip6_tnl_get_link_net); 2215 2216static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { 2217 [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, 2218 [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, 2219 [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, 2220 [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, 2221 [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, 2222 [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, 2223 [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, 2224 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, 2225 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, 2226 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, 2227 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, 2228 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, 2229 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, 2230 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, 2231}; 2232 2233static struct rtnl_link_ops ip6_link_ops __read_mostly = { 2234 .kind = "ip6tnl", 2235 .maxtype = IFLA_IPTUN_MAX, 2236 .policy = ip6_tnl_policy, 2237 .priv_size = sizeof(struct ip6_tnl), 2238 .setup = ip6_tnl_dev_setup, 2239 .validate = ip6_tnl_validate, 2240 .newlink = ip6_tnl_newlink, 2241 .changelink = ip6_tnl_changelink, 2242 .dellink = ip6_tnl_dellink, 2243 .get_size = ip6_tnl_get_size, 2244 .fill_info = ip6_tnl_fill_info, 2245 .get_link_net = ip6_tnl_get_link_net, 2246}; 2247 2248static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { 2249 .handler = ip4ip6_rcv, 2250 .err_handler = ip4ip6_err, 2251 .priority = 1, 2252}; 2253 2254static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { 2255 .handler = ip6ip6_rcv, 2256 .err_handler = ip6ip6_err, 2257 .priority = 1, 2258}; 2259 2260static struct xfrm6_tunnel mplsip6_handler __read_mostly = { 2261 .handler = mplsip6_rcv, 2262 .err_handler = mplsip6_err, 2263 .priority = 1, 2264}; 2265 2266static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) 2267{ 2268 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2269 struct net_device *dev, *aux; 2270 int h; 2271 struct ip6_tnl *t; 2272 2273 for_each_netdev_safe(net, dev, aux) 2274 if (dev->rtnl_link_ops == &ip6_link_ops) 2275 unregister_netdevice_queue(dev, list); 2276 2277 for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { 2278 t = rtnl_dereference(ip6n->tnls_r_l[h]); 2279 while (t) { 2280 /* If dev is in the same netns, it has already 2281 * been added to the list by the previous loop. 2282 */ 2283 if (!net_eq(dev_net(t->dev), net)) 2284 unregister_netdevice_queue(t->dev, list); 2285 t = rtnl_dereference(t->next); 2286 } 2287 } 2288 2289 t = rtnl_dereference(ip6n->tnls_wc[0]); 2290 while (t) { 2291 /* If dev is in the same netns, it has already 2292 * been added to the list by the previous loop. 2293 */ 2294 if (!net_eq(dev_net(t->dev), net)) 2295 unregister_netdevice_queue(t->dev, list); 2296 t = rtnl_dereference(t->next); 2297 } 2298} 2299 2300static int __net_init ip6_tnl_init_net(struct net *net) 2301{ 2302 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2303 struct ip6_tnl *t = NULL; 2304 int err; 2305 2306 ip6n->tnls[0] = ip6n->tnls_wc; 2307 ip6n->tnls[1] = ip6n->tnls_r_l; 2308 2309 if (!net_has_fallback_tunnels(net)) 2310 return 0; 2311 err = -ENOMEM; 2312 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", 2313 NET_NAME_UNKNOWN, ip6_tnl_dev_setup); 2314 2315 if (!ip6n->fb_tnl_dev) 2316 goto err_alloc_dev; 2317 dev_net_set(ip6n->fb_tnl_dev, net); 2318 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; 2319 /* FB netdevice is special: we have one, and only one per netns. 2320 * Allowing to move it to another netns is clearly unsafe. 2321 */ 2322 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; 2323 2324 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 2325 if (err < 0) 2326 goto err_register; 2327 2328 err = register_netdev(ip6n->fb_tnl_dev); 2329 if (err < 0) 2330 goto err_register; 2331 2332 t = netdev_priv(ip6n->fb_tnl_dev); 2333 2334 strcpy(t->parms.name, ip6n->fb_tnl_dev->name); 2335 return 0; 2336 2337err_register: 2338 free_netdev(ip6n->fb_tnl_dev); 2339err_alloc_dev: 2340 return err; 2341} 2342 2343static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) 2344{ 2345 struct net *net; 2346 LIST_HEAD(list); 2347 2348 rtnl_lock(); 2349 list_for_each_entry(net, net_list, exit_list) 2350 ip6_tnl_destroy_tunnels(net, &list); 2351 unregister_netdevice_many(&list); 2352 rtnl_unlock(); 2353} 2354 2355static struct pernet_operations ip6_tnl_net_ops = { 2356 .init = ip6_tnl_init_net, 2357 .exit_batch = ip6_tnl_exit_batch_net, 2358 .id = &ip6_tnl_net_id, 2359 .size = sizeof(struct ip6_tnl_net), 2360}; 2361 2362/** 2363 * ip6_tunnel_init - register protocol and reserve needed resources 2364 * 2365 * Return: 0 on success 2366 **/ 2367 2368static int __init ip6_tunnel_init(void) 2369{ 2370 int err; 2371 2372 if (!ipv6_mod_enabled()) 2373 return -EOPNOTSUPP; 2374 2375 err = register_pernet_device(&ip6_tnl_net_ops); 2376 if (err < 0) 2377 goto out_pernet; 2378 2379 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); 2380 if (err < 0) { 2381 pr_err("%s: can't register ip4ip6\n", __func__); 2382 goto out_ip4ip6; 2383 } 2384 2385 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); 2386 if (err < 0) { 2387 pr_err("%s: can't register ip6ip6\n", __func__); 2388 goto out_ip6ip6; 2389 } 2390 2391 if (ip6_tnl_mpls_supported()) { 2392 err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); 2393 if (err < 0) { 2394 pr_err("%s: can't register mplsip6\n", __func__); 2395 goto out_mplsip6; 2396 } 2397 } 2398 2399 err = rtnl_link_register(&ip6_link_ops); 2400 if (err < 0) 2401 goto rtnl_link_failed; 2402 2403 return 0; 2404 2405rtnl_link_failed: 2406 if (ip6_tnl_mpls_supported()) 2407 xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); 2408out_mplsip6: 2409 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); 2410out_ip6ip6: 2411 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); 2412out_ip4ip6: 2413 unregister_pernet_device(&ip6_tnl_net_ops); 2414out_pernet: 2415 return err; 2416} 2417 2418/** 2419 * ip6_tunnel_cleanup - free resources and unregister protocol 2420 **/ 2421 2422static void __exit ip6_tunnel_cleanup(void) 2423{ 2424 rtnl_link_unregister(&ip6_link_ops); 2425 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) 2426 pr_info("%s: can't deregister ip4ip6\n", __func__); 2427 2428 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) 2429 pr_info("%s: can't deregister ip6ip6\n", __func__); 2430 2431 if (ip6_tnl_mpls_supported() && 2432 xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) 2433 pr_info("%s: can't deregister mplsip6\n", __func__); 2434 unregister_pernet_device(&ip6_tnl_net_ops); 2435} 2436 2437module_init(ip6_tunnel_init); 2438module_exit(ip6_tunnel_cleanup); 2439