1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25#include <linux/errno.h> 26#include <linux/kernel.h> 27#include <linux/string.h> 28#include <linux/socket.h> 29#include <linux/net.h> 30#include <linux/netdevice.h> 31#include <linux/if_arp.h> 32#include <linux/in6.h> 33#include <linux/tcp.h> 34#include <linux/route.h> 35#include <linux/module.h> 36#include <linux/slab.h> 37 38#include <linux/bpf-cgroup.h> 39#include <linux/netfilter.h> 40#include <linux/netfilter_ipv6.h> 41 42#include <net/sock.h> 43#include <net/snmp.h> 44 45#include <net/ipv6.h> 46#include <net/ndisc.h> 47#include <net/protocol.h> 48#include <net/ip6_route.h> 49#include <net/addrconf.h> 50#include <net/rawv6.h> 51#include <net/icmp.h> 52#include <net/xfrm.h> 53#include <net/checksum.h> 54#include <linux/mroute6.h> 55#include <net/l3mdev.h> 56#include <net/lwtunnel.h> 57#include <net/ip_tunnels.h> 58 59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60{ 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 unsigned int hh_len = LL_RESERVED_SPACE(dev); 64 int delta = hh_len - skb_headroom(skb); 65 const struct in6_addr *nexthop; 66 struct neighbour *neigh; 67 int ret; 68 69 /* Be paranoid, rather than too clever. */ 70 if (unlikely(delta > 0) && dev->header_ops) { 71 /* pskb_expand_head() might crash, if skb is shared */ 72 if (skb_shared(skb)) { 73 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 74 75 if (likely(nskb)) { 76 if (skb->sk) 77 skb_set_owner_w(nskb, skb->sk); 78 consume_skb(skb); 79 } else { 80 kfree_skb(skb); 81 } 82 skb = nskb; 83 } 84 if (skb && 85 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { 86 kfree_skb(skb); 87 skb = NULL; 88 } 89 if (!skb) { 90 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 91 return -ENOMEM; 92 } 93 } 94 95 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 96 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 97 98 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 99 ((mroute6_is_socket(net, skb) && 100 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 101 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 102 &ipv6_hdr(skb)->saddr))) { 103 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 104 105 /* Do not check for IFF_ALLMULTI; multicast routing 106 is not supported in any case. 107 */ 108 if (newskb) 109 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 110 net, sk, newskb, NULL, newskb->dev, 111 dev_loopback_xmit); 112 113 if (ipv6_hdr(skb)->hop_limit == 0) { 114 IP6_INC_STATS(net, idev, 115 IPSTATS_MIB_OUTDISCARDS); 116 kfree_skb(skb); 117 return 0; 118 } 119 } 120 121 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 122 123 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 124 IPV6_ADDR_SCOPE_NODELOCAL && 125 !(dev->flags & IFF_LOOPBACK)) { 126 kfree_skb(skb); 127 return 0; 128 } 129 } 130 131 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 132 int res = lwtunnel_xmit(skb); 133 134 if (res != LWTUNNEL_XMIT_CONTINUE) 135 return res; 136 } 137 138 rcu_read_lock_bh(); 139 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); 140 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 141 if (unlikely(!neigh)) 142 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 143 if (!IS_ERR(neigh)) { 144 sock_confirm_neigh(skb, neigh); 145 ret = neigh_output(neigh, skb, false); 146 rcu_read_unlock_bh(); 147 return ret; 148 } 149 rcu_read_unlock_bh(); 150 151 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 152 kfree_skb(skb); 153 return -EINVAL; 154} 155 156static int 157ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 158 struct sk_buff *skb, unsigned int mtu) 159{ 160 struct sk_buff *segs, *nskb; 161 netdev_features_t features; 162 int ret = 0; 163 164 /* Please see corresponding comment in ip_finish_output_gso 165 * describing the cases where GSO segment length exceeds the 166 * egress MTU. 167 */ 168 features = netif_skb_features(skb); 169 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 170 if (IS_ERR_OR_NULL(segs)) { 171 kfree_skb(skb); 172 return -ENOMEM; 173 } 174 175 consume_skb(skb); 176 177 skb_list_walk_safe(segs, segs, nskb) { 178 int err; 179 180 skb_mark_not_on_list(segs); 181 /* Last GSO segment can be smaller than gso_size (and MTU). 182 * Adding a fragment header would produce an "atomic fragment", 183 * which is considered harmful (RFC-8021). Avoid that. 184 */ 185 err = segs->len > mtu ? 186 ip6_fragment(net, sk, segs, ip6_finish_output2) : 187 ip6_finish_output2(net, sk, segs); 188 if (err && ret == 0) 189 ret = err; 190 } 191 192 return ret; 193} 194 195static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 196{ 197 unsigned int mtu; 198 199#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 200 /* Policy lookup after SNAT yielded a new policy */ 201 if (skb_dst(skb)->xfrm) { 202 IP6CB(skb)->flags |= IP6SKB_REROUTED; 203 return dst_output(net, sk, skb); 204 } 205#endif 206 207 mtu = ip6_skb_dst_mtu(skb); 208 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 209 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 210 211 if ((skb->len > mtu && !skb_is_gso(skb)) || 212 dst_allfrag(skb_dst(skb)) || 213 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 214 return ip6_fragment(net, sk, skb, ip6_finish_output2); 215 else 216 return ip6_finish_output2(net, sk, skb); 217} 218 219static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 220{ 221 int ret; 222 223 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 224 switch (ret) { 225 case NET_XMIT_SUCCESS: 226 return __ip6_finish_output(net, sk, skb); 227 case NET_XMIT_CN: 228 return __ip6_finish_output(net, sk, skb) ? : ret; 229 default: 230 kfree_skb(skb); 231 return ret; 232 } 233} 234 235int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 236{ 237 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 238 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 239 240 skb->protocol = htons(ETH_P_IPV6); 241 skb->dev = dev; 242 243 if (unlikely(!idev || (idev->cnf.disable_ipv6))) { 244 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 245 kfree_skb(skb); 246 return 0; 247 } 248 249 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 250 net, sk, skb, indev, dev, 251 ip6_finish_output, 252 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 253} 254 255bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 256{ 257 if (!np->autoflowlabel_set) 258 return ip6_default_np_autolabel(net); 259 else 260 return np->autoflowlabel; 261} 262 263/* 264 * xmit an sk_buff (used by TCP, SCTP and DCCP) 265 * Note : socket lock is not held for SYNACK packets, but might be modified 266 * by calls to skb_set_owner_w() and ipv6_local_error(), 267 * which are using proper atomic operations or spinlocks. 268 */ 269int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 270 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 271{ 272 struct net *net = sock_net(sk); 273 const struct ipv6_pinfo *np = inet6_sk(sk); 274 struct in6_addr *first_hop = &fl6->daddr; 275 struct dst_entry *dst = skb_dst(skb); 276 unsigned int head_room; 277 struct ipv6hdr *hdr; 278 u8 proto = fl6->flowi6_proto; 279 int seg_len = skb->len; 280 int hlimit = -1; 281 u32 mtu; 282 283 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 284 if (opt) 285 head_room += opt->opt_nflen + opt->opt_flen; 286 287 if (unlikely(skb_headroom(skb) < head_room)) { 288 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 289 if (!skb2) { 290 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 291 IPSTATS_MIB_OUTDISCARDS); 292 kfree_skb(skb); 293 return -ENOBUFS; 294 } 295 if (skb->sk) 296 skb_set_owner_w(skb2, skb->sk); 297 consume_skb(skb); 298 skb = skb2; 299 } 300 301 if (opt) { 302 seg_len += opt->opt_nflen + opt->opt_flen; 303 304 if (opt->opt_flen) 305 ipv6_push_frag_opts(skb, opt, &proto); 306 307 if (opt->opt_nflen) 308 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 309 &fl6->saddr); 310 } 311 312 skb_push(skb, sizeof(struct ipv6hdr)); 313 skb_reset_network_header(skb); 314 hdr = ipv6_hdr(skb); 315 316 /* 317 * Fill in the IPv6 header 318 */ 319 if (np) 320 hlimit = np->hop_limit; 321 if (hlimit < 0) 322 hlimit = ip6_dst_hoplimit(dst); 323 324 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 325 ip6_autoflowlabel(net, np), fl6)); 326 327 hdr->payload_len = htons(seg_len); 328 hdr->nexthdr = proto; 329 hdr->hop_limit = hlimit; 330 331 hdr->saddr = fl6->saddr; 332 hdr->daddr = *first_hop; 333 334 skb->protocol = htons(ETH_P_IPV6); 335 skb->priority = priority; 336 skb->mark = mark; 337 338 mtu = dst_mtu(dst); 339 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 340 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 341 IPSTATS_MIB_OUT, skb->len); 342 343 /* if egress device is enslaved to an L3 master device pass the 344 * skb to its handler for processing 345 */ 346 skb = l3mdev_ip6_out((struct sock *)sk, skb); 347 if (unlikely(!skb)) 348 return 0; 349 350 /* hooks should never assume socket lock is held. 351 * we promote our socket to non const 352 */ 353 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 354 net, (struct sock *)sk, skb, NULL, dst->dev, 355 dst_output); 356 } 357 358 skb->dev = dst->dev; 359 /* ipv6_local_error() does not require socket lock, 360 * we promote our socket to non const 361 */ 362 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 363 364 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 365 kfree_skb(skb); 366 return -EMSGSIZE; 367} 368EXPORT_SYMBOL(ip6_xmit); 369 370static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 371{ 372 struct ip6_ra_chain *ra; 373 struct sock *last = NULL; 374 375 read_lock(&ip6_ra_lock); 376 for (ra = ip6_ra_chain; ra; ra = ra->next) { 377 struct sock *sk = ra->sk; 378 if (sk && ra->sel == sel && 379 (!sk->sk_bound_dev_if || 380 sk->sk_bound_dev_if == skb->dev->ifindex)) { 381 struct ipv6_pinfo *np = inet6_sk(sk); 382 383 if (np && np->rtalert_isolate && 384 !net_eq(sock_net(sk), dev_net(skb->dev))) { 385 continue; 386 } 387 if (last) { 388 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 389 if (skb2) 390 rawv6_rcv(last, skb2); 391 } 392 last = sk; 393 } 394 } 395 396 if (last) { 397 rawv6_rcv(last, skb); 398 read_unlock(&ip6_ra_lock); 399 return 1; 400 } 401 read_unlock(&ip6_ra_lock); 402 return 0; 403} 404 405static int ip6_forward_proxy_check(struct sk_buff *skb) 406{ 407 struct ipv6hdr *hdr = ipv6_hdr(skb); 408 u8 nexthdr = hdr->nexthdr; 409 __be16 frag_off; 410 int offset; 411 412 if (ipv6_ext_hdr(nexthdr)) { 413 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 414 if (offset < 0) 415 return 0; 416 } else 417 offset = sizeof(struct ipv6hdr); 418 419 if (nexthdr == IPPROTO_ICMPV6) { 420 struct icmp6hdr *icmp6; 421 422 if (!pskb_may_pull(skb, (skb_network_header(skb) + 423 offset + 1 - skb->data))) 424 return 0; 425 426 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 427 428 switch (icmp6->icmp6_type) { 429 case NDISC_ROUTER_SOLICITATION: 430 case NDISC_ROUTER_ADVERTISEMENT: 431 case NDISC_NEIGHBOUR_SOLICITATION: 432 case NDISC_NEIGHBOUR_ADVERTISEMENT: 433 case NDISC_REDIRECT: 434 /* For reaction involving unicast neighbor discovery 435 * message destined to the proxied address, pass it to 436 * input function. 437 */ 438 return 1; 439 default: 440 break; 441 } 442 } 443 444 /* 445 * The proxying router can't forward traffic sent to a link-local 446 * address, so signal the sender and discard the packet. This 447 * behavior is clarified by the MIPv6 specification. 448 */ 449 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 450 dst_link_failure(skb); 451 return -1; 452 } 453 454 return 0; 455} 456 457static inline int ip6_forward_finish(struct net *net, struct sock *sk, 458 struct sk_buff *skb) 459{ 460 struct dst_entry *dst = skb_dst(skb); 461 462 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 463 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 464 465#ifdef CONFIG_NET_SWITCHDEV 466 if (skb->offload_l3_fwd_mark) { 467 consume_skb(skb); 468 return 0; 469 } 470#endif 471 472 skb->tstamp = 0; 473 return dst_output(net, sk, skb); 474} 475 476static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 477{ 478 if (skb->len <= mtu) 479 return false; 480 481 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 482 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 483 return true; 484 485 if (skb->ignore_df) 486 return false; 487 488 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 489 return false; 490 491 return true; 492} 493 494int ip6_forward(struct sk_buff *skb) 495{ 496 struct dst_entry *dst = skb_dst(skb); 497 struct ipv6hdr *hdr = ipv6_hdr(skb); 498 struct inet6_skb_parm *opt = IP6CB(skb); 499 struct net *net = dev_net(dst->dev); 500 struct inet6_dev *idev; 501 u32 mtu; 502 503 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 504 if (net->ipv6.devconf_all->forwarding == 0) 505 goto error; 506 507 if (skb->pkt_type != PACKET_HOST) 508 goto drop; 509 510 if (unlikely(skb->sk)) 511 goto drop; 512 513 if (skb_warn_if_lro(skb)) 514 goto drop; 515 516 if (!net->ipv6.devconf_all->disable_policy && 517 (!idev || !idev->cnf.disable_policy) && 518 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 519 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 520 goto drop; 521 } 522 523 skb_forward_csum(skb); 524 525 /* 526 * We DO NOT make any processing on 527 * RA packets, pushing them to user level AS IS 528 * without ane WARRANTY that application will be able 529 * to interpret them. The reason is that we 530 * cannot make anything clever here. 531 * 532 * We are not end-node, so that if packet contains 533 * AH/ESP, we cannot make anything. 534 * Defragmentation also would be mistake, RA packets 535 * cannot be fragmented, because there is no warranty 536 * that different fragments will go along one path. --ANK 537 */ 538 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 539 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 540 return 0; 541 } 542 543 /* 544 * check and decrement ttl 545 */ 546 if (hdr->hop_limit <= 1) { 547 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 548 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 549 550 kfree_skb(skb); 551 return -ETIMEDOUT; 552 } 553 554 /* XXX: idev->cnf.proxy_ndp? */ 555 if (net->ipv6.devconf_all->proxy_ndp && 556 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 557 int proxied = ip6_forward_proxy_check(skb); 558 if (proxied > 0) 559 return ip6_input(skb); 560 else if (proxied < 0) { 561 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 562 goto drop; 563 } 564 } 565 566 if (!xfrm6_route_forward(skb)) { 567 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 568 goto drop; 569 } 570 dst = skb_dst(skb); 571 572 /* IPv6 specs say nothing about it, but it is clear that we cannot 573 send redirects to source routed frames. 574 We don't send redirects to frames decapsulated from IPsec. 575 */ 576 if (IP6CB(skb)->iif == dst->dev->ifindex && 577 opt->srcrt == 0 && !skb_sec_path(skb)) { 578 struct in6_addr *target = NULL; 579 struct inet_peer *peer; 580 struct rt6_info *rt; 581 582 /* 583 * incoming and outgoing devices are the same 584 * send a redirect. 585 */ 586 587 rt = (struct rt6_info *) dst; 588 if (rt->rt6i_flags & RTF_GATEWAY) 589 target = &rt->rt6i_gateway; 590 else 591 target = &hdr->daddr; 592 593 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 594 595 /* Limit redirects both by destination (here) 596 and by source (inside ndisc_send_redirect) 597 */ 598 if (inet_peer_xrlim_allow(peer, 1*HZ)) 599 ndisc_send_redirect(skb, target); 600 if (peer) 601 inet_putpeer(peer); 602 } else { 603 int addrtype = ipv6_addr_type(&hdr->saddr); 604 605 /* This check is security critical. */ 606 if (addrtype == IPV6_ADDR_ANY || 607 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 608 goto error; 609 if (addrtype & IPV6_ADDR_LINKLOCAL) { 610 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 611 ICMPV6_NOT_NEIGHBOUR, 0); 612 goto error; 613 } 614 } 615 616 mtu = ip6_dst_mtu_forward(dst); 617 if (mtu < IPV6_MIN_MTU) 618 mtu = IPV6_MIN_MTU; 619 620 if (ip6_pkt_too_big(skb, mtu)) { 621 /* Again, force OUTPUT device used as source address */ 622 skb->dev = dst->dev; 623 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 624 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 625 __IP6_INC_STATS(net, ip6_dst_idev(dst), 626 IPSTATS_MIB_FRAGFAILS); 627 kfree_skb(skb); 628 return -EMSGSIZE; 629 } 630 631 if (skb_cow(skb, dst->dev->hard_header_len)) { 632 __IP6_INC_STATS(net, ip6_dst_idev(dst), 633 IPSTATS_MIB_OUTDISCARDS); 634 goto drop; 635 } 636 637 hdr = ipv6_hdr(skb); 638 639 /* Mangling hops number delayed to point after skb COW */ 640 641 hdr->hop_limit--; 642 643 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 644 net, NULL, skb, skb->dev, dst->dev, 645 ip6_forward_finish); 646 647error: 648 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 649drop: 650 kfree_skb(skb); 651 return -EINVAL; 652} 653 654static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 655{ 656 to->pkt_type = from->pkt_type; 657 to->priority = from->priority; 658 to->protocol = from->protocol; 659 skb_dst_drop(to); 660 skb_dst_set(to, dst_clone(skb_dst(from))); 661 to->dev = from->dev; 662 to->mark = from->mark; 663 664 skb_copy_hash(to, from); 665 666#ifdef CONFIG_NET_SCHED 667 to->tc_index = from->tc_index; 668#endif 669 nf_copy(to, from); 670 skb_ext_copy(to, from); 671 skb_copy_secmark(to, from); 672} 673 674int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 675 u8 nexthdr, __be32 frag_id, 676 struct ip6_fraglist_iter *iter) 677{ 678 unsigned int first_len; 679 struct frag_hdr *fh; 680 681 /* BUILD HEADER */ 682 *prevhdr = NEXTHDR_FRAGMENT; 683 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 684 if (!iter->tmp_hdr) 685 return -ENOMEM; 686 687 iter->frag = skb_shinfo(skb)->frag_list; 688 skb_frag_list_init(skb); 689 690 iter->offset = 0; 691 iter->hlen = hlen; 692 iter->frag_id = frag_id; 693 iter->nexthdr = nexthdr; 694 695 __skb_pull(skb, hlen); 696 fh = __skb_push(skb, sizeof(struct frag_hdr)); 697 __skb_push(skb, hlen); 698 skb_reset_network_header(skb); 699 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 700 701 fh->nexthdr = nexthdr; 702 fh->reserved = 0; 703 fh->frag_off = htons(IP6_MF); 704 fh->identification = frag_id; 705 706 first_len = skb_pagelen(skb); 707 skb->data_len = first_len - skb_headlen(skb); 708 skb->len = first_len; 709 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 710 711 return 0; 712} 713EXPORT_SYMBOL(ip6_fraglist_init); 714 715void ip6_fraglist_prepare(struct sk_buff *skb, 716 struct ip6_fraglist_iter *iter) 717{ 718 struct sk_buff *frag = iter->frag; 719 unsigned int hlen = iter->hlen; 720 struct frag_hdr *fh; 721 722 frag->ip_summed = CHECKSUM_NONE; 723 skb_reset_transport_header(frag); 724 fh = __skb_push(frag, sizeof(struct frag_hdr)); 725 __skb_push(frag, hlen); 726 skb_reset_network_header(frag); 727 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 728 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 729 fh->nexthdr = iter->nexthdr; 730 fh->reserved = 0; 731 fh->frag_off = htons(iter->offset); 732 if (frag->next) 733 fh->frag_off |= htons(IP6_MF); 734 fh->identification = iter->frag_id; 735 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 736 ip6_copy_metadata(frag, skb); 737} 738EXPORT_SYMBOL(ip6_fraglist_prepare); 739 740void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 741 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 742 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 743{ 744 state->prevhdr = prevhdr; 745 state->nexthdr = nexthdr; 746 state->frag_id = frag_id; 747 748 state->hlen = hlen; 749 state->mtu = mtu; 750 751 state->left = skb->len - hlen; /* Space per frame */ 752 state->ptr = hlen; /* Where to start from */ 753 754 state->hroom = hdr_room; 755 state->troom = needed_tailroom; 756 757 state->offset = 0; 758} 759EXPORT_SYMBOL(ip6_frag_init); 760 761struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 762{ 763 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 764 struct sk_buff *frag; 765 struct frag_hdr *fh; 766 unsigned int len; 767 768 len = state->left; 769 /* IF: it doesn't fit, use 'mtu' - the data space left */ 770 if (len > state->mtu) 771 len = state->mtu; 772 /* IF: we are not sending up to and including the packet end 773 then align the next start on an eight byte boundary */ 774 if (len < state->left) 775 len &= ~7; 776 777 /* Allocate buffer */ 778 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 779 state->hroom + state->troom, GFP_ATOMIC); 780 if (!frag) 781 return ERR_PTR(-ENOMEM); 782 783 /* 784 * Set up data on packet 785 */ 786 787 ip6_copy_metadata(frag, skb); 788 skb_reserve(frag, state->hroom); 789 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 790 skb_reset_network_header(frag); 791 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 792 frag->transport_header = (frag->network_header + state->hlen + 793 sizeof(struct frag_hdr)); 794 795 /* 796 * Charge the memory for the fragment to any owner 797 * it might possess 798 */ 799 if (skb->sk) 800 skb_set_owner_w(frag, skb->sk); 801 802 /* 803 * Copy the packet header into the new buffer. 804 */ 805 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 806 807 fragnexthdr_offset = skb_network_header(frag); 808 fragnexthdr_offset += prevhdr - skb_network_header(skb); 809 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 810 811 /* 812 * Build fragment header. 813 */ 814 fh->nexthdr = state->nexthdr; 815 fh->reserved = 0; 816 fh->identification = state->frag_id; 817 818 /* 819 * Copy a block of the IP datagram. 820 */ 821 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 822 len)); 823 state->left -= len; 824 825 fh->frag_off = htons(state->offset); 826 if (state->left > 0) 827 fh->frag_off |= htons(IP6_MF); 828 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 829 830 state->ptr += len; 831 state->offset += len; 832 833 return frag; 834} 835EXPORT_SYMBOL(ip6_frag_next); 836 837int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 838 int (*output)(struct net *, struct sock *, struct sk_buff *)) 839{ 840 struct sk_buff *frag; 841 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 842 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 843 inet6_sk(skb->sk) : NULL; 844 struct ip6_frag_state state; 845 unsigned int mtu, hlen, nexthdr_offset; 846 ktime_t tstamp = skb->tstamp; 847 int hroom, err = 0; 848 __be32 frag_id; 849 u8 *prevhdr, nexthdr = 0; 850 851 err = ip6_find_1stfragopt(skb, &prevhdr); 852 if (err < 0) 853 goto fail; 854 hlen = err; 855 nexthdr = *prevhdr; 856 nexthdr_offset = prevhdr - skb_network_header(skb); 857 858 mtu = ip6_skb_dst_mtu(skb); 859 860 /* We must not fragment if the socket is set to force MTU discovery 861 * or if the skb it not generated by a local socket. 862 */ 863 if (unlikely(!skb->ignore_df && skb->len > mtu)) 864 goto fail_toobig; 865 866 if (IP6CB(skb)->frag_max_size) { 867 if (IP6CB(skb)->frag_max_size > mtu) 868 goto fail_toobig; 869 870 /* don't send fragments larger than what we received */ 871 mtu = IP6CB(skb)->frag_max_size; 872 if (mtu < IPV6_MIN_MTU) 873 mtu = IPV6_MIN_MTU; 874 } 875 876 if (np && np->frag_size < mtu) { 877 if (np->frag_size) 878 mtu = np->frag_size; 879 } 880 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 881 goto fail_toobig; 882 mtu -= hlen + sizeof(struct frag_hdr); 883 884 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 885 &ipv6_hdr(skb)->saddr); 886 887 if (skb->ip_summed == CHECKSUM_PARTIAL && 888 (err = skb_checksum_help(skb))) 889 goto fail; 890 891 prevhdr = skb_network_header(skb) + nexthdr_offset; 892 hroom = LL_RESERVED_SPACE(rt->dst.dev); 893 if (skb_has_frag_list(skb)) { 894 unsigned int first_len = skb_pagelen(skb); 895 struct ip6_fraglist_iter iter; 896 struct sk_buff *frag2; 897 898 if (first_len - hlen > mtu || 899 ((first_len - hlen) & 7) || 900 skb_cloned(skb) || 901 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 902 goto slow_path; 903 904 skb_walk_frags(skb, frag) { 905 /* Correct geometry. */ 906 if (frag->len > mtu || 907 ((frag->len & 7) && frag->next) || 908 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 909 goto slow_path_clean; 910 911 /* Partially cloned skb? */ 912 if (skb_shared(frag)) 913 goto slow_path_clean; 914 915 BUG_ON(frag->sk); 916 if (skb->sk) { 917 frag->sk = skb->sk; 918 frag->destructor = sock_wfree; 919 } 920 skb->truesize -= frag->truesize; 921 } 922 923 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 924 &iter); 925 if (err < 0) 926 goto fail; 927 928 /* We prevent @rt from being freed. */ 929 rcu_read_lock(); 930 931 for (;;) { 932 /* Prepare header of the next frame, 933 * before previous one went down. */ 934 if (iter.frag) 935 ip6_fraglist_prepare(skb, &iter); 936 937 skb->tstamp = tstamp; 938 err = output(net, sk, skb); 939 if (!err) 940 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 941 IPSTATS_MIB_FRAGCREATES); 942 943 if (err || !iter.frag) 944 break; 945 946 skb = ip6_fraglist_next(&iter); 947 } 948 949 kfree(iter.tmp_hdr); 950 951 if (err == 0) { 952 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 953 IPSTATS_MIB_FRAGOKS); 954 rcu_read_unlock(); 955 return 0; 956 } 957 958 kfree_skb_list(iter.frag); 959 960 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 961 IPSTATS_MIB_FRAGFAILS); 962 rcu_read_unlock(); 963 return err; 964 965slow_path_clean: 966 skb_walk_frags(skb, frag2) { 967 if (frag2 == frag) 968 break; 969 frag2->sk = NULL; 970 frag2->destructor = NULL; 971 skb->truesize += frag2->truesize; 972 } 973 } 974 975slow_path: 976 /* 977 * Fragment the datagram. 978 */ 979 980 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 981 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 982 &state); 983 984 /* 985 * Keep copying data until we run out. 986 */ 987 988 while (state.left > 0) { 989 frag = ip6_frag_next(skb, &state); 990 if (IS_ERR(frag)) { 991 err = PTR_ERR(frag); 992 goto fail; 993 } 994 995 /* 996 * Put this fragment into the sending queue. 997 */ 998 frag->tstamp = tstamp; 999 err = output(net, sk, frag); 1000 if (err) 1001 goto fail; 1002 1003 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1004 IPSTATS_MIB_FRAGCREATES); 1005 } 1006 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1007 IPSTATS_MIB_FRAGOKS); 1008 consume_skb(skb); 1009 return err; 1010 1011fail_toobig: 1012 if (skb->sk && dst_allfrag(skb_dst(skb))) 1013 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 1014 1015 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1016 err = -EMSGSIZE; 1017 1018fail: 1019 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1020 IPSTATS_MIB_FRAGFAILS); 1021 kfree_skb(skb); 1022 return err; 1023} 1024 1025static inline int ip6_rt_check(const struct rt6key *rt_key, 1026 const struct in6_addr *fl_addr, 1027 const struct in6_addr *addr_cache) 1028{ 1029 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 1030 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 1031} 1032 1033static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1034 struct dst_entry *dst, 1035 const struct flowi6 *fl6) 1036{ 1037 struct ipv6_pinfo *np = inet6_sk(sk); 1038 struct rt6_info *rt; 1039 1040 if (!dst) 1041 goto out; 1042 1043 if (dst->ops->family != AF_INET6) { 1044 dst_release(dst); 1045 return NULL; 1046 } 1047 1048 rt = (struct rt6_info *)dst; 1049 /* Yes, checking route validity in not connected 1050 * case is not very simple. Take into account, 1051 * that we do not support routing by source, TOS, 1052 * and MSG_DONTROUTE --ANK (980726) 1053 * 1054 * 1. ip6_rt_check(): If route was host route, 1055 * check that cached destination is current. 1056 * If it is network route, we still may 1057 * check its validity using saved pointer 1058 * to the last used address: daddr_cache. 1059 * We do not want to save whole address now, 1060 * (because main consumer of this service 1061 * is tcp, which has not this problem), 1062 * so that the last trick works only on connected 1063 * sockets. 1064 * 2. oif also should be the same. 1065 */ 1066 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1067#ifdef CONFIG_IPV6_SUBTREES 1068 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1069#endif 1070 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1071 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1072 dst_release(dst); 1073 dst = NULL; 1074 } 1075 1076out: 1077 return dst; 1078} 1079 1080static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1081 struct dst_entry **dst, struct flowi6 *fl6) 1082{ 1083#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1084 struct neighbour *n; 1085 struct rt6_info *rt; 1086#endif 1087 int err; 1088 int flags = 0; 1089 1090 /* The correct way to handle this would be to do 1091 * ip6_route_get_saddr, and then ip6_route_output; however, 1092 * the route-specific preferred source forces the 1093 * ip6_route_output call _before_ ip6_route_get_saddr. 1094 * 1095 * In source specific routing (no src=any default route), 1096 * ip6_route_output will fail given src=any saddr, though, so 1097 * that's why we try it again later. 1098 */ 1099 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { 1100 struct fib6_info *from; 1101 struct rt6_info *rt; 1102 bool had_dst = *dst != NULL; 1103 1104 if (!had_dst) 1105 *dst = ip6_route_output(net, sk, fl6); 1106 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1107 1108 rcu_read_lock(); 1109 from = rt ? rcu_dereference(rt->from) : NULL; 1110 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1111 sk ? inet6_sk(sk)->srcprefs : 0, 1112 &fl6->saddr); 1113 rcu_read_unlock(); 1114 1115 if (err) 1116 goto out_err_release; 1117 1118 /* If we had an erroneous initial result, pretend it 1119 * never existed and let the SA-enabled version take 1120 * over. 1121 */ 1122 if (!had_dst && (*dst)->error) { 1123 dst_release(*dst); 1124 *dst = NULL; 1125 } 1126 1127 if (fl6->flowi6_oif) 1128 flags |= RT6_LOOKUP_F_IFACE; 1129 } 1130 1131 if (!*dst) 1132 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1133 1134 err = (*dst)->error; 1135 if (err) 1136 goto out_err_release; 1137 1138#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1139 /* 1140 * Here if the dst entry we've looked up 1141 * has a neighbour entry that is in the INCOMPLETE 1142 * state and the src address from the flow is 1143 * marked as OPTIMISTIC, we release the found 1144 * dst entry and replace it instead with the 1145 * dst entry of the nexthop router 1146 */ 1147 rt = (struct rt6_info *) *dst; 1148 rcu_read_lock_bh(); 1149 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1150 rt6_nexthop(rt, &fl6->daddr)); 1151 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1152 rcu_read_unlock_bh(); 1153 1154 if (err) { 1155 struct inet6_ifaddr *ifp; 1156 struct flowi6 fl_gw6; 1157 int redirect; 1158 1159 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1160 (*dst)->dev, 1); 1161 1162 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1163 if (ifp) 1164 in6_ifa_put(ifp); 1165 1166 if (redirect) { 1167 /* 1168 * We need to get the dst entry for the 1169 * default router instead 1170 */ 1171 dst_release(*dst); 1172 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1173 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1174 *dst = ip6_route_output(net, sk, &fl_gw6); 1175 err = (*dst)->error; 1176 if (err) 1177 goto out_err_release; 1178 } 1179 } 1180#endif 1181 if (ipv6_addr_v4mapped(&fl6->saddr) && 1182 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1183 err = -EAFNOSUPPORT; 1184 goto out_err_release; 1185 } 1186 1187 return 0; 1188 1189out_err_release: 1190 dst_release(*dst); 1191 *dst = NULL; 1192 1193 if (err == -ENETUNREACH) 1194 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1195 return err; 1196} 1197 1198/** 1199 * ip6_dst_lookup - perform route lookup on flow 1200 * @net: Network namespace to perform lookup in 1201 * @sk: socket which provides route info 1202 * @dst: pointer to dst_entry * for result 1203 * @fl6: flow to lookup 1204 * 1205 * This function performs a route lookup on the given flow. 1206 * 1207 * It returns zero on success, or a standard errno code on error. 1208 */ 1209int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1210 struct flowi6 *fl6) 1211{ 1212 *dst = NULL; 1213 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1214} 1215EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1216 1217/** 1218 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1219 * @net: Network namespace to perform lookup in 1220 * @sk: socket which provides route info 1221 * @fl6: flow to lookup 1222 * @final_dst: final destination address for ipsec lookup 1223 * 1224 * This function performs a route lookup on the given flow. 1225 * 1226 * It returns a valid dst pointer on success, or a pointer encoded 1227 * error code. 1228 */ 1229struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1230 const struct in6_addr *final_dst) 1231{ 1232 struct dst_entry *dst = NULL; 1233 int err; 1234 1235 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1236 if (err) 1237 return ERR_PTR(err); 1238 if (final_dst) 1239 fl6->daddr = *final_dst; 1240 1241 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1242} 1243EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1244 1245/** 1246 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1247 * @sk: socket which provides the dst cache and route info 1248 * @fl6: flow to lookup 1249 * @final_dst: final destination address for ipsec lookup 1250 * @connected: whether @sk is connected or not 1251 * 1252 * This function performs a route lookup on the given flow with the 1253 * possibility of using the cached route in the socket if it is valid. 1254 * It will take the socket dst lock when operating on the dst cache. 1255 * As a result, this function can only be used in process context. 1256 * 1257 * In addition, for a connected socket, cache the dst in the socket 1258 * if the current cache is not valid. 1259 * 1260 * It returns a valid dst pointer on success, or a pointer encoded 1261 * error code. 1262 */ 1263struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1264 const struct in6_addr *final_dst, 1265 bool connected) 1266{ 1267 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1268 1269 dst = ip6_sk_dst_check(sk, dst, fl6); 1270 if (dst) 1271 return dst; 1272 1273 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1274 if (connected && !IS_ERR(dst)) 1275 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1276 1277 return dst; 1278} 1279EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1280 1281/** 1282 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1283 * @skb: Packet for which lookup is done 1284 * @dev: Tunnel device 1285 * @net: Network namespace of tunnel device 1286 * @sock: Socket which provides route info 1287 * @saddr: Memory to store the src ip address 1288 * @info: Tunnel information 1289 * @protocol: IP protocol 1290 * @use_cache: Flag to enable cache usage 1291 * This function performs a route lookup on a tunnel 1292 * 1293 * It returns a valid dst pointer and stores src address to be used in 1294 * tunnel in param saddr on success, else a pointer encoded error code. 1295 */ 1296 1297struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1298 struct net_device *dev, 1299 struct net *net, 1300 struct socket *sock, 1301 struct in6_addr *saddr, 1302 const struct ip_tunnel_info *info, 1303 u8 protocol, 1304 bool use_cache) 1305{ 1306 struct dst_entry *dst = NULL; 1307#ifdef CONFIG_DST_CACHE 1308 struct dst_cache *dst_cache; 1309#endif 1310 struct flowi6 fl6; 1311 __u8 prio; 1312 1313#ifdef CONFIG_DST_CACHE 1314 dst_cache = (struct dst_cache *)&info->dst_cache; 1315 if (use_cache) { 1316 dst = dst_cache_get_ip6(dst_cache, saddr); 1317 if (dst) 1318 return dst; 1319 } 1320#endif 1321 memset(&fl6, 0, sizeof(fl6)); 1322 fl6.flowi6_mark = skb->mark; 1323 fl6.flowi6_proto = protocol; 1324 fl6.daddr = info->key.u.ipv6.dst; 1325 fl6.saddr = info->key.u.ipv6.src; 1326 prio = info->key.tos; 1327 fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); 1328 1329 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1330 NULL); 1331 if (IS_ERR(dst)) { 1332 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1333 return ERR_PTR(-ENETUNREACH); 1334 } 1335 if (dst->dev == dev) { /* is this necessary? */ 1336 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1337 dst_release(dst); 1338 return ERR_PTR(-ELOOP); 1339 } 1340#ifdef CONFIG_DST_CACHE 1341 if (use_cache) 1342 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1343#endif 1344 *saddr = fl6.saddr; 1345 return dst; 1346} 1347EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1348 1349static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1350 gfp_t gfp) 1351{ 1352 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1353} 1354 1355static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1356 gfp_t gfp) 1357{ 1358 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1359} 1360 1361static void ip6_append_data_mtu(unsigned int *mtu, 1362 int *maxfraglen, 1363 unsigned int fragheaderlen, 1364 struct sk_buff *skb, 1365 struct rt6_info *rt, 1366 unsigned int orig_mtu) 1367{ 1368 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1369 if (!skb) { 1370 /* first fragment, reserve header_len */ 1371 *mtu = orig_mtu - rt->dst.header_len; 1372 1373 } else { 1374 /* 1375 * this fragment is not first, the headers 1376 * space is regarded as data space. 1377 */ 1378 *mtu = orig_mtu; 1379 } 1380 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1381 + fragheaderlen - sizeof(struct frag_hdr); 1382 } 1383} 1384 1385static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1386 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1387 struct rt6_info *rt, struct flowi6 *fl6) 1388{ 1389 struct ipv6_pinfo *np = inet6_sk(sk); 1390 unsigned int mtu; 1391 struct ipv6_txoptions *opt = ipc6->opt; 1392 1393 /* 1394 * setup for corking 1395 */ 1396 if (opt) { 1397 if (WARN_ON(v6_cork->opt)) 1398 return -EINVAL; 1399 1400 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1401 if (unlikely(!v6_cork->opt)) 1402 return -ENOBUFS; 1403 1404 v6_cork->opt->tot_len = sizeof(*opt); 1405 v6_cork->opt->opt_flen = opt->opt_flen; 1406 v6_cork->opt->opt_nflen = opt->opt_nflen; 1407 1408 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1409 sk->sk_allocation); 1410 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1411 return -ENOBUFS; 1412 1413 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1414 sk->sk_allocation); 1415 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1416 return -ENOBUFS; 1417 1418 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1419 sk->sk_allocation); 1420 if (opt->hopopt && !v6_cork->opt->hopopt) 1421 return -ENOBUFS; 1422 1423 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1424 sk->sk_allocation); 1425 if (opt->srcrt && !v6_cork->opt->srcrt) 1426 return -ENOBUFS; 1427 1428 /* need source address above miyazawa*/ 1429 } 1430 dst_hold(&rt->dst); 1431 cork->base.dst = &rt->dst; 1432 cork->fl.u.ip6 = *fl6; 1433 v6_cork->hop_limit = ipc6->hlimit; 1434 v6_cork->tclass = ipc6->tclass; 1435 if (rt->dst.flags & DST_XFRM_TUNNEL) 1436 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1437 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1438 else 1439 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1440 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1441 if (np->frag_size < mtu) { 1442 if (np->frag_size) 1443 mtu = np->frag_size; 1444 } 1445 cork->base.fragsize = mtu; 1446 cork->base.gso_size = ipc6->gso_size; 1447 cork->base.tx_flags = 0; 1448 cork->base.mark = ipc6->sockc.mark; 1449 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1450 1451 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1452 cork->base.flags |= IPCORK_ALLFRAG; 1453 cork->base.length = 0; 1454 1455 cork->base.transmit_time = ipc6->sockc.transmit_time; 1456 1457 return 0; 1458} 1459 1460static int __ip6_append_data(struct sock *sk, 1461 struct flowi6 *fl6, 1462 struct sk_buff_head *queue, 1463 struct inet_cork *cork, 1464 struct inet6_cork *v6_cork, 1465 struct page_frag *pfrag, 1466 int getfrag(void *from, char *to, int offset, 1467 int len, int odd, struct sk_buff *skb), 1468 void *from, int length, int transhdrlen, 1469 unsigned int flags, struct ipcm6_cookie *ipc6) 1470{ 1471 struct sk_buff *skb, *skb_prev = NULL; 1472 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1473 struct ubuf_info *uarg = NULL; 1474 int exthdrlen = 0; 1475 int dst_exthdrlen = 0; 1476 int hh_len; 1477 int copy; 1478 int err; 1479 int offset = 0; 1480 u32 tskey = 0; 1481 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1482 struct ipv6_txoptions *opt = v6_cork->opt; 1483 int csummode = CHECKSUM_NONE; 1484 unsigned int maxnonfragsize, headersize; 1485 unsigned int wmem_alloc_delta = 0; 1486 bool paged, extra_uref = false; 1487 1488 skb = skb_peek_tail(queue); 1489 if (!skb) { 1490 exthdrlen = opt ? opt->opt_flen : 0; 1491 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1492 } 1493 1494 paged = !!cork->gso_size; 1495 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1496 orig_mtu = mtu; 1497 1498 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1499 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1500 tskey = sk->sk_tskey++; 1501 1502 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1503 1504 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1505 (opt ? opt->opt_nflen : 0); 1506 1507 headersize = sizeof(struct ipv6hdr) + 1508 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1509 (dst_allfrag(&rt->dst) ? 1510 sizeof(struct frag_hdr) : 0) + 1511 rt->rt6i_nfheader_len; 1512 1513 if (mtu <= fragheaderlen || 1514 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) 1515 goto emsgsize; 1516 1517 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1518 sizeof(struct frag_hdr); 1519 1520 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1521 * the first fragment 1522 */ 1523 if (headersize + transhdrlen > mtu) 1524 goto emsgsize; 1525 1526 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1527 (sk->sk_protocol == IPPROTO_UDP || 1528 sk->sk_protocol == IPPROTO_RAW)) { 1529 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1530 sizeof(struct ipv6hdr)); 1531 goto emsgsize; 1532 } 1533 1534 if (ip6_sk_ignore_df(sk)) 1535 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1536 else 1537 maxnonfragsize = mtu; 1538 1539 if (cork->length + length > maxnonfragsize - headersize) { 1540emsgsize: 1541 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1542 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1543 return -EMSGSIZE; 1544 } 1545 1546 /* CHECKSUM_PARTIAL only with no extension headers and when 1547 * we are not going to fragment 1548 */ 1549 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1550 headersize == sizeof(struct ipv6hdr) && 1551 length <= mtu - headersize && 1552 (!(flags & MSG_MORE) || cork->gso_size) && 1553 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1554 csummode = CHECKSUM_PARTIAL; 1555 1556 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1557 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1558 if (!uarg) 1559 return -ENOBUFS; 1560 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1561 if (rt->dst.dev->features & NETIF_F_SG && 1562 csummode == CHECKSUM_PARTIAL) { 1563 paged = true; 1564 } else { 1565 uarg->zerocopy = 0; 1566 skb_zcopy_set(skb, uarg, &extra_uref); 1567 } 1568 } 1569 1570 /* 1571 * Let's try using as much space as possible. 1572 * Use MTU if total length of the message fits into the MTU. 1573 * Otherwise, we need to reserve fragment header and 1574 * fragment alignment (= 8-15 octects, in total). 1575 * 1576 * Note that we may need to "move" the data from the tail 1577 * of the buffer to the new fragment when we split 1578 * the message. 1579 * 1580 * FIXME: It may be fragmented into multiple chunks 1581 * at once if non-fragmentable extension headers 1582 * are too large. 1583 * --yoshfuji 1584 */ 1585 1586 cork->length += length; 1587 if (!skb) 1588 goto alloc_new_skb; 1589 1590 while (length > 0) { 1591 /* Check if the remaining data fits into current packet. */ 1592 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1593 if (copy < length) 1594 copy = maxfraglen - skb->len; 1595 1596 if (copy <= 0) { 1597 char *data; 1598 unsigned int datalen; 1599 unsigned int fraglen; 1600 unsigned int fraggap; 1601 unsigned int alloclen, alloc_extra; 1602 unsigned int pagedlen; 1603alloc_new_skb: 1604 /* There's no room in the current skb */ 1605 if (skb) 1606 fraggap = skb->len - maxfraglen; 1607 else 1608 fraggap = 0; 1609 /* update mtu and maxfraglen if necessary */ 1610 if (!skb || !skb_prev) 1611 ip6_append_data_mtu(&mtu, &maxfraglen, 1612 fragheaderlen, skb, rt, 1613 orig_mtu); 1614 1615 skb_prev = skb; 1616 1617 /* 1618 * If remaining data exceeds the mtu, 1619 * we know we need more fragment(s). 1620 */ 1621 datalen = length + fraggap; 1622 1623 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1624 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1625 fraglen = datalen + fragheaderlen; 1626 pagedlen = 0; 1627 1628 alloc_extra = hh_len; 1629 alloc_extra += dst_exthdrlen; 1630 alloc_extra += rt->dst.trailer_len; 1631 1632 /* We just reserve space for fragment header. 1633 * Note: this may be overallocation if the message 1634 * (without MSG_MORE) fits into the MTU. 1635 */ 1636 alloc_extra += sizeof(struct frag_hdr); 1637 1638 if ((flags & MSG_MORE) && 1639 !(rt->dst.dev->features&NETIF_F_SG)) 1640 alloclen = mtu; 1641 else if (!paged && 1642 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1643 !(rt->dst.dev->features & NETIF_F_SG))) 1644 alloclen = fraglen; 1645 else { 1646 alloclen = min_t(int, fraglen, MAX_HEADER); 1647 pagedlen = fraglen - alloclen; 1648 } 1649 alloclen += alloc_extra; 1650 1651 if (datalen != length + fraggap) { 1652 /* 1653 * this is not the last fragment, the trailer 1654 * space is regarded as data space. 1655 */ 1656 datalen += rt->dst.trailer_len; 1657 } 1658 1659 fraglen = datalen + fragheaderlen; 1660 1661 copy = datalen - transhdrlen - fraggap - pagedlen; 1662 if (copy < 0) { 1663 err = -EINVAL; 1664 goto error; 1665 } 1666 if (transhdrlen) { 1667 skb = sock_alloc_send_skb(sk, alloclen, 1668 (flags & MSG_DONTWAIT), &err); 1669 } else { 1670 skb = NULL; 1671 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1672 2 * sk->sk_sndbuf) 1673 skb = alloc_skb(alloclen, 1674 sk->sk_allocation); 1675 if (unlikely(!skb)) 1676 err = -ENOBUFS; 1677 } 1678 if (!skb) 1679 goto error; 1680 /* 1681 * Fill in the control structures 1682 */ 1683 skb->protocol = htons(ETH_P_IPV6); 1684 skb->ip_summed = csummode; 1685 skb->csum = 0; 1686 /* reserve for fragmentation and ipsec header */ 1687 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1688 dst_exthdrlen); 1689 1690 /* 1691 * Find where to start putting bytes 1692 */ 1693 data = skb_put(skb, fraglen - pagedlen); 1694 skb_set_network_header(skb, exthdrlen); 1695 data += fragheaderlen; 1696 skb->transport_header = (skb->network_header + 1697 fragheaderlen); 1698 if (fraggap) { 1699 skb->csum = skb_copy_and_csum_bits( 1700 skb_prev, maxfraglen, 1701 data + transhdrlen, fraggap); 1702 skb_prev->csum = csum_sub(skb_prev->csum, 1703 skb->csum); 1704 data += fraggap; 1705 pskb_trim_unique(skb_prev, maxfraglen); 1706 } 1707 if (copy > 0 && 1708 getfrag(from, data + transhdrlen, offset, 1709 copy, fraggap, skb) < 0) { 1710 err = -EFAULT; 1711 kfree_skb(skb); 1712 goto error; 1713 } 1714 1715 offset += copy; 1716 length -= copy + transhdrlen; 1717 transhdrlen = 0; 1718 exthdrlen = 0; 1719 dst_exthdrlen = 0; 1720 1721 /* Only the initial fragment is time stamped */ 1722 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1723 cork->tx_flags = 0; 1724 skb_shinfo(skb)->tskey = tskey; 1725 tskey = 0; 1726 skb_zcopy_set(skb, uarg, &extra_uref); 1727 1728 if ((flags & MSG_CONFIRM) && !skb_prev) 1729 skb_set_dst_pending_confirm(skb, 1); 1730 1731 /* 1732 * Put the packet on the pending queue 1733 */ 1734 if (!skb->destructor) { 1735 skb->destructor = sock_wfree; 1736 skb->sk = sk; 1737 wmem_alloc_delta += skb->truesize; 1738 } 1739 __skb_queue_tail(queue, skb); 1740 continue; 1741 } 1742 1743 if (copy > length) 1744 copy = length; 1745 1746 if (!(rt->dst.dev->features&NETIF_F_SG) && 1747 skb_tailroom(skb) >= copy) { 1748 unsigned int off; 1749 1750 off = skb->len; 1751 if (getfrag(from, skb_put(skb, copy), 1752 offset, copy, off, skb) < 0) { 1753 __skb_trim(skb, off); 1754 err = -EFAULT; 1755 goto error; 1756 } 1757 } else if (!uarg || !uarg->zerocopy) { 1758 int i = skb_shinfo(skb)->nr_frags; 1759 1760 err = -ENOMEM; 1761 if (!sk_page_frag_refill(sk, pfrag)) 1762 goto error; 1763 1764 if (!skb_can_coalesce(skb, i, pfrag->page, 1765 pfrag->offset)) { 1766 err = -EMSGSIZE; 1767 if (i == MAX_SKB_FRAGS) 1768 goto error; 1769 1770 __skb_fill_page_desc(skb, i, pfrag->page, 1771 pfrag->offset, 0); 1772 skb_shinfo(skb)->nr_frags = ++i; 1773 get_page(pfrag->page); 1774 } 1775 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1776 if (getfrag(from, 1777 page_address(pfrag->page) + pfrag->offset, 1778 offset, copy, skb->len, skb) < 0) 1779 goto error_efault; 1780 1781 pfrag->offset += copy; 1782 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1783 skb->len += copy; 1784 skb->data_len += copy; 1785 skb->truesize += copy; 1786 wmem_alloc_delta += copy; 1787 } else { 1788 err = skb_zerocopy_iter_dgram(skb, from, copy); 1789 if (err < 0) 1790 goto error; 1791 } 1792 offset += copy; 1793 length -= copy; 1794 } 1795 1796 if (wmem_alloc_delta) 1797 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1798 return 0; 1799 1800error_efault: 1801 err = -EFAULT; 1802error: 1803 if (uarg) 1804 sock_zerocopy_put_abort(uarg, extra_uref); 1805 cork->length -= length; 1806 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1807 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1808 return err; 1809} 1810 1811int ip6_append_data(struct sock *sk, 1812 int getfrag(void *from, char *to, int offset, int len, 1813 int odd, struct sk_buff *skb), 1814 void *from, int length, int transhdrlen, 1815 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1816 struct rt6_info *rt, unsigned int flags) 1817{ 1818 struct inet_sock *inet = inet_sk(sk); 1819 struct ipv6_pinfo *np = inet6_sk(sk); 1820 int exthdrlen; 1821 int err; 1822 1823 if (flags&MSG_PROBE) 1824 return 0; 1825 if (skb_queue_empty(&sk->sk_write_queue)) { 1826 /* 1827 * setup for corking 1828 */ 1829 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1830 ipc6, rt, fl6); 1831 if (err) 1832 return err; 1833 1834 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1835 length += exthdrlen; 1836 transhdrlen += exthdrlen; 1837 } else { 1838 fl6 = &inet->cork.fl.u.ip6; 1839 transhdrlen = 0; 1840 } 1841 1842 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1843 &np->cork, sk_page_frag(sk), getfrag, 1844 from, length, transhdrlen, flags, ipc6); 1845} 1846EXPORT_SYMBOL_GPL(ip6_append_data); 1847 1848static void ip6_cork_release(struct inet_cork_full *cork, 1849 struct inet6_cork *v6_cork) 1850{ 1851 if (v6_cork->opt) { 1852 kfree(v6_cork->opt->dst0opt); 1853 kfree(v6_cork->opt->dst1opt); 1854 kfree(v6_cork->opt->hopopt); 1855 kfree(v6_cork->opt->srcrt); 1856 kfree(v6_cork->opt); 1857 v6_cork->opt = NULL; 1858 } 1859 1860 if (cork->base.dst) { 1861 dst_release(cork->base.dst); 1862 cork->base.dst = NULL; 1863 cork->base.flags &= ~IPCORK_ALLFRAG; 1864 } 1865 memset(&cork->fl, 0, sizeof(cork->fl)); 1866} 1867 1868struct sk_buff *__ip6_make_skb(struct sock *sk, 1869 struct sk_buff_head *queue, 1870 struct inet_cork_full *cork, 1871 struct inet6_cork *v6_cork) 1872{ 1873 struct sk_buff *skb, *tmp_skb; 1874 struct sk_buff **tail_skb; 1875 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1876 struct ipv6_pinfo *np = inet6_sk(sk); 1877 struct net *net = sock_net(sk); 1878 struct ipv6hdr *hdr; 1879 struct ipv6_txoptions *opt = v6_cork->opt; 1880 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1881 struct flowi6 *fl6 = &cork->fl.u.ip6; 1882 unsigned char proto = fl6->flowi6_proto; 1883 1884 skb = __skb_dequeue(queue); 1885 if (!skb) 1886 goto out; 1887 tail_skb = &(skb_shinfo(skb)->frag_list); 1888 1889 /* move skb->data to ip header from ext header */ 1890 if (skb->data < skb_network_header(skb)) 1891 __skb_pull(skb, skb_network_offset(skb)); 1892 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1893 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1894 *tail_skb = tmp_skb; 1895 tail_skb = &(tmp_skb->next); 1896 skb->len += tmp_skb->len; 1897 skb->data_len += tmp_skb->len; 1898 skb->truesize += tmp_skb->truesize; 1899 tmp_skb->destructor = NULL; 1900 tmp_skb->sk = NULL; 1901 } 1902 1903 /* Allow local fragmentation. */ 1904 skb->ignore_df = ip6_sk_ignore_df(sk); 1905 1906 *final_dst = fl6->daddr; 1907 __skb_pull(skb, skb_network_header_len(skb)); 1908 if (opt && opt->opt_flen) 1909 ipv6_push_frag_opts(skb, opt, &proto); 1910 if (opt && opt->opt_nflen) 1911 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1912 1913 skb_push(skb, sizeof(struct ipv6hdr)); 1914 skb_reset_network_header(skb); 1915 hdr = ipv6_hdr(skb); 1916 1917 ip6_flow_hdr(hdr, v6_cork->tclass, 1918 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1919 ip6_autoflowlabel(net, np), fl6)); 1920 hdr->hop_limit = v6_cork->hop_limit; 1921 hdr->nexthdr = proto; 1922 hdr->saddr = fl6->saddr; 1923 hdr->daddr = *final_dst; 1924 1925 skb->priority = sk->sk_priority; 1926 skb->mark = cork->base.mark; 1927 1928 skb->tstamp = cork->base.transmit_time; 1929 1930 skb_dst_set(skb, dst_clone(&rt->dst)); 1931 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1932 if (proto == IPPROTO_ICMPV6) { 1933 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1934 u8 icmp6_type; 1935 1936 if (sk->sk_socket->type == SOCK_RAW && 1937 !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) 1938 icmp6_type = fl6->fl6_icmp_type; 1939 else 1940 icmp6_type = icmp6_hdr(skb)->icmp6_type; 1941 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); 1942 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1943 } 1944 1945 ip6_cork_release(cork, v6_cork); 1946out: 1947 return skb; 1948} 1949 1950int ip6_send_skb(struct sk_buff *skb) 1951{ 1952 struct net *net = sock_net(skb->sk); 1953 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1954 int err; 1955 1956 rcu_read_lock(); 1957 err = ip6_local_out(net, skb->sk, skb); 1958 if (err) { 1959 if (err > 0) 1960 err = net_xmit_errno(err); 1961 if (err) 1962 IP6_INC_STATS(net, rt->rt6i_idev, 1963 IPSTATS_MIB_OUTDISCARDS); 1964 } 1965 1966 rcu_read_unlock(); 1967 return err; 1968} 1969 1970int ip6_push_pending_frames(struct sock *sk) 1971{ 1972 struct sk_buff *skb; 1973 1974 skb = ip6_finish_skb(sk); 1975 if (!skb) 1976 return 0; 1977 1978 return ip6_send_skb(skb); 1979} 1980EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1981 1982static void __ip6_flush_pending_frames(struct sock *sk, 1983 struct sk_buff_head *queue, 1984 struct inet_cork_full *cork, 1985 struct inet6_cork *v6_cork) 1986{ 1987 struct sk_buff *skb; 1988 1989 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1990 if (skb_dst(skb)) 1991 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1992 IPSTATS_MIB_OUTDISCARDS); 1993 kfree_skb(skb); 1994 } 1995 1996 ip6_cork_release(cork, v6_cork); 1997} 1998 1999void ip6_flush_pending_frames(struct sock *sk) 2000{ 2001 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 2002 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 2003} 2004EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 2005 2006struct sk_buff *ip6_make_skb(struct sock *sk, 2007 int getfrag(void *from, char *to, int offset, 2008 int len, int odd, struct sk_buff *skb), 2009 void *from, int length, int transhdrlen, 2010 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 2011 struct rt6_info *rt, unsigned int flags, 2012 struct inet_cork_full *cork) 2013{ 2014 struct inet6_cork v6_cork; 2015 struct sk_buff_head queue; 2016 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 2017 int err; 2018 2019 if (flags & MSG_PROBE) 2020 return NULL; 2021 2022 __skb_queue_head_init(&queue); 2023 2024 cork->base.flags = 0; 2025 cork->base.addr = 0; 2026 cork->base.opt = NULL; 2027 cork->base.dst = NULL; 2028 v6_cork.opt = NULL; 2029 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 2030 if (err) { 2031 ip6_cork_release(cork, &v6_cork); 2032 return ERR_PTR(err); 2033 } 2034 if (ipc6->dontfrag < 0) 2035 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 2036 2037 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 2038 ¤t->task_frag, getfrag, from, 2039 length + exthdrlen, transhdrlen + exthdrlen, 2040 flags, ipc6); 2041 if (err) { 2042 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 2043 return ERR_PTR(err); 2044 } 2045 2046 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2047} 2048