162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * IPv6 output functions 462306a36Sopenharmony_ci * Linux INET6 implementation 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Authors: 762306a36Sopenharmony_ci * Pedro Roque <roque@di.fc.ul.pt> 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Based on linux/net/ipv4/ip_output.c 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Changes: 1262306a36Sopenharmony_ci * A.N.Kuznetsov : airthmetics in fragmentation. 1362306a36Sopenharmony_ci * extension headers are implemented. 1462306a36Sopenharmony_ci * route changes now work. 1562306a36Sopenharmony_ci * ip6_forward does not confuse sniffers. 1662306a36Sopenharmony_ci * etc. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * H. von Brand : Added missing #include <linux/string.h> 1962306a36Sopenharmony_ci * Imran Patel : frag id should be in NBO 2062306a36Sopenharmony_ci * Kazunori MIYAZAWA @USAGI 2162306a36Sopenharmony_ci * : add ip6_append_data and related functions 2262306a36Sopenharmony_ci * for datagram xmit 2362306a36Sopenharmony_ci */ 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include <linux/errno.h> 2662306a36Sopenharmony_ci#include <linux/kernel.h> 2762306a36Sopenharmony_ci#include <linux/string.h> 2862306a36Sopenharmony_ci#include <linux/socket.h> 2962306a36Sopenharmony_ci#include <linux/net.h> 3062306a36Sopenharmony_ci#include <linux/netdevice.h> 3162306a36Sopenharmony_ci#include <linux/if_arp.h> 3262306a36Sopenharmony_ci#include <linux/in6.h> 3362306a36Sopenharmony_ci#include <linux/tcp.h> 3462306a36Sopenharmony_ci#include <linux/route.h> 3562306a36Sopenharmony_ci#include <linux/module.h> 3662306a36Sopenharmony_ci#include <linux/slab.h> 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#include <linux/bpf-cgroup.h> 3962306a36Sopenharmony_ci#include <linux/netfilter.h> 4062306a36Sopenharmony_ci#include <linux/netfilter_ipv6.h> 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#include <net/sock.h> 4362306a36Sopenharmony_ci#include <net/snmp.h> 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci#include <net/gso.h> 4662306a36Sopenharmony_ci#include <net/ipv6.h> 4762306a36Sopenharmony_ci#include <net/ndisc.h> 4862306a36Sopenharmony_ci#include <net/protocol.h> 4962306a36Sopenharmony_ci#include <net/ip6_route.h> 5062306a36Sopenharmony_ci#include <net/addrconf.h> 5162306a36Sopenharmony_ci#include <net/rawv6.h> 5262306a36Sopenharmony_ci#include <net/icmp.h> 5362306a36Sopenharmony_ci#include <net/xfrm.h> 5462306a36Sopenharmony_ci#include <net/checksum.h> 5562306a36Sopenharmony_ci#include <linux/mroute6.h> 5662306a36Sopenharmony_ci#include <net/l3mdev.h> 5762306a36Sopenharmony_ci#include <net/lwtunnel.h> 5862306a36Sopenharmony_ci#include <net/ip_tunnels.h> 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci struct dst_entry *dst = skb_dst(skb); 6362306a36Sopenharmony_ci struct net_device *dev = dst->dev; 6462306a36Sopenharmony_ci struct inet6_dev *idev = ip6_dst_idev(dst); 6562306a36Sopenharmony_ci unsigned int hh_len = LL_RESERVED_SPACE(dev); 6662306a36Sopenharmony_ci const struct in6_addr *daddr, *nexthop; 6762306a36Sopenharmony_ci struct ipv6hdr *hdr; 6862306a36Sopenharmony_ci struct neighbour *neigh; 6962306a36Sopenharmony_ci int ret; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci /* Be paranoid, rather than too clever. */ 7262306a36Sopenharmony_ci if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { 7362306a36Sopenharmony_ci skb = skb_expand_head(skb, hh_len); 7462306a36Sopenharmony_ci if (!skb) { 7562306a36Sopenharmony_ci IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 7662306a36Sopenharmony_ci return -ENOMEM; 7762306a36Sopenharmony_ci } 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci hdr = ipv6_hdr(skb); 8162306a36Sopenharmony_ci daddr = &hdr->daddr; 8262306a36Sopenharmony_ci if (ipv6_addr_is_multicast(daddr)) { 8362306a36Sopenharmony_ci if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 8462306a36Sopenharmony_ci ((mroute6_is_socket(net, skb) && 8562306a36Sopenharmony_ci !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 8662306a36Sopenharmony_ci ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { 8762306a36Sopenharmony_ci struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci /* Do not check for IFF_ALLMULTI; multicast routing 9062306a36Sopenharmony_ci is not supported in any case. 9162306a36Sopenharmony_ci */ 9262306a36Sopenharmony_ci if (newskb) 9362306a36Sopenharmony_ci NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 9462306a36Sopenharmony_ci net, sk, newskb, NULL, newskb->dev, 9562306a36Sopenharmony_ci dev_loopback_xmit); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci if (hdr->hop_limit == 0) { 9862306a36Sopenharmony_ci IP6_INC_STATS(net, idev, 9962306a36Sopenharmony_ci IPSTATS_MIB_OUTDISCARDS); 10062306a36Sopenharmony_ci kfree_skb(skb); 10162306a36Sopenharmony_ci return 0; 10262306a36Sopenharmony_ci } 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 10662306a36Sopenharmony_ci if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && 10762306a36Sopenharmony_ci !(dev->flags & IFF_LOOPBACK)) { 10862306a36Sopenharmony_ci kfree_skb(skb); 10962306a36Sopenharmony_ci return 0; 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci } 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci if (lwtunnel_xmit_redirect(dst->lwtstate)) { 11462306a36Sopenharmony_ci int res = lwtunnel_xmit(skb); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci if (res != LWTUNNEL_XMIT_CONTINUE) 11762306a36Sopenharmony_ci return res; 11862306a36Sopenharmony_ci } 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci rcu_read_lock(); 12162306a36Sopenharmony_ci nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); 12262306a36Sopenharmony_ci neigh = __ipv6_neigh_lookup_noref(dev, nexthop); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci if (unlikely(IS_ERR_OR_NULL(neigh))) { 12562306a36Sopenharmony_ci if (unlikely(!neigh)) 12662306a36Sopenharmony_ci neigh = __neigh_create(&nd_tbl, nexthop, dev, false); 12762306a36Sopenharmony_ci if (IS_ERR(neigh)) { 12862306a36Sopenharmony_ci rcu_read_unlock(); 12962306a36Sopenharmony_ci IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); 13062306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); 13162306a36Sopenharmony_ci return -EINVAL; 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci sock_confirm_neigh(skb, neigh); 13562306a36Sopenharmony_ci ret = neigh_output(neigh, skb, false); 13662306a36Sopenharmony_ci rcu_read_unlock(); 13762306a36Sopenharmony_ci return ret; 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_cistatic int 14162306a36Sopenharmony_ciip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 14262306a36Sopenharmony_ci struct sk_buff *skb, unsigned int mtu) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci struct sk_buff *segs, *nskb; 14562306a36Sopenharmony_ci netdev_features_t features; 14662306a36Sopenharmony_ci int ret = 0; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci /* Please see corresponding comment in ip_finish_output_gso 14962306a36Sopenharmony_ci * describing the cases where GSO segment length exceeds the 15062306a36Sopenharmony_ci * egress MTU. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_ci features = netif_skb_features(skb); 15362306a36Sopenharmony_ci segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 15462306a36Sopenharmony_ci if (IS_ERR_OR_NULL(segs)) { 15562306a36Sopenharmony_ci kfree_skb(skb); 15662306a36Sopenharmony_ci return -ENOMEM; 15762306a36Sopenharmony_ci } 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci consume_skb(skb); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci skb_list_walk_safe(segs, segs, nskb) { 16262306a36Sopenharmony_ci int err; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci skb_mark_not_on_list(segs); 16562306a36Sopenharmony_ci /* Last GSO segment can be smaller than gso_size (and MTU). 16662306a36Sopenharmony_ci * Adding a fragment header would produce an "atomic fragment", 16762306a36Sopenharmony_ci * which is considered harmful (RFC-8021). Avoid that. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci err = segs->len > mtu ? 17062306a36Sopenharmony_ci ip6_fragment(net, sk, segs, ip6_finish_output2) : 17162306a36Sopenharmony_ci ip6_finish_output2(net, sk, segs); 17262306a36Sopenharmony_ci if (err && ret == 0) 17362306a36Sopenharmony_ci ret = err; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci return ret; 17762306a36Sopenharmony_ci} 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_cistatic int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 18062306a36Sopenharmony_ci{ 18162306a36Sopenharmony_ci unsigned int mtu; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 18462306a36Sopenharmony_ci /* Policy lookup after SNAT yielded a new policy */ 18562306a36Sopenharmony_ci if (skb_dst(skb)->xfrm) { 18662306a36Sopenharmony_ci IP6CB(skb)->flags |= IP6SKB_REROUTED; 18762306a36Sopenharmony_ci return dst_output(net, sk, skb); 18862306a36Sopenharmony_ci } 18962306a36Sopenharmony_ci#endif 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci mtu = ip6_skb_dst_mtu(skb); 19262306a36Sopenharmony_ci if (skb_is_gso(skb) && 19362306a36Sopenharmony_ci !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && 19462306a36Sopenharmony_ci !skb_gso_validate_network_len(skb, mtu)) 19562306a36Sopenharmony_ci return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci if ((skb->len > mtu && !skb_is_gso(skb)) || 19862306a36Sopenharmony_ci dst_allfrag(skb_dst(skb)) || 19962306a36Sopenharmony_ci (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 20062306a36Sopenharmony_ci return ip6_fragment(net, sk, skb, ip6_finish_output2); 20162306a36Sopenharmony_ci else 20262306a36Sopenharmony_ci return ip6_finish_output2(net, sk, skb); 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_cistatic int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci int ret; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 21062306a36Sopenharmony_ci switch (ret) { 21162306a36Sopenharmony_ci case NET_XMIT_SUCCESS: 21262306a36Sopenharmony_ci case NET_XMIT_CN: 21362306a36Sopenharmony_ci return __ip6_finish_output(net, sk, skb) ? : ret; 21462306a36Sopenharmony_ci default: 21562306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 21662306a36Sopenharmony_ci return ret; 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ciint ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 22362306a36Sopenharmony_ci struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IPV6); 22662306a36Sopenharmony_ci skb->dev = dev; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci if (unlikely(idev->cnf.disable_ipv6)) { 22962306a36Sopenharmony_ci IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 23062306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); 23162306a36Sopenharmony_ci return 0; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 23562306a36Sopenharmony_ci net, sk, skb, indev, dev, 23662306a36Sopenharmony_ci ip6_finish_output, 23762306a36Sopenharmony_ci !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 23862306a36Sopenharmony_ci} 23962306a36Sopenharmony_ciEXPORT_SYMBOL(ip6_output); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cibool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci if (!np->autoflowlabel_set) 24462306a36Sopenharmony_ci return ip6_default_np_autolabel(net); 24562306a36Sopenharmony_ci else 24662306a36Sopenharmony_ci return np->autoflowlabel; 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci/* 25062306a36Sopenharmony_ci * xmit an sk_buff (used by TCP, SCTP and DCCP) 25162306a36Sopenharmony_ci * Note : socket lock is not held for SYNACK packets, but might be modified 25262306a36Sopenharmony_ci * by calls to skb_set_owner_w() and ipv6_local_error(), 25362306a36Sopenharmony_ci * which are using proper atomic operations or spinlocks. 25462306a36Sopenharmony_ci */ 25562306a36Sopenharmony_ciint ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 25662306a36Sopenharmony_ci __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 25762306a36Sopenharmony_ci{ 25862306a36Sopenharmony_ci struct net *net = sock_net(sk); 25962306a36Sopenharmony_ci const struct ipv6_pinfo *np = inet6_sk(sk); 26062306a36Sopenharmony_ci struct in6_addr *first_hop = &fl6->daddr; 26162306a36Sopenharmony_ci struct dst_entry *dst = skb_dst(skb); 26262306a36Sopenharmony_ci struct net_device *dev = dst->dev; 26362306a36Sopenharmony_ci struct inet6_dev *idev = ip6_dst_idev(dst); 26462306a36Sopenharmony_ci struct hop_jumbo_hdr *hop_jumbo; 26562306a36Sopenharmony_ci int hoplen = sizeof(*hop_jumbo); 26662306a36Sopenharmony_ci unsigned int head_room; 26762306a36Sopenharmony_ci struct ipv6hdr *hdr; 26862306a36Sopenharmony_ci u8 proto = fl6->flowi6_proto; 26962306a36Sopenharmony_ci int seg_len = skb->len; 27062306a36Sopenharmony_ci int hlimit = -1; 27162306a36Sopenharmony_ci u32 mtu; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); 27462306a36Sopenharmony_ci if (opt) 27562306a36Sopenharmony_ci head_room += opt->opt_nflen + opt->opt_flen; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci if (unlikely(head_room > skb_headroom(skb))) { 27862306a36Sopenharmony_ci skb = skb_expand_head(skb, head_room); 27962306a36Sopenharmony_ci if (!skb) { 28062306a36Sopenharmony_ci IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 28162306a36Sopenharmony_ci return -ENOBUFS; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci if (opt) { 28662306a36Sopenharmony_ci seg_len += opt->opt_nflen + opt->opt_flen; 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci if (opt->opt_flen) 28962306a36Sopenharmony_ci ipv6_push_frag_opts(skb, opt, &proto); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci if (opt->opt_nflen) 29262306a36Sopenharmony_ci ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 29362306a36Sopenharmony_ci &fl6->saddr); 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci if (unlikely(seg_len > IPV6_MAXPLEN)) { 29762306a36Sopenharmony_ci hop_jumbo = skb_push(skb, hoplen); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci hop_jumbo->nexthdr = proto; 30062306a36Sopenharmony_ci hop_jumbo->hdrlen = 0; 30162306a36Sopenharmony_ci hop_jumbo->tlv_type = IPV6_TLV_JUMBO; 30262306a36Sopenharmony_ci hop_jumbo->tlv_len = 4; 30362306a36Sopenharmony_ci hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci proto = IPPROTO_HOPOPTS; 30662306a36Sopenharmony_ci seg_len = 0; 30762306a36Sopenharmony_ci IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; 30862306a36Sopenharmony_ci } 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci skb_push(skb, sizeof(struct ipv6hdr)); 31162306a36Sopenharmony_ci skb_reset_network_header(skb); 31262306a36Sopenharmony_ci hdr = ipv6_hdr(skb); 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* 31562306a36Sopenharmony_ci * Fill in the IPv6 header 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_ci if (np) 31862306a36Sopenharmony_ci hlimit = np->hop_limit; 31962306a36Sopenharmony_ci if (hlimit < 0) 32062306a36Sopenharmony_ci hlimit = ip6_dst_hoplimit(dst); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 32362306a36Sopenharmony_ci ip6_autoflowlabel(net, np), fl6)); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci hdr->payload_len = htons(seg_len); 32662306a36Sopenharmony_ci hdr->nexthdr = proto; 32762306a36Sopenharmony_ci hdr->hop_limit = hlimit; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci hdr->saddr = fl6->saddr; 33062306a36Sopenharmony_ci hdr->daddr = *first_hop; 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IPV6); 33362306a36Sopenharmony_ci skb->priority = priority; 33462306a36Sopenharmony_ci skb->mark = mark; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci mtu = dst_mtu(dst); 33762306a36Sopenharmony_ci if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 33862306a36Sopenharmony_ci IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci /* if egress device is enslaved to an L3 master device pass the 34162306a36Sopenharmony_ci * skb to its handler for processing 34262306a36Sopenharmony_ci */ 34362306a36Sopenharmony_ci skb = l3mdev_ip6_out((struct sock *)sk, skb); 34462306a36Sopenharmony_ci if (unlikely(!skb)) 34562306a36Sopenharmony_ci return 0; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci /* hooks should never assume socket lock is held. 34862306a36Sopenharmony_ci * we promote our socket to non const 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 35162306a36Sopenharmony_ci net, (struct sock *)sk, skb, NULL, dev, 35262306a36Sopenharmony_ci dst_output); 35362306a36Sopenharmony_ci } 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci skb->dev = dev; 35662306a36Sopenharmony_ci /* ipv6_local_error() does not require socket lock, 35762306a36Sopenharmony_ci * we promote our socket to non const 35862306a36Sopenharmony_ci */ 35962306a36Sopenharmony_ci ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); 36262306a36Sopenharmony_ci kfree_skb(skb); 36362306a36Sopenharmony_ci return -EMSGSIZE; 36462306a36Sopenharmony_ci} 36562306a36Sopenharmony_ciEXPORT_SYMBOL(ip6_xmit); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_cistatic int ip6_call_ra_chain(struct sk_buff *skb, int sel) 36862306a36Sopenharmony_ci{ 36962306a36Sopenharmony_ci struct ip6_ra_chain *ra; 37062306a36Sopenharmony_ci struct sock *last = NULL; 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci read_lock(&ip6_ra_lock); 37362306a36Sopenharmony_ci for (ra = ip6_ra_chain; ra; ra = ra->next) { 37462306a36Sopenharmony_ci struct sock *sk = ra->sk; 37562306a36Sopenharmony_ci if (sk && ra->sel == sel && 37662306a36Sopenharmony_ci (!sk->sk_bound_dev_if || 37762306a36Sopenharmony_ci sk->sk_bound_dev_if == skb->dev->ifindex)) { 37862306a36Sopenharmony_ci struct ipv6_pinfo *np = inet6_sk(sk); 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (np && np->rtalert_isolate && 38162306a36Sopenharmony_ci !net_eq(sock_net(sk), dev_net(skb->dev))) { 38262306a36Sopenharmony_ci continue; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci if (last) { 38562306a36Sopenharmony_ci struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 38662306a36Sopenharmony_ci if (skb2) 38762306a36Sopenharmony_ci rawv6_rcv(last, skb2); 38862306a36Sopenharmony_ci } 38962306a36Sopenharmony_ci last = sk; 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci if (last) { 39462306a36Sopenharmony_ci rawv6_rcv(last, skb); 39562306a36Sopenharmony_ci read_unlock(&ip6_ra_lock); 39662306a36Sopenharmony_ci return 1; 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci read_unlock(&ip6_ra_lock); 39962306a36Sopenharmony_ci return 0; 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_cistatic int ip6_forward_proxy_check(struct sk_buff *skb) 40362306a36Sopenharmony_ci{ 40462306a36Sopenharmony_ci struct ipv6hdr *hdr = ipv6_hdr(skb); 40562306a36Sopenharmony_ci u8 nexthdr = hdr->nexthdr; 40662306a36Sopenharmony_ci __be16 frag_off; 40762306a36Sopenharmony_ci int offset; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci if (ipv6_ext_hdr(nexthdr)) { 41062306a36Sopenharmony_ci offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 41162306a36Sopenharmony_ci if (offset < 0) 41262306a36Sopenharmony_ci return 0; 41362306a36Sopenharmony_ci } else 41462306a36Sopenharmony_ci offset = sizeof(struct ipv6hdr); 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci if (nexthdr == IPPROTO_ICMPV6) { 41762306a36Sopenharmony_ci struct icmp6hdr *icmp6; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci if (!pskb_may_pull(skb, (skb_network_header(skb) + 42062306a36Sopenharmony_ci offset + 1 - skb->data))) 42162306a36Sopenharmony_ci return 0; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci switch (icmp6->icmp6_type) { 42662306a36Sopenharmony_ci case NDISC_ROUTER_SOLICITATION: 42762306a36Sopenharmony_ci case NDISC_ROUTER_ADVERTISEMENT: 42862306a36Sopenharmony_ci case NDISC_NEIGHBOUR_SOLICITATION: 42962306a36Sopenharmony_ci case NDISC_NEIGHBOUR_ADVERTISEMENT: 43062306a36Sopenharmony_ci case NDISC_REDIRECT: 43162306a36Sopenharmony_ci /* For reaction involving unicast neighbor discovery 43262306a36Sopenharmony_ci * message destined to the proxied address, pass it to 43362306a36Sopenharmony_ci * input function. 43462306a36Sopenharmony_ci */ 43562306a36Sopenharmony_ci return 1; 43662306a36Sopenharmony_ci default: 43762306a36Sopenharmony_ci break; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci /* 44262306a36Sopenharmony_ci * The proxying router can't forward traffic sent to a link-local 44362306a36Sopenharmony_ci * address, so signal the sender and discard the packet. This 44462306a36Sopenharmony_ci * behavior is clarified by the MIPv6 specification. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_ci if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 44762306a36Sopenharmony_ci dst_link_failure(skb); 44862306a36Sopenharmony_ci return -1; 44962306a36Sopenharmony_ci } 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci return 0; 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_cistatic inline int ip6_forward_finish(struct net *net, struct sock *sk, 45562306a36Sopenharmony_ci struct sk_buff *skb) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci struct dst_entry *dst = skb_dst(skb); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci#ifdef CONFIG_NET_SWITCHDEV 46262306a36Sopenharmony_ci if (skb->offload_l3_fwd_mark) { 46362306a36Sopenharmony_ci consume_skb(skb); 46462306a36Sopenharmony_ci return 0; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci#endif 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci skb_clear_tstamp(skb); 46962306a36Sopenharmony_ci return dst_output(net, sk, skb); 47062306a36Sopenharmony_ci} 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_cistatic bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci if (skb->len <= mtu) 47562306a36Sopenharmony_ci return false; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 47862306a36Sopenharmony_ci if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 47962306a36Sopenharmony_ci return true; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci if (skb->ignore_df) 48262306a36Sopenharmony_ci return false; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 48562306a36Sopenharmony_ci return false; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci return true; 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ciint ip6_forward(struct sk_buff *skb) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci struct dst_entry *dst = skb_dst(skb); 49362306a36Sopenharmony_ci struct ipv6hdr *hdr = ipv6_hdr(skb); 49462306a36Sopenharmony_ci struct inet6_skb_parm *opt = IP6CB(skb); 49562306a36Sopenharmony_ci struct net *net = dev_net(dst->dev); 49662306a36Sopenharmony_ci struct inet6_dev *idev; 49762306a36Sopenharmony_ci SKB_DR(reason); 49862306a36Sopenharmony_ci u32 mtu; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 50162306a36Sopenharmony_ci if (net->ipv6.devconf_all->forwarding == 0) 50262306a36Sopenharmony_ci goto error; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci if (skb->pkt_type != PACKET_HOST) 50562306a36Sopenharmony_ci goto drop; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci if (unlikely(skb->sk)) 50862306a36Sopenharmony_ci goto drop; 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci if (skb_warn_if_lro(skb)) 51162306a36Sopenharmony_ci goto drop; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci if (!net->ipv6.devconf_all->disable_policy && 51462306a36Sopenharmony_ci (!idev || !idev->cnf.disable_policy) && 51562306a36Sopenharmony_ci !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 51662306a36Sopenharmony_ci __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 51762306a36Sopenharmony_ci goto drop; 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci skb_forward_csum(skb); 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci /* 52362306a36Sopenharmony_ci * We DO NOT make any processing on 52462306a36Sopenharmony_ci * RA packets, pushing them to user level AS IS 52562306a36Sopenharmony_ci * without ane WARRANTY that application will be able 52662306a36Sopenharmony_ci * to interpret them. The reason is that we 52762306a36Sopenharmony_ci * cannot make anything clever here. 52862306a36Sopenharmony_ci * 52962306a36Sopenharmony_ci * We are not end-node, so that if packet contains 53062306a36Sopenharmony_ci * AH/ESP, we cannot make anything. 53162306a36Sopenharmony_ci * Defragmentation also would be mistake, RA packets 53262306a36Sopenharmony_ci * cannot be fragmented, because there is no warranty 53362306a36Sopenharmony_ci * that different fragments will go along one path. --ANK 53462306a36Sopenharmony_ci */ 53562306a36Sopenharmony_ci if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 53662306a36Sopenharmony_ci if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 53762306a36Sopenharmony_ci return 0; 53862306a36Sopenharmony_ci } 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci /* 54162306a36Sopenharmony_ci * check and decrement ttl 54262306a36Sopenharmony_ci */ 54362306a36Sopenharmony_ci if (hdr->hop_limit <= 1) { 54462306a36Sopenharmony_ci icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 54562306a36Sopenharmony_ci __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); 54862306a36Sopenharmony_ci return -ETIMEDOUT; 54962306a36Sopenharmony_ci } 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci /* XXX: idev->cnf.proxy_ndp? */ 55262306a36Sopenharmony_ci if (net->ipv6.devconf_all->proxy_ndp && 55362306a36Sopenharmony_ci pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 55462306a36Sopenharmony_ci int proxied = ip6_forward_proxy_check(skb); 55562306a36Sopenharmony_ci if (proxied > 0) { 55662306a36Sopenharmony_ci /* It's tempting to decrease the hop limit 55762306a36Sopenharmony_ci * here by 1, as we do at the end of the 55862306a36Sopenharmony_ci * function too. 55962306a36Sopenharmony_ci * 56062306a36Sopenharmony_ci * But that would be incorrect, as proxying is 56162306a36Sopenharmony_ci * not forwarding. The ip6_input function 56262306a36Sopenharmony_ci * will handle this packet locally, and it 56362306a36Sopenharmony_ci * depends on the hop limit being unchanged. 56462306a36Sopenharmony_ci * 56562306a36Sopenharmony_ci * One example is the NDP hop limit, that 56662306a36Sopenharmony_ci * always has to stay 255, but other would be 56762306a36Sopenharmony_ci * similar checks around RA packets, where the 56862306a36Sopenharmony_ci * user can even change the desired limit. 56962306a36Sopenharmony_ci */ 57062306a36Sopenharmony_ci return ip6_input(skb); 57162306a36Sopenharmony_ci } else if (proxied < 0) { 57262306a36Sopenharmony_ci __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 57362306a36Sopenharmony_ci goto drop; 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci } 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci if (!xfrm6_route_forward(skb)) { 57862306a36Sopenharmony_ci __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 57962306a36Sopenharmony_ci SKB_DR_SET(reason, XFRM_POLICY); 58062306a36Sopenharmony_ci goto drop; 58162306a36Sopenharmony_ci } 58262306a36Sopenharmony_ci dst = skb_dst(skb); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci /* IPv6 specs say nothing about it, but it is clear that we cannot 58562306a36Sopenharmony_ci send redirects to source routed frames. 58662306a36Sopenharmony_ci We don't send redirects to frames decapsulated from IPsec. 58762306a36Sopenharmony_ci */ 58862306a36Sopenharmony_ci if (IP6CB(skb)->iif == dst->dev->ifindex && 58962306a36Sopenharmony_ci opt->srcrt == 0 && !skb_sec_path(skb)) { 59062306a36Sopenharmony_ci struct in6_addr *target = NULL; 59162306a36Sopenharmony_ci struct inet_peer *peer; 59262306a36Sopenharmony_ci struct rt6_info *rt; 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci /* 59562306a36Sopenharmony_ci * incoming and outgoing devices are the same 59662306a36Sopenharmony_ci * send a redirect. 59762306a36Sopenharmony_ci */ 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci rt = (struct rt6_info *) dst; 60062306a36Sopenharmony_ci if (rt->rt6i_flags & RTF_GATEWAY) 60162306a36Sopenharmony_ci target = &rt->rt6i_gateway; 60262306a36Sopenharmony_ci else 60362306a36Sopenharmony_ci target = &hdr->daddr; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci /* Limit redirects both by destination (here) 60862306a36Sopenharmony_ci and by source (inside ndisc_send_redirect) 60962306a36Sopenharmony_ci */ 61062306a36Sopenharmony_ci if (inet_peer_xrlim_allow(peer, 1*HZ)) 61162306a36Sopenharmony_ci ndisc_send_redirect(skb, target); 61262306a36Sopenharmony_ci if (peer) 61362306a36Sopenharmony_ci inet_putpeer(peer); 61462306a36Sopenharmony_ci } else { 61562306a36Sopenharmony_ci int addrtype = ipv6_addr_type(&hdr->saddr); 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci /* This check is security critical. */ 61862306a36Sopenharmony_ci if (addrtype == IPV6_ADDR_ANY || 61962306a36Sopenharmony_ci addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 62062306a36Sopenharmony_ci goto error; 62162306a36Sopenharmony_ci if (addrtype & IPV6_ADDR_LINKLOCAL) { 62262306a36Sopenharmony_ci icmpv6_send(skb, ICMPV6_DEST_UNREACH, 62362306a36Sopenharmony_ci ICMPV6_NOT_NEIGHBOUR, 0); 62462306a36Sopenharmony_ci goto error; 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci } 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci mtu = ip6_dst_mtu_maybe_forward(dst, true); 62962306a36Sopenharmony_ci if (mtu < IPV6_MIN_MTU) 63062306a36Sopenharmony_ci mtu = IPV6_MIN_MTU; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci if (ip6_pkt_too_big(skb, mtu)) { 63362306a36Sopenharmony_ci /* Again, force OUTPUT device used as source address */ 63462306a36Sopenharmony_ci skb->dev = dst->dev; 63562306a36Sopenharmony_ci icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 63662306a36Sopenharmony_ci __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 63762306a36Sopenharmony_ci __IP6_INC_STATS(net, ip6_dst_idev(dst), 63862306a36Sopenharmony_ci IPSTATS_MIB_FRAGFAILS); 63962306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); 64062306a36Sopenharmony_ci return -EMSGSIZE; 64162306a36Sopenharmony_ci } 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci if (skb_cow(skb, dst->dev->hard_header_len)) { 64462306a36Sopenharmony_ci __IP6_INC_STATS(net, ip6_dst_idev(dst), 64562306a36Sopenharmony_ci IPSTATS_MIB_OUTDISCARDS); 64662306a36Sopenharmony_ci goto drop; 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci hdr = ipv6_hdr(skb); 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci /* Mangling hops number delayed to point after skb COW */ 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci hdr->hop_limit--; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 65662306a36Sopenharmony_ci net, NULL, skb, skb->dev, dst->dev, 65762306a36Sopenharmony_ci ip6_forward_finish); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_cierror: 66062306a36Sopenharmony_ci __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 66162306a36Sopenharmony_ci SKB_DR_SET(reason, IP_INADDRERRORS); 66262306a36Sopenharmony_cidrop: 66362306a36Sopenharmony_ci kfree_skb_reason(skb, reason); 66462306a36Sopenharmony_ci return -EINVAL; 66562306a36Sopenharmony_ci} 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_cistatic void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 66862306a36Sopenharmony_ci{ 66962306a36Sopenharmony_ci to->pkt_type = from->pkt_type; 67062306a36Sopenharmony_ci to->priority = from->priority; 67162306a36Sopenharmony_ci to->protocol = from->protocol; 67262306a36Sopenharmony_ci skb_dst_drop(to); 67362306a36Sopenharmony_ci skb_dst_set(to, dst_clone(skb_dst(from))); 67462306a36Sopenharmony_ci to->dev = from->dev; 67562306a36Sopenharmony_ci to->mark = from->mark; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci skb_copy_hash(to, from); 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci#ifdef CONFIG_NET_SCHED 68062306a36Sopenharmony_ci to->tc_index = from->tc_index; 68162306a36Sopenharmony_ci#endif 68262306a36Sopenharmony_ci nf_copy(to, from); 68362306a36Sopenharmony_ci skb_ext_copy(to, from); 68462306a36Sopenharmony_ci skb_copy_secmark(to, from); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ciint ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 68862306a36Sopenharmony_ci u8 nexthdr, __be32 frag_id, 68962306a36Sopenharmony_ci struct ip6_fraglist_iter *iter) 69062306a36Sopenharmony_ci{ 69162306a36Sopenharmony_ci unsigned int first_len; 69262306a36Sopenharmony_ci struct frag_hdr *fh; 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci /* BUILD HEADER */ 69562306a36Sopenharmony_ci *prevhdr = NEXTHDR_FRAGMENT; 69662306a36Sopenharmony_ci iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 69762306a36Sopenharmony_ci if (!iter->tmp_hdr) 69862306a36Sopenharmony_ci return -ENOMEM; 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci iter->frag = skb_shinfo(skb)->frag_list; 70162306a36Sopenharmony_ci skb_frag_list_init(skb); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci iter->offset = 0; 70462306a36Sopenharmony_ci iter->hlen = hlen; 70562306a36Sopenharmony_ci iter->frag_id = frag_id; 70662306a36Sopenharmony_ci iter->nexthdr = nexthdr; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci __skb_pull(skb, hlen); 70962306a36Sopenharmony_ci fh = __skb_push(skb, sizeof(struct frag_hdr)); 71062306a36Sopenharmony_ci __skb_push(skb, hlen); 71162306a36Sopenharmony_ci skb_reset_network_header(skb); 71262306a36Sopenharmony_ci memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci fh->nexthdr = nexthdr; 71562306a36Sopenharmony_ci fh->reserved = 0; 71662306a36Sopenharmony_ci fh->frag_off = htons(IP6_MF); 71762306a36Sopenharmony_ci fh->identification = frag_id; 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci first_len = skb_pagelen(skb); 72062306a36Sopenharmony_ci skb->data_len = first_len - skb_headlen(skb); 72162306a36Sopenharmony_ci skb->len = first_len; 72262306a36Sopenharmony_ci ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci return 0; 72562306a36Sopenharmony_ci} 72662306a36Sopenharmony_ciEXPORT_SYMBOL(ip6_fraglist_init); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_civoid ip6_fraglist_prepare(struct sk_buff *skb, 72962306a36Sopenharmony_ci struct ip6_fraglist_iter *iter) 73062306a36Sopenharmony_ci{ 73162306a36Sopenharmony_ci struct sk_buff *frag = iter->frag; 73262306a36Sopenharmony_ci unsigned int hlen = iter->hlen; 73362306a36Sopenharmony_ci struct frag_hdr *fh; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci frag->ip_summed = CHECKSUM_NONE; 73662306a36Sopenharmony_ci skb_reset_transport_header(frag); 73762306a36Sopenharmony_ci fh = __skb_push(frag, sizeof(struct frag_hdr)); 73862306a36Sopenharmony_ci __skb_push(frag, hlen); 73962306a36Sopenharmony_ci skb_reset_network_header(frag); 74062306a36Sopenharmony_ci memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 74162306a36Sopenharmony_ci iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 74262306a36Sopenharmony_ci fh->nexthdr = iter->nexthdr; 74362306a36Sopenharmony_ci fh->reserved = 0; 74462306a36Sopenharmony_ci fh->frag_off = htons(iter->offset); 74562306a36Sopenharmony_ci if (frag->next) 74662306a36Sopenharmony_ci fh->frag_off |= htons(IP6_MF); 74762306a36Sopenharmony_ci fh->identification = iter->frag_id; 74862306a36Sopenharmony_ci ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 74962306a36Sopenharmony_ci ip6_copy_metadata(frag, skb); 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ciEXPORT_SYMBOL(ip6_fraglist_prepare); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_civoid ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 75462306a36Sopenharmony_ci unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 75562306a36Sopenharmony_ci u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci state->prevhdr = prevhdr; 75862306a36Sopenharmony_ci state->nexthdr = nexthdr; 75962306a36Sopenharmony_ci state->frag_id = frag_id; 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci state->hlen = hlen; 76262306a36Sopenharmony_ci state->mtu = mtu; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci state->left = skb->len - hlen; /* Space per frame */ 76562306a36Sopenharmony_ci state->ptr = hlen; /* Where to start from */ 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci state->hroom = hdr_room; 76862306a36Sopenharmony_ci state->troom = needed_tailroom; 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci state->offset = 0; 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ciEXPORT_SYMBOL(ip6_frag_init); 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_cistruct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 77562306a36Sopenharmony_ci{ 77662306a36Sopenharmony_ci u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 77762306a36Sopenharmony_ci struct sk_buff *frag; 77862306a36Sopenharmony_ci struct frag_hdr *fh; 77962306a36Sopenharmony_ci unsigned int len; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci len = state->left; 78262306a36Sopenharmony_ci /* IF: it doesn't fit, use 'mtu' - the data space left */ 78362306a36Sopenharmony_ci if (len > state->mtu) 78462306a36Sopenharmony_ci len = state->mtu; 78562306a36Sopenharmony_ci /* IF: we are not sending up to and including the packet end 78662306a36Sopenharmony_ci then align the next start on an eight byte boundary */ 78762306a36Sopenharmony_ci if (len < state->left) 78862306a36Sopenharmony_ci len &= ~7; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci /* Allocate buffer */ 79162306a36Sopenharmony_ci frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 79262306a36Sopenharmony_ci state->hroom + state->troom, GFP_ATOMIC); 79362306a36Sopenharmony_ci if (!frag) 79462306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci /* 79762306a36Sopenharmony_ci * Set up data on packet 79862306a36Sopenharmony_ci */ 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci ip6_copy_metadata(frag, skb); 80162306a36Sopenharmony_ci skb_reserve(frag, state->hroom); 80262306a36Sopenharmony_ci skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 80362306a36Sopenharmony_ci skb_reset_network_header(frag); 80462306a36Sopenharmony_ci fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 80562306a36Sopenharmony_ci frag->transport_header = (frag->network_header + state->hlen + 80662306a36Sopenharmony_ci sizeof(struct frag_hdr)); 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci /* 80962306a36Sopenharmony_ci * Charge the memory for the fragment to any owner 81062306a36Sopenharmony_ci * it might possess 81162306a36Sopenharmony_ci */ 81262306a36Sopenharmony_ci if (skb->sk) 81362306a36Sopenharmony_ci skb_set_owner_w(frag, skb->sk); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci /* 81662306a36Sopenharmony_ci * Copy the packet header into the new buffer. 81762306a36Sopenharmony_ci */ 81862306a36Sopenharmony_ci skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci fragnexthdr_offset = skb_network_header(frag); 82162306a36Sopenharmony_ci fragnexthdr_offset += prevhdr - skb_network_header(skb); 82262306a36Sopenharmony_ci *fragnexthdr_offset = NEXTHDR_FRAGMENT; 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci /* 82562306a36Sopenharmony_ci * Build fragment header. 82662306a36Sopenharmony_ci */ 82762306a36Sopenharmony_ci fh->nexthdr = state->nexthdr; 82862306a36Sopenharmony_ci fh->reserved = 0; 82962306a36Sopenharmony_ci fh->identification = state->frag_id; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci /* 83262306a36Sopenharmony_ci * Copy a block of the IP datagram. 83362306a36Sopenharmony_ci */ 83462306a36Sopenharmony_ci BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 83562306a36Sopenharmony_ci len)); 83662306a36Sopenharmony_ci state->left -= len; 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci fh->frag_off = htons(state->offset); 83962306a36Sopenharmony_ci if (state->left > 0) 84062306a36Sopenharmony_ci fh->frag_off |= htons(IP6_MF); 84162306a36Sopenharmony_ci ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci state->ptr += len; 84462306a36Sopenharmony_ci state->offset += len; 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci return frag; 84762306a36Sopenharmony_ci} 84862306a36Sopenharmony_ciEXPORT_SYMBOL(ip6_frag_next); 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ciint ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 85162306a36Sopenharmony_ci int (*output)(struct net *, struct sock *, struct sk_buff *)) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci struct sk_buff *frag; 85462306a36Sopenharmony_ci struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 85562306a36Sopenharmony_ci struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 85662306a36Sopenharmony_ci inet6_sk(skb->sk) : NULL; 85762306a36Sopenharmony_ci bool mono_delivery_time = skb->mono_delivery_time; 85862306a36Sopenharmony_ci struct ip6_frag_state state; 85962306a36Sopenharmony_ci unsigned int mtu, hlen, nexthdr_offset; 86062306a36Sopenharmony_ci ktime_t tstamp = skb->tstamp; 86162306a36Sopenharmony_ci int hroom, err = 0; 86262306a36Sopenharmony_ci __be32 frag_id; 86362306a36Sopenharmony_ci u8 *prevhdr, nexthdr = 0; 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci err = ip6_find_1stfragopt(skb, &prevhdr); 86662306a36Sopenharmony_ci if (err < 0) 86762306a36Sopenharmony_ci goto fail; 86862306a36Sopenharmony_ci hlen = err; 86962306a36Sopenharmony_ci nexthdr = *prevhdr; 87062306a36Sopenharmony_ci nexthdr_offset = prevhdr - skb_network_header(skb); 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci mtu = ip6_skb_dst_mtu(skb); 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci /* We must not fragment if the socket is set to force MTU discovery 87562306a36Sopenharmony_ci * or if the skb it not generated by a local socket. 87662306a36Sopenharmony_ci */ 87762306a36Sopenharmony_ci if (unlikely(!skb->ignore_df && skb->len > mtu)) 87862306a36Sopenharmony_ci goto fail_toobig; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci if (IP6CB(skb)->frag_max_size) { 88162306a36Sopenharmony_ci if (IP6CB(skb)->frag_max_size > mtu) 88262306a36Sopenharmony_ci goto fail_toobig; 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci /* don't send fragments larger than what we received */ 88562306a36Sopenharmony_ci mtu = IP6CB(skb)->frag_max_size; 88662306a36Sopenharmony_ci if (mtu < IPV6_MIN_MTU) 88762306a36Sopenharmony_ci mtu = IPV6_MIN_MTU; 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci if (np && np->frag_size < mtu) { 89162306a36Sopenharmony_ci if (np->frag_size) 89262306a36Sopenharmony_ci mtu = np->frag_size; 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci if (mtu < hlen + sizeof(struct frag_hdr) + 8) 89562306a36Sopenharmony_ci goto fail_toobig; 89662306a36Sopenharmony_ci mtu -= hlen + sizeof(struct frag_hdr); 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 89962306a36Sopenharmony_ci &ipv6_hdr(skb)->saddr); 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_PARTIAL && 90262306a36Sopenharmony_ci (err = skb_checksum_help(skb))) 90362306a36Sopenharmony_ci goto fail; 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci prevhdr = skb_network_header(skb) + nexthdr_offset; 90662306a36Sopenharmony_ci hroom = LL_RESERVED_SPACE(rt->dst.dev); 90762306a36Sopenharmony_ci if (skb_has_frag_list(skb)) { 90862306a36Sopenharmony_ci unsigned int first_len = skb_pagelen(skb); 90962306a36Sopenharmony_ci struct ip6_fraglist_iter iter; 91062306a36Sopenharmony_ci struct sk_buff *frag2; 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci if (first_len - hlen > mtu || 91362306a36Sopenharmony_ci ((first_len - hlen) & 7) || 91462306a36Sopenharmony_ci skb_cloned(skb) || 91562306a36Sopenharmony_ci skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 91662306a36Sopenharmony_ci goto slow_path; 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci skb_walk_frags(skb, frag) { 91962306a36Sopenharmony_ci /* Correct geometry. */ 92062306a36Sopenharmony_ci if (frag->len > mtu || 92162306a36Sopenharmony_ci ((frag->len & 7) && frag->next) || 92262306a36Sopenharmony_ci skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 92362306a36Sopenharmony_ci goto slow_path_clean; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci /* Partially cloned skb? */ 92662306a36Sopenharmony_ci if (skb_shared(frag)) 92762306a36Sopenharmony_ci goto slow_path_clean; 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci BUG_ON(frag->sk); 93062306a36Sopenharmony_ci if (skb->sk) { 93162306a36Sopenharmony_ci frag->sk = skb->sk; 93262306a36Sopenharmony_ci frag->destructor = sock_wfree; 93362306a36Sopenharmony_ci } 93462306a36Sopenharmony_ci skb->truesize -= frag->truesize; 93562306a36Sopenharmony_ci } 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 93862306a36Sopenharmony_ci &iter); 93962306a36Sopenharmony_ci if (err < 0) 94062306a36Sopenharmony_ci goto fail; 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci /* We prevent @rt from being freed. */ 94362306a36Sopenharmony_ci rcu_read_lock(); 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci for (;;) { 94662306a36Sopenharmony_ci /* Prepare header of the next frame, 94762306a36Sopenharmony_ci * before previous one went down. */ 94862306a36Sopenharmony_ci if (iter.frag) 94962306a36Sopenharmony_ci ip6_fraglist_prepare(skb, &iter); 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci skb_set_delivery_time(skb, tstamp, mono_delivery_time); 95262306a36Sopenharmony_ci err = output(net, sk, skb); 95362306a36Sopenharmony_ci if (!err) 95462306a36Sopenharmony_ci IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 95562306a36Sopenharmony_ci IPSTATS_MIB_FRAGCREATES); 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci if (err || !iter.frag) 95862306a36Sopenharmony_ci break; 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci skb = ip6_fraglist_next(&iter); 96162306a36Sopenharmony_ci } 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci kfree(iter.tmp_hdr); 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci if (err == 0) { 96662306a36Sopenharmony_ci IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 96762306a36Sopenharmony_ci IPSTATS_MIB_FRAGOKS); 96862306a36Sopenharmony_ci rcu_read_unlock(); 96962306a36Sopenharmony_ci return 0; 97062306a36Sopenharmony_ci } 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci kfree_skb_list(iter.frag); 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 97562306a36Sopenharmony_ci IPSTATS_MIB_FRAGFAILS); 97662306a36Sopenharmony_ci rcu_read_unlock(); 97762306a36Sopenharmony_ci return err; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_cislow_path_clean: 98062306a36Sopenharmony_ci skb_walk_frags(skb, frag2) { 98162306a36Sopenharmony_ci if (frag2 == frag) 98262306a36Sopenharmony_ci break; 98362306a36Sopenharmony_ci frag2->sk = NULL; 98462306a36Sopenharmony_ci frag2->destructor = NULL; 98562306a36Sopenharmony_ci skb->truesize += frag2->truesize; 98662306a36Sopenharmony_ci } 98762306a36Sopenharmony_ci } 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_cislow_path: 99062306a36Sopenharmony_ci /* 99162306a36Sopenharmony_ci * Fragment the datagram. 99262306a36Sopenharmony_ci */ 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 99562306a36Sopenharmony_ci LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 99662306a36Sopenharmony_ci &state); 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci /* 99962306a36Sopenharmony_ci * Keep copying data until we run out. 100062306a36Sopenharmony_ci */ 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci while (state.left > 0) { 100362306a36Sopenharmony_ci frag = ip6_frag_next(skb, &state); 100462306a36Sopenharmony_ci if (IS_ERR(frag)) { 100562306a36Sopenharmony_ci err = PTR_ERR(frag); 100662306a36Sopenharmony_ci goto fail; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci /* 101062306a36Sopenharmony_ci * Put this fragment into the sending queue. 101162306a36Sopenharmony_ci */ 101262306a36Sopenharmony_ci skb_set_delivery_time(frag, tstamp, mono_delivery_time); 101362306a36Sopenharmony_ci err = output(net, sk, frag); 101462306a36Sopenharmony_ci if (err) 101562306a36Sopenharmony_ci goto fail; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 101862306a36Sopenharmony_ci IPSTATS_MIB_FRAGCREATES); 101962306a36Sopenharmony_ci } 102062306a36Sopenharmony_ci IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 102162306a36Sopenharmony_ci IPSTATS_MIB_FRAGOKS); 102262306a36Sopenharmony_ci consume_skb(skb); 102362306a36Sopenharmony_ci return err; 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_cifail_toobig: 102662306a36Sopenharmony_ci if (skb->sk && dst_allfrag(skb_dst(skb))) 102762306a36Sopenharmony_ci sk_gso_disable(skb->sk); 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 103062306a36Sopenharmony_ci err = -EMSGSIZE; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_cifail: 103362306a36Sopenharmony_ci IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 103462306a36Sopenharmony_ci IPSTATS_MIB_FRAGFAILS); 103562306a36Sopenharmony_ci kfree_skb(skb); 103662306a36Sopenharmony_ci return err; 103762306a36Sopenharmony_ci} 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_cistatic inline int ip6_rt_check(const struct rt6key *rt_key, 104062306a36Sopenharmony_ci const struct in6_addr *fl_addr, 104162306a36Sopenharmony_ci const struct in6_addr *addr_cache) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 104462306a36Sopenharmony_ci (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 104562306a36Sopenharmony_ci} 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_cistatic struct dst_entry *ip6_sk_dst_check(struct sock *sk, 104862306a36Sopenharmony_ci struct dst_entry *dst, 104962306a36Sopenharmony_ci const struct flowi6 *fl6) 105062306a36Sopenharmony_ci{ 105162306a36Sopenharmony_ci struct ipv6_pinfo *np = inet6_sk(sk); 105262306a36Sopenharmony_ci struct rt6_info *rt; 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci if (!dst) 105562306a36Sopenharmony_ci goto out; 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci if (dst->ops->family != AF_INET6) { 105862306a36Sopenharmony_ci dst_release(dst); 105962306a36Sopenharmony_ci return NULL; 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci rt = (struct rt6_info *)dst; 106362306a36Sopenharmony_ci /* Yes, checking route validity in not connected 106462306a36Sopenharmony_ci * case is not very simple. Take into account, 106562306a36Sopenharmony_ci * that we do not support routing by source, TOS, 106662306a36Sopenharmony_ci * and MSG_DONTROUTE --ANK (980726) 106762306a36Sopenharmony_ci * 106862306a36Sopenharmony_ci * 1. ip6_rt_check(): If route was host route, 106962306a36Sopenharmony_ci * check that cached destination is current. 107062306a36Sopenharmony_ci * If it is network route, we still may 107162306a36Sopenharmony_ci * check its validity using saved pointer 107262306a36Sopenharmony_ci * to the last used address: daddr_cache. 107362306a36Sopenharmony_ci * We do not want to save whole address now, 107462306a36Sopenharmony_ci * (because main consumer of this service 107562306a36Sopenharmony_ci * is tcp, which has not this problem), 107662306a36Sopenharmony_ci * so that the last trick works only on connected 107762306a36Sopenharmony_ci * sockets. 107862306a36Sopenharmony_ci * 2. oif also should be the same. 107962306a36Sopenharmony_ci */ 108062306a36Sopenharmony_ci if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 108162306a36Sopenharmony_ci#ifdef CONFIG_IPV6_SUBTREES 108262306a36Sopenharmony_ci ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 108362306a36Sopenharmony_ci#endif 108462306a36Sopenharmony_ci (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 108562306a36Sopenharmony_ci dst_release(dst); 108662306a36Sopenharmony_ci dst = NULL; 108762306a36Sopenharmony_ci } 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ciout: 109062306a36Sopenharmony_ci return dst; 109162306a36Sopenharmony_ci} 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_cistatic int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 109462306a36Sopenharmony_ci struct dst_entry **dst, struct flowi6 *fl6) 109562306a36Sopenharmony_ci{ 109662306a36Sopenharmony_ci#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 109762306a36Sopenharmony_ci struct neighbour *n; 109862306a36Sopenharmony_ci struct rt6_info *rt; 109962306a36Sopenharmony_ci#endif 110062306a36Sopenharmony_ci int err; 110162306a36Sopenharmony_ci int flags = 0; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci /* The correct way to handle this would be to do 110462306a36Sopenharmony_ci * ip6_route_get_saddr, and then ip6_route_output; however, 110562306a36Sopenharmony_ci * the route-specific preferred source forces the 110662306a36Sopenharmony_ci * ip6_route_output call _before_ ip6_route_get_saddr. 110762306a36Sopenharmony_ci * 110862306a36Sopenharmony_ci * In source specific routing (no src=any default route), 110962306a36Sopenharmony_ci * ip6_route_output will fail given src=any saddr, though, so 111062306a36Sopenharmony_ci * that's why we try it again later. 111162306a36Sopenharmony_ci */ 111262306a36Sopenharmony_ci if (ipv6_addr_any(&fl6->saddr)) { 111362306a36Sopenharmony_ci struct fib6_info *from; 111462306a36Sopenharmony_ci struct rt6_info *rt; 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci *dst = ip6_route_output(net, sk, fl6); 111762306a36Sopenharmony_ci rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci rcu_read_lock(); 112062306a36Sopenharmony_ci from = rt ? rcu_dereference(rt->from) : NULL; 112162306a36Sopenharmony_ci err = ip6_route_get_saddr(net, from, &fl6->daddr, 112262306a36Sopenharmony_ci sk ? inet6_sk(sk)->srcprefs : 0, 112362306a36Sopenharmony_ci &fl6->saddr); 112462306a36Sopenharmony_ci rcu_read_unlock(); 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci if (err) 112762306a36Sopenharmony_ci goto out_err_release; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci /* If we had an erroneous initial result, pretend it 113062306a36Sopenharmony_ci * never existed and let the SA-enabled version take 113162306a36Sopenharmony_ci * over. 113262306a36Sopenharmony_ci */ 113362306a36Sopenharmony_ci if ((*dst)->error) { 113462306a36Sopenharmony_ci dst_release(*dst); 113562306a36Sopenharmony_ci *dst = NULL; 113662306a36Sopenharmony_ci } 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci if (fl6->flowi6_oif) 113962306a36Sopenharmony_ci flags |= RT6_LOOKUP_F_IFACE; 114062306a36Sopenharmony_ci } 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci if (!*dst) 114362306a36Sopenharmony_ci *dst = ip6_route_output_flags(net, sk, fl6, flags); 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci err = (*dst)->error; 114662306a36Sopenharmony_ci if (err) 114762306a36Sopenharmony_ci goto out_err_release; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 115062306a36Sopenharmony_ci /* 115162306a36Sopenharmony_ci * Here if the dst entry we've looked up 115262306a36Sopenharmony_ci * has a neighbour entry that is in the INCOMPLETE 115362306a36Sopenharmony_ci * state and the src address from the flow is 115462306a36Sopenharmony_ci * marked as OPTIMISTIC, we release the found 115562306a36Sopenharmony_ci * dst entry and replace it instead with the 115662306a36Sopenharmony_ci * dst entry of the nexthop router 115762306a36Sopenharmony_ci */ 115862306a36Sopenharmony_ci rt = (struct rt6_info *) *dst; 115962306a36Sopenharmony_ci rcu_read_lock(); 116062306a36Sopenharmony_ci n = __ipv6_neigh_lookup_noref(rt->dst.dev, 116162306a36Sopenharmony_ci rt6_nexthop(rt, &fl6->daddr)); 116262306a36Sopenharmony_ci err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; 116362306a36Sopenharmony_ci rcu_read_unlock(); 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci if (err) { 116662306a36Sopenharmony_ci struct inet6_ifaddr *ifp; 116762306a36Sopenharmony_ci struct flowi6 fl_gw6; 116862306a36Sopenharmony_ci int redirect; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci ifp = ipv6_get_ifaddr(net, &fl6->saddr, 117162306a36Sopenharmony_ci (*dst)->dev, 1); 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 117462306a36Sopenharmony_ci if (ifp) 117562306a36Sopenharmony_ci in6_ifa_put(ifp); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci if (redirect) { 117862306a36Sopenharmony_ci /* 117962306a36Sopenharmony_ci * We need to get the dst entry for the 118062306a36Sopenharmony_ci * default router instead 118162306a36Sopenharmony_ci */ 118262306a36Sopenharmony_ci dst_release(*dst); 118362306a36Sopenharmony_ci memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 118462306a36Sopenharmony_ci memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 118562306a36Sopenharmony_ci *dst = ip6_route_output(net, sk, &fl_gw6); 118662306a36Sopenharmony_ci err = (*dst)->error; 118762306a36Sopenharmony_ci if (err) 118862306a36Sopenharmony_ci goto out_err_release; 118962306a36Sopenharmony_ci } 119062306a36Sopenharmony_ci } 119162306a36Sopenharmony_ci#endif 119262306a36Sopenharmony_ci if (ipv6_addr_v4mapped(&fl6->saddr) && 119362306a36Sopenharmony_ci !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 119462306a36Sopenharmony_ci err = -EAFNOSUPPORT; 119562306a36Sopenharmony_ci goto out_err_release; 119662306a36Sopenharmony_ci } 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci return 0; 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ciout_err_release: 120162306a36Sopenharmony_ci dst_release(*dst); 120262306a36Sopenharmony_ci *dst = NULL; 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci if (err == -ENETUNREACH) 120562306a36Sopenharmony_ci IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 120662306a36Sopenharmony_ci return err; 120762306a36Sopenharmony_ci} 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci/** 121062306a36Sopenharmony_ci * ip6_dst_lookup - perform route lookup on flow 121162306a36Sopenharmony_ci * @net: Network namespace to perform lookup in 121262306a36Sopenharmony_ci * @sk: socket which provides route info 121362306a36Sopenharmony_ci * @dst: pointer to dst_entry * for result 121462306a36Sopenharmony_ci * @fl6: flow to lookup 121562306a36Sopenharmony_ci * 121662306a36Sopenharmony_ci * This function performs a route lookup on the given flow. 121762306a36Sopenharmony_ci * 121862306a36Sopenharmony_ci * It returns zero on success, or a standard errno code on error. 121962306a36Sopenharmony_ci */ 122062306a36Sopenharmony_ciint ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 122162306a36Sopenharmony_ci struct flowi6 *fl6) 122262306a36Sopenharmony_ci{ 122362306a36Sopenharmony_ci *dst = NULL; 122462306a36Sopenharmony_ci return ip6_dst_lookup_tail(net, sk, dst, fl6); 122562306a36Sopenharmony_ci} 122662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_dst_lookup); 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci/** 122962306a36Sopenharmony_ci * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 123062306a36Sopenharmony_ci * @net: Network namespace to perform lookup in 123162306a36Sopenharmony_ci * @sk: socket which provides route info 123262306a36Sopenharmony_ci * @fl6: flow to lookup 123362306a36Sopenharmony_ci * @final_dst: final destination address for ipsec lookup 123462306a36Sopenharmony_ci * 123562306a36Sopenharmony_ci * This function performs a route lookup on the given flow. 123662306a36Sopenharmony_ci * 123762306a36Sopenharmony_ci * It returns a valid dst pointer on success, or a pointer encoded 123862306a36Sopenharmony_ci * error code. 123962306a36Sopenharmony_ci */ 124062306a36Sopenharmony_cistruct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 124162306a36Sopenharmony_ci const struct in6_addr *final_dst) 124262306a36Sopenharmony_ci{ 124362306a36Sopenharmony_ci struct dst_entry *dst = NULL; 124462306a36Sopenharmony_ci int err; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 124762306a36Sopenharmony_ci if (err) 124862306a36Sopenharmony_ci return ERR_PTR(err); 124962306a36Sopenharmony_ci if (final_dst) 125062306a36Sopenharmony_ci fl6->daddr = *final_dst; 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 125362306a36Sopenharmony_ci} 125462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci/** 125762306a36Sopenharmony_ci * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 125862306a36Sopenharmony_ci * @sk: socket which provides the dst cache and route info 125962306a36Sopenharmony_ci * @fl6: flow to lookup 126062306a36Sopenharmony_ci * @final_dst: final destination address for ipsec lookup 126162306a36Sopenharmony_ci * @connected: whether @sk is connected or not 126262306a36Sopenharmony_ci * 126362306a36Sopenharmony_ci * This function performs a route lookup on the given flow with the 126462306a36Sopenharmony_ci * possibility of using the cached route in the socket if it is valid. 126562306a36Sopenharmony_ci * It will take the socket dst lock when operating on the dst cache. 126662306a36Sopenharmony_ci * As a result, this function can only be used in process context. 126762306a36Sopenharmony_ci * 126862306a36Sopenharmony_ci * In addition, for a connected socket, cache the dst in the socket 126962306a36Sopenharmony_ci * if the current cache is not valid. 127062306a36Sopenharmony_ci * 127162306a36Sopenharmony_ci * It returns a valid dst pointer on success, or a pointer encoded 127262306a36Sopenharmony_ci * error code. 127362306a36Sopenharmony_ci */ 127462306a36Sopenharmony_cistruct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 127562306a36Sopenharmony_ci const struct in6_addr *final_dst, 127662306a36Sopenharmony_ci bool connected) 127762306a36Sopenharmony_ci{ 127862306a36Sopenharmony_ci struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci dst = ip6_sk_dst_check(sk, dst, fl6); 128162306a36Sopenharmony_ci if (dst) 128262306a36Sopenharmony_ci return dst; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 128562306a36Sopenharmony_ci if (connected && !IS_ERR(dst)) 128662306a36Sopenharmony_ci ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci return dst; 128962306a36Sopenharmony_ci} 129062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci/** 129362306a36Sopenharmony_ci * ip6_dst_lookup_tunnel - perform route lookup on tunnel 129462306a36Sopenharmony_ci * @skb: Packet for which lookup is done 129562306a36Sopenharmony_ci * @dev: Tunnel device 129662306a36Sopenharmony_ci * @net: Network namespace of tunnel device 129762306a36Sopenharmony_ci * @sock: Socket which provides route info 129862306a36Sopenharmony_ci * @saddr: Memory to store the src ip address 129962306a36Sopenharmony_ci * @info: Tunnel information 130062306a36Sopenharmony_ci * @protocol: IP protocol 130162306a36Sopenharmony_ci * @use_cache: Flag to enable cache usage 130262306a36Sopenharmony_ci * This function performs a route lookup on a tunnel 130362306a36Sopenharmony_ci * 130462306a36Sopenharmony_ci * It returns a valid dst pointer and stores src address to be used in 130562306a36Sopenharmony_ci * tunnel in param saddr on success, else a pointer encoded error code. 130662306a36Sopenharmony_ci */ 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_cistruct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 130962306a36Sopenharmony_ci struct net_device *dev, 131062306a36Sopenharmony_ci struct net *net, 131162306a36Sopenharmony_ci struct socket *sock, 131262306a36Sopenharmony_ci struct in6_addr *saddr, 131362306a36Sopenharmony_ci const struct ip_tunnel_info *info, 131462306a36Sopenharmony_ci u8 protocol, 131562306a36Sopenharmony_ci bool use_cache) 131662306a36Sopenharmony_ci{ 131762306a36Sopenharmony_ci struct dst_entry *dst = NULL; 131862306a36Sopenharmony_ci#ifdef CONFIG_DST_CACHE 131962306a36Sopenharmony_ci struct dst_cache *dst_cache; 132062306a36Sopenharmony_ci#endif 132162306a36Sopenharmony_ci struct flowi6 fl6; 132262306a36Sopenharmony_ci __u8 prio; 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci#ifdef CONFIG_DST_CACHE 132562306a36Sopenharmony_ci dst_cache = (struct dst_cache *)&info->dst_cache; 132662306a36Sopenharmony_ci if (use_cache) { 132762306a36Sopenharmony_ci dst = dst_cache_get_ip6(dst_cache, saddr); 132862306a36Sopenharmony_ci if (dst) 132962306a36Sopenharmony_ci return dst; 133062306a36Sopenharmony_ci } 133162306a36Sopenharmony_ci#endif 133262306a36Sopenharmony_ci memset(&fl6, 0, sizeof(fl6)); 133362306a36Sopenharmony_ci fl6.flowi6_mark = skb->mark; 133462306a36Sopenharmony_ci fl6.flowi6_proto = protocol; 133562306a36Sopenharmony_ci fl6.daddr = info->key.u.ipv6.dst; 133662306a36Sopenharmony_ci fl6.saddr = info->key.u.ipv6.src; 133762306a36Sopenharmony_ci prio = info->key.tos; 133862306a36Sopenharmony_ci fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 134162306a36Sopenharmony_ci NULL); 134262306a36Sopenharmony_ci if (IS_ERR(dst)) { 134362306a36Sopenharmony_ci netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 134462306a36Sopenharmony_ci return ERR_PTR(-ENETUNREACH); 134562306a36Sopenharmony_ci } 134662306a36Sopenharmony_ci if (dst->dev == dev) { /* is this necessary? */ 134762306a36Sopenharmony_ci netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 134862306a36Sopenharmony_ci dst_release(dst); 134962306a36Sopenharmony_ci return ERR_PTR(-ELOOP); 135062306a36Sopenharmony_ci } 135162306a36Sopenharmony_ci#ifdef CONFIG_DST_CACHE 135262306a36Sopenharmony_ci if (use_cache) 135362306a36Sopenharmony_ci dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 135462306a36Sopenharmony_ci#endif 135562306a36Sopenharmony_ci *saddr = fl6.saddr; 135662306a36Sopenharmony_ci return dst; 135762306a36Sopenharmony_ci} 135862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_cistatic inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 136162306a36Sopenharmony_ci gfp_t gfp) 136262306a36Sopenharmony_ci{ 136362306a36Sopenharmony_ci return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 136462306a36Sopenharmony_ci} 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_cistatic inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 136762306a36Sopenharmony_ci gfp_t gfp) 136862306a36Sopenharmony_ci{ 136962306a36Sopenharmony_ci return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 137062306a36Sopenharmony_ci} 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_cistatic void ip6_append_data_mtu(unsigned int *mtu, 137362306a36Sopenharmony_ci int *maxfraglen, 137462306a36Sopenharmony_ci unsigned int fragheaderlen, 137562306a36Sopenharmony_ci struct sk_buff *skb, 137662306a36Sopenharmony_ci struct rt6_info *rt, 137762306a36Sopenharmony_ci unsigned int orig_mtu) 137862306a36Sopenharmony_ci{ 137962306a36Sopenharmony_ci if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 138062306a36Sopenharmony_ci if (!skb) { 138162306a36Sopenharmony_ci /* first fragment, reserve header_len */ 138262306a36Sopenharmony_ci *mtu = orig_mtu - rt->dst.header_len; 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_ci } else { 138562306a36Sopenharmony_ci /* 138662306a36Sopenharmony_ci * this fragment is not first, the headers 138762306a36Sopenharmony_ci * space is regarded as data space. 138862306a36Sopenharmony_ci */ 138962306a36Sopenharmony_ci *mtu = orig_mtu; 139062306a36Sopenharmony_ci } 139162306a36Sopenharmony_ci *maxfraglen = ((*mtu - fragheaderlen) & ~7) 139262306a36Sopenharmony_ci + fragheaderlen - sizeof(struct frag_hdr); 139362306a36Sopenharmony_ci } 139462306a36Sopenharmony_ci} 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_cistatic int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 139762306a36Sopenharmony_ci struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 139862306a36Sopenharmony_ci struct rt6_info *rt) 139962306a36Sopenharmony_ci{ 140062306a36Sopenharmony_ci struct ipv6_pinfo *np = inet6_sk(sk); 140162306a36Sopenharmony_ci unsigned int mtu; 140262306a36Sopenharmony_ci struct ipv6_txoptions *nopt, *opt = ipc6->opt; 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci /* callers pass dst together with a reference, set it first so 140562306a36Sopenharmony_ci * ip6_cork_release() can put it down even in case of an error. 140662306a36Sopenharmony_ci */ 140762306a36Sopenharmony_ci cork->base.dst = &rt->dst; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci /* 141062306a36Sopenharmony_ci * setup for corking 141162306a36Sopenharmony_ci */ 141262306a36Sopenharmony_ci if (opt) { 141362306a36Sopenharmony_ci if (WARN_ON(v6_cork->opt)) 141462306a36Sopenharmony_ci return -EINVAL; 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 141762306a36Sopenharmony_ci if (unlikely(!nopt)) 141862306a36Sopenharmony_ci return -ENOBUFS; 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci nopt->tot_len = sizeof(*opt); 142162306a36Sopenharmony_ci nopt->opt_flen = opt->opt_flen; 142262306a36Sopenharmony_ci nopt->opt_nflen = opt->opt_nflen; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); 142562306a36Sopenharmony_ci if (opt->dst0opt && !nopt->dst0opt) 142662306a36Sopenharmony_ci return -ENOBUFS; 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); 142962306a36Sopenharmony_ci if (opt->dst1opt && !nopt->dst1opt) 143062306a36Sopenharmony_ci return -ENOBUFS; 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); 143362306a36Sopenharmony_ci if (opt->hopopt && !nopt->hopopt) 143462306a36Sopenharmony_ci return -ENOBUFS; 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); 143762306a36Sopenharmony_ci if (opt->srcrt && !nopt->srcrt) 143862306a36Sopenharmony_ci return -ENOBUFS; 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci /* need source address above miyazawa*/ 144162306a36Sopenharmony_ci } 144262306a36Sopenharmony_ci v6_cork->hop_limit = ipc6->hlimit; 144362306a36Sopenharmony_ci v6_cork->tclass = ipc6->tclass; 144462306a36Sopenharmony_ci if (rt->dst.flags & DST_XFRM_TUNNEL) 144562306a36Sopenharmony_ci mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 144662306a36Sopenharmony_ci READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 144762306a36Sopenharmony_ci else 144862306a36Sopenharmony_ci mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 144962306a36Sopenharmony_ci READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 145062306a36Sopenharmony_ci if (np->frag_size < mtu) { 145162306a36Sopenharmony_ci if (np->frag_size) 145262306a36Sopenharmony_ci mtu = np->frag_size; 145362306a36Sopenharmony_ci } 145462306a36Sopenharmony_ci cork->base.fragsize = mtu; 145562306a36Sopenharmony_ci cork->base.gso_size = ipc6->gso_size; 145662306a36Sopenharmony_ci cork->base.tx_flags = 0; 145762306a36Sopenharmony_ci cork->base.mark = ipc6->sockc.mark; 145862306a36Sopenharmony_ci sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci if (dst_allfrag(xfrm_dst_path(&rt->dst))) 146162306a36Sopenharmony_ci cork->base.flags |= IPCORK_ALLFRAG; 146262306a36Sopenharmony_ci cork->base.length = 0; 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci cork->base.transmit_time = ipc6->sockc.transmit_time; 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci return 0; 146762306a36Sopenharmony_ci} 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_cistatic int __ip6_append_data(struct sock *sk, 147062306a36Sopenharmony_ci struct sk_buff_head *queue, 147162306a36Sopenharmony_ci struct inet_cork_full *cork_full, 147262306a36Sopenharmony_ci struct inet6_cork *v6_cork, 147362306a36Sopenharmony_ci struct page_frag *pfrag, 147462306a36Sopenharmony_ci int getfrag(void *from, char *to, int offset, 147562306a36Sopenharmony_ci int len, int odd, struct sk_buff *skb), 147662306a36Sopenharmony_ci void *from, size_t length, int transhdrlen, 147762306a36Sopenharmony_ci unsigned int flags, struct ipcm6_cookie *ipc6) 147862306a36Sopenharmony_ci{ 147962306a36Sopenharmony_ci struct sk_buff *skb, *skb_prev = NULL; 148062306a36Sopenharmony_ci struct inet_cork *cork = &cork_full->base; 148162306a36Sopenharmony_ci struct flowi6 *fl6 = &cork_full->fl.u.ip6; 148262306a36Sopenharmony_ci unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 148362306a36Sopenharmony_ci struct ubuf_info *uarg = NULL; 148462306a36Sopenharmony_ci int exthdrlen = 0; 148562306a36Sopenharmony_ci int dst_exthdrlen = 0; 148662306a36Sopenharmony_ci int hh_len; 148762306a36Sopenharmony_ci int copy; 148862306a36Sopenharmony_ci int err; 148962306a36Sopenharmony_ci int offset = 0; 149062306a36Sopenharmony_ci bool zc = false; 149162306a36Sopenharmony_ci u32 tskey = 0; 149262306a36Sopenharmony_ci struct rt6_info *rt = (struct rt6_info *)cork->dst; 149362306a36Sopenharmony_ci struct ipv6_txoptions *opt = v6_cork->opt; 149462306a36Sopenharmony_ci int csummode = CHECKSUM_NONE; 149562306a36Sopenharmony_ci unsigned int maxnonfragsize, headersize; 149662306a36Sopenharmony_ci unsigned int wmem_alloc_delta = 0; 149762306a36Sopenharmony_ci bool paged, extra_uref = false; 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci skb = skb_peek_tail(queue); 150062306a36Sopenharmony_ci if (!skb) { 150162306a36Sopenharmony_ci exthdrlen = opt ? opt->opt_flen : 0; 150262306a36Sopenharmony_ci dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 150362306a36Sopenharmony_ci } 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci paged = !!cork->gso_size; 150662306a36Sopenharmony_ci mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 150762306a36Sopenharmony_ci orig_mtu = mtu; 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci if (cork->tx_flags & SKBTX_ANY_TSTAMP && 151062306a36Sopenharmony_ci READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) 151162306a36Sopenharmony_ci tskey = atomic_inc_return(&sk->sk_tskey) - 1; 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci hh_len = LL_RESERVED_SPACE(rt->dst.dev); 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 151662306a36Sopenharmony_ci (opt ? opt->opt_nflen : 0); 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci headersize = sizeof(struct ipv6hdr) + 151962306a36Sopenharmony_ci (opt ? opt->opt_flen + opt->opt_nflen : 0) + 152062306a36Sopenharmony_ci (dst_allfrag(&rt->dst) ? 152162306a36Sopenharmony_ci sizeof(struct frag_hdr) : 0) + 152262306a36Sopenharmony_ci rt->rt6i_nfheader_len; 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci if (mtu <= fragheaderlen || 152562306a36Sopenharmony_ci ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) 152662306a36Sopenharmony_ci goto emsgsize; 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 152962306a36Sopenharmony_ci sizeof(struct frag_hdr); 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 153262306a36Sopenharmony_ci * the first fragment 153362306a36Sopenharmony_ci */ 153462306a36Sopenharmony_ci if (headersize + transhdrlen > mtu) 153562306a36Sopenharmony_ci goto emsgsize; 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci if (cork->length + length > mtu - headersize && ipc6->dontfrag && 153862306a36Sopenharmony_ci (sk->sk_protocol == IPPROTO_UDP || 153962306a36Sopenharmony_ci sk->sk_protocol == IPPROTO_ICMPV6 || 154062306a36Sopenharmony_ci sk->sk_protocol == IPPROTO_RAW)) { 154162306a36Sopenharmony_ci ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 154262306a36Sopenharmony_ci sizeof(struct ipv6hdr)); 154362306a36Sopenharmony_ci goto emsgsize; 154462306a36Sopenharmony_ci } 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci if (ip6_sk_ignore_df(sk)) 154762306a36Sopenharmony_ci maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 154862306a36Sopenharmony_ci else 154962306a36Sopenharmony_ci maxnonfragsize = mtu; 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci if (cork->length + length > maxnonfragsize - headersize) { 155262306a36Sopenharmony_ciemsgsize: 155362306a36Sopenharmony_ci pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 155462306a36Sopenharmony_ci ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 155562306a36Sopenharmony_ci return -EMSGSIZE; 155662306a36Sopenharmony_ci } 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci /* CHECKSUM_PARTIAL only with no extension headers and when 155962306a36Sopenharmony_ci * we are not going to fragment 156062306a36Sopenharmony_ci */ 156162306a36Sopenharmony_ci if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 156262306a36Sopenharmony_ci headersize == sizeof(struct ipv6hdr) && 156362306a36Sopenharmony_ci length <= mtu - headersize && 156462306a36Sopenharmony_ci (!(flags & MSG_MORE) || cork->gso_size) && 156562306a36Sopenharmony_ci rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 156662306a36Sopenharmony_ci csummode = CHECKSUM_PARTIAL; 156762306a36Sopenharmony_ci 156862306a36Sopenharmony_ci if ((flags & MSG_ZEROCOPY) && length) { 156962306a36Sopenharmony_ci struct msghdr *msg = from; 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { 157262306a36Sopenharmony_ci if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) 157362306a36Sopenharmony_ci return -EINVAL; 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci /* Leave uarg NULL if can't zerocopy, callers should 157662306a36Sopenharmony_ci * be able to handle it. 157762306a36Sopenharmony_ci */ 157862306a36Sopenharmony_ci if ((rt->dst.dev->features & NETIF_F_SG) && 157962306a36Sopenharmony_ci csummode == CHECKSUM_PARTIAL) { 158062306a36Sopenharmony_ci paged = true; 158162306a36Sopenharmony_ci zc = true; 158262306a36Sopenharmony_ci uarg = msg->msg_ubuf; 158362306a36Sopenharmony_ci } 158462306a36Sopenharmony_ci } else if (sock_flag(sk, SOCK_ZEROCOPY)) { 158562306a36Sopenharmony_ci uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 158662306a36Sopenharmony_ci if (!uarg) 158762306a36Sopenharmony_ci return -ENOBUFS; 158862306a36Sopenharmony_ci extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 158962306a36Sopenharmony_ci if (rt->dst.dev->features & NETIF_F_SG && 159062306a36Sopenharmony_ci csummode == CHECKSUM_PARTIAL) { 159162306a36Sopenharmony_ci paged = true; 159262306a36Sopenharmony_ci zc = true; 159362306a36Sopenharmony_ci } else { 159462306a36Sopenharmony_ci uarg_to_msgzc(uarg)->zerocopy = 0; 159562306a36Sopenharmony_ci skb_zcopy_set(skb, uarg, &extra_uref); 159662306a36Sopenharmony_ci } 159762306a36Sopenharmony_ci } 159862306a36Sopenharmony_ci } else if ((flags & MSG_SPLICE_PAGES) && length) { 159962306a36Sopenharmony_ci if (inet_test_bit(HDRINCL, sk)) 160062306a36Sopenharmony_ci return -EPERM; 160162306a36Sopenharmony_ci if (rt->dst.dev->features & NETIF_F_SG && 160262306a36Sopenharmony_ci getfrag == ip_generic_getfrag) 160362306a36Sopenharmony_ci /* We need an empty buffer to attach stuff to */ 160462306a36Sopenharmony_ci paged = true; 160562306a36Sopenharmony_ci else 160662306a36Sopenharmony_ci flags &= ~MSG_SPLICE_PAGES; 160762306a36Sopenharmony_ci } 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci /* 161062306a36Sopenharmony_ci * Let's try using as much space as possible. 161162306a36Sopenharmony_ci * Use MTU if total length of the message fits into the MTU. 161262306a36Sopenharmony_ci * Otherwise, we need to reserve fragment header and 161362306a36Sopenharmony_ci * fragment alignment (= 8-15 octects, in total). 161462306a36Sopenharmony_ci * 161562306a36Sopenharmony_ci * Note that we may need to "move" the data from the tail 161662306a36Sopenharmony_ci * of the buffer to the new fragment when we split 161762306a36Sopenharmony_ci * the message. 161862306a36Sopenharmony_ci * 161962306a36Sopenharmony_ci * FIXME: It may be fragmented into multiple chunks 162062306a36Sopenharmony_ci * at once if non-fragmentable extension headers 162162306a36Sopenharmony_ci * are too large. 162262306a36Sopenharmony_ci * --yoshfuji 162362306a36Sopenharmony_ci */ 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci cork->length += length; 162662306a36Sopenharmony_ci if (!skb) 162762306a36Sopenharmony_ci goto alloc_new_skb; 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci while (length > 0) { 163062306a36Sopenharmony_ci /* Check if the remaining data fits into current packet. */ 163162306a36Sopenharmony_ci copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 163262306a36Sopenharmony_ci if (copy < length) 163362306a36Sopenharmony_ci copy = maxfraglen - skb->len; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci if (copy <= 0) { 163662306a36Sopenharmony_ci char *data; 163762306a36Sopenharmony_ci unsigned int datalen; 163862306a36Sopenharmony_ci unsigned int fraglen; 163962306a36Sopenharmony_ci unsigned int fraggap; 164062306a36Sopenharmony_ci unsigned int alloclen, alloc_extra; 164162306a36Sopenharmony_ci unsigned int pagedlen; 164262306a36Sopenharmony_cialloc_new_skb: 164362306a36Sopenharmony_ci /* There's no room in the current skb */ 164462306a36Sopenharmony_ci if (skb) 164562306a36Sopenharmony_ci fraggap = skb->len - maxfraglen; 164662306a36Sopenharmony_ci else 164762306a36Sopenharmony_ci fraggap = 0; 164862306a36Sopenharmony_ci /* update mtu and maxfraglen if necessary */ 164962306a36Sopenharmony_ci if (!skb || !skb_prev) 165062306a36Sopenharmony_ci ip6_append_data_mtu(&mtu, &maxfraglen, 165162306a36Sopenharmony_ci fragheaderlen, skb, rt, 165262306a36Sopenharmony_ci orig_mtu); 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci skb_prev = skb; 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci /* 165762306a36Sopenharmony_ci * If remaining data exceeds the mtu, 165862306a36Sopenharmony_ci * we know we need more fragment(s). 165962306a36Sopenharmony_ci */ 166062306a36Sopenharmony_ci datalen = length + fraggap; 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 166362306a36Sopenharmony_ci datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 166462306a36Sopenharmony_ci fraglen = datalen + fragheaderlen; 166562306a36Sopenharmony_ci pagedlen = 0; 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci alloc_extra = hh_len; 166862306a36Sopenharmony_ci alloc_extra += dst_exthdrlen; 166962306a36Sopenharmony_ci alloc_extra += rt->dst.trailer_len; 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci /* We just reserve space for fragment header. 167262306a36Sopenharmony_ci * Note: this may be overallocation if the message 167362306a36Sopenharmony_ci * (without MSG_MORE) fits into the MTU. 167462306a36Sopenharmony_ci */ 167562306a36Sopenharmony_ci alloc_extra += sizeof(struct frag_hdr); 167662306a36Sopenharmony_ci 167762306a36Sopenharmony_ci if ((flags & MSG_MORE) && 167862306a36Sopenharmony_ci !(rt->dst.dev->features&NETIF_F_SG)) 167962306a36Sopenharmony_ci alloclen = mtu; 168062306a36Sopenharmony_ci else if (!paged && 168162306a36Sopenharmony_ci (fraglen + alloc_extra < SKB_MAX_ALLOC || 168262306a36Sopenharmony_ci !(rt->dst.dev->features & NETIF_F_SG))) 168362306a36Sopenharmony_ci alloclen = fraglen; 168462306a36Sopenharmony_ci else { 168562306a36Sopenharmony_ci alloclen = fragheaderlen + transhdrlen; 168662306a36Sopenharmony_ci pagedlen = datalen - transhdrlen; 168762306a36Sopenharmony_ci } 168862306a36Sopenharmony_ci alloclen += alloc_extra; 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci if (datalen != length + fraggap) { 169162306a36Sopenharmony_ci /* 169262306a36Sopenharmony_ci * this is not the last fragment, the trailer 169362306a36Sopenharmony_ci * space is regarded as data space. 169462306a36Sopenharmony_ci */ 169562306a36Sopenharmony_ci datalen += rt->dst.trailer_len; 169662306a36Sopenharmony_ci } 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci fraglen = datalen + fragheaderlen; 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_ci copy = datalen - transhdrlen - fraggap - pagedlen; 170162306a36Sopenharmony_ci /* [!] NOTE: copy may be negative if pagedlen>0 170262306a36Sopenharmony_ci * because then the equation may reduces to -fraggap. 170362306a36Sopenharmony_ci */ 170462306a36Sopenharmony_ci if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { 170562306a36Sopenharmony_ci err = -EINVAL; 170662306a36Sopenharmony_ci goto error; 170762306a36Sopenharmony_ci } 170862306a36Sopenharmony_ci if (transhdrlen) { 170962306a36Sopenharmony_ci skb = sock_alloc_send_skb(sk, alloclen, 171062306a36Sopenharmony_ci (flags & MSG_DONTWAIT), &err); 171162306a36Sopenharmony_ci } else { 171262306a36Sopenharmony_ci skb = NULL; 171362306a36Sopenharmony_ci if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 171462306a36Sopenharmony_ci 2 * sk->sk_sndbuf) 171562306a36Sopenharmony_ci skb = alloc_skb(alloclen, 171662306a36Sopenharmony_ci sk->sk_allocation); 171762306a36Sopenharmony_ci if (unlikely(!skb)) 171862306a36Sopenharmony_ci err = -ENOBUFS; 171962306a36Sopenharmony_ci } 172062306a36Sopenharmony_ci if (!skb) 172162306a36Sopenharmony_ci goto error; 172262306a36Sopenharmony_ci /* 172362306a36Sopenharmony_ci * Fill in the control structures 172462306a36Sopenharmony_ci */ 172562306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IPV6); 172662306a36Sopenharmony_ci skb->ip_summed = csummode; 172762306a36Sopenharmony_ci skb->csum = 0; 172862306a36Sopenharmony_ci /* reserve for fragmentation and ipsec header */ 172962306a36Sopenharmony_ci skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 173062306a36Sopenharmony_ci dst_exthdrlen); 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci /* 173362306a36Sopenharmony_ci * Find where to start putting bytes 173462306a36Sopenharmony_ci */ 173562306a36Sopenharmony_ci data = skb_put(skb, fraglen - pagedlen); 173662306a36Sopenharmony_ci skb_set_network_header(skb, exthdrlen); 173762306a36Sopenharmony_ci data += fragheaderlen; 173862306a36Sopenharmony_ci skb->transport_header = (skb->network_header + 173962306a36Sopenharmony_ci fragheaderlen); 174062306a36Sopenharmony_ci if (fraggap) { 174162306a36Sopenharmony_ci skb->csum = skb_copy_and_csum_bits( 174262306a36Sopenharmony_ci skb_prev, maxfraglen, 174362306a36Sopenharmony_ci data + transhdrlen, fraggap); 174462306a36Sopenharmony_ci skb_prev->csum = csum_sub(skb_prev->csum, 174562306a36Sopenharmony_ci skb->csum); 174662306a36Sopenharmony_ci data += fraggap; 174762306a36Sopenharmony_ci pskb_trim_unique(skb_prev, maxfraglen); 174862306a36Sopenharmony_ci } 174962306a36Sopenharmony_ci if (copy > 0 && 175062306a36Sopenharmony_ci getfrag(from, data + transhdrlen, offset, 175162306a36Sopenharmony_ci copy, fraggap, skb) < 0) { 175262306a36Sopenharmony_ci err = -EFAULT; 175362306a36Sopenharmony_ci kfree_skb(skb); 175462306a36Sopenharmony_ci goto error; 175562306a36Sopenharmony_ci } else if (flags & MSG_SPLICE_PAGES) { 175662306a36Sopenharmony_ci copy = 0; 175762306a36Sopenharmony_ci } 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci offset += copy; 176062306a36Sopenharmony_ci length -= copy + transhdrlen; 176162306a36Sopenharmony_ci transhdrlen = 0; 176262306a36Sopenharmony_ci exthdrlen = 0; 176362306a36Sopenharmony_ci dst_exthdrlen = 0; 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci /* Only the initial fragment is time stamped */ 176662306a36Sopenharmony_ci skb_shinfo(skb)->tx_flags = cork->tx_flags; 176762306a36Sopenharmony_ci cork->tx_flags = 0; 176862306a36Sopenharmony_ci skb_shinfo(skb)->tskey = tskey; 176962306a36Sopenharmony_ci tskey = 0; 177062306a36Sopenharmony_ci skb_zcopy_set(skb, uarg, &extra_uref); 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci if ((flags & MSG_CONFIRM) && !skb_prev) 177362306a36Sopenharmony_ci skb_set_dst_pending_confirm(skb, 1); 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci /* 177662306a36Sopenharmony_ci * Put the packet on the pending queue 177762306a36Sopenharmony_ci */ 177862306a36Sopenharmony_ci if (!skb->destructor) { 177962306a36Sopenharmony_ci skb->destructor = sock_wfree; 178062306a36Sopenharmony_ci skb->sk = sk; 178162306a36Sopenharmony_ci wmem_alloc_delta += skb->truesize; 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci __skb_queue_tail(queue, skb); 178462306a36Sopenharmony_ci continue; 178562306a36Sopenharmony_ci } 178662306a36Sopenharmony_ci 178762306a36Sopenharmony_ci if (copy > length) 178862306a36Sopenharmony_ci copy = length; 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_ci if (!(rt->dst.dev->features&NETIF_F_SG) && 179162306a36Sopenharmony_ci skb_tailroom(skb) >= copy) { 179262306a36Sopenharmony_ci unsigned int off; 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci off = skb->len; 179562306a36Sopenharmony_ci if (getfrag(from, skb_put(skb, copy), 179662306a36Sopenharmony_ci offset, copy, off, skb) < 0) { 179762306a36Sopenharmony_ci __skb_trim(skb, off); 179862306a36Sopenharmony_ci err = -EFAULT; 179962306a36Sopenharmony_ci goto error; 180062306a36Sopenharmony_ci } 180162306a36Sopenharmony_ci } else if (flags & MSG_SPLICE_PAGES) { 180262306a36Sopenharmony_ci struct msghdr *msg = from; 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_ci err = -EIO; 180562306a36Sopenharmony_ci if (WARN_ON_ONCE(copy > msg->msg_iter.count)) 180662306a36Sopenharmony_ci goto error; 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 180962306a36Sopenharmony_ci sk->sk_allocation); 181062306a36Sopenharmony_ci if (err < 0) 181162306a36Sopenharmony_ci goto error; 181262306a36Sopenharmony_ci copy = err; 181362306a36Sopenharmony_ci wmem_alloc_delta += copy; 181462306a36Sopenharmony_ci } else if (!zc) { 181562306a36Sopenharmony_ci int i = skb_shinfo(skb)->nr_frags; 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci err = -ENOMEM; 181862306a36Sopenharmony_ci if (!sk_page_frag_refill(sk, pfrag)) 181962306a36Sopenharmony_ci goto error; 182062306a36Sopenharmony_ci 182162306a36Sopenharmony_ci skb_zcopy_downgrade_managed(skb); 182262306a36Sopenharmony_ci if (!skb_can_coalesce(skb, i, pfrag->page, 182362306a36Sopenharmony_ci pfrag->offset)) { 182462306a36Sopenharmony_ci err = -EMSGSIZE; 182562306a36Sopenharmony_ci if (i == MAX_SKB_FRAGS) 182662306a36Sopenharmony_ci goto error; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci __skb_fill_page_desc(skb, i, pfrag->page, 182962306a36Sopenharmony_ci pfrag->offset, 0); 183062306a36Sopenharmony_ci skb_shinfo(skb)->nr_frags = ++i; 183162306a36Sopenharmony_ci get_page(pfrag->page); 183262306a36Sopenharmony_ci } 183362306a36Sopenharmony_ci copy = min_t(int, copy, pfrag->size - pfrag->offset); 183462306a36Sopenharmony_ci if (getfrag(from, 183562306a36Sopenharmony_ci page_address(pfrag->page) + pfrag->offset, 183662306a36Sopenharmony_ci offset, copy, skb->len, skb) < 0) 183762306a36Sopenharmony_ci goto error_efault; 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci pfrag->offset += copy; 184062306a36Sopenharmony_ci skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 184162306a36Sopenharmony_ci skb->len += copy; 184262306a36Sopenharmony_ci skb->data_len += copy; 184362306a36Sopenharmony_ci skb->truesize += copy; 184462306a36Sopenharmony_ci wmem_alloc_delta += copy; 184562306a36Sopenharmony_ci } else { 184662306a36Sopenharmony_ci err = skb_zerocopy_iter_dgram(skb, from, copy); 184762306a36Sopenharmony_ci if (err < 0) 184862306a36Sopenharmony_ci goto error; 184962306a36Sopenharmony_ci } 185062306a36Sopenharmony_ci offset += copy; 185162306a36Sopenharmony_ci length -= copy; 185262306a36Sopenharmony_ci } 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_ci if (wmem_alloc_delta) 185562306a36Sopenharmony_ci refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 185662306a36Sopenharmony_ci return 0; 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_cierror_efault: 185962306a36Sopenharmony_ci err = -EFAULT; 186062306a36Sopenharmony_cierror: 186162306a36Sopenharmony_ci net_zcopy_put_abort(uarg, extra_uref); 186262306a36Sopenharmony_ci cork->length -= length; 186362306a36Sopenharmony_ci IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 186462306a36Sopenharmony_ci refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 186562306a36Sopenharmony_ci return err; 186662306a36Sopenharmony_ci} 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ciint ip6_append_data(struct sock *sk, 186962306a36Sopenharmony_ci int getfrag(void *from, char *to, int offset, int len, 187062306a36Sopenharmony_ci int odd, struct sk_buff *skb), 187162306a36Sopenharmony_ci void *from, size_t length, int transhdrlen, 187262306a36Sopenharmony_ci struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 187362306a36Sopenharmony_ci struct rt6_info *rt, unsigned int flags) 187462306a36Sopenharmony_ci{ 187562306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 187662306a36Sopenharmony_ci struct ipv6_pinfo *np = inet6_sk(sk); 187762306a36Sopenharmony_ci int exthdrlen; 187862306a36Sopenharmony_ci int err; 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci if (flags&MSG_PROBE) 188162306a36Sopenharmony_ci return 0; 188262306a36Sopenharmony_ci if (skb_queue_empty(&sk->sk_write_queue)) { 188362306a36Sopenharmony_ci /* 188462306a36Sopenharmony_ci * setup for corking 188562306a36Sopenharmony_ci */ 188662306a36Sopenharmony_ci dst_hold(&rt->dst); 188762306a36Sopenharmony_ci err = ip6_setup_cork(sk, &inet->cork, &np->cork, 188862306a36Sopenharmony_ci ipc6, rt); 188962306a36Sopenharmony_ci if (err) 189062306a36Sopenharmony_ci return err; 189162306a36Sopenharmony_ci 189262306a36Sopenharmony_ci inet->cork.fl.u.ip6 = *fl6; 189362306a36Sopenharmony_ci exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 189462306a36Sopenharmony_ci length += exthdrlen; 189562306a36Sopenharmony_ci transhdrlen += exthdrlen; 189662306a36Sopenharmony_ci } else { 189762306a36Sopenharmony_ci transhdrlen = 0; 189862306a36Sopenharmony_ci } 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, 190162306a36Sopenharmony_ci &np->cork, sk_page_frag(sk), getfrag, 190262306a36Sopenharmony_ci from, length, transhdrlen, flags, ipc6); 190362306a36Sopenharmony_ci} 190462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_append_data); 190562306a36Sopenharmony_ci 190662306a36Sopenharmony_cistatic void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) 190762306a36Sopenharmony_ci{ 190862306a36Sopenharmony_ci struct dst_entry *dst = cork->base.dst; 190962306a36Sopenharmony_ci 191062306a36Sopenharmony_ci cork->base.dst = NULL; 191162306a36Sopenharmony_ci cork->base.flags &= ~IPCORK_ALLFRAG; 191262306a36Sopenharmony_ci skb_dst_set(skb, dst); 191362306a36Sopenharmony_ci} 191462306a36Sopenharmony_ci 191562306a36Sopenharmony_cistatic void ip6_cork_release(struct inet_cork_full *cork, 191662306a36Sopenharmony_ci struct inet6_cork *v6_cork) 191762306a36Sopenharmony_ci{ 191862306a36Sopenharmony_ci if (v6_cork->opt) { 191962306a36Sopenharmony_ci struct ipv6_txoptions *opt = v6_cork->opt; 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci kfree(opt->dst0opt); 192262306a36Sopenharmony_ci kfree(opt->dst1opt); 192362306a36Sopenharmony_ci kfree(opt->hopopt); 192462306a36Sopenharmony_ci kfree(opt->srcrt); 192562306a36Sopenharmony_ci kfree(opt); 192662306a36Sopenharmony_ci v6_cork->opt = NULL; 192762306a36Sopenharmony_ci } 192862306a36Sopenharmony_ci 192962306a36Sopenharmony_ci if (cork->base.dst) { 193062306a36Sopenharmony_ci dst_release(cork->base.dst); 193162306a36Sopenharmony_ci cork->base.dst = NULL; 193262306a36Sopenharmony_ci cork->base.flags &= ~IPCORK_ALLFRAG; 193362306a36Sopenharmony_ci } 193462306a36Sopenharmony_ci} 193562306a36Sopenharmony_ci 193662306a36Sopenharmony_cistruct sk_buff *__ip6_make_skb(struct sock *sk, 193762306a36Sopenharmony_ci struct sk_buff_head *queue, 193862306a36Sopenharmony_ci struct inet_cork_full *cork, 193962306a36Sopenharmony_ci struct inet6_cork *v6_cork) 194062306a36Sopenharmony_ci{ 194162306a36Sopenharmony_ci struct sk_buff *skb, *tmp_skb; 194262306a36Sopenharmony_ci struct sk_buff **tail_skb; 194362306a36Sopenharmony_ci struct in6_addr *final_dst; 194462306a36Sopenharmony_ci struct ipv6_pinfo *np = inet6_sk(sk); 194562306a36Sopenharmony_ci struct net *net = sock_net(sk); 194662306a36Sopenharmony_ci struct ipv6hdr *hdr; 194762306a36Sopenharmony_ci struct ipv6_txoptions *opt = v6_cork->opt; 194862306a36Sopenharmony_ci struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 194962306a36Sopenharmony_ci struct flowi6 *fl6 = &cork->fl.u.ip6; 195062306a36Sopenharmony_ci unsigned char proto = fl6->flowi6_proto; 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci skb = __skb_dequeue(queue); 195362306a36Sopenharmony_ci if (!skb) 195462306a36Sopenharmony_ci goto out; 195562306a36Sopenharmony_ci tail_skb = &(skb_shinfo(skb)->frag_list); 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_ci /* move skb->data to ip header from ext header */ 195862306a36Sopenharmony_ci if (skb->data < skb_network_header(skb)) 195962306a36Sopenharmony_ci __skb_pull(skb, skb_network_offset(skb)); 196062306a36Sopenharmony_ci while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 196162306a36Sopenharmony_ci __skb_pull(tmp_skb, skb_network_header_len(skb)); 196262306a36Sopenharmony_ci *tail_skb = tmp_skb; 196362306a36Sopenharmony_ci tail_skb = &(tmp_skb->next); 196462306a36Sopenharmony_ci skb->len += tmp_skb->len; 196562306a36Sopenharmony_ci skb->data_len += tmp_skb->len; 196662306a36Sopenharmony_ci skb->truesize += tmp_skb->truesize; 196762306a36Sopenharmony_ci tmp_skb->destructor = NULL; 196862306a36Sopenharmony_ci tmp_skb->sk = NULL; 196962306a36Sopenharmony_ci } 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci /* Allow local fragmentation. */ 197262306a36Sopenharmony_ci skb->ignore_df = ip6_sk_ignore_df(sk); 197362306a36Sopenharmony_ci __skb_pull(skb, skb_network_header_len(skb)); 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci final_dst = &fl6->daddr; 197662306a36Sopenharmony_ci if (opt && opt->opt_flen) 197762306a36Sopenharmony_ci ipv6_push_frag_opts(skb, opt, &proto); 197862306a36Sopenharmony_ci if (opt && opt->opt_nflen) 197962306a36Sopenharmony_ci ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 198062306a36Sopenharmony_ci 198162306a36Sopenharmony_ci skb_push(skb, sizeof(struct ipv6hdr)); 198262306a36Sopenharmony_ci skb_reset_network_header(skb); 198362306a36Sopenharmony_ci hdr = ipv6_hdr(skb); 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci ip6_flow_hdr(hdr, v6_cork->tclass, 198662306a36Sopenharmony_ci ip6_make_flowlabel(net, skb, fl6->flowlabel, 198762306a36Sopenharmony_ci ip6_autoflowlabel(net, np), fl6)); 198862306a36Sopenharmony_ci hdr->hop_limit = v6_cork->hop_limit; 198962306a36Sopenharmony_ci hdr->nexthdr = proto; 199062306a36Sopenharmony_ci hdr->saddr = fl6->saddr; 199162306a36Sopenharmony_ci hdr->daddr = *final_dst; 199262306a36Sopenharmony_ci 199362306a36Sopenharmony_ci skb->priority = sk->sk_priority; 199462306a36Sopenharmony_ci skb->mark = cork->base.mark; 199562306a36Sopenharmony_ci skb->tstamp = cork->base.transmit_time; 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci ip6_cork_steal_dst(skb, cork); 199862306a36Sopenharmony_ci IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 199962306a36Sopenharmony_ci if (proto == IPPROTO_ICMPV6) { 200062306a36Sopenharmony_ci struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 200162306a36Sopenharmony_ci u8 icmp6_type; 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci if (sk->sk_socket->type == SOCK_RAW && 200462306a36Sopenharmony_ci !inet_test_bit(HDRINCL, sk)) 200562306a36Sopenharmony_ci icmp6_type = fl6->fl6_icmp_type; 200662306a36Sopenharmony_ci else 200762306a36Sopenharmony_ci icmp6_type = icmp6_hdr(skb)->icmp6_type; 200862306a36Sopenharmony_ci ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); 200962306a36Sopenharmony_ci ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 201062306a36Sopenharmony_ci } 201162306a36Sopenharmony_ci 201262306a36Sopenharmony_ci ip6_cork_release(cork, v6_cork); 201362306a36Sopenharmony_ciout: 201462306a36Sopenharmony_ci return skb; 201562306a36Sopenharmony_ci} 201662306a36Sopenharmony_ci 201762306a36Sopenharmony_ciint ip6_send_skb(struct sk_buff *skb) 201862306a36Sopenharmony_ci{ 201962306a36Sopenharmony_ci struct net *net = sock_net(skb->sk); 202062306a36Sopenharmony_ci struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 202162306a36Sopenharmony_ci int err; 202262306a36Sopenharmony_ci 202362306a36Sopenharmony_ci err = ip6_local_out(net, skb->sk, skb); 202462306a36Sopenharmony_ci if (err) { 202562306a36Sopenharmony_ci if (err > 0) 202662306a36Sopenharmony_ci err = net_xmit_errno(err); 202762306a36Sopenharmony_ci if (err) 202862306a36Sopenharmony_ci IP6_INC_STATS(net, rt->rt6i_idev, 202962306a36Sopenharmony_ci IPSTATS_MIB_OUTDISCARDS); 203062306a36Sopenharmony_ci } 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci return err; 203362306a36Sopenharmony_ci} 203462306a36Sopenharmony_ci 203562306a36Sopenharmony_ciint ip6_push_pending_frames(struct sock *sk) 203662306a36Sopenharmony_ci{ 203762306a36Sopenharmony_ci struct sk_buff *skb; 203862306a36Sopenharmony_ci 203962306a36Sopenharmony_ci skb = ip6_finish_skb(sk); 204062306a36Sopenharmony_ci if (!skb) 204162306a36Sopenharmony_ci return 0; 204262306a36Sopenharmony_ci 204362306a36Sopenharmony_ci return ip6_send_skb(skb); 204462306a36Sopenharmony_ci} 204562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_push_pending_frames); 204662306a36Sopenharmony_ci 204762306a36Sopenharmony_cistatic void __ip6_flush_pending_frames(struct sock *sk, 204862306a36Sopenharmony_ci struct sk_buff_head *queue, 204962306a36Sopenharmony_ci struct inet_cork_full *cork, 205062306a36Sopenharmony_ci struct inet6_cork *v6_cork) 205162306a36Sopenharmony_ci{ 205262306a36Sopenharmony_ci struct sk_buff *skb; 205362306a36Sopenharmony_ci 205462306a36Sopenharmony_ci while ((skb = __skb_dequeue_tail(queue)) != NULL) { 205562306a36Sopenharmony_ci if (skb_dst(skb)) 205662306a36Sopenharmony_ci IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 205762306a36Sopenharmony_ci IPSTATS_MIB_OUTDISCARDS); 205862306a36Sopenharmony_ci kfree_skb(skb); 205962306a36Sopenharmony_ci } 206062306a36Sopenharmony_ci 206162306a36Sopenharmony_ci ip6_cork_release(cork, v6_cork); 206262306a36Sopenharmony_ci} 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_civoid ip6_flush_pending_frames(struct sock *sk) 206562306a36Sopenharmony_ci{ 206662306a36Sopenharmony_ci __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 206762306a36Sopenharmony_ci &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 206862306a36Sopenharmony_ci} 206962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_cistruct sk_buff *ip6_make_skb(struct sock *sk, 207262306a36Sopenharmony_ci int getfrag(void *from, char *to, int offset, 207362306a36Sopenharmony_ci int len, int odd, struct sk_buff *skb), 207462306a36Sopenharmony_ci void *from, size_t length, int transhdrlen, 207562306a36Sopenharmony_ci struct ipcm6_cookie *ipc6, struct rt6_info *rt, 207662306a36Sopenharmony_ci unsigned int flags, struct inet_cork_full *cork) 207762306a36Sopenharmony_ci{ 207862306a36Sopenharmony_ci struct inet6_cork v6_cork; 207962306a36Sopenharmony_ci struct sk_buff_head queue; 208062306a36Sopenharmony_ci int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 208162306a36Sopenharmony_ci int err; 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci if (flags & MSG_PROBE) { 208462306a36Sopenharmony_ci dst_release(&rt->dst); 208562306a36Sopenharmony_ci return NULL; 208662306a36Sopenharmony_ci } 208762306a36Sopenharmony_ci 208862306a36Sopenharmony_ci __skb_queue_head_init(&queue); 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci cork->base.flags = 0; 209162306a36Sopenharmony_ci cork->base.addr = 0; 209262306a36Sopenharmony_ci cork->base.opt = NULL; 209362306a36Sopenharmony_ci v6_cork.opt = NULL; 209462306a36Sopenharmony_ci err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); 209562306a36Sopenharmony_ci if (err) { 209662306a36Sopenharmony_ci ip6_cork_release(cork, &v6_cork); 209762306a36Sopenharmony_ci return ERR_PTR(err); 209862306a36Sopenharmony_ci } 209962306a36Sopenharmony_ci if (ipc6->dontfrag < 0) 210062306a36Sopenharmony_ci ipc6->dontfrag = inet6_sk(sk)->dontfrag; 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci err = __ip6_append_data(sk, &queue, cork, &v6_cork, 210362306a36Sopenharmony_ci ¤t->task_frag, getfrag, from, 210462306a36Sopenharmony_ci length + exthdrlen, transhdrlen + exthdrlen, 210562306a36Sopenharmony_ci flags, ipc6); 210662306a36Sopenharmony_ci if (err) { 210762306a36Sopenharmony_ci __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 210862306a36Sopenharmony_ci return ERR_PTR(err); 210962306a36Sopenharmony_ci } 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_ci return __ip6_make_skb(sk, &queue, cork, &v6_cork); 211262306a36Sopenharmony_ci} 2113