162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * The Internet Protocol (IP) output module. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Authors: Ross Biro 1062306a36Sopenharmony_ci * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 1162306a36Sopenharmony_ci * Donald Becker, <becker@super.org> 1262306a36Sopenharmony_ci * Alan Cox, <Alan.Cox@linux.org> 1362306a36Sopenharmony_ci * Richard Underwood 1462306a36Sopenharmony_ci * Stefan Becker, <stefanb@yello.ping.de> 1562306a36Sopenharmony_ci * Jorge Cwik, <jorge@laser.satlink.net> 1662306a36Sopenharmony_ci * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 1762306a36Sopenharmony_ci * Hirokazu Takahashi, <taka@valinux.co.jp> 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * See ip_input.c for original log 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * Fixes: 2262306a36Sopenharmony_ci * Alan Cox : Missing nonblock feature in ip_build_xmit. 2362306a36Sopenharmony_ci * Mike Kilburn : htons() missing in ip_build_xmit. 2462306a36Sopenharmony_ci * Bradford Johnson: Fix faulty handling of some frames when 2562306a36Sopenharmony_ci * no route is found. 2662306a36Sopenharmony_ci * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit 2762306a36Sopenharmony_ci * (in case if packet not accepted by 2862306a36Sopenharmony_ci * output firewall rules) 2962306a36Sopenharmony_ci * Mike McLagan : Routing by source 3062306a36Sopenharmony_ci * Alexey Kuznetsov: use new route cache 3162306a36Sopenharmony_ci * Andi Kleen: Fix broken PMTU recovery and remove 3262306a36Sopenharmony_ci * some redundant tests. 3362306a36Sopenharmony_ci * Vitaly E. Lavrov : Transparent proxy revived after year coma. 3462306a36Sopenharmony_ci * Andi Kleen : Replace ip_reply with ip_send_reply. 3562306a36Sopenharmony_ci * Andi Kleen : Split fast and slow ip_build_xmit path 3662306a36Sopenharmony_ci * for decreased register pressure on x86 3762306a36Sopenharmony_ci * and more readability. 3862306a36Sopenharmony_ci * Marc Boucher : When call_out_firewall returns FW_QUEUE, 3962306a36Sopenharmony_ci * silently drop skb instead of failing with -EPERM. 4062306a36Sopenharmony_ci * Detlev Wengorz : Copy protocol for fragments. 4162306a36Sopenharmony_ci * Hirokazu Takahashi: HW checksumming for outgoing UDP 4262306a36Sopenharmony_ci * datagrams. 4362306a36Sopenharmony_ci * Hirokazu Takahashi: sendfile() on UDP works now. 4462306a36Sopenharmony_ci */ 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#include <linux/uaccess.h> 4762306a36Sopenharmony_ci#include <linux/module.h> 4862306a36Sopenharmony_ci#include <linux/types.h> 4962306a36Sopenharmony_ci#include <linux/kernel.h> 5062306a36Sopenharmony_ci#include <linux/mm.h> 5162306a36Sopenharmony_ci#include <linux/string.h> 5262306a36Sopenharmony_ci#include <linux/errno.h> 5362306a36Sopenharmony_ci#include <linux/highmem.h> 5462306a36Sopenharmony_ci#include <linux/slab.h> 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#include <linux/socket.h> 5762306a36Sopenharmony_ci#include <linux/sockios.h> 5862306a36Sopenharmony_ci#include <linux/in.h> 5962306a36Sopenharmony_ci#include <linux/inet.h> 6062306a36Sopenharmony_ci#include <linux/netdevice.h> 6162306a36Sopenharmony_ci#include <linux/etherdevice.h> 6262306a36Sopenharmony_ci#include <linux/proc_fs.h> 6362306a36Sopenharmony_ci#include <linux/stat.h> 6462306a36Sopenharmony_ci#include <linux/init.h> 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci#include <net/snmp.h> 6762306a36Sopenharmony_ci#include <net/ip.h> 6862306a36Sopenharmony_ci#include <net/protocol.h> 6962306a36Sopenharmony_ci#include <net/route.h> 7062306a36Sopenharmony_ci#include <net/xfrm.h> 7162306a36Sopenharmony_ci#include <linux/skbuff.h> 7262306a36Sopenharmony_ci#include <net/sock.h> 7362306a36Sopenharmony_ci#include <net/arp.h> 7462306a36Sopenharmony_ci#include <net/icmp.h> 7562306a36Sopenharmony_ci#include <net/checksum.h> 7662306a36Sopenharmony_ci#include <net/gso.h> 7762306a36Sopenharmony_ci#include <net/inetpeer.h> 7862306a36Sopenharmony_ci#include <net/inet_ecn.h> 7962306a36Sopenharmony_ci#include <net/lwtunnel.h> 8062306a36Sopenharmony_ci#include <linux/bpf-cgroup.h> 8162306a36Sopenharmony_ci#include <linux/igmp.h> 8262306a36Sopenharmony_ci#include <linux/netfilter_ipv4.h> 8362306a36Sopenharmony_ci#include <linux/netfilter_bridge.h> 8462306a36Sopenharmony_ci#include <linux/netlink.h> 8562306a36Sopenharmony_ci#include <linux/tcp.h> 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic int 8862306a36Sopenharmony_ciip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 8962306a36Sopenharmony_ci unsigned int mtu, 9062306a36Sopenharmony_ci int (*output)(struct net *, struct sock *, struct sk_buff *)); 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/* Generate a checksum for an outgoing IP datagram. */ 9362306a36Sopenharmony_civoid ip_send_check(struct iphdr *iph) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci iph->check = 0; 9662306a36Sopenharmony_ci iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 9762306a36Sopenharmony_ci} 9862306a36Sopenharmony_ciEXPORT_SYMBOL(ip_send_check); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ciint __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) 10162306a36Sopenharmony_ci{ 10262306a36Sopenharmony_ci struct iphdr *iph = ip_hdr(skb); 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci iph_set_totlen(iph, skb->len); 10562306a36Sopenharmony_ci ip_send_check(iph); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci /* if egress device is enslaved to an L3 master device pass the 10862306a36Sopenharmony_ci * skb to its handler for processing 10962306a36Sopenharmony_ci */ 11062306a36Sopenharmony_ci skb = l3mdev_ip_out(sk, skb); 11162306a36Sopenharmony_ci if (unlikely(!skb)) 11262306a36Sopenharmony_ci return 0; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IP); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, 11762306a36Sopenharmony_ci net, sk, skb, NULL, skb_dst(skb)->dev, 11862306a36Sopenharmony_ci dst_output); 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ciint ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci int err; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci err = __ip_local_out(net, sk, skb); 12662306a36Sopenharmony_ci if (likely(err == 1)) 12762306a36Sopenharmony_ci err = dst_output(net, sk, skb); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci return err; 13062306a36Sopenharmony_ci} 13162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip_local_out); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic inline int ip_select_ttl(const struct inet_sock *inet, 13462306a36Sopenharmony_ci const struct dst_entry *dst) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci int ttl = READ_ONCE(inet->uc_ttl); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (ttl < 0) 13962306a36Sopenharmony_ci ttl = ip4_dst_hoplimit(dst); 14062306a36Sopenharmony_ci return ttl; 14162306a36Sopenharmony_ci} 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci/* 14462306a36Sopenharmony_ci * Add an ip header to a skbuff and send it out. 14562306a36Sopenharmony_ci * 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_ciint ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, 14862306a36Sopenharmony_ci __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, 14962306a36Sopenharmony_ci u8 tos) 15062306a36Sopenharmony_ci{ 15162306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 15262306a36Sopenharmony_ci struct rtable *rt = skb_rtable(skb); 15362306a36Sopenharmony_ci struct net *net = sock_net(sk); 15462306a36Sopenharmony_ci struct iphdr *iph; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci /* Build the IP header. */ 15762306a36Sopenharmony_ci skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); 15862306a36Sopenharmony_ci skb_reset_network_header(skb); 15962306a36Sopenharmony_ci iph = ip_hdr(skb); 16062306a36Sopenharmony_ci iph->version = 4; 16162306a36Sopenharmony_ci iph->ihl = 5; 16262306a36Sopenharmony_ci iph->tos = tos; 16362306a36Sopenharmony_ci iph->ttl = ip_select_ttl(inet, &rt->dst); 16462306a36Sopenharmony_ci iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); 16562306a36Sopenharmony_ci iph->saddr = saddr; 16662306a36Sopenharmony_ci iph->protocol = sk->sk_protocol; 16762306a36Sopenharmony_ci /* Do not bother generating IPID for small packets (eg SYNACK) */ 16862306a36Sopenharmony_ci if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { 16962306a36Sopenharmony_ci iph->frag_off = htons(IP_DF); 17062306a36Sopenharmony_ci iph->id = 0; 17162306a36Sopenharmony_ci } else { 17262306a36Sopenharmony_ci iph->frag_off = 0; 17362306a36Sopenharmony_ci /* TCP packets here are SYNACK with fat IPv4/TCP options. 17462306a36Sopenharmony_ci * Avoid using the hashed IP ident generator. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci if (sk->sk_protocol == IPPROTO_TCP) 17762306a36Sopenharmony_ci iph->id = (__force __be16)get_random_u16(); 17862306a36Sopenharmony_ci else 17962306a36Sopenharmony_ci __ip_select_ident(net, iph, 1); 18062306a36Sopenharmony_ci } 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci if (opt && opt->opt.optlen) { 18362306a36Sopenharmony_ci iph->ihl += opt->opt.optlen>>2; 18462306a36Sopenharmony_ci ip_options_build(skb, &opt->opt, daddr, rt); 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci skb->priority = READ_ONCE(sk->sk_priority); 18862306a36Sopenharmony_ci if (!skb->mark) 18962306a36Sopenharmony_ci skb->mark = READ_ONCE(sk->sk_mark); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci /* Send it out. */ 19262306a36Sopenharmony_ci return ip_local_out(net, skb->sk, skb); 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_cistatic int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci struct dst_entry *dst = skb_dst(skb); 19962306a36Sopenharmony_ci struct rtable *rt = (struct rtable *)dst; 20062306a36Sopenharmony_ci struct net_device *dev = dst->dev; 20162306a36Sopenharmony_ci unsigned int hh_len = LL_RESERVED_SPACE(dev); 20262306a36Sopenharmony_ci struct neighbour *neigh; 20362306a36Sopenharmony_ci bool is_v6gw = false; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci if (rt->rt_type == RTN_MULTICAST) { 20662306a36Sopenharmony_ci IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); 20762306a36Sopenharmony_ci } else if (rt->rt_type == RTN_BROADCAST) 20862306a36Sopenharmony_ci IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci /* OUTOCTETS should be counted after fragment */ 21162306a36Sopenharmony_ci IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 21462306a36Sopenharmony_ci skb = skb_expand_head(skb, hh_len); 21562306a36Sopenharmony_ci if (!skb) 21662306a36Sopenharmony_ci return -ENOMEM; 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci if (lwtunnel_xmit_redirect(dst->lwtstate)) { 22062306a36Sopenharmony_ci int res = lwtunnel_xmit(skb); 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci if (res != LWTUNNEL_XMIT_CONTINUE) 22362306a36Sopenharmony_ci return res; 22462306a36Sopenharmony_ci } 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci rcu_read_lock(); 22762306a36Sopenharmony_ci neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); 22862306a36Sopenharmony_ci if (!IS_ERR(neigh)) { 22962306a36Sopenharmony_ci int res; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci sock_confirm_neigh(skb, neigh); 23262306a36Sopenharmony_ci /* if crossing protocols, can not use the cached header */ 23362306a36Sopenharmony_ci res = neigh_output(neigh, skb, is_v6gw); 23462306a36Sopenharmony_ci rcu_read_unlock(); 23562306a36Sopenharmony_ci return res; 23662306a36Sopenharmony_ci } 23762306a36Sopenharmony_ci rcu_read_unlock(); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci net_dbg_ratelimited("%s: No header cache and no neighbour!\n", 24062306a36Sopenharmony_ci __func__); 24162306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); 24262306a36Sopenharmony_ci return PTR_ERR(neigh); 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_cistatic int ip_finish_output_gso(struct net *net, struct sock *sk, 24662306a36Sopenharmony_ci struct sk_buff *skb, unsigned int mtu) 24762306a36Sopenharmony_ci{ 24862306a36Sopenharmony_ci struct sk_buff *segs, *nskb; 24962306a36Sopenharmony_ci netdev_features_t features; 25062306a36Sopenharmony_ci int ret = 0; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci /* common case: seglen is <= mtu 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_ci if (skb_gso_validate_network_len(skb, mtu)) 25562306a36Sopenharmony_ci return ip_finish_output2(net, sk, skb); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci /* Slowpath - GSO segment length exceeds the egress MTU. 25862306a36Sopenharmony_ci * 25962306a36Sopenharmony_ci * This can happen in several cases: 26062306a36Sopenharmony_ci * - Forwarding of a TCP GRO skb, when DF flag is not set. 26162306a36Sopenharmony_ci * - Forwarding of an skb that arrived on a virtualization interface 26262306a36Sopenharmony_ci * (virtio-net/vhost/tap) with TSO/GSO size set by other network 26362306a36Sopenharmony_ci * stack. 26462306a36Sopenharmony_ci * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an 26562306a36Sopenharmony_ci * interface with a smaller MTU. 26662306a36Sopenharmony_ci * - Arriving GRO skb (or GSO skb in a virtualized environment) that is 26762306a36Sopenharmony_ci * bridged to a NETIF_F_TSO tunnel stacked over an interface with an 26862306a36Sopenharmony_ci * insufficient MTU. 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_ci features = netif_skb_features(skb); 27162306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 27262306a36Sopenharmony_ci segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 27362306a36Sopenharmony_ci if (IS_ERR_OR_NULL(segs)) { 27462306a36Sopenharmony_ci kfree_skb(skb); 27562306a36Sopenharmony_ci return -ENOMEM; 27662306a36Sopenharmony_ci } 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci consume_skb(skb); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci skb_list_walk_safe(segs, segs, nskb) { 28162306a36Sopenharmony_ci int err; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci skb_mark_not_on_list(segs); 28462306a36Sopenharmony_ci err = ip_fragment(net, sk, segs, mtu, ip_finish_output2); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci if (err && ret == 0) 28762306a36Sopenharmony_ci ret = err; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci return ret; 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_cistatic int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci unsigned int mtu; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 29862306a36Sopenharmony_ci /* Policy lookup after SNAT yielded a new policy */ 29962306a36Sopenharmony_ci if (skb_dst(skb)->xfrm) { 30062306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_REROUTED; 30162306a36Sopenharmony_ci return dst_output(net, sk, skb); 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci#endif 30462306a36Sopenharmony_ci mtu = ip_skb_dst_mtu(sk, skb); 30562306a36Sopenharmony_ci if (skb_is_gso(skb)) 30662306a36Sopenharmony_ci return ip_finish_output_gso(net, sk, skb, mtu); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (skb->len > mtu || IPCB(skb)->frag_max_size) 30962306a36Sopenharmony_ci return ip_fragment(net, sk, skb, mtu, ip_finish_output2); 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci return ip_finish_output2(net, sk, skb); 31262306a36Sopenharmony_ci} 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_cistatic int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 31562306a36Sopenharmony_ci{ 31662306a36Sopenharmony_ci int ret; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 31962306a36Sopenharmony_ci switch (ret) { 32062306a36Sopenharmony_ci case NET_XMIT_SUCCESS: 32162306a36Sopenharmony_ci return __ip_finish_output(net, sk, skb); 32262306a36Sopenharmony_ci case NET_XMIT_CN: 32362306a36Sopenharmony_ci return __ip_finish_output(net, sk, skb) ? : ret; 32462306a36Sopenharmony_ci default: 32562306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 32662306a36Sopenharmony_ci return ret; 32762306a36Sopenharmony_ci } 32862306a36Sopenharmony_ci} 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_cistatic int ip_mc_finish_output(struct net *net, struct sock *sk, 33162306a36Sopenharmony_ci struct sk_buff *skb) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci struct rtable *new_rt; 33462306a36Sopenharmony_ci bool do_cn = false; 33562306a36Sopenharmony_ci int ret, err; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 33862306a36Sopenharmony_ci switch (ret) { 33962306a36Sopenharmony_ci case NET_XMIT_CN: 34062306a36Sopenharmony_ci do_cn = true; 34162306a36Sopenharmony_ci fallthrough; 34262306a36Sopenharmony_ci case NET_XMIT_SUCCESS: 34362306a36Sopenharmony_ci break; 34462306a36Sopenharmony_ci default: 34562306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 34662306a36Sopenharmony_ci return ret; 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting 35062306a36Sopenharmony_ci * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten, 35162306a36Sopenharmony_ci * see ipv4_pktinfo_prepare(). 35262306a36Sopenharmony_ci */ 35362306a36Sopenharmony_ci new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb)); 35462306a36Sopenharmony_ci if (new_rt) { 35562306a36Sopenharmony_ci new_rt->rt_iif = 0; 35662306a36Sopenharmony_ci skb_dst_drop(skb); 35762306a36Sopenharmony_ci skb_dst_set(skb, &new_rt->dst); 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci err = dev_loopback_xmit(net, sk, skb); 36162306a36Sopenharmony_ci return (do_cn && err) ? ret : err; 36262306a36Sopenharmony_ci} 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ciint ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci struct rtable *rt = skb_rtable(skb); 36762306a36Sopenharmony_ci struct net_device *dev = rt->dst.dev; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci /* 37062306a36Sopenharmony_ci * If the indicated interface is up and running, send the packet. 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_ci skb->dev = dev; 37362306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IP); 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci /* 37662306a36Sopenharmony_ci * Multicasts are looped back for other local users 37762306a36Sopenharmony_ci */ 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci if (rt->rt_flags&RTCF_MULTICAST) { 38062306a36Sopenharmony_ci if (sk_mc_loop(sk) 38162306a36Sopenharmony_ci#ifdef CONFIG_IP_MROUTE 38262306a36Sopenharmony_ci /* Small optimization: do not loopback not local frames, 38362306a36Sopenharmony_ci which returned after forwarding; they will be dropped 38462306a36Sopenharmony_ci by ip_mr_input in any case. 38562306a36Sopenharmony_ci Note, that local frames are looped back to be delivered 38662306a36Sopenharmony_ci to local recipients. 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci This check is duplicated in ip_mr_input at the moment. 38962306a36Sopenharmony_ci */ 39062306a36Sopenharmony_ci && 39162306a36Sopenharmony_ci ((rt->rt_flags & RTCF_LOCAL) || 39262306a36Sopenharmony_ci !(IPCB(skb)->flags & IPSKB_FORWARDED)) 39362306a36Sopenharmony_ci#endif 39462306a36Sopenharmony_ci ) { 39562306a36Sopenharmony_ci struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 39662306a36Sopenharmony_ci if (newskb) 39762306a36Sopenharmony_ci NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 39862306a36Sopenharmony_ci net, sk, newskb, NULL, newskb->dev, 39962306a36Sopenharmony_ci ip_mc_finish_output); 40062306a36Sopenharmony_ci } 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci /* Multicasts with ttl 0 must not go beyond the host */ 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci if (ip_hdr(skb)->ttl == 0) { 40562306a36Sopenharmony_ci kfree_skb(skb); 40662306a36Sopenharmony_ci return 0; 40762306a36Sopenharmony_ci } 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci if (rt->rt_flags&RTCF_BROADCAST) { 41162306a36Sopenharmony_ci struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 41262306a36Sopenharmony_ci if (newskb) 41362306a36Sopenharmony_ci NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 41462306a36Sopenharmony_ci net, sk, newskb, NULL, newskb->dev, 41562306a36Sopenharmony_ci ip_mc_finish_output); 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, 41962306a36Sopenharmony_ci net, sk, skb, NULL, skb->dev, 42062306a36Sopenharmony_ci ip_finish_output, 42162306a36Sopenharmony_ci !(IPCB(skb)->flags & IPSKB_REROUTED)); 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ciint ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci skb->dev = dev; 42962306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IP); 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, 43262306a36Sopenharmony_ci net, sk, skb, indev, dev, 43362306a36Sopenharmony_ci ip_finish_output, 43462306a36Sopenharmony_ci !(IPCB(skb)->flags & IPSKB_REROUTED)); 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ciEXPORT_SYMBOL(ip_output); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci/* 43962306a36Sopenharmony_ci * copy saddr and daddr, possibly using 64bit load/stores 44062306a36Sopenharmony_ci * Equivalent to : 44162306a36Sopenharmony_ci * iph->saddr = fl4->saddr; 44262306a36Sopenharmony_ci * iph->daddr = fl4->daddr; 44362306a36Sopenharmony_ci */ 44462306a36Sopenharmony_cistatic void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != 44762306a36Sopenharmony_ci offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci iph->saddr = fl4->saddr; 45062306a36Sopenharmony_ci iph->daddr = fl4->daddr; 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci/* Note: skb->sk can be different from sk, in case of tunnels */ 45462306a36Sopenharmony_ciint __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 45562306a36Sopenharmony_ci __u8 tos) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 45862306a36Sopenharmony_ci struct net *net = sock_net(sk); 45962306a36Sopenharmony_ci struct ip_options_rcu *inet_opt; 46062306a36Sopenharmony_ci struct flowi4 *fl4; 46162306a36Sopenharmony_ci struct rtable *rt; 46262306a36Sopenharmony_ci struct iphdr *iph; 46362306a36Sopenharmony_ci int res; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci /* Skip all of this if the packet is already routed, 46662306a36Sopenharmony_ci * f.e. by something like SCTP. 46762306a36Sopenharmony_ci */ 46862306a36Sopenharmony_ci rcu_read_lock(); 46962306a36Sopenharmony_ci inet_opt = rcu_dereference(inet->inet_opt); 47062306a36Sopenharmony_ci fl4 = &fl->u.ip4; 47162306a36Sopenharmony_ci rt = skb_rtable(skb); 47262306a36Sopenharmony_ci if (rt) 47362306a36Sopenharmony_ci goto packet_routed; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci /* Make sure we can route this packet. */ 47662306a36Sopenharmony_ci rt = (struct rtable *)__sk_dst_check(sk, 0); 47762306a36Sopenharmony_ci if (!rt) { 47862306a36Sopenharmony_ci __be32 daddr; 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci /* Use correct destination address if we have options. */ 48162306a36Sopenharmony_ci daddr = inet->inet_daddr; 48262306a36Sopenharmony_ci if (inet_opt && inet_opt->opt.srr) 48362306a36Sopenharmony_ci daddr = inet_opt->opt.faddr; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci /* If this fails, retransmit mechanism of transport layer will 48662306a36Sopenharmony_ci * keep trying until route appears or the connection times 48762306a36Sopenharmony_ci * itself out. 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci rt = ip_route_output_ports(net, fl4, sk, 49062306a36Sopenharmony_ci daddr, inet->inet_saddr, 49162306a36Sopenharmony_ci inet->inet_dport, 49262306a36Sopenharmony_ci inet->inet_sport, 49362306a36Sopenharmony_ci sk->sk_protocol, 49462306a36Sopenharmony_ci RT_CONN_FLAGS_TOS(sk, tos), 49562306a36Sopenharmony_ci sk->sk_bound_dev_if); 49662306a36Sopenharmony_ci if (IS_ERR(rt)) 49762306a36Sopenharmony_ci goto no_route; 49862306a36Sopenharmony_ci sk_setup_caps(sk, &rt->dst); 49962306a36Sopenharmony_ci } 50062306a36Sopenharmony_ci skb_dst_set_noref(skb, &rt->dst); 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_cipacket_routed: 50362306a36Sopenharmony_ci if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) 50462306a36Sopenharmony_ci goto no_route; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci /* OK, we know where to send it, allocate and build IP header. */ 50762306a36Sopenharmony_ci skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); 50862306a36Sopenharmony_ci skb_reset_network_header(skb); 50962306a36Sopenharmony_ci iph = ip_hdr(skb); 51062306a36Sopenharmony_ci *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff)); 51162306a36Sopenharmony_ci if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) 51262306a36Sopenharmony_ci iph->frag_off = htons(IP_DF); 51362306a36Sopenharmony_ci else 51462306a36Sopenharmony_ci iph->frag_off = 0; 51562306a36Sopenharmony_ci iph->ttl = ip_select_ttl(inet, &rt->dst); 51662306a36Sopenharmony_ci iph->protocol = sk->sk_protocol; 51762306a36Sopenharmony_ci ip_copy_addrs(iph, fl4); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci /* Transport layer set skb->h.foo itself. */ 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci if (inet_opt && inet_opt->opt.optlen) { 52262306a36Sopenharmony_ci iph->ihl += inet_opt->opt.optlen >> 2; 52362306a36Sopenharmony_ci ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt); 52462306a36Sopenharmony_ci } 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci ip_select_ident_segs(net, skb, sk, 52762306a36Sopenharmony_ci skb_shinfo(skb)->gso_segs ?: 1); 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci /* TODO : should we use skb->sk here instead of sk ? */ 53062306a36Sopenharmony_ci skb->priority = READ_ONCE(sk->sk_priority); 53162306a36Sopenharmony_ci skb->mark = READ_ONCE(sk->sk_mark); 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci res = ip_local_out(net, sk, skb); 53462306a36Sopenharmony_ci rcu_read_unlock(); 53562306a36Sopenharmony_ci return res; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_cino_route: 53862306a36Sopenharmony_ci rcu_read_unlock(); 53962306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 54062306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES); 54162306a36Sopenharmony_ci return -EHOSTUNREACH; 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ciEXPORT_SYMBOL(__ip_queue_xmit); 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ciint ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos); 54862306a36Sopenharmony_ci} 54962306a36Sopenharmony_ciEXPORT_SYMBOL(ip_queue_xmit); 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_cistatic void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) 55262306a36Sopenharmony_ci{ 55362306a36Sopenharmony_ci to->pkt_type = from->pkt_type; 55462306a36Sopenharmony_ci to->priority = from->priority; 55562306a36Sopenharmony_ci to->protocol = from->protocol; 55662306a36Sopenharmony_ci to->skb_iif = from->skb_iif; 55762306a36Sopenharmony_ci skb_dst_drop(to); 55862306a36Sopenharmony_ci skb_dst_copy(to, from); 55962306a36Sopenharmony_ci to->dev = from->dev; 56062306a36Sopenharmony_ci to->mark = from->mark; 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci skb_copy_hash(to, from); 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci#ifdef CONFIG_NET_SCHED 56562306a36Sopenharmony_ci to->tc_index = from->tc_index; 56662306a36Sopenharmony_ci#endif 56762306a36Sopenharmony_ci nf_copy(to, from); 56862306a36Sopenharmony_ci skb_ext_copy(to, from); 56962306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IP_VS) 57062306a36Sopenharmony_ci to->ipvs_property = from->ipvs_property; 57162306a36Sopenharmony_ci#endif 57262306a36Sopenharmony_ci skb_copy_secmark(to, from); 57362306a36Sopenharmony_ci} 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_cistatic int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 57662306a36Sopenharmony_ci unsigned int mtu, 57762306a36Sopenharmony_ci int (*output)(struct net *, struct sock *, struct sk_buff *)) 57862306a36Sopenharmony_ci{ 57962306a36Sopenharmony_ci struct iphdr *iph = ip_hdr(skb); 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci if ((iph->frag_off & htons(IP_DF)) == 0) 58262306a36Sopenharmony_ci return ip_do_fragment(net, sk, skb, output); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci if (unlikely(!skb->ignore_df || 58562306a36Sopenharmony_ci (IPCB(skb)->frag_max_size && 58662306a36Sopenharmony_ci IPCB(skb)->frag_max_size > mtu))) { 58762306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 58862306a36Sopenharmony_ci icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 58962306a36Sopenharmony_ci htonl(mtu)); 59062306a36Sopenharmony_ci kfree_skb(skb); 59162306a36Sopenharmony_ci return -EMSGSIZE; 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci return ip_do_fragment(net, sk, skb, output); 59562306a36Sopenharmony_ci} 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_civoid ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, 59862306a36Sopenharmony_ci unsigned int hlen, struct ip_fraglist_iter *iter) 59962306a36Sopenharmony_ci{ 60062306a36Sopenharmony_ci unsigned int first_len = skb_pagelen(skb); 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci iter->frag = skb_shinfo(skb)->frag_list; 60362306a36Sopenharmony_ci skb_frag_list_init(skb); 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci iter->offset = 0; 60662306a36Sopenharmony_ci iter->iph = iph; 60762306a36Sopenharmony_ci iter->hlen = hlen; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci skb->data_len = first_len - skb_headlen(skb); 61062306a36Sopenharmony_ci skb->len = first_len; 61162306a36Sopenharmony_ci iph->tot_len = htons(first_len); 61262306a36Sopenharmony_ci iph->frag_off = htons(IP_MF); 61362306a36Sopenharmony_ci ip_send_check(iph); 61462306a36Sopenharmony_ci} 61562306a36Sopenharmony_ciEXPORT_SYMBOL(ip_fraglist_init); 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_civoid ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) 61862306a36Sopenharmony_ci{ 61962306a36Sopenharmony_ci unsigned int hlen = iter->hlen; 62062306a36Sopenharmony_ci struct iphdr *iph = iter->iph; 62162306a36Sopenharmony_ci struct sk_buff *frag; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci frag = iter->frag; 62462306a36Sopenharmony_ci frag->ip_summed = CHECKSUM_NONE; 62562306a36Sopenharmony_ci skb_reset_transport_header(frag); 62662306a36Sopenharmony_ci __skb_push(frag, hlen); 62762306a36Sopenharmony_ci skb_reset_network_header(frag); 62862306a36Sopenharmony_ci memcpy(skb_network_header(frag), iph, hlen); 62962306a36Sopenharmony_ci iter->iph = ip_hdr(frag); 63062306a36Sopenharmony_ci iph = iter->iph; 63162306a36Sopenharmony_ci iph->tot_len = htons(frag->len); 63262306a36Sopenharmony_ci ip_copy_metadata(frag, skb); 63362306a36Sopenharmony_ci iter->offset += skb->len - hlen; 63462306a36Sopenharmony_ci iph->frag_off = htons(iter->offset >> 3); 63562306a36Sopenharmony_ci if (frag->next) 63662306a36Sopenharmony_ci iph->frag_off |= htons(IP_MF); 63762306a36Sopenharmony_ci /* Ready, complete checksum */ 63862306a36Sopenharmony_ci ip_send_check(iph); 63962306a36Sopenharmony_ci} 64062306a36Sopenharmony_ciEXPORT_SYMBOL(ip_fraglist_prepare); 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_civoid ip_frag_init(struct sk_buff *skb, unsigned int hlen, 64362306a36Sopenharmony_ci unsigned int ll_rs, unsigned int mtu, bool DF, 64462306a36Sopenharmony_ci struct ip_frag_state *state) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci struct iphdr *iph = ip_hdr(skb); 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci state->DF = DF; 64962306a36Sopenharmony_ci state->hlen = hlen; 65062306a36Sopenharmony_ci state->ll_rs = ll_rs; 65162306a36Sopenharmony_ci state->mtu = mtu; 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci state->left = skb->len - hlen; /* Space per frame */ 65462306a36Sopenharmony_ci state->ptr = hlen; /* Where to start from */ 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; 65762306a36Sopenharmony_ci state->not_last_frag = iph->frag_off & htons(IP_MF); 65862306a36Sopenharmony_ci} 65962306a36Sopenharmony_ciEXPORT_SYMBOL(ip_frag_init); 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_cistatic void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, 66262306a36Sopenharmony_ci bool first_frag) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci /* Copy the flags to each fragment. */ 66562306a36Sopenharmony_ci IPCB(to)->flags = IPCB(from)->flags; 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci /* ANK: dirty, but effective trick. Upgrade options only if 66862306a36Sopenharmony_ci * the segment to be fragmented was THE FIRST (otherwise, 66962306a36Sopenharmony_ci * options are already fixed) and make it ONCE 67062306a36Sopenharmony_ci * on the initial skb, so that all the following fragments 67162306a36Sopenharmony_ci * will inherit fixed options. 67262306a36Sopenharmony_ci */ 67362306a36Sopenharmony_ci if (first_frag) 67462306a36Sopenharmony_ci ip_options_fragment(from); 67562306a36Sopenharmony_ci} 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_cistruct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state) 67862306a36Sopenharmony_ci{ 67962306a36Sopenharmony_ci unsigned int len = state->left; 68062306a36Sopenharmony_ci struct sk_buff *skb2; 68162306a36Sopenharmony_ci struct iphdr *iph; 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci /* IF: it doesn't fit, use 'mtu' - the data space left */ 68462306a36Sopenharmony_ci if (len > state->mtu) 68562306a36Sopenharmony_ci len = state->mtu; 68662306a36Sopenharmony_ci /* IF: we are not sending up to and including the packet end 68762306a36Sopenharmony_ci then align the next start on an eight byte boundary */ 68862306a36Sopenharmony_ci if (len < state->left) { 68962306a36Sopenharmony_ci len &= ~7; 69062306a36Sopenharmony_ci } 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci /* Allocate buffer */ 69362306a36Sopenharmony_ci skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC); 69462306a36Sopenharmony_ci if (!skb2) 69562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci /* 69862306a36Sopenharmony_ci * Set up data on packet 69962306a36Sopenharmony_ci */ 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci ip_copy_metadata(skb2, skb); 70262306a36Sopenharmony_ci skb_reserve(skb2, state->ll_rs); 70362306a36Sopenharmony_ci skb_put(skb2, len + state->hlen); 70462306a36Sopenharmony_ci skb_reset_network_header(skb2); 70562306a36Sopenharmony_ci skb2->transport_header = skb2->network_header + state->hlen; 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci /* 70862306a36Sopenharmony_ci * Charge the memory for the fragment to any owner 70962306a36Sopenharmony_ci * it might possess 71062306a36Sopenharmony_ci */ 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci if (skb->sk) 71362306a36Sopenharmony_ci skb_set_owner_w(skb2, skb->sk); 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci /* 71662306a36Sopenharmony_ci * Copy the packet header into the new buffer. 71762306a36Sopenharmony_ci */ 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen); 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci /* 72262306a36Sopenharmony_ci * Copy a block of the IP datagram. 72362306a36Sopenharmony_ci */ 72462306a36Sopenharmony_ci if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len)) 72562306a36Sopenharmony_ci BUG(); 72662306a36Sopenharmony_ci state->left -= len; 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci /* 72962306a36Sopenharmony_ci * Fill in the new header fields. 73062306a36Sopenharmony_ci */ 73162306a36Sopenharmony_ci iph = ip_hdr(skb2); 73262306a36Sopenharmony_ci iph->frag_off = htons((state->offset >> 3)); 73362306a36Sopenharmony_ci if (state->DF) 73462306a36Sopenharmony_ci iph->frag_off |= htons(IP_DF); 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci /* 73762306a36Sopenharmony_ci * Added AC : If we are fragmenting a fragment that's not the 73862306a36Sopenharmony_ci * last fragment then keep MF on each bit 73962306a36Sopenharmony_ci */ 74062306a36Sopenharmony_ci if (state->left > 0 || state->not_last_frag) 74162306a36Sopenharmony_ci iph->frag_off |= htons(IP_MF); 74262306a36Sopenharmony_ci state->ptr += len; 74362306a36Sopenharmony_ci state->offset += len; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci iph->tot_len = htons(len + state->hlen); 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci ip_send_check(iph); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci return skb2; 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ciEXPORT_SYMBOL(ip_frag_next); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci/* 75462306a36Sopenharmony_ci * This IP datagram is too large to be sent in one piece. Break it up into 75562306a36Sopenharmony_ci * smaller pieces (each of size equal to IP header plus 75662306a36Sopenharmony_ci * a block of the data of the original IP data part) that will yet fit in a 75762306a36Sopenharmony_ci * single device frame, and queue such a frame for sending. 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ciint ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 76162306a36Sopenharmony_ci int (*output)(struct net *, struct sock *, struct sk_buff *)) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci struct iphdr *iph; 76462306a36Sopenharmony_ci struct sk_buff *skb2; 76562306a36Sopenharmony_ci bool mono_delivery_time = skb->mono_delivery_time; 76662306a36Sopenharmony_ci struct rtable *rt = skb_rtable(skb); 76762306a36Sopenharmony_ci unsigned int mtu, hlen, ll_rs; 76862306a36Sopenharmony_ci struct ip_fraglist_iter iter; 76962306a36Sopenharmony_ci ktime_t tstamp = skb->tstamp; 77062306a36Sopenharmony_ci struct ip_frag_state state; 77162306a36Sopenharmony_ci int err = 0; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci /* for offloaded checksums cleanup checksum before fragmentation */ 77462306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_PARTIAL && 77562306a36Sopenharmony_ci (err = skb_checksum_help(skb))) 77662306a36Sopenharmony_ci goto fail; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci /* 77962306a36Sopenharmony_ci * Point into the IP datagram header. 78062306a36Sopenharmony_ci */ 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci iph = ip_hdr(skb); 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci mtu = ip_skb_dst_mtu(sk, skb); 78562306a36Sopenharmony_ci if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) 78662306a36Sopenharmony_ci mtu = IPCB(skb)->frag_max_size; 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci /* 78962306a36Sopenharmony_ci * Setup starting values. 79062306a36Sopenharmony_ci */ 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci hlen = iph->ihl * 4; 79362306a36Sopenharmony_ci mtu = mtu - hlen; /* Size of data space */ 79462306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; 79562306a36Sopenharmony_ci ll_rs = LL_RESERVED_SPACE(rt->dst.dev); 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci /* When frag_list is given, use it. First, check its validity: 79862306a36Sopenharmony_ci * some transformers could create wrong frag_list or break existing 79962306a36Sopenharmony_ci * one, it is not prohibited. In this case fall back to copying. 80062306a36Sopenharmony_ci * 80162306a36Sopenharmony_ci * LATER: this step can be merged to real generation of fragments, 80262306a36Sopenharmony_ci * we can switch to copy when see the first bad fragment. 80362306a36Sopenharmony_ci */ 80462306a36Sopenharmony_ci if (skb_has_frag_list(skb)) { 80562306a36Sopenharmony_ci struct sk_buff *frag, *frag2; 80662306a36Sopenharmony_ci unsigned int first_len = skb_pagelen(skb); 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci if (first_len - hlen > mtu || 80962306a36Sopenharmony_ci ((first_len - hlen) & 7) || 81062306a36Sopenharmony_ci ip_is_fragment(iph) || 81162306a36Sopenharmony_ci skb_cloned(skb) || 81262306a36Sopenharmony_ci skb_headroom(skb) < ll_rs) 81362306a36Sopenharmony_ci goto slow_path; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci skb_walk_frags(skb, frag) { 81662306a36Sopenharmony_ci /* Correct geometry. */ 81762306a36Sopenharmony_ci if (frag->len > mtu || 81862306a36Sopenharmony_ci ((frag->len & 7) && frag->next) || 81962306a36Sopenharmony_ci skb_headroom(frag) < hlen + ll_rs) 82062306a36Sopenharmony_ci goto slow_path_clean; 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci /* Partially cloned skb? */ 82362306a36Sopenharmony_ci if (skb_shared(frag)) 82462306a36Sopenharmony_ci goto slow_path_clean; 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci BUG_ON(frag->sk); 82762306a36Sopenharmony_ci if (skb->sk) { 82862306a36Sopenharmony_ci frag->sk = skb->sk; 82962306a36Sopenharmony_ci frag->destructor = sock_wfree; 83062306a36Sopenharmony_ci } 83162306a36Sopenharmony_ci skb->truesize -= frag->truesize; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci /* Everything is OK. Generate! */ 83562306a36Sopenharmony_ci ip_fraglist_init(skb, iph, hlen, &iter); 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci for (;;) { 83862306a36Sopenharmony_ci /* Prepare header of the next frame, 83962306a36Sopenharmony_ci * before previous one went down. */ 84062306a36Sopenharmony_ci if (iter.frag) { 84162306a36Sopenharmony_ci bool first_frag = (iter.offset == 0); 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci IPCB(iter.frag)->flags = IPCB(skb)->flags; 84462306a36Sopenharmony_ci ip_fraglist_prepare(skb, &iter); 84562306a36Sopenharmony_ci if (first_frag && IPCB(skb)->opt.optlen) { 84662306a36Sopenharmony_ci /* ipcb->opt is not populated for frags 84762306a36Sopenharmony_ci * coming from __ip_make_skb(), 84862306a36Sopenharmony_ci * ip_options_fragment() needs optlen 84962306a36Sopenharmony_ci */ 85062306a36Sopenharmony_ci IPCB(iter.frag)->opt.optlen = 85162306a36Sopenharmony_ci IPCB(skb)->opt.optlen; 85262306a36Sopenharmony_ci ip_options_fragment(iter.frag); 85362306a36Sopenharmony_ci ip_send_check(iter.iph); 85462306a36Sopenharmony_ci } 85562306a36Sopenharmony_ci } 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci skb_set_delivery_time(skb, tstamp, mono_delivery_time); 85862306a36Sopenharmony_ci err = output(net, sk, skb); 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (!err) 86162306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); 86262306a36Sopenharmony_ci if (err || !iter.frag) 86362306a36Sopenharmony_ci break; 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci skb = ip_fraglist_next(&iter); 86662306a36Sopenharmony_ci } 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci if (err == 0) { 86962306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); 87062306a36Sopenharmony_ci return 0; 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci kfree_skb_list(iter.frag); 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 87662306a36Sopenharmony_ci return err; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_cislow_path_clean: 87962306a36Sopenharmony_ci skb_walk_frags(skb, frag2) { 88062306a36Sopenharmony_ci if (frag2 == frag) 88162306a36Sopenharmony_ci break; 88262306a36Sopenharmony_ci frag2->sk = NULL; 88362306a36Sopenharmony_ci frag2->destructor = NULL; 88462306a36Sopenharmony_ci skb->truesize += frag2->truesize; 88562306a36Sopenharmony_ci } 88662306a36Sopenharmony_ci } 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_cislow_path: 88962306a36Sopenharmony_ci /* 89062306a36Sopenharmony_ci * Fragment the datagram. 89162306a36Sopenharmony_ci */ 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU, 89462306a36Sopenharmony_ci &state); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci /* 89762306a36Sopenharmony_ci * Keep copying data until we run out. 89862306a36Sopenharmony_ci */ 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci while (state.left > 0) { 90162306a36Sopenharmony_ci bool first_frag = (state.offset == 0); 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci skb2 = ip_frag_next(skb, &state); 90462306a36Sopenharmony_ci if (IS_ERR(skb2)) { 90562306a36Sopenharmony_ci err = PTR_ERR(skb2); 90662306a36Sopenharmony_ci goto fail; 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci ip_frag_ipcb(skb, skb2, first_frag); 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci /* 91162306a36Sopenharmony_ci * Put this fragment into the sending queue. 91262306a36Sopenharmony_ci */ 91362306a36Sopenharmony_ci skb_set_delivery_time(skb2, tstamp, mono_delivery_time); 91462306a36Sopenharmony_ci err = output(net, sk, skb2); 91562306a36Sopenharmony_ci if (err) 91662306a36Sopenharmony_ci goto fail; 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); 91962306a36Sopenharmony_ci } 92062306a36Sopenharmony_ci consume_skb(skb); 92162306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); 92262306a36Sopenharmony_ci return err; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_cifail: 92562306a36Sopenharmony_ci kfree_skb(skb); 92662306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 92762306a36Sopenharmony_ci return err; 92862306a36Sopenharmony_ci} 92962306a36Sopenharmony_ciEXPORT_SYMBOL(ip_do_fragment); 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ciint 93262306a36Sopenharmony_ciip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 93362306a36Sopenharmony_ci{ 93462306a36Sopenharmony_ci struct msghdr *msg = from; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_PARTIAL) { 93762306a36Sopenharmony_ci if (!copy_from_iter_full(to, len, &msg->msg_iter)) 93862306a36Sopenharmony_ci return -EFAULT; 93962306a36Sopenharmony_ci } else { 94062306a36Sopenharmony_ci __wsum csum = 0; 94162306a36Sopenharmony_ci if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter)) 94262306a36Sopenharmony_ci return -EFAULT; 94362306a36Sopenharmony_ci skb->csum = csum_block_add(skb->csum, csum, odd); 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci return 0; 94662306a36Sopenharmony_ci} 94762306a36Sopenharmony_ciEXPORT_SYMBOL(ip_generic_getfrag); 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_cistatic int __ip_append_data(struct sock *sk, 95062306a36Sopenharmony_ci struct flowi4 *fl4, 95162306a36Sopenharmony_ci struct sk_buff_head *queue, 95262306a36Sopenharmony_ci struct inet_cork *cork, 95362306a36Sopenharmony_ci struct page_frag *pfrag, 95462306a36Sopenharmony_ci int getfrag(void *from, char *to, int offset, 95562306a36Sopenharmony_ci int len, int odd, struct sk_buff *skb), 95662306a36Sopenharmony_ci void *from, int length, int transhdrlen, 95762306a36Sopenharmony_ci unsigned int flags) 95862306a36Sopenharmony_ci{ 95962306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 96062306a36Sopenharmony_ci struct ubuf_info *uarg = NULL; 96162306a36Sopenharmony_ci struct sk_buff *skb; 96262306a36Sopenharmony_ci struct ip_options *opt = cork->opt; 96362306a36Sopenharmony_ci int hh_len; 96462306a36Sopenharmony_ci int exthdrlen; 96562306a36Sopenharmony_ci int mtu; 96662306a36Sopenharmony_ci int copy; 96762306a36Sopenharmony_ci int err; 96862306a36Sopenharmony_ci int offset = 0; 96962306a36Sopenharmony_ci bool zc = false; 97062306a36Sopenharmony_ci unsigned int maxfraglen, fragheaderlen, maxnonfragsize; 97162306a36Sopenharmony_ci int csummode = CHECKSUM_NONE; 97262306a36Sopenharmony_ci struct rtable *rt = (struct rtable *)cork->dst; 97362306a36Sopenharmony_ci unsigned int wmem_alloc_delta = 0; 97462306a36Sopenharmony_ci bool paged, extra_uref = false; 97562306a36Sopenharmony_ci u32 tskey = 0; 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci skb = skb_peek_tail(queue); 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci exthdrlen = !skb ? rt->dst.header_len : 0; 98062306a36Sopenharmony_ci mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; 98162306a36Sopenharmony_ci paged = !!cork->gso_size; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci if (cork->tx_flags & SKBTX_ANY_TSTAMP && 98462306a36Sopenharmony_ci READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) 98562306a36Sopenharmony_ci tskey = atomic_inc_return(&sk->sk_tskey) - 1; 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci hh_len = LL_RESERVED_SPACE(rt->dst.dev); 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 99062306a36Sopenharmony_ci maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 99162306a36Sopenharmony_ci maxnonfragsize = ip_sk_ignore_df(sk) ? IP_MAX_MTU : mtu; 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci if (cork->length + length > maxnonfragsize - fragheaderlen) { 99462306a36Sopenharmony_ci ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, 99562306a36Sopenharmony_ci mtu - (opt ? opt->optlen : 0)); 99662306a36Sopenharmony_ci return -EMSGSIZE; 99762306a36Sopenharmony_ci } 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci /* 100062306a36Sopenharmony_ci * transhdrlen > 0 means that this is the first fragment and we wish 100162306a36Sopenharmony_ci * it won't be fragmented in the future. 100262306a36Sopenharmony_ci */ 100362306a36Sopenharmony_ci if (transhdrlen && 100462306a36Sopenharmony_ci length + fragheaderlen <= mtu && 100562306a36Sopenharmony_ci rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && 100662306a36Sopenharmony_ci (!(flags & MSG_MORE) || cork->gso_size) && 100762306a36Sopenharmony_ci (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) 100862306a36Sopenharmony_ci csummode = CHECKSUM_PARTIAL; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci if ((flags & MSG_ZEROCOPY) && length) { 101162306a36Sopenharmony_ci struct msghdr *msg = from; 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { 101462306a36Sopenharmony_ci if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) 101562306a36Sopenharmony_ci return -EINVAL; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci /* Leave uarg NULL if can't zerocopy, callers should 101862306a36Sopenharmony_ci * be able to handle it. 101962306a36Sopenharmony_ci */ 102062306a36Sopenharmony_ci if ((rt->dst.dev->features & NETIF_F_SG) && 102162306a36Sopenharmony_ci csummode == CHECKSUM_PARTIAL) { 102262306a36Sopenharmony_ci paged = true; 102362306a36Sopenharmony_ci zc = true; 102462306a36Sopenharmony_ci uarg = msg->msg_ubuf; 102562306a36Sopenharmony_ci } 102662306a36Sopenharmony_ci } else if (sock_flag(sk, SOCK_ZEROCOPY)) { 102762306a36Sopenharmony_ci uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 102862306a36Sopenharmony_ci if (!uarg) 102962306a36Sopenharmony_ci return -ENOBUFS; 103062306a36Sopenharmony_ci extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 103162306a36Sopenharmony_ci if (rt->dst.dev->features & NETIF_F_SG && 103262306a36Sopenharmony_ci csummode == CHECKSUM_PARTIAL) { 103362306a36Sopenharmony_ci paged = true; 103462306a36Sopenharmony_ci zc = true; 103562306a36Sopenharmony_ci } else { 103662306a36Sopenharmony_ci uarg_to_msgzc(uarg)->zerocopy = 0; 103762306a36Sopenharmony_ci skb_zcopy_set(skb, uarg, &extra_uref); 103862306a36Sopenharmony_ci } 103962306a36Sopenharmony_ci } 104062306a36Sopenharmony_ci } else if ((flags & MSG_SPLICE_PAGES) && length) { 104162306a36Sopenharmony_ci if (inet_test_bit(HDRINCL, sk)) 104262306a36Sopenharmony_ci return -EPERM; 104362306a36Sopenharmony_ci if (rt->dst.dev->features & NETIF_F_SG && 104462306a36Sopenharmony_ci getfrag == ip_generic_getfrag) 104562306a36Sopenharmony_ci /* We need an empty buffer to attach stuff to */ 104662306a36Sopenharmony_ci paged = true; 104762306a36Sopenharmony_ci else 104862306a36Sopenharmony_ci flags &= ~MSG_SPLICE_PAGES; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci cork->length += length; 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci /* So, what's going on in the loop below? 105462306a36Sopenharmony_ci * 105562306a36Sopenharmony_ci * We use calculated fragment length to generate chained skb, 105662306a36Sopenharmony_ci * each of segments is IP fragment ready for sending to network after 105762306a36Sopenharmony_ci * adding appropriate IP header. 105862306a36Sopenharmony_ci */ 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci if (!skb) 106162306a36Sopenharmony_ci goto alloc_new_skb; 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci while (length > 0) { 106462306a36Sopenharmony_ci /* Check if the remaining data fits into current packet. */ 106562306a36Sopenharmony_ci copy = mtu - skb->len; 106662306a36Sopenharmony_ci if (copy < length) 106762306a36Sopenharmony_ci copy = maxfraglen - skb->len; 106862306a36Sopenharmony_ci if (copy <= 0) { 106962306a36Sopenharmony_ci char *data; 107062306a36Sopenharmony_ci unsigned int datalen; 107162306a36Sopenharmony_ci unsigned int fraglen; 107262306a36Sopenharmony_ci unsigned int fraggap; 107362306a36Sopenharmony_ci unsigned int alloclen, alloc_extra; 107462306a36Sopenharmony_ci unsigned int pagedlen; 107562306a36Sopenharmony_ci struct sk_buff *skb_prev; 107662306a36Sopenharmony_cialloc_new_skb: 107762306a36Sopenharmony_ci skb_prev = skb; 107862306a36Sopenharmony_ci if (skb_prev) 107962306a36Sopenharmony_ci fraggap = skb_prev->len - maxfraglen; 108062306a36Sopenharmony_ci else 108162306a36Sopenharmony_ci fraggap = 0; 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci /* 108462306a36Sopenharmony_ci * If remaining data exceeds the mtu, 108562306a36Sopenharmony_ci * we know we need more fragment(s). 108662306a36Sopenharmony_ci */ 108762306a36Sopenharmony_ci datalen = length + fraggap; 108862306a36Sopenharmony_ci if (datalen > mtu - fragheaderlen) 108962306a36Sopenharmony_ci datalen = maxfraglen - fragheaderlen; 109062306a36Sopenharmony_ci fraglen = datalen + fragheaderlen; 109162306a36Sopenharmony_ci pagedlen = 0; 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci alloc_extra = hh_len + 15; 109462306a36Sopenharmony_ci alloc_extra += exthdrlen; 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci /* The last fragment gets additional space at tail. 109762306a36Sopenharmony_ci * Note, with MSG_MORE we overallocate on fragments, 109862306a36Sopenharmony_ci * because we have no idea what fragment will be 109962306a36Sopenharmony_ci * the last. 110062306a36Sopenharmony_ci */ 110162306a36Sopenharmony_ci if (datalen == length + fraggap) 110262306a36Sopenharmony_ci alloc_extra += rt->dst.trailer_len; 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci if ((flags & MSG_MORE) && 110562306a36Sopenharmony_ci !(rt->dst.dev->features&NETIF_F_SG)) 110662306a36Sopenharmony_ci alloclen = mtu; 110762306a36Sopenharmony_ci else if (!paged && 110862306a36Sopenharmony_ci (fraglen + alloc_extra < SKB_MAX_ALLOC || 110962306a36Sopenharmony_ci !(rt->dst.dev->features & NETIF_F_SG))) 111062306a36Sopenharmony_ci alloclen = fraglen; 111162306a36Sopenharmony_ci else { 111262306a36Sopenharmony_ci alloclen = fragheaderlen + transhdrlen; 111362306a36Sopenharmony_ci pagedlen = datalen - transhdrlen; 111462306a36Sopenharmony_ci } 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci alloclen += alloc_extra; 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci if (transhdrlen) { 111962306a36Sopenharmony_ci skb = sock_alloc_send_skb(sk, alloclen, 112062306a36Sopenharmony_ci (flags & MSG_DONTWAIT), &err); 112162306a36Sopenharmony_ci } else { 112262306a36Sopenharmony_ci skb = NULL; 112362306a36Sopenharmony_ci if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 112462306a36Sopenharmony_ci 2 * sk->sk_sndbuf) 112562306a36Sopenharmony_ci skb = alloc_skb(alloclen, 112662306a36Sopenharmony_ci sk->sk_allocation); 112762306a36Sopenharmony_ci if (unlikely(!skb)) 112862306a36Sopenharmony_ci err = -ENOBUFS; 112962306a36Sopenharmony_ci } 113062306a36Sopenharmony_ci if (!skb) 113162306a36Sopenharmony_ci goto error; 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci /* 113462306a36Sopenharmony_ci * Fill in the control structures 113562306a36Sopenharmony_ci */ 113662306a36Sopenharmony_ci skb->ip_summed = csummode; 113762306a36Sopenharmony_ci skb->csum = 0; 113862306a36Sopenharmony_ci skb_reserve(skb, hh_len); 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_ci /* 114162306a36Sopenharmony_ci * Find where to start putting bytes. 114262306a36Sopenharmony_ci */ 114362306a36Sopenharmony_ci data = skb_put(skb, fraglen + exthdrlen - pagedlen); 114462306a36Sopenharmony_ci skb_set_network_header(skb, exthdrlen); 114562306a36Sopenharmony_ci skb->transport_header = (skb->network_header + 114662306a36Sopenharmony_ci fragheaderlen); 114762306a36Sopenharmony_ci data += fragheaderlen + exthdrlen; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci if (fraggap) { 115062306a36Sopenharmony_ci skb->csum = skb_copy_and_csum_bits( 115162306a36Sopenharmony_ci skb_prev, maxfraglen, 115262306a36Sopenharmony_ci data + transhdrlen, fraggap); 115362306a36Sopenharmony_ci skb_prev->csum = csum_sub(skb_prev->csum, 115462306a36Sopenharmony_ci skb->csum); 115562306a36Sopenharmony_ci data += fraggap; 115662306a36Sopenharmony_ci pskb_trim_unique(skb_prev, maxfraglen); 115762306a36Sopenharmony_ci } 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci copy = datalen - transhdrlen - fraggap - pagedlen; 116062306a36Sopenharmony_ci /* [!] NOTE: copy will be negative if pagedlen>0 116162306a36Sopenharmony_ci * because then the equation reduces to -fraggap. 116262306a36Sopenharmony_ci */ 116362306a36Sopenharmony_ci if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 116462306a36Sopenharmony_ci err = -EFAULT; 116562306a36Sopenharmony_ci kfree_skb(skb); 116662306a36Sopenharmony_ci goto error; 116762306a36Sopenharmony_ci } else if (flags & MSG_SPLICE_PAGES) { 116862306a36Sopenharmony_ci copy = 0; 116962306a36Sopenharmony_ci } 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci offset += copy; 117262306a36Sopenharmony_ci length -= copy + transhdrlen; 117362306a36Sopenharmony_ci transhdrlen = 0; 117462306a36Sopenharmony_ci exthdrlen = 0; 117562306a36Sopenharmony_ci csummode = CHECKSUM_NONE; 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci /* only the initial fragment is time stamped */ 117862306a36Sopenharmony_ci skb_shinfo(skb)->tx_flags = cork->tx_flags; 117962306a36Sopenharmony_ci cork->tx_flags = 0; 118062306a36Sopenharmony_ci skb_shinfo(skb)->tskey = tskey; 118162306a36Sopenharmony_ci tskey = 0; 118262306a36Sopenharmony_ci skb_zcopy_set(skb, uarg, &extra_uref); 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci if ((flags & MSG_CONFIRM) && !skb_prev) 118562306a36Sopenharmony_ci skb_set_dst_pending_confirm(skb, 1); 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci /* 118862306a36Sopenharmony_ci * Put the packet on the pending queue. 118962306a36Sopenharmony_ci */ 119062306a36Sopenharmony_ci if (!skb->destructor) { 119162306a36Sopenharmony_ci skb->destructor = sock_wfree; 119262306a36Sopenharmony_ci skb->sk = sk; 119362306a36Sopenharmony_ci wmem_alloc_delta += skb->truesize; 119462306a36Sopenharmony_ci } 119562306a36Sopenharmony_ci __skb_queue_tail(queue, skb); 119662306a36Sopenharmony_ci continue; 119762306a36Sopenharmony_ci } 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci if (copy > length) 120062306a36Sopenharmony_ci copy = length; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci if (!(rt->dst.dev->features&NETIF_F_SG) && 120362306a36Sopenharmony_ci skb_tailroom(skb) >= copy) { 120462306a36Sopenharmony_ci unsigned int off; 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci off = skb->len; 120762306a36Sopenharmony_ci if (getfrag(from, skb_put(skb, copy), 120862306a36Sopenharmony_ci offset, copy, off, skb) < 0) { 120962306a36Sopenharmony_ci __skb_trim(skb, off); 121062306a36Sopenharmony_ci err = -EFAULT; 121162306a36Sopenharmony_ci goto error; 121262306a36Sopenharmony_ci } 121362306a36Sopenharmony_ci } else if (flags & MSG_SPLICE_PAGES) { 121462306a36Sopenharmony_ci struct msghdr *msg = from; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci err = -EIO; 121762306a36Sopenharmony_ci if (WARN_ON_ONCE(copy > msg->msg_iter.count)) 121862306a36Sopenharmony_ci goto error; 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ci err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 122162306a36Sopenharmony_ci sk->sk_allocation); 122262306a36Sopenharmony_ci if (err < 0) 122362306a36Sopenharmony_ci goto error; 122462306a36Sopenharmony_ci copy = err; 122562306a36Sopenharmony_ci wmem_alloc_delta += copy; 122662306a36Sopenharmony_ci } else if (!zc) { 122762306a36Sopenharmony_ci int i = skb_shinfo(skb)->nr_frags; 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci err = -ENOMEM; 123062306a36Sopenharmony_ci if (!sk_page_frag_refill(sk, pfrag)) 123162306a36Sopenharmony_ci goto error; 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci skb_zcopy_downgrade_managed(skb); 123462306a36Sopenharmony_ci if (!skb_can_coalesce(skb, i, pfrag->page, 123562306a36Sopenharmony_ci pfrag->offset)) { 123662306a36Sopenharmony_ci err = -EMSGSIZE; 123762306a36Sopenharmony_ci if (i == MAX_SKB_FRAGS) 123862306a36Sopenharmony_ci goto error; 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci __skb_fill_page_desc(skb, i, pfrag->page, 124162306a36Sopenharmony_ci pfrag->offset, 0); 124262306a36Sopenharmony_ci skb_shinfo(skb)->nr_frags = ++i; 124362306a36Sopenharmony_ci get_page(pfrag->page); 124462306a36Sopenharmony_ci } 124562306a36Sopenharmony_ci copy = min_t(int, copy, pfrag->size - pfrag->offset); 124662306a36Sopenharmony_ci if (getfrag(from, 124762306a36Sopenharmony_ci page_address(pfrag->page) + pfrag->offset, 124862306a36Sopenharmony_ci offset, copy, skb->len, skb) < 0) 124962306a36Sopenharmony_ci goto error_efault; 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci pfrag->offset += copy; 125262306a36Sopenharmony_ci skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 125362306a36Sopenharmony_ci skb_len_add(skb, copy); 125462306a36Sopenharmony_ci wmem_alloc_delta += copy; 125562306a36Sopenharmony_ci } else { 125662306a36Sopenharmony_ci err = skb_zerocopy_iter_dgram(skb, from, copy); 125762306a36Sopenharmony_ci if (err < 0) 125862306a36Sopenharmony_ci goto error; 125962306a36Sopenharmony_ci } 126062306a36Sopenharmony_ci offset += copy; 126162306a36Sopenharmony_ci length -= copy; 126262306a36Sopenharmony_ci } 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci if (wmem_alloc_delta) 126562306a36Sopenharmony_ci refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 126662306a36Sopenharmony_ci return 0; 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_cierror_efault: 126962306a36Sopenharmony_ci err = -EFAULT; 127062306a36Sopenharmony_cierror: 127162306a36Sopenharmony_ci net_zcopy_put_abort(uarg, extra_uref); 127262306a36Sopenharmony_ci cork->length -= length; 127362306a36Sopenharmony_ci IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 127462306a36Sopenharmony_ci refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 127562306a36Sopenharmony_ci return err; 127662306a36Sopenharmony_ci} 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_cistatic int ip_setup_cork(struct sock *sk, struct inet_cork *cork, 127962306a36Sopenharmony_ci struct ipcm_cookie *ipc, struct rtable **rtp) 128062306a36Sopenharmony_ci{ 128162306a36Sopenharmony_ci struct ip_options_rcu *opt; 128262306a36Sopenharmony_ci struct rtable *rt; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci rt = *rtp; 128562306a36Sopenharmony_ci if (unlikely(!rt)) 128662306a36Sopenharmony_ci return -EFAULT; 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci cork->fragsize = ip_sk_use_pmtu(sk) ? 128962306a36Sopenharmony_ci dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci if (!inetdev_valid_mtu(cork->fragsize)) 129262306a36Sopenharmony_ci return -ENETUNREACH; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci /* 129562306a36Sopenharmony_ci * setup for corking. 129662306a36Sopenharmony_ci */ 129762306a36Sopenharmony_ci opt = ipc->opt; 129862306a36Sopenharmony_ci if (opt) { 129962306a36Sopenharmony_ci if (!cork->opt) { 130062306a36Sopenharmony_ci cork->opt = kmalloc(sizeof(struct ip_options) + 40, 130162306a36Sopenharmony_ci sk->sk_allocation); 130262306a36Sopenharmony_ci if (unlikely(!cork->opt)) 130362306a36Sopenharmony_ci return -ENOBUFS; 130462306a36Sopenharmony_ci } 130562306a36Sopenharmony_ci memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); 130662306a36Sopenharmony_ci cork->flags |= IPCORK_OPT; 130762306a36Sopenharmony_ci cork->addr = ipc->addr; 130862306a36Sopenharmony_ci } 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ci cork->gso_size = ipc->gso_size; 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci cork->dst = &rt->dst; 131362306a36Sopenharmony_ci /* We stole this route, caller should not release it. */ 131462306a36Sopenharmony_ci *rtp = NULL; 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci cork->length = 0; 131762306a36Sopenharmony_ci cork->ttl = ipc->ttl; 131862306a36Sopenharmony_ci cork->tos = ipc->tos; 131962306a36Sopenharmony_ci cork->mark = ipc->sockc.mark; 132062306a36Sopenharmony_ci cork->priority = ipc->priority; 132162306a36Sopenharmony_ci cork->transmit_time = ipc->sockc.transmit_time; 132262306a36Sopenharmony_ci cork->tx_flags = 0; 132362306a36Sopenharmony_ci sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci return 0; 132662306a36Sopenharmony_ci} 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci/* 132962306a36Sopenharmony_ci * ip_append_data() can make one large IP datagram from many pieces of 133062306a36Sopenharmony_ci * data. Each piece will be held on the socket until 133162306a36Sopenharmony_ci * ip_push_pending_frames() is called. Each piece can be a page or 133262306a36Sopenharmony_ci * non-page data. 133362306a36Sopenharmony_ci * 133462306a36Sopenharmony_ci * Not only UDP, other transport protocols - e.g. raw sockets - can use 133562306a36Sopenharmony_ci * this interface potentially. 133662306a36Sopenharmony_ci * 133762306a36Sopenharmony_ci * LATER: length must be adjusted by pad at tail, when it is required. 133862306a36Sopenharmony_ci */ 133962306a36Sopenharmony_ciint ip_append_data(struct sock *sk, struct flowi4 *fl4, 134062306a36Sopenharmony_ci int getfrag(void *from, char *to, int offset, int len, 134162306a36Sopenharmony_ci int odd, struct sk_buff *skb), 134262306a36Sopenharmony_ci void *from, int length, int transhdrlen, 134362306a36Sopenharmony_ci struct ipcm_cookie *ipc, struct rtable **rtp, 134462306a36Sopenharmony_ci unsigned int flags) 134562306a36Sopenharmony_ci{ 134662306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 134762306a36Sopenharmony_ci int err; 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci if (flags&MSG_PROBE) 135062306a36Sopenharmony_ci return 0; 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci if (skb_queue_empty(&sk->sk_write_queue)) { 135362306a36Sopenharmony_ci err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp); 135462306a36Sopenharmony_ci if (err) 135562306a36Sopenharmony_ci return err; 135662306a36Sopenharmony_ci } else { 135762306a36Sopenharmony_ci transhdrlen = 0; 135862306a36Sopenharmony_ci } 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, 136162306a36Sopenharmony_ci sk_page_frag(sk), getfrag, 136262306a36Sopenharmony_ci from, length, transhdrlen, flags); 136362306a36Sopenharmony_ci} 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_cistatic void ip_cork_release(struct inet_cork *cork) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci cork->flags &= ~IPCORK_OPT; 136862306a36Sopenharmony_ci kfree(cork->opt); 136962306a36Sopenharmony_ci cork->opt = NULL; 137062306a36Sopenharmony_ci dst_release(cork->dst); 137162306a36Sopenharmony_ci cork->dst = NULL; 137262306a36Sopenharmony_ci} 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci/* 137562306a36Sopenharmony_ci * Combined all pending IP fragments on the socket as one IP datagram 137662306a36Sopenharmony_ci * and push them out. 137762306a36Sopenharmony_ci */ 137862306a36Sopenharmony_cistruct sk_buff *__ip_make_skb(struct sock *sk, 137962306a36Sopenharmony_ci struct flowi4 *fl4, 138062306a36Sopenharmony_ci struct sk_buff_head *queue, 138162306a36Sopenharmony_ci struct inet_cork *cork) 138262306a36Sopenharmony_ci{ 138362306a36Sopenharmony_ci struct sk_buff *skb, *tmp_skb; 138462306a36Sopenharmony_ci struct sk_buff **tail_skb; 138562306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 138662306a36Sopenharmony_ci struct net *net = sock_net(sk); 138762306a36Sopenharmony_ci struct ip_options *opt = NULL; 138862306a36Sopenharmony_ci struct rtable *rt = (struct rtable *)cork->dst; 138962306a36Sopenharmony_ci struct iphdr *iph; 139062306a36Sopenharmony_ci __be16 df = 0; 139162306a36Sopenharmony_ci __u8 ttl; 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci skb = __skb_dequeue(queue); 139462306a36Sopenharmony_ci if (!skb) 139562306a36Sopenharmony_ci goto out; 139662306a36Sopenharmony_ci tail_skb = &(skb_shinfo(skb)->frag_list); 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci /* move skb->data to ip header from ext header */ 139962306a36Sopenharmony_ci if (skb->data < skb_network_header(skb)) 140062306a36Sopenharmony_ci __skb_pull(skb, skb_network_offset(skb)); 140162306a36Sopenharmony_ci while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 140262306a36Sopenharmony_ci __skb_pull(tmp_skb, skb_network_header_len(skb)); 140362306a36Sopenharmony_ci *tail_skb = tmp_skb; 140462306a36Sopenharmony_ci tail_skb = &(tmp_skb->next); 140562306a36Sopenharmony_ci skb->len += tmp_skb->len; 140662306a36Sopenharmony_ci skb->data_len += tmp_skb->len; 140762306a36Sopenharmony_ci skb->truesize += tmp_skb->truesize; 140862306a36Sopenharmony_ci tmp_skb->destructor = NULL; 140962306a36Sopenharmony_ci tmp_skb->sk = NULL; 141062306a36Sopenharmony_ci } 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow 141362306a36Sopenharmony_ci * to fragment the frame generated here. No matter, what transforms 141462306a36Sopenharmony_ci * how transforms change size of the packet, it will come out. 141562306a36Sopenharmony_ci */ 141662306a36Sopenharmony_ci skb->ignore_df = ip_sk_ignore_df(sk); 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci /* DF bit is set when we want to see DF on outgoing frames. 141962306a36Sopenharmony_ci * If ignore_df is set too, we still allow to fragment this frame 142062306a36Sopenharmony_ci * locally. */ 142162306a36Sopenharmony_ci if (inet->pmtudisc == IP_PMTUDISC_DO || 142262306a36Sopenharmony_ci inet->pmtudisc == IP_PMTUDISC_PROBE || 142362306a36Sopenharmony_ci (skb->len <= dst_mtu(&rt->dst) && 142462306a36Sopenharmony_ci ip_dont_fragment(sk, &rt->dst))) 142562306a36Sopenharmony_ci df = htons(IP_DF); 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci if (cork->flags & IPCORK_OPT) 142862306a36Sopenharmony_ci opt = cork->opt; 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci if (cork->ttl != 0) 143162306a36Sopenharmony_ci ttl = cork->ttl; 143262306a36Sopenharmony_ci else if (rt->rt_type == RTN_MULTICAST) 143362306a36Sopenharmony_ci ttl = inet->mc_ttl; 143462306a36Sopenharmony_ci else 143562306a36Sopenharmony_ci ttl = ip_select_ttl(inet, &rt->dst); 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci iph = ip_hdr(skb); 143862306a36Sopenharmony_ci iph->version = 4; 143962306a36Sopenharmony_ci iph->ihl = 5; 144062306a36Sopenharmony_ci iph->tos = (cork->tos != -1) ? cork->tos : inet->tos; 144162306a36Sopenharmony_ci iph->frag_off = df; 144262306a36Sopenharmony_ci iph->ttl = ttl; 144362306a36Sopenharmony_ci iph->protocol = sk->sk_protocol; 144462306a36Sopenharmony_ci ip_copy_addrs(iph, fl4); 144562306a36Sopenharmony_ci ip_select_ident(net, skb, sk); 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci if (opt) { 144862306a36Sopenharmony_ci iph->ihl += opt->optlen >> 2; 144962306a36Sopenharmony_ci ip_options_build(skb, opt, cork->addr, rt); 145062306a36Sopenharmony_ci } 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; 145362306a36Sopenharmony_ci skb->mark = cork->mark; 145462306a36Sopenharmony_ci skb->tstamp = cork->transmit_time; 145562306a36Sopenharmony_ci /* 145662306a36Sopenharmony_ci * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec 145762306a36Sopenharmony_ci * on dst refcount 145862306a36Sopenharmony_ci */ 145962306a36Sopenharmony_ci cork->dst = NULL; 146062306a36Sopenharmony_ci skb_dst_set(skb, &rt->dst); 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci if (iph->protocol == IPPROTO_ICMP) { 146362306a36Sopenharmony_ci u8 icmp_type; 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci /* For such sockets, transhdrlen is zero when do ip_append_data(), 146662306a36Sopenharmony_ci * so icmphdr does not in skb linear region and can not get icmp_type 146762306a36Sopenharmony_ci * by icmp_hdr(skb)->type. 146862306a36Sopenharmony_ci */ 146962306a36Sopenharmony_ci if (sk->sk_type == SOCK_RAW && 147062306a36Sopenharmony_ci !inet_test_bit(HDRINCL, sk)) 147162306a36Sopenharmony_ci icmp_type = fl4->fl4_icmp_type; 147262306a36Sopenharmony_ci else 147362306a36Sopenharmony_ci icmp_type = icmp_hdr(skb)->type; 147462306a36Sopenharmony_ci icmp_out_count(net, icmp_type); 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci ip_cork_release(cork); 147862306a36Sopenharmony_ciout: 147962306a36Sopenharmony_ci return skb; 148062306a36Sopenharmony_ci} 148162306a36Sopenharmony_ci 148262306a36Sopenharmony_ciint ip_send_skb(struct net *net, struct sk_buff *skb) 148362306a36Sopenharmony_ci{ 148462306a36Sopenharmony_ci int err; 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci err = ip_local_out(net, skb->sk, skb); 148762306a36Sopenharmony_ci if (err) { 148862306a36Sopenharmony_ci if (err > 0) 148962306a36Sopenharmony_ci err = net_xmit_errno(err); 149062306a36Sopenharmony_ci if (err) 149162306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); 149262306a36Sopenharmony_ci } 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci return err; 149562306a36Sopenharmony_ci} 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_ciint ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) 149862306a36Sopenharmony_ci{ 149962306a36Sopenharmony_ci struct sk_buff *skb; 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci skb = ip_finish_skb(sk, fl4); 150262306a36Sopenharmony_ci if (!skb) 150362306a36Sopenharmony_ci return 0; 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci /* Netfilter gets whole the not fragmented skb. */ 150662306a36Sopenharmony_ci return ip_send_skb(sock_net(sk), skb); 150762306a36Sopenharmony_ci} 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci/* 151062306a36Sopenharmony_ci * Throw away all pending data on the socket. 151162306a36Sopenharmony_ci */ 151262306a36Sopenharmony_cistatic void __ip_flush_pending_frames(struct sock *sk, 151362306a36Sopenharmony_ci struct sk_buff_head *queue, 151462306a36Sopenharmony_ci struct inet_cork *cork) 151562306a36Sopenharmony_ci{ 151662306a36Sopenharmony_ci struct sk_buff *skb; 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci while ((skb = __skb_dequeue_tail(queue)) != NULL) 151962306a36Sopenharmony_ci kfree_skb(skb); 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci ip_cork_release(cork); 152262306a36Sopenharmony_ci} 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_civoid ip_flush_pending_frames(struct sock *sk) 152562306a36Sopenharmony_ci{ 152662306a36Sopenharmony_ci __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); 152762306a36Sopenharmony_ci} 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_cistruct sk_buff *ip_make_skb(struct sock *sk, 153062306a36Sopenharmony_ci struct flowi4 *fl4, 153162306a36Sopenharmony_ci int getfrag(void *from, char *to, int offset, 153262306a36Sopenharmony_ci int len, int odd, struct sk_buff *skb), 153362306a36Sopenharmony_ci void *from, int length, int transhdrlen, 153462306a36Sopenharmony_ci struct ipcm_cookie *ipc, struct rtable **rtp, 153562306a36Sopenharmony_ci struct inet_cork *cork, unsigned int flags) 153662306a36Sopenharmony_ci{ 153762306a36Sopenharmony_ci struct sk_buff_head queue; 153862306a36Sopenharmony_ci int err; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci if (flags & MSG_PROBE) 154162306a36Sopenharmony_ci return NULL; 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci __skb_queue_head_init(&queue); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci cork->flags = 0; 154662306a36Sopenharmony_ci cork->addr = 0; 154762306a36Sopenharmony_ci cork->opt = NULL; 154862306a36Sopenharmony_ci err = ip_setup_cork(sk, cork, ipc, rtp); 154962306a36Sopenharmony_ci if (err) 155062306a36Sopenharmony_ci return ERR_PTR(err); 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci err = __ip_append_data(sk, fl4, &queue, cork, 155362306a36Sopenharmony_ci ¤t->task_frag, getfrag, 155462306a36Sopenharmony_ci from, length, transhdrlen, flags); 155562306a36Sopenharmony_ci if (err) { 155662306a36Sopenharmony_ci __ip_flush_pending_frames(sk, &queue, cork); 155762306a36Sopenharmony_ci return ERR_PTR(err); 155862306a36Sopenharmony_ci } 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci return __ip_make_skb(sk, fl4, &queue, cork); 156162306a36Sopenharmony_ci} 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci/* 156462306a36Sopenharmony_ci * Fetch data from kernel space and fill in checksum if needed. 156562306a36Sopenharmony_ci */ 156662306a36Sopenharmony_cistatic int ip_reply_glue_bits(void *dptr, char *to, int offset, 156762306a36Sopenharmony_ci int len, int odd, struct sk_buff *skb) 156862306a36Sopenharmony_ci{ 156962306a36Sopenharmony_ci __wsum csum; 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci csum = csum_partial_copy_nocheck(dptr+offset, to, len); 157262306a36Sopenharmony_ci skb->csum = csum_block_add(skb->csum, csum, odd); 157362306a36Sopenharmony_ci return 0; 157462306a36Sopenharmony_ci} 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci/* 157762306a36Sopenharmony_ci * Generic function to send a packet as reply to another packet. 157862306a36Sopenharmony_ci * Used to send some TCP resets/acks so far. 157962306a36Sopenharmony_ci */ 158062306a36Sopenharmony_civoid ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, 158162306a36Sopenharmony_ci const struct ip_options *sopt, 158262306a36Sopenharmony_ci __be32 daddr, __be32 saddr, 158362306a36Sopenharmony_ci const struct ip_reply_arg *arg, 158462306a36Sopenharmony_ci unsigned int len, u64 transmit_time, u32 txhash) 158562306a36Sopenharmony_ci{ 158662306a36Sopenharmony_ci struct ip_options_data replyopts; 158762306a36Sopenharmony_ci struct ipcm_cookie ipc; 158862306a36Sopenharmony_ci struct flowi4 fl4; 158962306a36Sopenharmony_ci struct rtable *rt = skb_rtable(skb); 159062306a36Sopenharmony_ci struct net *net = sock_net(sk); 159162306a36Sopenharmony_ci struct sk_buff *nskb; 159262306a36Sopenharmony_ci int err; 159362306a36Sopenharmony_ci int oif; 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt)) 159662306a36Sopenharmony_ci return; 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci ipcm_init(&ipc); 159962306a36Sopenharmony_ci ipc.addr = daddr; 160062306a36Sopenharmony_ci ipc.sockc.transmit_time = transmit_time; 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci if (replyopts.opt.opt.optlen) { 160362306a36Sopenharmony_ci ipc.opt = &replyopts.opt; 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci if (replyopts.opt.opt.srr) 160662306a36Sopenharmony_ci daddr = replyopts.opt.opt.faddr; 160762306a36Sopenharmony_ci } 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci oif = arg->bound_dev_if; 161062306a36Sopenharmony_ci if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 161162306a36Sopenharmony_ci oif = skb->skb_iif; 161262306a36Sopenharmony_ci 161362306a36Sopenharmony_ci flowi4_init_output(&fl4, oif, 161462306a36Sopenharmony_ci IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark, 161562306a36Sopenharmony_ci RT_TOS(arg->tos), 161662306a36Sopenharmony_ci RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, 161762306a36Sopenharmony_ci ip_reply_arg_flowi_flags(arg), 161862306a36Sopenharmony_ci daddr, saddr, 161962306a36Sopenharmony_ci tcp_hdr(skb)->source, tcp_hdr(skb)->dest, 162062306a36Sopenharmony_ci arg->uid); 162162306a36Sopenharmony_ci security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); 162262306a36Sopenharmony_ci rt = ip_route_output_flow(net, &fl4, sk); 162362306a36Sopenharmony_ci if (IS_ERR(rt)) 162462306a36Sopenharmony_ci return; 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci sk->sk_protocol = ip_hdr(skb)->protocol; 162962306a36Sopenharmony_ci sk->sk_bound_dev_if = arg->bound_dev_if; 163062306a36Sopenharmony_ci sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); 163162306a36Sopenharmony_ci ipc.sockc.mark = fl4.flowi4_mark; 163262306a36Sopenharmony_ci err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, 163362306a36Sopenharmony_ci len, 0, &ipc, &rt, MSG_DONTWAIT); 163462306a36Sopenharmony_ci if (unlikely(err)) { 163562306a36Sopenharmony_ci ip_flush_pending_frames(sk); 163662306a36Sopenharmony_ci goto out; 163762306a36Sopenharmony_ci } 163862306a36Sopenharmony_ci 163962306a36Sopenharmony_ci nskb = skb_peek(&sk->sk_write_queue); 164062306a36Sopenharmony_ci if (nskb) { 164162306a36Sopenharmony_ci if (arg->csumoffset >= 0) 164262306a36Sopenharmony_ci *((__sum16 *)skb_transport_header(nskb) + 164362306a36Sopenharmony_ci arg->csumoffset) = csum_fold(csum_add(nskb->csum, 164462306a36Sopenharmony_ci arg->csum)); 164562306a36Sopenharmony_ci nskb->ip_summed = CHECKSUM_NONE; 164662306a36Sopenharmony_ci nskb->mono_delivery_time = !!transmit_time; 164762306a36Sopenharmony_ci if (txhash) 164862306a36Sopenharmony_ci skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); 164962306a36Sopenharmony_ci ip_push_pending_frames(sk, &fl4); 165062306a36Sopenharmony_ci } 165162306a36Sopenharmony_ciout: 165262306a36Sopenharmony_ci ip_rt_put(rt); 165362306a36Sopenharmony_ci} 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_civoid __init ip_init(void) 165662306a36Sopenharmony_ci{ 165762306a36Sopenharmony_ci ip_rt_init(); 165862306a36Sopenharmony_ci inet_initpeers(); 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci#if defined(CONFIG_IP_MULTICAST) 166162306a36Sopenharmony_ci igmp_mc_init(); 166262306a36Sopenharmony_ci#endif 166362306a36Sopenharmony_ci} 1664