162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * The Internet Protocol (IP) module. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Authors: Ross Biro 1062306a36Sopenharmony_ci * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 1162306a36Sopenharmony_ci * Donald Becker, <becker@super.org> 1262306a36Sopenharmony_ci * Alan Cox, <alan@lxorguk.ukuu.org.uk> 1362306a36Sopenharmony_ci * Richard Underwood 1462306a36Sopenharmony_ci * Stefan Becker, <stefanb@yello.ping.de> 1562306a36Sopenharmony_ci * Jorge Cwik, <jorge@laser.satlink.net> 1662306a36Sopenharmony_ci * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * Fixes: 1962306a36Sopenharmony_ci * Alan Cox : Commented a couple of minor bits of surplus code 2062306a36Sopenharmony_ci * Alan Cox : Undefining IP_FORWARD doesn't include the code 2162306a36Sopenharmony_ci * (just stops a compiler warning). 2262306a36Sopenharmony_ci * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 2362306a36Sopenharmony_ci * are junked rather than corrupting things. 2462306a36Sopenharmony_ci * Alan Cox : Frames to bad broadcast subnets are dumped 2562306a36Sopenharmony_ci * We used to process them non broadcast and 2662306a36Sopenharmony_ci * boy could that cause havoc. 2762306a36Sopenharmony_ci * Alan Cox : ip_forward sets the free flag on the 2862306a36Sopenharmony_ci * new frame it queues. Still crap because 2962306a36Sopenharmony_ci * it copies the frame but at least it 3062306a36Sopenharmony_ci * doesn't eat memory too. 3162306a36Sopenharmony_ci * Alan Cox : Generic queue code and memory fixes. 3262306a36Sopenharmony_ci * Fred Van Kempen : IP fragment support (borrowed from NET2E) 3362306a36Sopenharmony_ci * Gerhard Koerting: Forward fragmented frames correctly. 3462306a36Sopenharmony_ci * Gerhard Koerting: Fixes to my fix of the above 8-). 3562306a36Sopenharmony_ci * Gerhard Koerting: IP interface addressing fix. 3662306a36Sopenharmony_ci * Linus Torvalds : More robustness checks 3762306a36Sopenharmony_ci * Alan Cox : Even more checks: Still not as robust as it ought to be 3862306a36Sopenharmony_ci * Alan Cox : Save IP header pointer for later 3962306a36Sopenharmony_ci * Alan Cox : ip option setting 4062306a36Sopenharmony_ci * Alan Cox : Use ip_tos/ip_ttl settings 4162306a36Sopenharmony_ci * Alan Cox : Fragmentation bogosity removed 4262306a36Sopenharmony_ci * (Thanks to Mark.Bush@prg.ox.ac.uk) 4362306a36Sopenharmony_ci * Dmitry Gorodchanin : Send of a raw packet crash fix. 4462306a36Sopenharmony_ci * Alan Cox : Silly ip bug when an overlength 4562306a36Sopenharmony_ci * fragment turns up. Now frees the 4662306a36Sopenharmony_ci * queue. 4762306a36Sopenharmony_ci * Linus Torvalds/ : Memory leakage on fragmentation 4862306a36Sopenharmony_ci * Alan Cox : handling. 4962306a36Sopenharmony_ci * Gerhard Koerting: Forwarding uses IP priority hints 5062306a36Sopenharmony_ci * Teemu Rantanen : Fragment problems. 5162306a36Sopenharmony_ci * Alan Cox : General cleanup, comments and reformat 5262306a36Sopenharmony_ci * Alan Cox : SNMP statistics 5362306a36Sopenharmony_ci * Alan Cox : BSD address rule semantics. Also see 5462306a36Sopenharmony_ci * UDP as there is a nasty checksum issue 5562306a36Sopenharmony_ci * if you do things the wrong way. 5662306a36Sopenharmony_ci * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 5762306a36Sopenharmony_ci * Alan Cox : IP options adjust sk->priority. 5862306a36Sopenharmony_ci * Pedro Roque : Fix mtu/length error in ip_forward. 5962306a36Sopenharmony_ci * Alan Cox : Avoid ip_chk_addr when possible. 6062306a36Sopenharmony_ci * Richard Underwood : IP multicasting. 6162306a36Sopenharmony_ci * Alan Cox : Cleaned up multicast handlers. 6262306a36Sopenharmony_ci * Alan Cox : RAW sockets demultiplex in the BSD style. 6362306a36Sopenharmony_ci * Gunther Mayer : Fix the SNMP reporting typo 6462306a36Sopenharmony_ci * Alan Cox : Always in group 224.0.0.1 6562306a36Sopenharmony_ci * Pauline Middelink : Fast ip_checksum update when forwarding 6662306a36Sopenharmony_ci * Masquerading support. 6762306a36Sopenharmony_ci * Alan Cox : Multicast loopback error for 224.0.0.1 6862306a36Sopenharmony_ci * Alan Cox : IP_MULTICAST_LOOP option. 6962306a36Sopenharmony_ci * Alan Cox : Use notifiers. 7062306a36Sopenharmony_ci * Bjorn Ekwall : Removed ip_csum (from slhc.c too) 7162306a36Sopenharmony_ci * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) 7262306a36Sopenharmony_ci * Stefan Becker : Send out ICMP HOST REDIRECT 7362306a36Sopenharmony_ci * Arnt Gulbrandsen : ip_build_xmit 7462306a36Sopenharmony_ci * Alan Cox : Per socket routing cache 7562306a36Sopenharmony_ci * Alan Cox : Fixed routing cache, added header cache. 7662306a36Sopenharmony_ci * Alan Cox : Loopback didn't work right in original ip_build_xmit - fixed it. 7762306a36Sopenharmony_ci * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. 7862306a36Sopenharmony_ci * Alan Cox : Incoming IP option handling. 7962306a36Sopenharmony_ci * Alan Cox : Set saddr on raw output frames as per BSD. 8062306a36Sopenharmony_ci * Alan Cox : Stopped broadcast source route explosions. 8162306a36Sopenharmony_ci * Alan Cox : Can disable source routing 8262306a36Sopenharmony_ci * Takeshi Sone : Masquerading didn't work. 8362306a36Sopenharmony_ci * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible. 8462306a36Sopenharmony_ci * Alan Cox : Memory leaks, tramples, misc debugging. 8562306a36Sopenharmony_ci * Alan Cox : Fixed multicast (by popular demand 8)) 8662306a36Sopenharmony_ci * Alan Cox : Fixed forwarding (by even more popular demand 8)) 8762306a36Sopenharmony_ci * Alan Cox : Fixed SNMP statistics [I think] 8862306a36Sopenharmony_ci * Gerhard Koerting : IP fragmentation forwarding fix 8962306a36Sopenharmony_ci * Alan Cox : Device lock against page fault. 9062306a36Sopenharmony_ci * Alan Cox : IP_HDRINCL facility. 9162306a36Sopenharmony_ci * Werner Almesberger : Zero fragment bug 9262306a36Sopenharmony_ci * Alan Cox : RAW IP frame length bug 9362306a36Sopenharmony_ci * Alan Cox : Outgoing firewall on build_xmit 9462306a36Sopenharmony_ci * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel 9562306a36Sopenharmony_ci * Alan Cox : Multicast routing hooks 9662306a36Sopenharmony_ci * Jos Vos : Do accounting *before* call_in_firewall 9762306a36Sopenharmony_ci * Willy Konynenberg : Transparent proxying support 9862306a36Sopenharmony_ci * 9962306a36Sopenharmony_ci * To Fix: 10062306a36Sopenharmony_ci * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient 10162306a36Sopenharmony_ci * and could be made very efficient with the addition of some virtual memory hacks to permit 10262306a36Sopenharmony_ci * the allocation of a buffer that can then be 'grown' by twiddling page tables. 10362306a36Sopenharmony_ci * Output fragmentation wants updating along with the buffer management to use a single 10462306a36Sopenharmony_ci * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet 10562306a36Sopenharmony_ci * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause 10662306a36Sopenharmony_ci * fragmentation anyway. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci#define pr_fmt(fmt) "IPv4: " fmt 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci#include <linux/module.h> 11262306a36Sopenharmony_ci#include <linux/types.h> 11362306a36Sopenharmony_ci#include <linux/kernel.h> 11462306a36Sopenharmony_ci#include <linux/string.h> 11562306a36Sopenharmony_ci#include <linux/errno.h> 11662306a36Sopenharmony_ci#include <linux/slab.h> 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci#include <linux/net.h> 11962306a36Sopenharmony_ci#include <linux/socket.h> 12062306a36Sopenharmony_ci#include <linux/sockios.h> 12162306a36Sopenharmony_ci#include <linux/in.h> 12262306a36Sopenharmony_ci#include <linux/inet.h> 12362306a36Sopenharmony_ci#include <linux/inetdevice.h> 12462306a36Sopenharmony_ci#include <linux/netdevice.h> 12562306a36Sopenharmony_ci#include <linux/etherdevice.h> 12662306a36Sopenharmony_ci#include <linux/indirect_call_wrapper.h> 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci#include <net/snmp.h> 12962306a36Sopenharmony_ci#include <net/ip.h> 13062306a36Sopenharmony_ci#include <net/protocol.h> 13162306a36Sopenharmony_ci#include <net/route.h> 13262306a36Sopenharmony_ci#include <linux/skbuff.h> 13362306a36Sopenharmony_ci#include <net/sock.h> 13462306a36Sopenharmony_ci#include <net/arp.h> 13562306a36Sopenharmony_ci#include <net/icmp.h> 13662306a36Sopenharmony_ci#include <net/raw.h> 13762306a36Sopenharmony_ci#include <net/checksum.h> 13862306a36Sopenharmony_ci#include <net/inet_ecn.h> 13962306a36Sopenharmony_ci#include <linux/netfilter_ipv4.h> 14062306a36Sopenharmony_ci#include <net/xfrm.h> 14162306a36Sopenharmony_ci#include <linux/mroute.h> 14262306a36Sopenharmony_ci#include <linux/netlink.h> 14362306a36Sopenharmony_ci#include <net/dst_metadata.h> 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci/* 14662306a36Sopenharmony_ci * Process Router Attention IP option (RFC 2113) 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_cibool ip_call_ra_chain(struct sk_buff *skb) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci struct ip_ra_chain *ra; 15162306a36Sopenharmony_ci u8 protocol = ip_hdr(skb)->protocol; 15262306a36Sopenharmony_ci struct sock *last = NULL; 15362306a36Sopenharmony_ci struct net_device *dev = skb->dev; 15462306a36Sopenharmony_ci struct net *net = dev_net(dev); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) { 15762306a36Sopenharmony_ci struct sock *sk = ra->sk; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci /* If socket is bound to an interface, only report 16062306a36Sopenharmony_ci * the packet if it came from that interface. 16162306a36Sopenharmony_ci */ 16262306a36Sopenharmony_ci if (sk && inet_sk(sk)->inet_num == protocol && 16362306a36Sopenharmony_ci (!sk->sk_bound_dev_if || 16462306a36Sopenharmony_ci sk->sk_bound_dev_if == dev->ifindex)) { 16562306a36Sopenharmony_ci if (ip_is_fragment(ip_hdr(skb))) { 16662306a36Sopenharmony_ci if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) 16762306a36Sopenharmony_ci return true; 16862306a36Sopenharmony_ci } 16962306a36Sopenharmony_ci if (last) { 17062306a36Sopenharmony_ci struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 17162306a36Sopenharmony_ci if (skb2) 17262306a36Sopenharmony_ci raw_rcv(last, skb2); 17362306a36Sopenharmony_ci } 17462306a36Sopenharmony_ci last = sk; 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci } 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci if (last) { 17962306a36Sopenharmony_ci raw_rcv(last, skb); 18062306a36Sopenharmony_ci return true; 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci return false; 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ciINDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); 18662306a36Sopenharmony_ciINDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); 18762306a36Sopenharmony_civoid ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) 18862306a36Sopenharmony_ci{ 18962306a36Sopenharmony_ci const struct net_protocol *ipprot; 19062306a36Sopenharmony_ci int raw, ret; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ciresubmit: 19362306a36Sopenharmony_ci raw = raw_local_deliver(skb, protocol); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci ipprot = rcu_dereference(inet_protos[protocol]); 19662306a36Sopenharmony_ci if (ipprot) { 19762306a36Sopenharmony_ci if (!ipprot->no_policy) { 19862306a36Sopenharmony_ci if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 19962306a36Sopenharmony_ci kfree_skb_reason(skb, 20062306a36Sopenharmony_ci SKB_DROP_REASON_XFRM_POLICY); 20162306a36Sopenharmony_ci return; 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci nf_reset_ct(skb); 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, 20662306a36Sopenharmony_ci skb); 20762306a36Sopenharmony_ci if (ret < 0) { 20862306a36Sopenharmony_ci protocol = -ret; 20962306a36Sopenharmony_ci goto resubmit; 21062306a36Sopenharmony_ci } 21162306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); 21262306a36Sopenharmony_ci } else { 21362306a36Sopenharmony_ci if (!raw) { 21462306a36Sopenharmony_ci if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 21562306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); 21662306a36Sopenharmony_ci icmp_send(skb, ICMP_DEST_UNREACH, 21762306a36Sopenharmony_ci ICMP_PROT_UNREACH, 0); 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO); 22062306a36Sopenharmony_ci } else { 22162306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); 22262306a36Sopenharmony_ci consume_skb(skb); 22362306a36Sopenharmony_ci } 22462306a36Sopenharmony_ci } 22562306a36Sopenharmony_ci} 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_cistatic int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci skb_clear_delivery_time(skb); 23062306a36Sopenharmony_ci __skb_pull(skb, skb_network_header_len(skb)); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci rcu_read_lock(); 23362306a36Sopenharmony_ci ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol); 23462306a36Sopenharmony_ci rcu_read_unlock(); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return 0; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * Deliver IP Packets to the higher protocol layers. 24162306a36Sopenharmony_ci */ 24262306a36Sopenharmony_ciint ip_local_deliver(struct sk_buff *skb) 24362306a36Sopenharmony_ci{ 24462306a36Sopenharmony_ci /* 24562306a36Sopenharmony_ci * Reassemble IP fragments. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_ci struct net *net = dev_net(skb->dev); 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci if (ip_is_fragment(ip_hdr(skb))) { 25062306a36Sopenharmony_ci if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER)) 25162306a36Sopenharmony_ci return 0; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, 25562306a36Sopenharmony_ci net, NULL, skb, skb->dev, NULL, 25662306a36Sopenharmony_ci ip_local_deliver_finish); 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ciEXPORT_SYMBOL(ip_local_deliver); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_cistatic inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci struct ip_options *opt; 26362306a36Sopenharmony_ci const struct iphdr *iph; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci /* It looks as overkill, because not all 26662306a36Sopenharmony_ci IP options require packet mangling. 26762306a36Sopenharmony_ci But it is the easiest for now, especially taking 26862306a36Sopenharmony_ci into account that combination of IP options 26962306a36Sopenharmony_ci and running sniffer is extremely rare condition. 27062306a36Sopenharmony_ci --ANK (980813) 27162306a36Sopenharmony_ci */ 27262306a36Sopenharmony_ci if (skb_cow(skb, skb_headroom(skb))) { 27362306a36Sopenharmony_ci __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS); 27462306a36Sopenharmony_ci goto drop; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci iph = ip_hdr(skb); 27862306a36Sopenharmony_ci opt = &(IPCB(skb)->opt); 27962306a36Sopenharmony_ci opt->optlen = iph->ihl*4 - sizeof(struct iphdr); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (ip_options_compile(dev_net(dev), opt, skb)) { 28262306a36Sopenharmony_ci __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 28362306a36Sopenharmony_ci goto drop; 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci if (unlikely(opt->srr)) { 28762306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (in_dev) { 29062306a36Sopenharmony_ci if (!IN_DEV_SOURCE_ROUTE(in_dev)) { 29162306a36Sopenharmony_ci if (IN_DEV_LOG_MARTIANS(in_dev)) 29262306a36Sopenharmony_ci net_info_ratelimited("source route option %pI4 -> %pI4\n", 29362306a36Sopenharmony_ci &iph->saddr, 29462306a36Sopenharmony_ci &iph->daddr); 29562306a36Sopenharmony_ci goto drop; 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci if (ip_options_rcv_srr(skb, dev)) 30062306a36Sopenharmony_ci goto drop; 30162306a36Sopenharmony_ci } 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci return false; 30462306a36Sopenharmony_cidrop: 30562306a36Sopenharmony_ci return true; 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, 30962306a36Sopenharmony_ci const struct sk_buff *hint) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr && 31262306a36Sopenharmony_ci ip_hdr(hint)->tos == iph->tos; 31362306a36Sopenharmony_ci} 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ciint tcp_v4_early_demux(struct sk_buff *skb); 31662306a36Sopenharmony_ciint udp_v4_early_demux(struct sk_buff *skb); 31762306a36Sopenharmony_cistatic int ip_rcv_finish_core(struct net *net, struct sock *sk, 31862306a36Sopenharmony_ci struct sk_buff *skb, struct net_device *dev, 31962306a36Sopenharmony_ci const struct sk_buff *hint) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci const struct iphdr *iph = ip_hdr(skb); 32262306a36Sopenharmony_ci int err, drop_reason; 32362306a36Sopenharmony_ci struct rtable *rt; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci if (ip_can_use_hint(skb, iph, hint)) { 32862306a36Sopenharmony_ci err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, 32962306a36Sopenharmony_ci dev, hint); 33062306a36Sopenharmony_ci if (unlikely(err)) 33162306a36Sopenharmony_ci goto drop_error; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && 33562306a36Sopenharmony_ci !skb_dst(skb) && 33662306a36Sopenharmony_ci !skb->sk && 33762306a36Sopenharmony_ci !ip_is_fragment(iph)) { 33862306a36Sopenharmony_ci switch (iph->protocol) { 33962306a36Sopenharmony_ci case IPPROTO_TCP: 34062306a36Sopenharmony_ci if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { 34162306a36Sopenharmony_ci tcp_v4_early_demux(skb); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci /* must reload iph, skb->head might have changed */ 34462306a36Sopenharmony_ci iph = ip_hdr(skb); 34562306a36Sopenharmony_ci } 34662306a36Sopenharmony_ci break; 34762306a36Sopenharmony_ci case IPPROTO_UDP: 34862306a36Sopenharmony_ci if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { 34962306a36Sopenharmony_ci err = udp_v4_early_demux(skb); 35062306a36Sopenharmony_ci if (unlikely(err)) 35162306a36Sopenharmony_ci goto drop_error; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci /* must reload iph, skb->head might have changed */ 35462306a36Sopenharmony_ci iph = ip_hdr(skb); 35562306a36Sopenharmony_ci } 35662306a36Sopenharmony_ci break; 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci /* 36162306a36Sopenharmony_ci * Initialise the virtual path cache for the packet. It describes 36262306a36Sopenharmony_ci * how the packet travels inside Linux networking. 36362306a36Sopenharmony_ci */ 36462306a36Sopenharmony_ci if (!skb_valid_dst(skb)) { 36562306a36Sopenharmony_ci err = ip_route_input_noref(skb, iph->daddr, iph->saddr, 36662306a36Sopenharmony_ci iph->tos, dev); 36762306a36Sopenharmony_ci if (unlikely(err)) 36862306a36Sopenharmony_ci goto drop_error; 36962306a36Sopenharmony_ci } else { 37062306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) 37362306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_NOPOLICY; 37462306a36Sopenharmony_ci } 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 37762306a36Sopenharmony_ci if (unlikely(skb_dst(skb)->tclassid)) { 37862306a36Sopenharmony_ci struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); 37962306a36Sopenharmony_ci u32 idx = skb_dst(skb)->tclassid; 38062306a36Sopenharmony_ci st[idx&0xFF].o_packets++; 38162306a36Sopenharmony_ci st[idx&0xFF].o_bytes += skb->len; 38262306a36Sopenharmony_ci st[(idx>>16)&0xFF].i_packets++; 38362306a36Sopenharmony_ci st[(idx>>16)&0xFF].i_bytes += skb->len; 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci#endif 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci if (iph->ihl > 5 && ip_rcv_options(skb, dev)) 38862306a36Sopenharmony_ci goto drop; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci rt = skb_rtable(skb); 39162306a36Sopenharmony_ci if (rt->rt_type == RTN_MULTICAST) { 39262306a36Sopenharmony_ci __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); 39362306a36Sopenharmony_ci } else if (rt->rt_type == RTN_BROADCAST) { 39462306a36Sopenharmony_ci __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); 39562306a36Sopenharmony_ci } else if (skb->pkt_type == PACKET_BROADCAST || 39662306a36Sopenharmony_ci skb->pkt_type == PACKET_MULTICAST) { 39762306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci /* RFC 1122 3.3.6: 40062306a36Sopenharmony_ci * 40162306a36Sopenharmony_ci * When a host sends a datagram to a link-layer broadcast 40262306a36Sopenharmony_ci * address, the IP destination address MUST be a legal IP 40362306a36Sopenharmony_ci * broadcast or IP multicast address. 40462306a36Sopenharmony_ci * 40562306a36Sopenharmony_ci * A host SHOULD silently discard a datagram that is received 40662306a36Sopenharmony_ci * via a link-layer broadcast (see Section 2.4) but does not 40762306a36Sopenharmony_ci * specify an IP multicast or broadcast destination address. 40862306a36Sopenharmony_ci * 40962306a36Sopenharmony_ci * This doesn't explicitly say L2 *broadcast*, but broadcast is 41062306a36Sopenharmony_ci * in a way a form of multicast and the most common use case for 41162306a36Sopenharmony_ci * this is 802.11 protecting against cross-station spoofing (the 41262306a36Sopenharmony_ci * so-called "hole-196" attack) so do it for both. 41362306a36Sopenharmony_ci */ 41462306a36Sopenharmony_ci if (in_dev && 41562306a36Sopenharmony_ci IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) { 41662306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST; 41762306a36Sopenharmony_ci goto drop; 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci return NET_RX_SUCCESS; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cidrop: 42462306a36Sopenharmony_ci kfree_skb_reason(skb, drop_reason); 42562306a36Sopenharmony_ci return NET_RX_DROP; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_cidrop_error: 42862306a36Sopenharmony_ci if (err == -EXDEV) { 42962306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_IP_RPFILTER; 43062306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); 43162306a36Sopenharmony_ci } 43262306a36Sopenharmony_ci goto drop; 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistatic int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 43662306a36Sopenharmony_ci{ 43762306a36Sopenharmony_ci struct net_device *dev = skb->dev; 43862306a36Sopenharmony_ci int ret; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci /* if ingress device is enslaved to an L3 master device pass the 44162306a36Sopenharmony_ci * skb to its handler for processing 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_ci skb = l3mdev_ip_rcv(skb); 44462306a36Sopenharmony_ci if (!skb) 44562306a36Sopenharmony_ci return NET_RX_SUCCESS; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci ret = ip_rcv_finish_core(net, sk, skb, dev, NULL); 44862306a36Sopenharmony_ci if (ret != NET_RX_DROP) 44962306a36Sopenharmony_ci ret = dst_input(skb); 45062306a36Sopenharmony_ci return ret; 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci/* 45462306a36Sopenharmony_ci * Main IP Receive routine. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_cistatic struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci const struct iphdr *iph; 45962306a36Sopenharmony_ci int drop_reason; 46062306a36Sopenharmony_ci u32 len; 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci /* When the interface is in promisc. mode, drop all the crap 46362306a36Sopenharmony_ci * that it receives, do not try to analyse it. 46462306a36Sopenharmony_ci */ 46562306a36Sopenharmony_ci if (skb->pkt_type == PACKET_OTHERHOST) { 46662306a36Sopenharmony_ci dev_core_stats_rx_otherhost_dropped_inc(skb->dev); 46762306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_OTHERHOST; 46862306a36Sopenharmony_ci goto drop; 46962306a36Sopenharmony_ci } 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci skb = skb_share_check(skb, GFP_ATOMIC); 47462306a36Sopenharmony_ci if (!skb) { 47562306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 47662306a36Sopenharmony_ci goto out; 47762306a36Sopenharmony_ci } 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 48062306a36Sopenharmony_ci if (!pskb_may_pull(skb, sizeof(struct iphdr))) 48162306a36Sopenharmony_ci goto inhdr_error; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci iph = ip_hdr(skb); 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci /* 48662306a36Sopenharmony_ci * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. 48762306a36Sopenharmony_ci * 48862306a36Sopenharmony_ci * Is the datagram acceptable? 48962306a36Sopenharmony_ci * 49062306a36Sopenharmony_ci * 1. Length at least the size of an ip header 49162306a36Sopenharmony_ci * 2. Version of 4 49262306a36Sopenharmony_ci * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] 49362306a36Sopenharmony_ci * 4. Doesn't have a bogus length 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci if (iph->ihl < 5 || iph->version != 4) 49762306a36Sopenharmony_ci goto inhdr_error; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); 50062306a36Sopenharmony_ci BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); 50162306a36Sopenharmony_ci BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); 50262306a36Sopenharmony_ci __IP_ADD_STATS(net, 50362306a36Sopenharmony_ci IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), 50462306a36Sopenharmony_ci max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci if (!pskb_may_pull(skb, iph->ihl*4)) 50762306a36Sopenharmony_ci goto inhdr_error; 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci iph = ip_hdr(skb); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 51262306a36Sopenharmony_ci goto csum_error; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci len = iph_totlen(skb, iph); 51562306a36Sopenharmony_ci if (skb->len < len) { 51662306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 51762306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 51862306a36Sopenharmony_ci goto drop; 51962306a36Sopenharmony_ci } else if (len < (iph->ihl*4)) 52062306a36Sopenharmony_ci goto inhdr_error; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci /* Our transport medium may have padded the buffer out. Now we know it 52362306a36Sopenharmony_ci * is IP we can trim to the true length of the frame. 52462306a36Sopenharmony_ci * Note this now means skb->len holds ntohs(iph->tot_len). 52562306a36Sopenharmony_ci */ 52662306a36Sopenharmony_ci if (pskb_trim_rcsum(skb, len)) { 52762306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 52862306a36Sopenharmony_ci goto drop; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci iph = ip_hdr(skb); 53262306a36Sopenharmony_ci skb->transport_header = skb->network_header + iph->ihl*4; 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci /* Remove any debris in the socket control block */ 53562306a36Sopenharmony_ci memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 53662306a36Sopenharmony_ci IPCB(skb)->iif = skb->skb_iif; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci /* Must drop socket now because of tproxy. */ 53962306a36Sopenharmony_ci if (!skb_sk_is_prefetched(skb)) 54062306a36Sopenharmony_ci skb_orphan(skb); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci return skb; 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_cicsum_error: 54562306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_IP_CSUM; 54662306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 54762306a36Sopenharmony_ciinhdr_error: 54862306a36Sopenharmony_ci if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED) 54962306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_IP_INHDR; 55062306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 55162306a36Sopenharmony_cidrop: 55262306a36Sopenharmony_ci kfree_skb_reason(skb, drop_reason); 55362306a36Sopenharmony_ciout: 55462306a36Sopenharmony_ci return NULL; 55562306a36Sopenharmony_ci} 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci/* 55862306a36Sopenharmony_ci * IP receive entry point 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_ciint ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, 56162306a36Sopenharmony_ci struct net_device *orig_dev) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci struct net *net = dev_net(dev); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci skb = ip_rcv_core(skb, net); 56662306a36Sopenharmony_ci if (skb == NULL) 56762306a36Sopenharmony_ci return NET_RX_DROP; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 57062306a36Sopenharmony_ci net, NULL, skb, dev, NULL, 57162306a36Sopenharmony_ci ip_rcv_finish); 57262306a36Sopenharmony_ci} 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_cistatic void ip_sublist_rcv_finish(struct list_head *head) 57562306a36Sopenharmony_ci{ 57662306a36Sopenharmony_ci struct sk_buff *skb, *next; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci list_for_each_entry_safe(skb, next, head, list) { 57962306a36Sopenharmony_ci skb_list_del_init(skb); 58062306a36Sopenharmony_ci dst_input(skb); 58162306a36Sopenharmony_ci } 58262306a36Sopenharmony_ci} 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_cistatic struct sk_buff *ip_extract_route_hint(const struct net *net, 58562306a36Sopenharmony_ci struct sk_buff *skb, int rt_type) 58662306a36Sopenharmony_ci{ 58762306a36Sopenharmony_ci if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST || 58862306a36Sopenharmony_ci IPCB(skb)->flags & IPSKB_MULTIPATH) 58962306a36Sopenharmony_ci return NULL; 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci return skb; 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic void ip_list_rcv_finish(struct net *net, struct sock *sk, 59562306a36Sopenharmony_ci struct list_head *head) 59662306a36Sopenharmony_ci{ 59762306a36Sopenharmony_ci struct sk_buff *skb, *next, *hint = NULL; 59862306a36Sopenharmony_ci struct dst_entry *curr_dst = NULL; 59962306a36Sopenharmony_ci struct list_head sublist; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci INIT_LIST_HEAD(&sublist); 60262306a36Sopenharmony_ci list_for_each_entry_safe(skb, next, head, list) { 60362306a36Sopenharmony_ci struct net_device *dev = skb->dev; 60462306a36Sopenharmony_ci struct dst_entry *dst; 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci skb_list_del_init(skb); 60762306a36Sopenharmony_ci /* if ingress device is enslaved to an L3 master device pass the 60862306a36Sopenharmony_ci * skb to its handler for processing 60962306a36Sopenharmony_ci */ 61062306a36Sopenharmony_ci skb = l3mdev_ip_rcv(skb); 61162306a36Sopenharmony_ci if (!skb) 61262306a36Sopenharmony_ci continue; 61362306a36Sopenharmony_ci if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP) 61462306a36Sopenharmony_ci continue; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci dst = skb_dst(skb); 61762306a36Sopenharmony_ci if (curr_dst != dst) { 61862306a36Sopenharmony_ci hint = ip_extract_route_hint(net, skb, 61962306a36Sopenharmony_ci ((struct rtable *)dst)->rt_type); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci /* dispatch old sublist */ 62262306a36Sopenharmony_ci if (!list_empty(&sublist)) 62362306a36Sopenharmony_ci ip_sublist_rcv_finish(&sublist); 62462306a36Sopenharmony_ci /* start new sublist */ 62562306a36Sopenharmony_ci INIT_LIST_HEAD(&sublist); 62662306a36Sopenharmony_ci curr_dst = dst; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci list_add_tail(&skb->list, &sublist); 62962306a36Sopenharmony_ci } 63062306a36Sopenharmony_ci /* dispatch final sublist */ 63162306a36Sopenharmony_ci ip_sublist_rcv_finish(&sublist); 63262306a36Sopenharmony_ci} 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_cistatic void ip_sublist_rcv(struct list_head *head, struct net_device *dev, 63562306a36Sopenharmony_ci struct net *net) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL, 63862306a36Sopenharmony_ci head, dev, NULL, ip_rcv_finish); 63962306a36Sopenharmony_ci ip_list_rcv_finish(net, NULL, head); 64062306a36Sopenharmony_ci} 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci/* Receive a list of IP packets */ 64362306a36Sopenharmony_civoid ip_list_rcv(struct list_head *head, struct packet_type *pt, 64462306a36Sopenharmony_ci struct net_device *orig_dev) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci struct net_device *curr_dev = NULL; 64762306a36Sopenharmony_ci struct net *curr_net = NULL; 64862306a36Sopenharmony_ci struct sk_buff *skb, *next; 64962306a36Sopenharmony_ci struct list_head sublist; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci INIT_LIST_HEAD(&sublist); 65262306a36Sopenharmony_ci list_for_each_entry_safe(skb, next, head, list) { 65362306a36Sopenharmony_ci struct net_device *dev = skb->dev; 65462306a36Sopenharmony_ci struct net *net = dev_net(dev); 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci skb_list_del_init(skb); 65762306a36Sopenharmony_ci skb = ip_rcv_core(skb, net); 65862306a36Sopenharmony_ci if (skb == NULL) 65962306a36Sopenharmony_ci continue; 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci if (curr_dev != dev || curr_net != net) { 66262306a36Sopenharmony_ci /* dispatch old sublist */ 66362306a36Sopenharmony_ci if (!list_empty(&sublist)) 66462306a36Sopenharmony_ci ip_sublist_rcv(&sublist, curr_dev, curr_net); 66562306a36Sopenharmony_ci /* start new sublist */ 66662306a36Sopenharmony_ci INIT_LIST_HEAD(&sublist); 66762306a36Sopenharmony_ci curr_dev = dev; 66862306a36Sopenharmony_ci curr_net = net; 66962306a36Sopenharmony_ci } 67062306a36Sopenharmony_ci list_add_tail(&skb->list, &sublist); 67162306a36Sopenharmony_ci } 67262306a36Sopenharmony_ci /* dispatch final sublist */ 67362306a36Sopenharmony_ci if (!list_empty(&sublist)) 67462306a36Sopenharmony_ci ip_sublist_rcv(&sublist, curr_dev, curr_net); 67562306a36Sopenharmony_ci} 676