162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * ROUTE - implementation of the IP router. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Authors: Ross Biro 1062306a36Sopenharmony_ci * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 1162306a36Sopenharmony_ci * Alan Cox, <gw4pts@gw4pts.ampr.org> 1262306a36Sopenharmony_ci * Linus Torvalds, <Linus.Torvalds@helsinki.fi> 1362306a36Sopenharmony_ci * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * Fixes: 1662306a36Sopenharmony_ci * Alan Cox : Verify area fixes. 1762306a36Sopenharmony_ci * Alan Cox : cli() protects routing changes 1862306a36Sopenharmony_ci * Rui Oliveira : ICMP routing table updates 1962306a36Sopenharmony_ci * (rco@di.uminho.pt) Routing table insertion and update 2062306a36Sopenharmony_ci * Linus Torvalds : Rewrote bits to be sensible 2162306a36Sopenharmony_ci * Alan Cox : Added BSD route gw semantics 2262306a36Sopenharmony_ci * Alan Cox : Super /proc >4K 2362306a36Sopenharmony_ci * Alan Cox : MTU in route table 2462306a36Sopenharmony_ci * Alan Cox : MSS actually. Also added the window 2562306a36Sopenharmony_ci * clamper. 2662306a36Sopenharmony_ci * Sam Lantinga : Fixed route matching in rt_del() 2762306a36Sopenharmony_ci * Alan Cox : Routing cache support. 2862306a36Sopenharmony_ci * Alan Cox : Removed compatibility cruft. 2962306a36Sopenharmony_ci * Alan Cox : RTF_REJECT support. 3062306a36Sopenharmony_ci * Alan Cox : TCP irtt support. 3162306a36Sopenharmony_ci * Jonathan Naylor : Added Metric support. 3262306a36Sopenharmony_ci * Miquel van Smoorenburg : BSD API fixes. 3362306a36Sopenharmony_ci * Miquel van Smoorenburg : Metrics. 3462306a36Sopenharmony_ci * Alan Cox : Use __u32 properly 3562306a36Sopenharmony_ci * Alan Cox : Aligned routing errors more closely with BSD 3662306a36Sopenharmony_ci * our system is still very different. 3762306a36Sopenharmony_ci * Alan Cox : Faster /proc handling 3862306a36Sopenharmony_ci * Alexey Kuznetsov : Massive rework to support tree based routing, 3962306a36Sopenharmony_ci * routing caches and better behaviour. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Olaf Erb : irtt wasn't being copied right. 4262306a36Sopenharmony_ci * Bjorn Ekwall : Kerneld route support. 4362306a36Sopenharmony_ci * Alan Cox : Multicast fixed (I hope) 4462306a36Sopenharmony_ci * Pavel Krauz : Limited broadcast fixed 4562306a36Sopenharmony_ci * Mike McLagan : Routing by source 4662306a36Sopenharmony_ci * Alexey Kuznetsov : End of old history. Split to fib.c and 4762306a36Sopenharmony_ci * route.c and rewritten from scratch. 4862306a36Sopenharmony_ci * Andi Kleen : Load-limit warning messages. 4962306a36Sopenharmony_ci * Vitaly E. Lavrov : Transparent proxy revived after year coma. 5062306a36Sopenharmony_ci * Vitaly E. Lavrov : Race condition in ip_route_input_slow. 5162306a36Sopenharmony_ci * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. 5262306a36Sopenharmony_ci * Vladimir V. Ivanov : IP rule info (flowid) is really useful. 5362306a36Sopenharmony_ci * Marc Boucher : routing by fwmark 5462306a36Sopenharmony_ci * Robert Olsson : Added rt_cache statistics 5562306a36Sopenharmony_ci * Arnaldo C. Melo : Convert proc stuff to seq_file 5662306a36Sopenharmony_ci * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. 5762306a36Sopenharmony_ci * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect 5862306a36Sopenharmony_ci * Ilia Sotnikov : Removed TOS from hash calculations 5962306a36Sopenharmony_ci */ 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#define pr_fmt(fmt) "IPv4: " fmt 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#include <linux/module.h> 6462306a36Sopenharmony_ci#include <linux/bitops.h> 6562306a36Sopenharmony_ci#include <linux/kernel.h> 6662306a36Sopenharmony_ci#include <linux/mm.h> 6762306a36Sopenharmony_ci#include <linux/memblock.h> 6862306a36Sopenharmony_ci#include <linux/socket.h> 6962306a36Sopenharmony_ci#include <linux/errno.h> 7062306a36Sopenharmony_ci#include <linux/in.h> 7162306a36Sopenharmony_ci#include <linux/inet.h> 7262306a36Sopenharmony_ci#include <linux/netdevice.h> 7362306a36Sopenharmony_ci#include <linux/proc_fs.h> 7462306a36Sopenharmony_ci#include <linux/init.h> 7562306a36Sopenharmony_ci#include <linux/skbuff.h> 7662306a36Sopenharmony_ci#include <linux/inetdevice.h> 7762306a36Sopenharmony_ci#include <linux/igmp.h> 7862306a36Sopenharmony_ci#include <linux/pkt_sched.h> 7962306a36Sopenharmony_ci#include <linux/mroute.h> 8062306a36Sopenharmony_ci#include <linux/netfilter_ipv4.h> 8162306a36Sopenharmony_ci#include <linux/random.h> 8262306a36Sopenharmony_ci#include <linux/rcupdate.h> 8362306a36Sopenharmony_ci#include <linux/slab.h> 8462306a36Sopenharmony_ci#include <linux/jhash.h> 8562306a36Sopenharmony_ci#include <net/dst.h> 8662306a36Sopenharmony_ci#include <net/dst_metadata.h> 8762306a36Sopenharmony_ci#include <net/inet_dscp.h> 8862306a36Sopenharmony_ci#include <net/net_namespace.h> 8962306a36Sopenharmony_ci#include <net/ip.h> 9062306a36Sopenharmony_ci#include <net/route.h> 9162306a36Sopenharmony_ci#include <net/inetpeer.h> 9262306a36Sopenharmony_ci#include <net/sock.h> 9362306a36Sopenharmony_ci#include <net/ip_fib.h> 9462306a36Sopenharmony_ci#include <net/nexthop.h> 9562306a36Sopenharmony_ci#include <net/tcp.h> 9662306a36Sopenharmony_ci#include <net/icmp.h> 9762306a36Sopenharmony_ci#include <net/xfrm.h> 9862306a36Sopenharmony_ci#include <net/lwtunnel.h> 9962306a36Sopenharmony_ci#include <net/netevent.h> 10062306a36Sopenharmony_ci#include <net/rtnetlink.h> 10162306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 10262306a36Sopenharmony_ci#include <linux/sysctl.h> 10362306a36Sopenharmony_ci#endif 10462306a36Sopenharmony_ci#include <net/secure_seq.h> 10562306a36Sopenharmony_ci#include <net/ip_tunnels.h> 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci#include "fib_lookup.h" 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci#define RT_FL_TOS(oldflp4) \ 11062306a36Sopenharmony_ci ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci#define RT_GC_TIMEOUT (300*HZ) 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci#define DEFAULT_MIN_PMTU (512 + 20 + 20) 11562306a36Sopenharmony_ci#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) 11662306a36Sopenharmony_ci#define DEFAULT_MIN_ADVMSS 256 11762306a36Sopenharmony_cistatic int ip_rt_max_size; 11862306a36Sopenharmony_cistatic int ip_rt_redirect_number __read_mostly = 9; 11962306a36Sopenharmony_cistatic int ip_rt_redirect_load __read_mostly = HZ / 50; 12062306a36Sopenharmony_cistatic int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); 12162306a36Sopenharmony_cistatic int ip_rt_error_cost __read_mostly = HZ; 12262306a36Sopenharmony_cistatic int ip_rt_error_burst __read_mostly = 5 * HZ; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_cistatic int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci/* 12762306a36Sopenharmony_ci * Interface to generic destination cache. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ciINDIRECT_CALLABLE_SCOPE 13162306a36Sopenharmony_cistruct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 13262306a36Sopenharmony_cistatic unsigned int ipv4_default_advmss(const struct dst_entry *dst); 13362306a36Sopenharmony_ciINDIRECT_CALLABLE_SCOPE 13462306a36Sopenharmony_ciunsigned int ipv4_mtu(const struct dst_entry *dst); 13562306a36Sopenharmony_cistatic struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 13662306a36Sopenharmony_cistatic void ipv4_link_failure(struct sk_buff *skb); 13762306a36Sopenharmony_cistatic void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 13862306a36Sopenharmony_ci struct sk_buff *skb, u32 mtu, 13962306a36Sopenharmony_ci bool confirm_neigh); 14062306a36Sopenharmony_cistatic void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 14162306a36Sopenharmony_ci struct sk_buff *skb); 14262306a36Sopenharmony_cistatic void ipv4_dst_destroy(struct dst_entry *dst); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_cistatic u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci WARN_ON(1); 14762306a36Sopenharmony_ci return NULL; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 15162306a36Sopenharmony_ci struct sk_buff *skb, 15262306a36Sopenharmony_ci const void *daddr); 15362306a36Sopenharmony_cistatic void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic struct dst_ops ipv4_dst_ops = { 15662306a36Sopenharmony_ci .family = AF_INET, 15762306a36Sopenharmony_ci .check = ipv4_dst_check, 15862306a36Sopenharmony_ci .default_advmss = ipv4_default_advmss, 15962306a36Sopenharmony_ci .mtu = ipv4_mtu, 16062306a36Sopenharmony_ci .cow_metrics = ipv4_cow_metrics, 16162306a36Sopenharmony_ci .destroy = ipv4_dst_destroy, 16262306a36Sopenharmony_ci .negative_advice = ipv4_negative_advice, 16362306a36Sopenharmony_ci .link_failure = ipv4_link_failure, 16462306a36Sopenharmony_ci .update_pmtu = ip_rt_update_pmtu, 16562306a36Sopenharmony_ci .redirect = ip_do_redirect, 16662306a36Sopenharmony_ci .local_out = __ip_local_out, 16762306a36Sopenharmony_ci .neigh_lookup = ipv4_neigh_lookup, 16862306a36Sopenharmony_ci .confirm_neigh = ipv4_confirm_neigh, 16962306a36Sopenharmony_ci}; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci#define ECN_OR_COST(class) TC_PRIO_##class 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ciconst __u8 ip_tos2prio[16] = { 17462306a36Sopenharmony_ci TC_PRIO_BESTEFFORT, 17562306a36Sopenharmony_ci ECN_OR_COST(BESTEFFORT), 17662306a36Sopenharmony_ci TC_PRIO_BESTEFFORT, 17762306a36Sopenharmony_ci ECN_OR_COST(BESTEFFORT), 17862306a36Sopenharmony_ci TC_PRIO_BULK, 17962306a36Sopenharmony_ci ECN_OR_COST(BULK), 18062306a36Sopenharmony_ci TC_PRIO_BULK, 18162306a36Sopenharmony_ci ECN_OR_COST(BULK), 18262306a36Sopenharmony_ci TC_PRIO_INTERACTIVE, 18362306a36Sopenharmony_ci ECN_OR_COST(INTERACTIVE), 18462306a36Sopenharmony_ci TC_PRIO_INTERACTIVE, 18562306a36Sopenharmony_ci ECN_OR_COST(INTERACTIVE), 18662306a36Sopenharmony_ci TC_PRIO_INTERACTIVE_BULK, 18762306a36Sopenharmony_ci ECN_OR_COST(INTERACTIVE_BULK), 18862306a36Sopenharmony_ci TC_PRIO_INTERACTIVE_BULK, 18962306a36Sopenharmony_ci ECN_OR_COST(INTERACTIVE_BULK) 19062306a36Sopenharmony_ci}; 19162306a36Sopenharmony_ciEXPORT_SYMBOL(ip_tos2prio); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 19462306a36Sopenharmony_ci#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 19762306a36Sopenharmony_cistatic void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci if (*pos) 20062306a36Sopenharmony_ci return NULL; 20162306a36Sopenharmony_ci return SEQ_START_TOKEN; 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_cistatic void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci ++*pos; 20762306a36Sopenharmony_ci return NULL; 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic void rt_cache_seq_stop(struct seq_file *seq, void *v) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci} 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_cistatic int rt_cache_seq_show(struct seq_file *seq, void *v) 21562306a36Sopenharmony_ci{ 21662306a36Sopenharmony_ci if (v == SEQ_START_TOKEN) 21762306a36Sopenharmony_ci seq_printf(seq, "%-127s\n", 21862306a36Sopenharmony_ci "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" 21962306a36Sopenharmony_ci "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" 22062306a36Sopenharmony_ci "HHUptod\tSpecDst"); 22162306a36Sopenharmony_ci return 0; 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic const struct seq_operations rt_cache_seq_ops = { 22562306a36Sopenharmony_ci .start = rt_cache_seq_start, 22662306a36Sopenharmony_ci .next = rt_cache_seq_next, 22762306a36Sopenharmony_ci .stop = rt_cache_seq_stop, 22862306a36Sopenharmony_ci .show = rt_cache_seq_show, 22962306a36Sopenharmony_ci}; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_cistatic void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) 23262306a36Sopenharmony_ci{ 23362306a36Sopenharmony_ci int cpu; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci if (*pos == 0) 23662306a36Sopenharmony_ci return SEQ_START_TOKEN; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 23962306a36Sopenharmony_ci if (!cpu_possible(cpu)) 24062306a36Sopenharmony_ci continue; 24162306a36Sopenharmony_ci *pos = cpu+1; 24262306a36Sopenharmony_ci return &per_cpu(rt_cache_stat, cpu); 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci return NULL; 24562306a36Sopenharmony_ci} 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_cistatic void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci int cpu; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 25262306a36Sopenharmony_ci if (!cpu_possible(cpu)) 25362306a36Sopenharmony_ci continue; 25462306a36Sopenharmony_ci *pos = cpu+1; 25562306a36Sopenharmony_ci return &per_cpu(rt_cache_stat, cpu); 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci (*pos)++; 25862306a36Sopenharmony_ci return NULL; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_cistatic void rt_cpu_seq_stop(struct seq_file *seq, void *v) 26362306a36Sopenharmony_ci{ 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_cistatic int rt_cpu_seq_show(struct seq_file *seq, void *v) 26862306a36Sopenharmony_ci{ 26962306a36Sopenharmony_ci struct rt_cache_stat *st = v; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (v == SEQ_START_TOKEN) { 27262306a36Sopenharmony_ci seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); 27362306a36Sopenharmony_ci return 0; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x " 27762306a36Sopenharmony_ci "%08x %08x %08x %08x %08x %08x " 27862306a36Sopenharmony_ci "%08x %08x %08x %08x\n", 27962306a36Sopenharmony_ci dst_entries_get_slow(&ipv4_dst_ops), 28062306a36Sopenharmony_ci 0, /* st->in_hit */ 28162306a36Sopenharmony_ci st->in_slow_tot, 28262306a36Sopenharmony_ci st->in_slow_mc, 28362306a36Sopenharmony_ci st->in_no_route, 28462306a36Sopenharmony_ci st->in_brd, 28562306a36Sopenharmony_ci st->in_martian_dst, 28662306a36Sopenharmony_ci st->in_martian_src, 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci 0, /* st->out_hit */ 28962306a36Sopenharmony_ci st->out_slow_tot, 29062306a36Sopenharmony_ci st->out_slow_mc, 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci 0, /* st->gc_total */ 29362306a36Sopenharmony_ci 0, /* st->gc_ignored */ 29462306a36Sopenharmony_ci 0, /* st->gc_goal_miss */ 29562306a36Sopenharmony_ci 0, /* st->gc_dst_overflow */ 29662306a36Sopenharmony_ci 0, /* st->in_hlist_search */ 29762306a36Sopenharmony_ci 0 /* st->out_hlist_search */ 29862306a36Sopenharmony_ci ); 29962306a36Sopenharmony_ci return 0; 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_cistatic const struct seq_operations rt_cpu_seq_ops = { 30362306a36Sopenharmony_ci .start = rt_cpu_seq_start, 30462306a36Sopenharmony_ci .next = rt_cpu_seq_next, 30562306a36Sopenharmony_ci .stop = rt_cpu_seq_stop, 30662306a36Sopenharmony_ci .show = rt_cpu_seq_show, 30762306a36Sopenharmony_ci}; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 31062306a36Sopenharmony_cistatic int rt_acct_proc_show(struct seq_file *m, void *v) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci struct ip_rt_acct *dst, *src; 31362306a36Sopenharmony_ci unsigned int i, j; 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); 31662306a36Sopenharmony_ci if (!dst) 31762306a36Sopenharmony_ci return -ENOMEM; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci for_each_possible_cpu(i) { 32062306a36Sopenharmony_ci src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); 32162306a36Sopenharmony_ci for (j = 0; j < 256; j++) { 32262306a36Sopenharmony_ci dst[j].o_bytes += src[j].o_bytes; 32362306a36Sopenharmony_ci dst[j].o_packets += src[j].o_packets; 32462306a36Sopenharmony_ci dst[j].i_bytes += src[j].i_bytes; 32562306a36Sopenharmony_ci dst[j].i_packets += src[j].i_packets; 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci } 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); 33062306a36Sopenharmony_ci kfree(dst); 33162306a36Sopenharmony_ci return 0; 33262306a36Sopenharmony_ci} 33362306a36Sopenharmony_ci#endif 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_cistatic int __net_init ip_rt_do_proc_init(struct net *net) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci struct proc_dir_entry *pde; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci pde = proc_create_seq("rt_cache", 0444, net->proc_net, 34062306a36Sopenharmony_ci &rt_cache_seq_ops); 34162306a36Sopenharmony_ci if (!pde) 34262306a36Sopenharmony_ci goto err1; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat, 34562306a36Sopenharmony_ci &rt_cpu_seq_ops); 34662306a36Sopenharmony_ci if (!pde) 34762306a36Sopenharmony_ci goto err2; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 35062306a36Sopenharmony_ci pde = proc_create_single("rt_acct", 0, net->proc_net, 35162306a36Sopenharmony_ci rt_acct_proc_show); 35262306a36Sopenharmony_ci if (!pde) 35362306a36Sopenharmony_ci goto err3; 35462306a36Sopenharmony_ci#endif 35562306a36Sopenharmony_ci return 0; 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 35862306a36Sopenharmony_cierr3: 35962306a36Sopenharmony_ci remove_proc_entry("rt_cache", net->proc_net_stat); 36062306a36Sopenharmony_ci#endif 36162306a36Sopenharmony_cierr2: 36262306a36Sopenharmony_ci remove_proc_entry("rt_cache", net->proc_net); 36362306a36Sopenharmony_cierr1: 36462306a36Sopenharmony_ci return -ENOMEM; 36562306a36Sopenharmony_ci} 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_cistatic void __net_exit ip_rt_do_proc_exit(struct net *net) 36862306a36Sopenharmony_ci{ 36962306a36Sopenharmony_ci remove_proc_entry("rt_cache", net->proc_net_stat); 37062306a36Sopenharmony_ci remove_proc_entry("rt_cache", net->proc_net); 37162306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 37262306a36Sopenharmony_ci remove_proc_entry("rt_acct", net->proc_net); 37362306a36Sopenharmony_ci#endif 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cistatic struct pernet_operations ip_rt_proc_ops __net_initdata = { 37762306a36Sopenharmony_ci .init = ip_rt_do_proc_init, 37862306a36Sopenharmony_ci .exit = ip_rt_do_proc_exit, 37962306a36Sopenharmony_ci}; 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_cistatic int __init ip_rt_proc_init(void) 38262306a36Sopenharmony_ci{ 38362306a36Sopenharmony_ci return register_pernet_subsys(&ip_rt_proc_ops); 38462306a36Sopenharmony_ci} 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci#else 38762306a36Sopenharmony_cistatic inline int ip_rt_proc_init(void) 38862306a36Sopenharmony_ci{ 38962306a36Sopenharmony_ci return 0; 39062306a36Sopenharmony_ci} 39162306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */ 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_cistatic inline bool rt_is_expired(const struct rtable *rth) 39462306a36Sopenharmony_ci{ 39562306a36Sopenharmony_ci return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); 39662306a36Sopenharmony_ci} 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_civoid rt_cache_flush(struct net *net) 39962306a36Sopenharmony_ci{ 40062306a36Sopenharmony_ci rt_genid_bump_ipv4(net); 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_cistatic struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 40462306a36Sopenharmony_ci struct sk_buff *skb, 40562306a36Sopenharmony_ci const void *daddr) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci const struct rtable *rt = container_of(dst, struct rtable, dst); 40862306a36Sopenharmony_ci struct net_device *dev = dst->dev; 40962306a36Sopenharmony_ci struct neighbour *n; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci rcu_read_lock(); 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci if (likely(rt->rt_gw_family == AF_INET)) { 41462306a36Sopenharmony_ci n = ip_neigh_gw4(dev, rt->rt_gw4); 41562306a36Sopenharmony_ci } else if (rt->rt_gw_family == AF_INET6) { 41662306a36Sopenharmony_ci n = ip_neigh_gw6(dev, &rt->rt_gw6); 41762306a36Sopenharmony_ci } else { 41862306a36Sopenharmony_ci __be32 pkey; 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); 42162306a36Sopenharmony_ci n = ip_neigh_gw4(dev, pkey); 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt)) 42562306a36Sopenharmony_ci n = NULL; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci rcu_read_unlock(); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci return n; 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_cistatic void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci const struct rtable *rt = container_of(dst, struct rtable, dst); 43562306a36Sopenharmony_ci struct net_device *dev = dst->dev; 43662306a36Sopenharmony_ci const __be32 *pkey = daddr; 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci if (rt->rt_gw_family == AF_INET) { 43962306a36Sopenharmony_ci pkey = (const __be32 *)&rt->rt_gw4; 44062306a36Sopenharmony_ci } else if (rt->rt_gw_family == AF_INET6) { 44162306a36Sopenharmony_ci return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); 44262306a36Sopenharmony_ci } else if (!daddr || 44362306a36Sopenharmony_ci (rt->rt_flags & 44462306a36Sopenharmony_ci (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { 44562306a36Sopenharmony_ci return; 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci/* Hash tables of size 2048..262144 depending on RAM size. 45162306a36Sopenharmony_ci * Each bucket uses 8 bytes. 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_cistatic u32 ip_idents_mask __read_mostly; 45462306a36Sopenharmony_cistatic atomic_t *ip_idents __read_mostly; 45562306a36Sopenharmony_cistatic u32 *ip_tstamps __read_mostly; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci/* In order to protect privacy, we add a perturbation to identifiers 45862306a36Sopenharmony_ci * if one generator is seldom used. This makes hard for an attacker 45962306a36Sopenharmony_ci * to infer how many packets were sent between two points in time. 46062306a36Sopenharmony_ci */ 46162306a36Sopenharmony_cistatic u32 ip_idents_reserve(u32 hash, int segs) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci u32 bucket, old, now = (u32)jiffies; 46462306a36Sopenharmony_ci atomic_t *p_id; 46562306a36Sopenharmony_ci u32 *p_tstamp; 46662306a36Sopenharmony_ci u32 delta = 0; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci bucket = hash & ip_idents_mask; 46962306a36Sopenharmony_ci p_tstamp = ip_tstamps + bucket; 47062306a36Sopenharmony_ci p_id = ip_idents + bucket; 47162306a36Sopenharmony_ci old = READ_ONCE(*p_tstamp); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (old != now && cmpxchg(p_tstamp, old, now) == old) 47462306a36Sopenharmony_ci delta = get_random_u32_below(now - old); 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci /* If UBSAN reports an error there, please make sure your compiler 47762306a36Sopenharmony_ci * supports -fno-strict-overflow before reporting it that was a bug 47862306a36Sopenharmony_ci * in UBSAN, and it has been fixed in GCC-8. 47962306a36Sopenharmony_ci */ 48062306a36Sopenharmony_ci return atomic_add_return(segs + delta, p_id) - segs; 48162306a36Sopenharmony_ci} 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_civoid __ip_select_ident(struct net *net, struct iphdr *iph, int segs) 48462306a36Sopenharmony_ci{ 48562306a36Sopenharmony_ci u32 hash, id; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci /* Note the following code is not safe, but this is okay. */ 48862306a36Sopenharmony_ci if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) 48962306a36Sopenharmony_ci get_random_bytes(&net->ipv4.ip_id_key, 49062306a36Sopenharmony_ci sizeof(net->ipv4.ip_id_key)); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci hash = siphash_3u32((__force u32)iph->daddr, 49362306a36Sopenharmony_ci (__force u32)iph->saddr, 49462306a36Sopenharmony_ci iph->protocol, 49562306a36Sopenharmony_ci &net->ipv4.ip_id_key); 49662306a36Sopenharmony_ci id = ip_idents_reserve(hash, segs); 49762306a36Sopenharmony_ci iph->id = htons(id); 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ciEXPORT_SYMBOL(__ip_select_ident); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_cistatic void ip_rt_fix_tos(struct flowi4 *fl4) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci __u8 tos = RT_FL_TOS(fl4); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci fl4->flowi4_tos = tos & IPTOS_RT_MASK; 50662306a36Sopenharmony_ci if (tos & RTO_ONLINK) 50762306a36Sopenharmony_ci fl4->flowi4_scope = RT_SCOPE_LINK; 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_cistatic void __build_flow_key(const struct net *net, struct flowi4 *fl4, 51162306a36Sopenharmony_ci const struct sock *sk, const struct iphdr *iph, 51262306a36Sopenharmony_ci int oif, __u8 tos, u8 prot, u32 mark, 51362306a36Sopenharmony_ci int flow_flags) 51462306a36Sopenharmony_ci{ 51562306a36Sopenharmony_ci __u8 scope = RT_SCOPE_UNIVERSE; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci if (sk) { 51862306a36Sopenharmony_ci oif = sk->sk_bound_dev_if; 51962306a36Sopenharmony_ci mark = READ_ONCE(sk->sk_mark); 52062306a36Sopenharmony_ci tos = ip_sock_rt_tos(sk); 52162306a36Sopenharmony_ci scope = ip_sock_rt_scope(sk); 52262306a36Sopenharmony_ci prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : 52362306a36Sopenharmony_ci sk->sk_protocol; 52462306a36Sopenharmony_ci } 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope, 52762306a36Sopenharmony_ci prot, flow_flags, iph->daddr, iph->saddr, 0, 0, 52862306a36Sopenharmony_ci sock_net_uid(net, sk)); 52962306a36Sopenharmony_ci} 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_cistatic void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, 53262306a36Sopenharmony_ci const struct sock *sk) 53362306a36Sopenharmony_ci{ 53462306a36Sopenharmony_ci const struct net *net = dev_net(skb->dev); 53562306a36Sopenharmony_ci const struct iphdr *iph = ip_hdr(skb); 53662306a36Sopenharmony_ci int oif = skb->dev->ifindex; 53762306a36Sopenharmony_ci u8 prot = iph->protocol; 53862306a36Sopenharmony_ci u32 mark = skb->mark; 53962306a36Sopenharmony_ci __u8 tos = iph->tos; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_cistatic void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) 54562306a36Sopenharmony_ci{ 54662306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 54762306a36Sopenharmony_ci const struct ip_options_rcu *inet_opt; 54862306a36Sopenharmony_ci __be32 daddr = inet->inet_daddr; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci rcu_read_lock(); 55162306a36Sopenharmony_ci inet_opt = rcu_dereference(inet->inet_opt); 55262306a36Sopenharmony_ci if (inet_opt && inet_opt->opt.srr) 55362306a36Sopenharmony_ci daddr = inet_opt->opt.faddr; 55462306a36Sopenharmony_ci flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), 55562306a36Sopenharmony_ci ip_sock_rt_tos(sk) & IPTOS_RT_MASK, 55662306a36Sopenharmony_ci ip_sock_rt_scope(sk), 55762306a36Sopenharmony_ci inet_test_bit(HDRINCL, sk) ? 55862306a36Sopenharmony_ci IPPROTO_RAW : sk->sk_protocol, 55962306a36Sopenharmony_ci inet_sk_flowi_flags(sk), 56062306a36Sopenharmony_ci daddr, inet->inet_saddr, 0, 0, sk->sk_uid); 56162306a36Sopenharmony_ci rcu_read_unlock(); 56262306a36Sopenharmony_ci} 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_cistatic void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, 56562306a36Sopenharmony_ci const struct sk_buff *skb) 56662306a36Sopenharmony_ci{ 56762306a36Sopenharmony_ci if (skb) 56862306a36Sopenharmony_ci build_skb_flow_key(fl4, skb, sk); 56962306a36Sopenharmony_ci else 57062306a36Sopenharmony_ci build_sk_flow_key(fl4, sk); 57162306a36Sopenharmony_ci} 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_cistatic DEFINE_SPINLOCK(fnhe_lock); 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_cistatic void fnhe_flush_routes(struct fib_nh_exception *fnhe) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci struct rtable *rt; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci rt = rcu_dereference(fnhe->fnhe_rth_input); 58062306a36Sopenharmony_ci if (rt) { 58162306a36Sopenharmony_ci RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); 58262306a36Sopenharmony_ci dst_dev_put(&rt->dst); 58362306a36Sopenharmony_ci dst_release(&rt->dst); 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci rt = rcu_dereference(fnhe->fnhe_rth_output); 58662306a36Sopenharmony_ci if (rt) { 58762306a36Sopenharmony_ci RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); 58862306a36Sopenharmony_ci dst_dev_put(&rt->dst); 58962306a36Sopenharmony_ci dst_release(&rt->dst); 59062306a36Sopenharmony_ci } 59162306a36Sopenharmony_ci} 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_cistatic void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) 59462306a36Sopenharmony_ci{ 59562306a36Sopenharmony_ci struct fib_nh_exception __rcu **fnhe_p, **oldest_p; 59662306a36Sopenharmony_ci struct fib_nh_exception *fnhe, *oldest = NULL; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { 59962306a36Sopenharmony_ci fnhe = rcu_dereference_protected(*fnhe_p, 60062306a36Sopenharmony_ci lockdep_is_held(&fnhe_lock)); 60162306a36Sopenharmony_ci if (!fnhe) 60262306a36Sopenharmony_ci break; 60362306a36Sopenharmony_ci if (!oldest || 60462306a36Sopenharmony_ci time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { 60562306a36Sopenharmony_ci oldest = fnhe; 60662306a36Sopenharmony_ci oldest_p = fnhe_p; 60762306a36Sopenharmony_ci } 60862306a36Sopenharmony_ci } 60962306a36Sopenharmony_ci fnhe_flush_routes(oldest); 61062306a36Sopenharmony_ci *oldest_p = oldest->fnhe_next; 61162306a36Sopenharmony_ci kfree_rcu(oldest, rcu); 61262306a36Sopenharmony_ci} 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_cistatic u32 fnhe_hashfun(__be32 daddr) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci static siphash_aligned_key_t fnhe_hash_key; 61762306a36Sopenharmony_ci u64 hval; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); 62062306a36Sopenharmony_ci hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key); 62162306a36Sopenharmony_ci return hash_64(hval, FNHE_HASH_SHIFT); 62262306a36Sopenharmony_ci} 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_cistatic void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) 62562306a36Sopenharmony_ci{ 62662306a36Sopenharmony_ci rt->rt_pmtu = fnhe->fnhe_pmtu; 62762306a36Sopenharmony_ci rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; 62862306a36Sopenharmony_ci rt->dst.expires = fnhe->fnhe_expires; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci if (fnhe->fnhe_gw) { 63162306a36Sopenharmony_ci rt->rt_flags |= RTCF_REDIRECTED; 63262306a36Sopenharmony_ci rt->rt_uses_gateway = 1; 63362306a36Sopenharmony_ci rt->rt_gw_family = AF_INET; 63462306a36Sopenharmony_ci rt->rt_gw4 = fnhe->fnhe_gw; 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_cistatic void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, 63962306a36Sopenharmony_ci __be32 gw, u32 pmtu, bool lock, 64062306a36Sopenharmony_ci unsigned long expires) 64162306a36Sopenharmony_ci{ 64262306a36Sopenharmony_ci struct fnhe_hash_bucket *hash; 64362306a36Sopenharmony_ci struct fib_nh_exception *fnhe; 64462306a36Sopenharmony_ci struct rtable *rt; 64562306a36Sopenharmony_ci u32 genid, hval; 64662306a36Sopenharmony_ci unsigned int i; 64762306a36Sopenharmony_ci int depth; 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci genid = fnhe_genid(dev_net(nhc->nhc_dev)); 65062306a36Sopenharmony_ci hval = fnhe_hashfun(daddr); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci spin_lock_bh(&fnhe_lock); 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci hash = rcu_dereference(nhc->nhc_exceptions); 65562306a36Sopenharmony_ci if (!hash) { 65662306a36Sopenharmony_ci hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); 65762306a36Sopenharmony_ci if (!hash) 65862306a36Sopenharmony_ci goto out_unlock; 65962306a36Sopenharmony_ci rcu_assign_pointer(nhc->nhc_exceptions, hash); 66062306a36Sopenharmony_ci } 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci hash += hval; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci depth = 0; 66562306a36Sopenharmony_ci for (fnhe = rcu_dereference(hash->chain); fnhe; 66662306a36Sopenharmony_ci fnhe = rcu_dereference(fnhe->fnhe_next)) { 66762306a36Sopenharmony_ci if (fnhe->fnhe_daddr == daddr) 66862306a36Sopenharmony_ci break; 66962306a36Sopenharmony_ci depth++; 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci if (fnhe) { 67362306a36Sopenharmony_ci if (fnhe->fnhe_genid != genid) 67462306a36Sopenharmony_ci fnhe->fnhe_genid = genid; 67562306a36Sopenharmony_ci if (gw) 67662306a36Sopenharmony_ci fnhe->fnhe_gw = gw; 67762306a36Sopenharmony_ci if (pmtu) { 67862306a36Sopenharmony_ci fnhe->fnhe_pmtu = pmtu; 67962306a36Sopenharmony_ci fnhe->fnhe_mtu_locked = lock; 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci fnhe->fnhe_expires = max(1UL, expires); 68262306a36Sopenharmony_ci /* Update all cached dsts too */ 68362306a36Sopenharmony_ci rt = rcu_dereference(fnhe->fnhe_rth_input); 68462306a36Sopenharmony_ci if (rt) 68562306a36Sopenharmony_ci fill_route_from_fnhe(rt, fnhe); 68662306a36Sopenharmony_ci rt = rcu_dereference(fnhe->fnhe_rth_output); 68762306a36Sopenharmony_ci if (rt) 68862306a36Sopenharmony_ci fill_route_from_fnhe(rt, fnhe); 68962306a36Sopenharmony_ci } else { 69062306a36Sopenharmony_ci /* Randomize max depth to avoid some side channels attacks. */ 69162306a36Sopenharmony_ci int max_depth = FNHE_RECLAIM_DEPTH + 69262306a36Sopenharmony_ci get_random_u32_below(FNHE_RECLAIM_DEPTH); 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci while (depth > max_depth) { 69562306a36Sopenharmony_ci fnhe_remove_oldest(hash); 69662306a36Sopenharmony_ci depth--; 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); 70062306a36Sopenharmony_ci if (!fnhe) 70162306a36Sopenharmony_ci goto out_unlock; 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci fnhe->fnhe_next = hash->chain; 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci fnhe->fnhe_genid = genid; 70662306a36Sopenharmony_ci fnhe->fnhe_daddr = daddr; 70762306a36Sopenharmony_ci fnhe->fnhe_gw = gw; 70862306a36Sopenharmony_ci fnhe->fnhe_pmtu = pmtu; 70962306a36Sopenharmony_ci fnhe->fnhe_mtu_locked = lock; 71062306a36Sopenharmony_ci fnhe->fnhe_expires = max(1UL, expires); 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci rcu_assign_pointer(hash->chain, fnhe); 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci /* Exception created; mark the cached routes for the nexthop 71562306a36Sopenharmony_ci * stale, so anyone caching it rechecks if this exception 71662306a36Sopenharmony_ci * applies to them. 71762306a36Sopenharmony_ci */ 71862306a36Sopenharmony_ci rt = rcu_dereference(nhc->nhc_rth_input); 71962306a36Sopenharmony_ci if (rt) 72062306a36Sopenharmony_ci rt->dst.obsolete = DST_OBSOLETE_KILL; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci for_each_possible_cpu(i) { 72362306a36Sopenharmony_ci struct rtable __rcu **prt; 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); 72662306a36Sopenharmony_ci rt = rcu_dereference(*prt); 72762306a36Sopenharmony_ci if (rt) 72862306a36Sopenharmony_ci rt->dst.obsolete = DST_OBSOLETE_KILL; 72962306a36Sopenharmony_ci } 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci fnhe->fnhe_stamp = jiffies; 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ciout_unlock: 73562306a36Sopenharmony_ci spin_unlock_bh(&fnhe_lock); 73662306a36Sopenharmony_ci} 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_cistatic void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, 73962306a36Sopenharmony_ci bool kill_route) 74062306a36Sopenharmony_ci{ 74162306a36Sopenharmony_ci __be32 new_gw = icmp_hdr(skb)->un.gateway; 74262306a36Sopenharmony_ci __be32 old_gw = ip_hdr(skb)->saddr; 74362306a36Sopenharmony_ci struct net_device *dev = skb->dev; 74462306a36Sopenharmony_ci struct in_device *in_dev; 74562306a36Sopenharmony_ci struct fib_result res; 74662306a36Sopenharmony_ci struct neighbour *n; 74762306a36Sopenharmony_ci struct net *net; 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci switch (icmp_hdr(skb)->code & 7) { 75062306a36Sopenharmony_ci case ICMP_REDIR_NET: 75162306a36Sopenharmony_ci case ICMP_REDIR_NETTOS: 75262306a36Sopenharmony_ci case ICMP_REDIR_HOST: 75362306a36Sopenharmony_ci case ICMP_REDIR_HOSTTOS: 75462306a36Sopenharmony_ci break; 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci default: 75762306a36Sopenharmony_ci return; 75862306a36Sopenharmony_ci } 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) 76162306a36Sopenharmony_ci return; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci in_dev = __in_dev_get_rcu(dev); 76462306a36Sopenharmony_ci if (!in_dev) 76562306a36Sopenharmony_ci return; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci net = dev_net(dev); 76862306a36Sopenharmony_ci if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || 76962306a36Sopenharmony_ci ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || 77062306a36Sopenharmony_ci ipv4_is_zeronet(new_gw)) 77162306a36Sopenharmony_ci goto reject_redirect; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci if (!IN_DEV_SHARED_MEDIA(in_dev)) { 77462306a36Sopenharmony_ci if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 77562306a36Sopenharmony_ci goto reject_redirect; 77662306a36Sopenharmony_ci if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 77762306a36Sopenharmony_ci goto reject_redirect; 77862306a36Sopenharmony_ci } else { 77962306a36Sopenharmony_ci if (inet_addr_type(net, new_gw) != RTN_UNICAST) 78062306a36Sopenharmony_ci goto reject_redirect; 78162306a36Sopenharmony_ci } 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw); 78462306a36Sopenharmony_ci if (!n) 78562306a36Sopenharmony_ci n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); 78662306a36Sopenharmony_ci if (!IS_ERR(n)) { 78762306a36Sopenharmony_ci if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { 78862306a36Sopenharmony_ci neigh_event_send(n, NULL); 78962306a36Sopenharmony_ci } else { 79062306a36Sopenharmony_ci if (fib_lookup(net, fl4, &res, 0) == 0) { 79162306a36Sopenharmony_ci struct fib_nh_common *nhc; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci fib_select_path(net, &res, fl4, skb); 79462306a36Sopenharmony_ci nhc = FIB_RES_NHC(res); 79562306a36Sopenharmony_ci update_or_create_fnhe(nhc, fl4->daddr, new_gw, 79662306a36Sopenharmony_ci 0, false, 79762306a36Sopenharmony_ci jiffies + ip_rt_gc_timeout); 79862306a36Sopenharmony_ci } 79962306a36Sopenharmony_ci if (kill_route) 80062306a36Sopenharmony_ci rt->dst.obsolete = DST_OBSOLETE_KILL; 80162306a36Sopenharmony_ci call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); 80262306a36Sopenharmony_ci } 80362306a36Sopenharmony_ci neigh_release(n); 80462306a36Sopenharmony_ci } 80562306a36Sopenharmony_ci return; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_cireject_redirect: 80862306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_VERBOSE 80962306a36Sopenharmony_ci if (IN_DEV_LOG_MARTIANS(in_dev)) { 81062306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *) skb->data; 81162306a36Sopenharmony_ci __be32 daddr = iph->daddr; 81262306a36Sopenharmony_ci __be32 saddr = iph->saddr; 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" 81562306a36Sopenharmony_ci " Advised path = %pI4 -> %pI4\n", 81662306a36Sopenharmony_ci &old_gw, dev->name, &new_gw, 81762306a36Sopenharmony_ci &saddr, &daddr); 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci#endif 82062306a36Sopenharmony_ci ; 82162306a36Sopenharmony_ci} 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_cistatic void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 82462306a36Sopenharmony_ci{ 82562306a36Sopenharmony_ci struct rtable *rt; 82662306a36Sopenharmony_ci struct flowi4 fl4; 82762306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *) skb->data; 82862306a36Sopenharmony_ci struct net *net = dev_net(skb->dev); 82962306a36Sopenharmony_ci int oif = skb->dev->ifindex; 83062306a36Sopenharmony_ci u8 prot = iph->protocol; 83162306a36Sopenharmony_ci u32 mark = skb->mark; 83262306a36Sopenharmony_ci __u8 tos = iph->tos; 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci rt = (struct rtable *) dst; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); 83762306a36Sopenharmony_ci __ip_do_redirect(rt, skb, &fl4, true); 83862306a36Sopenharmony_ci} 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_cistatic struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 84162306a36Sopenharmony_ci{ 84262306a36Sopenharmony_ci struct rtable *rt = (struct rtable *)dst; 84362306a36Sopenharmony_ci struct dst_entry *ret = dst; 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci if (rt) { 84662306a36Sopenharmony_ci if (dst->obsolete > 0) { 84762306a36Sopenharmony_ci ip_rt_put(rt); 84862306a36Sopenharmony_ci ret = NULL; 84962306a36Sopenharmony_ci } else if ((rt->rt_flags & RTCF_REDIRECTED) || 85062306a36Sopenharmony_ci rt->dst.expires) { 85162306a36Sopenharmony_ci ip_rt_put(rt); 85262306a36Sopenharmony_ci ret = NULL; 85362306a36Sopenharmony_ci } 85462306a36Sopenharmony_ci } 85562306a36Sopenharmony_ci return ret; 85662306a36Sopenharmony_ci} 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci/* 85962306a36Sopenharmony_ci * Algorithm: 86062306a36Sopenharmony_ci * 1. The first ip_rt_redirect_number redirects are sent 86162306a36Sopenharmony_ci * with exponential backoff, then we stop sending them at all, 86262306a36Sopenharmony_ci * assuming that the host ignores our redirects. 86362306a36Sopenharmony_ci * 2. If we did not see packets requiring redirects 86462306a36Sopenharmony_ci * during ip_rt_redirect_silence, we assume that the host 86562306a36Sopenharmony_ci * forgot redirected route and start to send redirects again. 86662306a36Sopenharmony_ci * 86762306a36Sopenharmony_ci * This algorithm is much cheaper and more intelligent than dumb load limiting 86862306a36Sopenharmony_ci * in icmp.c. 86962306a36Sopenharmony_ci * 87062306a36Sopenharmony_ci * NOTE. Do not forget to inhibit load limiting for redirects (redundant) 87162306a36Sopenharmony_ci * and "frag. need" (breaks PMTU discovery) in icmp.c. 87262306a36Sopenharmony_ci */ 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_civoid ip_rt_send_redirect(struct sk_buff *skb) 87562306a36Sopenharmony_ci{ 87662306a36Sopenharmony_ci struct rtable *rt = skb_rtable(skb); 87762306a36Sopenharmony_ci struct in_device *in_dev; 87862306a36Sopenharmony_ci struct inet_peer *peer; 87962306a36Sopenharmony_ci struct net *net; 88062306a36Sopenharmony_ci int log_martians; 88162306a36Sopenharmony_ci int vif; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci rcu_read_lock(); 88462306a36Sopenharmony_ci in_dev = __in_dev_get_rcu(rt->dst.dev); 88562306a36Sopenharmony_ci if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 88662306a36Sopenharmony_ci rcu_read_unlock(); 88762306a36Sopenharmony_ci return; 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci log_martians = IN_DEV_LOG_MARTIANS(in_dev); 89062306a36Sopenharmony_ci vif = l3mdev_master_ifindex_rcu(rt->dst.dev); 89162306a36Sopenharmony_ci rcu_read_unlock(); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci net = dev_net(rt->dst.dev); 89462306a36Sopenharmony_ci peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); 89562306a36Sopenharmony_ci if (!peer) { 89662306a36Sopenharmony_ci icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, 89762306a36Sopenharmony_ci rt_nexthop(rt, ip_hdr(skb)->daddr)); 89862306a36Sopenharmony_ci return; 89962306a36Sopenharmony_ci } 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci /* No redirected packets during ip_rt_redirect_silence; 90262306a36Sopenharmony_ci * reset the algorithm. 90362306a36Sopenharmony_ci */ 90462306a36Sopenharmony_ci if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { 90562306a36Sopenharmony_ci peer->rate_tokens = 0; 90662306a36Sopenharmony_ci peer->n_redirects = 0; 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci /* Too many ignored redirects; do not send anything 91062306a36Sopenharmony_ci * set dst.rate_last to the last seen redirected packet. 91162306a36Sopenharmony_ci */ 91262306a36Sopenharmony_ci if (peer->n_redirects >= ip_rt_redirect_number) { 91362306a36Sopenharmony_ci peer->rate_last = jiffies; 91462306a36Sopenharmony_ci goto out_put_peer; 91562306a36Sopenharmony_ci } 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ci /* Check for load limit; set rate_last to the latest sent 91862306a36Sopenharmony_ci * redirect. 91962306a36Sopenharmony_ci */ 92062306a36Sopenharmony_ci if (peer->n_redirects == 0 || 92162306a36Sopenharmony_ci time_after(jiffies, 92262306a36Sopenharmony_ci (peer->rate_last + 92362306a36Sopenharmony_ci (ip_rt_redirect_load << peer->n_redirects)))) { 92462306a36Sopenharmony_ci __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); 92762306a36Sopenharmony_ci peer->rate_last = jiffies; 92862306a36Sopenharmony_ci ++peer->n_redirects; 92962306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_VERBOSE 93062306a36Sopenharmony_ci if (log_martians && 93162306a36Sopenharmony_ci peer->n_redirects == ip_rt_redirect_number) 93262306a36Sopenharmony_ci net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", 93362306a36Sopenharmony_ci &ip_hdr(skb)->saddr, inet_iif(skb), 93462306a36Sopenharmony_ci &ip_hdr(skb)->daddr, &gw); 93562306a36Sopenharmony_ci#endif 93662306a36Sopenharmony_ci } 93762306a36Sopenharmony_ciout_put_peer: 93862306a36Sopenharmony_ci inet_putpeer(peer); 93962306a36Sopenharmony_ci} 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_cistatic int ip_error(struct sk_buff *skb) 94262306a36Sopenharmony_ci{ 94362306a36Sopenharmony_ci struct rtable *rt = skb_rtable(skb); 94462306a36Sopenharmony_ci struct net_device *dev = skb->dev; 94562306a36Sopenharmony_ci struct in_device *in_dev; 94662306a36Sopenharmony_ci struct inet_peer *peer; 94762306a36Sopenharmony_ci unsigned long now; 94862306a36Sopenharmony_ci struct net *net; 94962306a36Sopenharmony_ci SKB_DR(reason); 95062306a36Sopenharmony_ci bool send; 95162306a36Sopenharmony_ci int code; 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci if (netif_is_l3_master(skb->dev)) { 95462306a36Sopenharmony_ci dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); 95562306a36Sopenharmony_ci if (!dev) 95662306a36Sopenharmony_ci goto out; 95762306a36Sopenharmony_ci } 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci in_dev = __in_dev_get_rcu(dev); 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci /* IP on this device is disabled. */ 96262306a36Sopenharmony_ci if (!in_dev) 96362306a36Sopenharmony_ci goto out; 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci net = dev_net(rt->dst.dev); 96662306a36Sopenharmony_ci if (!IN_DEV_FORWARD(in_dev)) { 96762306a36Sopenharmony_ci switch (rt->dst.error) { 96862306a36Sopenharmony_ci case EHOSTUNREACH: 96962306a36Sopenharmony_ci SKB_DR_SET(reason, IP_INADDRERRORS); 97062306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); 97162306a36Sopenharmony_ci break; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci case ENETUNREACH: 97462306a36Sopenharmony_ci SKB_DR_SET(reason, IP_INNOROUTES); 97562306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); 97662306a36Sopenharmony_ci break; 97762306a36Sopenharmony_ci } 97862306a36Sopenharmony_ci goto out; 97962306a36Sopenharmony_ci } 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci switch (rt->dst.error) { 98262306a36Sopenharmony_ci case EINVAL: 98362306a36Sopenharmony_ci default: 98462306a36Sopenharmony_ci goto out; 98562306a36Sopenharmony_ci case EHOSTUNREACH: 98662306a36Sopenharmony_ci code = ICMP_HOST_UNREACH; 98762306a36Sopenharmony_ci break; 98862306a36Sopenharmony_ci case ENETUNREACH: 98962306a36Sopenharmony_ci code = ICMP_NET_UNREACH; 99062306a36Sopenharmony_ci SKB_DR_SET(reason, IP_INNOROUTES); 99162306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); 99262306a36Sopenharmony_ci break; 99362306a36Sopenharmony_ci case EACCES: 99462306a36Sopenharmony_ci code = ICMP_PKT_FILTERED; 99562306a36Sopenharmony_ci break; 99662306a36Sopenharmony_ci } 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 99962306a36Sopenharmony_ci l3mdev_master_ifindex(skb->dev), 1); 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci send = true; 100262306a36Sopenharmony_ci if (peer) { 100362306a36Sopenharmony_ci now = jiffies; 100462306a36Sopenharmony_ci peer->rate_tokens += now - peer->rate_last; 100562306a36Sopenharmony_ci if (peer->rate_tokens > ip_rt_error_burst) 100662306a36Sopenharmony_ci peer->rate_tokens = ip_rt_error_burst; 100762306a36Sopenharmony_ci peer->rate_last = now; 100862306a36Sopenharmony_ci if (peer->rate_tokens >= ip_rt_error_cost) 100962306a36Sopenharmony_ci peer->rate_tokens -= ip_rt_error_cost; 101062306a36Sopenharmony_ci else 101162306a36Sopenharmony_ci send = false; 101262306a36Sopenharmony_ci inet_putpeer(peer); 101362306a36Sopenharmony_ci } 101462306a36Sopenharmony_ci if (send) 101562306a36Sopenharmony_ci icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ciout: kfree_skb_reason(skb, reason); 101862306a36Sopenharmony_ci return 0; 101962306a36Sopenharmony_ci} 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_cistatic void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) 102262306a36Sopenharmony_ci{ 102362306a36Sopenharmony_ci struct dst_entry *dst = &rt->dst; 102462306a36Sopenharmony_ci struct net *net = dev_net(dst->dev); 102562306a36Sopenharmony_ci struct fib_result res; 102662306a36Sopenharmony_ci bool lock = false; 102762306a36Sopenharmony_ci u32 old_mtu; 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci if (ip_mtu_locked(dst)) 103062306a36Sopenharmony_ci return; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci old_mtu = ipv4_mtu(dst); 103362306a36Sopenharmony_ci if (old_mtu < mtu) 103462306a36Sopenharmony_ci return; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci if (mtu < net->ipv4.ip_rt_min_pmtu) { 103762306a36Sopenharmony_ci lock = true; 103862306a36Sopenharmony_ci mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); 103962306a36Sopenharmony_ci } 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci if (rt->rt_pmtu == mtu && !lock && 104262306a36Sopenharmony_ci time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) 104362306a36Sopenharmony_ci return; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci rcu_read_lock(); 104662306a36Sopenharmony_ci if (fib_lookup(net, fl4, &res, 0) == 0) { 104762306a36Sopenharmony_ci struct fib_nh_common *nhc; 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci fib_select_path(net, &res, fl4, NULL); 105062306a36Sopenharmony_ci nhc = FIB_RES_NHC(res); 105162306a36Sopenharmony_ci update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, 105262306a36Sopenharmony_ci jiffies + net->ipv4.ip_rt_mtu_expires); 105362306a36Sopenharmony_ci } 105462306a36Sopenharmony_ci rcu_read_unlock(); 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_cistatic void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 105862306a36Sopenharmony_ci struct sk_buff *skb, u32 mtu, 105962306a36Sopenharmony_ci bool confirm_neigh) 106062306a36Sopenharmony_ci{ 106162306a36Sopenharmony_ci struct rtable *rt = (struct rtable *) dst; 106262306a36Sopenharmony_ci struct flowi4 fl4; 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci ip_rt_build_flow_key(&fl4, sk, skb); 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci /* Don't make lookup fail for bridged encapsulations */ 106762306a36Sopenharmony_ci if (skb && netif_is_any_bridge_port(skb->dev)) 106862306a36Sopenharmony_ci fl4.flowi4_oif = 0; 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci __ip_rt_update_pmtu(rt, &fl4, mtu); 107162306a36Sopenharmony_ci} 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_civoid ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, 107462306a36Sopenharmony_ci int oif, u8 protocol) 107562306a36Sopenharmony_ci{ 107662306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *)skb->data; 107762306a36Sopenharmony_ci struct flowi4 fl4; 107862306a36Sopenharmony_ci struct rtable *rt; 107962306a36Sopenharmony_ci u32 mark = IP4_REPLY_MARK(net, skb->mark); 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark, 108262306a36Sopenharmony_ci 0); 108362306a36Sopenharmony_ci rt = __ip_route_output_key(net, &fl4); 108462306a36Sopenharmony_ci if (!IS_ERR(rt)) { 108562306a36Sopenharmony_ci __ip_rt_update_pmtu(rt, &fl4, mtu); 108662306a36Sopenharmony_ci ip_rt_put(rt); 108762306a36Sopenharmony_ci } 108862306a36Sopenharmony_ci} 108962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ipv4_update_pmtu); 109062306a36Sopenharmony_ci 109162306a36Sopenharmony_cistatic void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 109262306a36Sopenharmony_ci{ 109362306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *)skb->data; 109462306a36Sopenharmony_ci struct flowi4 fl4; 109562306a36Sopenharmony_ci struct rtable *rt; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci if (!fl4.flowi4_mark) 110062306a36Sopenharmony_ci fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci rt = __ip_route_output_key(sock_net(sk), &fl4); 110362306a36Sopenharmony_ci if (!IS_ERR(rt)) { 110462306a36Sopenharmony_ci __ip_rt_update_pmtu(rt, &fl4, mtu); 110562306a36Sopenharmony_ci ip_rt_put(rt); 110662306a36Sopenharmony_ci } 110762306a36Sopenharmony_ci} 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_civoid ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 111062306a36Sopenharmony_ci{ 111162306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *)skb->data; 111262306a36Sopenharmony_ci struct flowi4 fl4; 111362306a36Sopenharmony_ci struct rtable *rt; 111462306a36Sopenharmony_ci struct dst_entry *odst = NULL; 111562306a36Sopenharmony_ci bool new = false; 111662306a36Sopenharmony_ci struct net *net = sock_net(sk); 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci bh_lock_sock(sk); 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci if (!ip_sk_accept_pmtu(sk)) 112162306a36Sopenharmony_ci goto out; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci odst = sk_dst_get(sk); 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci if (sock_owned_by_user(sk) || !odst) { 112662306a36Sopenharmony_ci __ipv4_sk_update_pmtu(skb, sk, mtu); 112762306a36Sopenharmony_ci goto out; 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci rt = (struct rtable *)odst; 113362306a36Sopenharmony_ci if (odst->obsolete && !odst->ops->check(odst, 0)) { 113462306a36Sopenharmony_ci rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 113562306a36Sopenharmony_ci if (IS_ERR(rt)) 113662306a36Sopenharmony_ci goto out; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci new = true; 113962306a36Sopenharmony_ci } 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci if (!dst_check(&rt->dst, 0)) { 114462306a36Sopenharmony_ci if (new) 114562306a36Sopenharmony_ci dst_release(&rt->dst); 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 114862306a36Sopenharmony_ci if (IS_ERR(rt)) 114962306a36Sopenharmony_ci goto out; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci new = true; 115262306a36Sopenharmony_ci } 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci if (new) 115562306a36Sopenharmony_ci sk_dst_set(sk, &rt->dst); 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ciout: 115862306a36Sopenharmony_ci bh_unlock_sock(sk); 115962306a36Sopenharmony_ci dst_release(odst); 116062306a36Sopenharmony_ci} 116162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_civoid ipv4_redirect(struct sk_buff *skb, struct net *net, 116462306a36Sopenharmony_ci int oif, u8 protocol) 116562306a36Sopenharmony_ci{ 116662306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *)skb->data; 116762306a36Sopenharmony_ci struct flowi4 fl4; 116862306a36Sopenharmony_ci struct rtable *rt; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0); 117162306a36Sopenharmony_ci rt = __ip_route_output_key(net, &fl4); 117262306a36Sopenharmony_ci if (!IS_ERR(rt)) { 117362306a36Sopenharmony_ci __ip_do_redirect(rt, skb, &fl4, false); 117462306a36Sopenharmony_ci ip_rt_put(rt); 117562306a36Sopenharmony_ci } 117662306a36Sopenharmony_ci} 117762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ipv4_redirect); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_civoid ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) 118062306a36Sopenharmony_ci{ 118162306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *)skb->data; 118262306a36Sopenharmony_ci struct flowi4 fl4; 118362306a36Sopenharmony_ci struct rtable *rt; 118462306a36Sopenharmony_ci struct net *net = sock_net(sk); 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); 118762306a36Sopenharmony_ci rt = __ip_route_output_key(net, &fl4); 118862306a36Sopenharmony_ci if (!IS_ERR(rt)) { 118962306a36Sopenharmony_ci __ip_do_redirect(rt, skb, &fl4, false); 119062306a36Sopenharmony_ci ip_rt_put(rt); 119162306a36Sopenharmony_ci } 119262306a36Sopenharmony_ci} 119362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ipv4_sk_redirect); 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ciINDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, 119662306a36Sopenharmony_ci u32 cookie) 119762306a36Sopenharmony_ci{ 119862306a36Sopenharmony_ci struct rtable *rt = (struct rtable *) dst; 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci /* All IPV4 dsts are created with ->obsolete set to the value 120162306a36Sopenharmony_ci * DST_OBSOLETE_FORCE_CHK which forces validation calls down 120262306a36Sopenharmony_ci * into this function always. 120362306a36Sopenharmony_ci * 120462306a36Sopenharmony_ci * When a PMTU/redirect information update invalidates a route, 120562306a36Sopenharmony_ci * this is indicated by setting obsolete to DST_OBSOLETE_KILL or 120662306a36Sopenharmony_ci * DST_OBSOLETE_DEAD. 120762306a36Sopenharmony_ci */ 120862306a36Sopenharmony_ci if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) 120962306a36Sopenharmony_ci return NULL; 121062306a36Sopenharmony_ci return dst; 121162306a36Sopenharmony_ci} 121262306a36Sopenharmony_ciEXPORT_INDIRECT_CALLABLE(ipv4_dst_check); 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_cistatic void ipv4_send_dest_unreach(struct sk_buff *skb) 121562306a36Sopenharmony_ci{ 121662306a36Sopenharmony_ci struct net_device *dev; 121762306a36Sopenharmony_ci struct ip_options opt; 121862306a36Sopenharmony_ci int res; 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ci /* Recompile ip options since IPCB may not be valid anymore. 122162306a36Sopenharmony_ci * Also check we have a reasonable ipv4 header. 122262306a36Sopenharmony_ci */ 122362306a36Sopenharmony_ci if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || 122462306a36Sopenharmony_ci ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) 122562306a36Sopenharmony_ci return; 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci memset(&opt, 0, sizeof(opt)); 122862306a36Sopenharmony_ci if (ip_hdr(skb)->ihl > 5) { 122962306a36Sopenharmony_ci if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) 123062306a36Sopenharmony_ci return; 123162306a36Sopenharmony_ci opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci rcu_read_lock(); 123462306a36Sopenharmony_ci dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; 123562306a36Sopenharmony_ci res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); 123662306a36Sopenharmony_ci rcu_read_unlock(); 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci if (res) 123962306a36Sopenharmony_ci return; 124062306a36Sopenharmony_ci } 124162306a36Sopenharmony_ci __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); 124262306a36Sopenharmony_ci} 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_cistatic void ipv4_link_failure(struct sk_buff *skb) 124562306a36Sopenharmony_ci{ 124662306a36Sopenharmony_ci struct rtable *rt; 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_ci ipv4_send_dest_unreach(skb); 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci rt = skb_rtable(skb); 125162306a36Sopenharmony_ci if (rt) 125262306a36Sopenharmony_ci dst_set_expires(&rt->dst, 0); 125362306a36Sopenharmony_ci} 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_cistatic int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) 125662306a36Sopenharmony_ci{ 125762306a36Sopenharmony_ci pr_debug("%s: %pI4 -> %pI4, %s\n", 125862306a36Sopenharmony_ci __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 125962306a36Sopenharmony_ci skb->dev ? skb->dev->name : "?"); 126062306a36Sopenharmony_ci kfree_skb(skb); 126162306a36Sopenharmony_ci WARN_ON(1); 126262306a36Sopenharmony_ci return 0; 126362306a36Sopenharmony_ci} 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci/* 126662306a36Sopenharmony_ci * We do not cache source address of outgoing interface, 126762306a36Sopenharmony_ci * because it is used only by IP RR, TS and SRR options, 126862306a36Sopenharmony_ci * so that it out of fast path. 126962306a36Sopenharmony_ci * 127062306a36Sopenharmony_ci * BTW remember: "addr" is allowed to be not aligned 127162306a36Sopenharmony_ci * in IP options! 127262306a36Sopenharmony_ci */ 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_civoid ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) 127562306a36Sopenharmony_ci{ 127662306a36Sopenharmony_ci __be32 src; 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci if (rt_is_output_route(rt)) 127962306a36Sopenharmony_ci src = ip_hdr(skb)->saddr; 128062306a36Sopenharmony_ci else { 128162306a36Sopenharmony_ci struct fib_result res; 128262306a36Sopenharmony_ci struct iphdr *iph = ip_hdr(skb); 128362306a36Sopenharmony_ci struct flowi4 fl4 = { 128462306a36Sopenharmony_ci .daddr = iph->daddr, 128562306a36Sopenharmony_ci .saddr = iph->saddr, 128662306a36Sopenharmony_ci .flowi4_tos = RT_TOS(iph->tos), 128762306a36Sopenharmony_ci .flowi4_oif = rt->dst.dev->ifindex, 128862306a36Sopenharmony_ci .flowi4_iif = skb->dev->ifindex, 128962306a36Sopenharmony_ci .flowi4_mark = skb->mark, 129062306a36Sopenharmony_ci }; 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci rcu_read_lock(); 129362306a36Sopenharmony_ci if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) 129462306a36Sopenharmony_ci src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); 129562306a36Sopenharmony_ci else 129662306a36Sopenharmony_ci src = inet_select_addr(rt->dst.dev, 129762306a36Sopenharmony_ci rt_nexthop(rt, iph->daddr), 129862306a36Sopenharmony_ci RT_SCOPE_UNIVERSE); 129962306a36Sopenharmony_ci rcu_read_unlock(); 130062306a36Sopenharmony_ci } 130162306a36Sopenharmony_ci memcpy(addr, &src, 4); 130262306a36Sopenharmony_ci} 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 130562306a36Sopenharmony_cistatic void set_class_tag(struct rtable *rt, u32 tag) 130662306a36Sopenharmony_ci{ 130762306a36Sopenharmony_ci if (!(rt->dst.tclassid & 0xFFFF)) 130862306a36Sopenharmony_ci rt->dst.tclassid |= tag & 0xFFFF; 130962306a36Sopenharmony_ci if (!(rt->dst.tclassid & 0xFFFF0000)) 131062306a36Sopenharmony_ci rt->dst.tclassid |= tag & 0xFFFF0000; 131162306a36Sopenharmony_ci} 131262306a36Sopenharmony_ci#endif 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_cistatic unsigned int ipv4_default_advmss(const struct dst_entry *dst) 131562306a36Sopenharmony_ci{ 131662306a36Sopenharmony_ci struct net *net = dev_net(dst->dev); 131762306a36Sopenharmony_ci unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); 131862306a36Sopenharmony_ci unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, 131962306a36Sopenharmony_ci net->ipv4.ip_rt_min_advmss); 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci return min(advmss, IPV4_MAX_PMTU - header_size); 132262306a36Sopenharmony_ci} 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ciINDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) 132562306a36Sopenharmony_ci{ 132662306a36Sopenharmony_ci return ip_dst_mtu_maybe_forward(dst, false); 132762306a36Sopenharmony_ci} 132862306a36Sopenharmony_ciEXPORT_INDIRECT_CALLABLE(ipv4_mtu); 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_cistatic void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) 133162306a36Sopenharmony_ci{ 133262306a36Sopenharmony_ci struct fnhe_hash_bucket *hash; 133362306a36Sopenharmony_ci struct fib_nh_exception *fnhe, __rcu **fnhe_p; 133462306a36Sopenharmony_ci u32 hval = fnhe_hashfun(daddr); 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci spin_lock_bh(&fnhe_lock); 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci hash = rcu_dereference_protected(nhc->nhc_exceptions, 133962306a36Sopenharmony_ci lockdep_is_held(&fnhe_lock)); 134062306a36Sopenharmony_ci hash += hval; 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci fnhe_p = &hash->chain; 134362306a36Sopenharmony_ci fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); 134462306a36Sopenharmony_ci while (fnhe) { 134562306a36Sopenharmony_ci if (fnhe->fnhe_daddr == daddr) { 134662306a36Sopenharmony_ci rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( 134762306a36Sopenharmony_ci fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); 134862306a36Sopenharmony_ci /* set fnhe_daddr to 0 to ensure it won't bind with 134962306a36Sopenharmony_ci * new dsts in rt_bind_exception(). 135062306a36Sopenharmony_ci */ 135162306a36Sopenharmony_ci fnhe->fnhe_daddr = 0; 135262306a36Sopenharmony_ci fnhe_flush_routes(fnhe); 135362306a36Sopenharmony_ci kfree_rcu(fnhe, rcu); 135462306a36Sopenharmony_ci break; 135562306a36Sopenharmony_ci } 135662306a36Sopenharmony_ci fnhe_p = &fnhe->fnhe_next; 135762306a36Sopenharmony_ci fnhe = rcu_dereference_protected(fnhe->fnhe_next, 135862306a36Sopenharmony_ci lockdep_is_held(&fnhe_lock)); 135962306a36Sopenharmony_ci } 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci spin_unlock_bh(&fnhe_lock); 136262306a36Sopenharmony_ci} 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_cistatic struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, 136562306a36Sopenharmony_ci __be32 daddr) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); 136862306a36Sopenharmony_ci struct fib_nh_exception *fnhe; 136962306a36Sopenharmony_ci u32 hval; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci if (!hash) 137262306a36Sopenharmony_ci return NULL; 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci hval = fnhe_hashfun(daddr); 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci for (fnhe = rcu_dereference(hash[hval].chain); fnhe; 137762306a36Sopenharmony_ci fnhe = rcu_dereference(fnhe->fnhe_next)) { 137862306a36Sopenharmony_ci if (fnhe->fnhe_daddr == daddr) { 137962306a36Sopenharmony_ci if (fnhe->fnhe_expires && 138062306a36Sopenharmony_ci time_after(jiffies, fnhe->fnhe_expires)) { 138162306a36Sopenharmony_ci ip_del_fnhe(nhc, daddr); 138262306a36Sopenharmony_ci break; 138362306a36Sopenharmony_ci } 138462306a36Sopenharmony_ci return fnhe; 138562306a36Sopenharmony_ci } 138662306a36Sopenharmony_ci } 138762306a36Sopenharmony_ci return NULL; 138862306a36Sopenharmony_ci} 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci/* MTU selection: 139162306a36Sopenharmony_ci * 1. mtu on route is locked - use it 139262306a36Sopenharmony_ci * 2. mtu from nexthop exception 139362306a36Sopenharmony_ci * 3. mtu from egress device 139462306a36Sopenharmony_ci */ 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ciu32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) 139762306a36Sopenharmony_ci{ 139862306a36Sopenharmony_ci struct fib_nh_common *nhc = res->nhc; 139962306a36Sopenharmony_ci struct net_device *dev = nhc->nhc_dev; 140062306a36Sopenharmony_ci struct fib_info *fi = res->fi; 140162306a36Sopenharmony_ci u32 mtu = 0; 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || 140462306a36Sopenharmony_ci fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) 140562306a36Sopenharmony_ci mtu = fi->fib_mtu; 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci if (likely(!mtu)) { 140862306a36Sopenharmony_ci struct fib_nh_exception *fnhe; 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci fnhe = find_exception(nhc, daddr); 141162306a36Sopenharmony_ci if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) 141262306a36Sopenharmony_ci mtu = fnhe->fnhe_pmtu; 141362306a36Sopenharmony_ci } 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci if (likely(!mtu)) 141662306a36Sopenharmony_ci mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); 141962306a36Sopenharmony_ci} 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_cistatic bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 142262306a36Sopenharmony_ci __be32 daddr, const bool do_cache) 142362306a36Sopenharmony_ci{ 142462306a36Sopenharmony_ci bool ret = false; 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci spin_lock_bh(&fnhe_lock); 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci if (daddr == fnhe->fnhe_daddr) { 142962306a36Sopenharmony_ci struct rtable __rcu **porig; 143062306a36Sopenharmony_ci struct rtable *orig; 143162306a36Sopenharmony_ci int genid = fnhe_genid(dev_net(rt->dst.dev)); 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci if (rt_is_input_route(rt)) 143462306a36Sopenharmony_ci porig = &fnhe->fnhe_rth_input; 143562306a36Sopenharmony_ci else 143662306a36Sopenharmony_ci porig = &fnhe->fnhe_rth_output; 143762306a36Sopenharmony_ci orig = rcu_dereference(*porig); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci if (fnhe->fnhe_genid != genid) { 144062306a36Sopenharmony_ci fnhe->fnhe_genid = genid; 144162306a36Sopenharmony_ci fnhe->fnhe_gw = 0; 144262306a36Sopenharmony_ci fnhe->fnhe_pmtu = 0; 144362306a36Sopenharmony_ci fnhe->fnhe_expires = 0; 144462306a36Sopenharmony_ci fnhe->fnhe_mtu_locked = false; 144562306a36Sopenharmony_ci fnhe_flush_routes(fnhe); 144662306a36Sopenharmony_ci orig = NULL; 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci fill_route_from_fnhe(rt, fnhe); 144962306a36Sopenharmony_ci if (!rt->rt_gw4) { 145062306a36Sopenharmony_ci rt->rt_gw4 = daddr; 145162306a36Sopenharmony_ci rt->rt_gw_family = AF_INET; 145262306a36Sopenharmony_ci } 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci if (do_cache) { 145562306a36Sopenharmony_ci dst_hold(&rt->dst); 145662306a36Sopenharmony_ci rcu_assign_pointer(*porig, rt); 145762306a36Sopenharmony_ci if (orig) { 145862306a36Sopenharmony_ci dst_dev_put(&orig->dst); 145962306a36Sopenharmony_ci dst_release(&orig->dst); 146062306a36Sopenharmony_ci } 146162306a36Sopenharmony_ci ret = true; 146262306a36Sopenharmony_ci } 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci fnhe->fnhe_stamp = jiffies; 146562306a36Sopenharmony_ci } 146662306a36Sopenharmony_ci spin_unlock_bh(&fnhe_lock); 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci return ret; 146962306a36Sopenharmony_ci} 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_cistatic bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci struct rtable *orig, *prev, **p; 147462306a36Sopenharmony_ci bool ret = true; 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci if (rt_is_input_route(rt)) { 147762306a36Sopenharmony_ci p = (struct rtable **)&nhc->nhc_rth_input; 147862306a36Sopenharmony_ci } else { 147962306a36Sopenharmony_ci p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); 148062306a36Sopenharmony_ci } 148162306a36Sopenharmony_ci orig = *p; 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci /* hold dst before doing cmpxchg() to avoid race condition 148462306a36Sopenharmony_ci * on this dst 148562306a36Sopenharmony_ci */ 148662306a36Sopenharmony_ci dst_hold(&rt->dst); 148762306a36Sopenharmony_ci prev = cmpxchg(p, orig, rt); 148862306a36Sopenharmony_ci if (prev == orig) { 148962306a36Sopenharmony_ci if (orig) { 149062306a36Sopenharmony_ci rt_add_uncached_list(orig); 149162306a36Sopenharmony_ci dst_release(&orig->dst); 149262306a36Sopenharmony_ci } 149362306a36Sopenharmony_ci } else { 149462306a36Sopenharmony_ci dst_release(&rt->dst); 149562306a36Sopenharmony_ci ret = false; 149662306a36Sopenharmony_ci } 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci return ret; 149962306a36Sopenharmony_ci} 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_cistruct uncached_list { 150262306a36Sopenharmony_ci spinlock_t lock; 150362306a36Sopenharmony_ci struct list_head head; 150462306a36Sopenharmony_ci struct list_head quarantine; 150562306a36Sopenharmony_ci}; 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_cistatic DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_civoid rt_add_uncached_list(struct rtable *rt) 151062306a36Sopenharmony_ci{ 151162306a36Sopenharmony_ci struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci rt->dst.rt_uncached_list = ul; 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci spin_lock_bh(&ul->lock); 151662306a36Sopenharmony_ci list_add_tail(&rt->dst.rt_uncached, &ul->head); 151762306a36Sopenharmony_ci spin_unlock_bh(&ul->lock); 151862306a36Sopenharmony_ci} 151962306a36Sopenharmony_ci 152062306a36Sopenharmony_civoid rt_del_uncached_list(struct rtable *rt) 152162306a36Sopenharmony_ci{ 152262306a36Sopenharmony_ci if (!list_empty(&rt->dst.rt_uncached)) { 152362306a36Sopenharmony_ci struct uncached_list *ul = rt->dst.rt_uncached_list; 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci spin_lock_bh(&ul->lock); 152662306a36Sopenharmony_ci list_del_init(&rt->dst.rt_uncached); 152762306a36Sopenharmony_ci spin_unlock_bh(&ul->lock); 152862306a36Sopenharmony_ci } 152962306a36Sopenharmony_ci} 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_cistatic void ipv4_dst_destroy(struct dst_entry *dst) 153262306a36Sopenharmony_ci{ 153362306a36Sopenharmony_ci struct rtable *rt = (struct rtable *)dst; 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci ip_dst_metrics_put(dst); 153662306a36Sopenharmony_ci rt_del_uncached_list(rt); 153762306a36Sopenharmony_ci} 153862306a36Sopenharmony_ci 153962306a36Sopenharmony_civoid rt_flush_dev(struct net_device *dev) 154062306a36Sopenharmony_ci{ 154162306a36Sopenharmony_ci struct rtable *rt, *safe; 154262306a36Sopenharmony_ci int cpu; 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 154562306a36Sopenharmony_ci struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci if (list_empty(&ul->head)) 154862306a36Sopenharmony_ci continue; 154962306a36Sopenharmony_ci 155062306a36Sopenharmony_ci spin_lock_bh(&ul->lock); 155162306a36Sopenharmony_ci list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { 155262306a36Sopenharmony_ci if (rt->dst.dev != dev) 155362306a36Sopenharmony_ci continue; 155462306a36Sopenharmony_ci rt->dst.dev = blackhole_netdev; 155562306a36Sopenharmony_ci netdev_ref_replace(dev, blackhole_netdev, 155662306a36Sopenharmony_ci &rt->dst.dev_tracker, GFP_ATOMIC); 155762306a36Sopenharmony_ci list_move(&rt->dst.rt_uncached, &ul->quarantine); 155862306a36Sopenharmony_ci } 155962306a36Sopenharmony_ci spin_unlock_bh(&ul->lock); 156062306a36Sopenharmony_ci } 156162306a36Sopenharmony_ci} 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_cistatic bool rt_cache_valid(const struct rtable *rt) 156462306a36Sopenharmony_ci{ 156562306a36Sopenharmony_ci return rt && 156662306a36Sopenharmony_ci rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 156762306a36Sopenharmony_ci !rt_is_expired(rt); 156862306a36Sopenharmony_ci} 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_cistatic void rt_set_nexthop(struct rtable *rt, __be32 daddr, 157162306a36Sopenharmony_ci const struct fib_result *res, 157262306a36Sopenharmony_ci struct fib_nh_exception *fnhe, 157362306a36Sopenharmony_ci struct fib_info *fi, u16 type, u32 itag, 157462306a36Sopenharmony_ci const bool do_cache) 157562306a36Sopenharmony_ci{ 157662306a36Sopenharmony_ci bool cached = false; 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci if (fi) { 157962306a36Sopenharmony_ci struct fib_nh_common *nhc = FIB_RES_NHC(*res); 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { 158262306a36Sopenharmony_ci rt->rt_uses_gateway = 1; 158362306a36Sopenharmony_ci rt->rt_gw_family = nhc->nhc_gw_family; 158462306a36Sopenharmony_ci /* only INET and INET6 are supported */ 158562306a36Sopenharmony_ci if (likely(nhc->nhc_gw_family == AF_INET)) 158662306a36Sopenharmony_ci rt->rt_gw4 = nhc->nhc_gw.ipv4; 158762306a36Sopenharmony_ci else 158862306a36Sopenharmony_ci rt->rt_gw6 = nhc->nhc_gw.ipv6; 158962306a36Sopenharmony_ci } 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci ip_dst_init_metrics(&rt->dst, fi->fib_metrics); 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 159462306a36Sopenharmony_ci if (nhc->nhc_family == AF_INET) { 159562306a36Sopenharmony_ci struct fib_nh *nh; 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci nh = container_of(nhc, struct fib_nh, nh_common); 159862306a36Sopenharmony_ci rt->dst.tclassid = nh->nh_tclassid; 159962306a36Sopenharmony_ci } 160062306a36Sopenharmony_ci#endif 160162306a36Sopenharmony_ci rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); 160262306a36Sopenharmony_ci if (unlikely(fnhe)) 160362306a36Sopenharmony_ci cached = rt_bind_exception(rt, fnhe, daddr, do_cache); 160462306a36Sopenharmony_ci else if (do_cache) 160562306a36Sopenharmony_ci cached = rt_cache_route(nhc, rt); 160662306a36Sopenharmony_ci if (unlikely(!cached)) { 160762306a36Sopenharmony_ci /* Routes we intend to cache in nexthop exception or 160862306a36Sopenharmony_ci * FIB nexthop have the DST_NOCACHE bit clear. 160962306a36Sopenharmony_ci * However, if we are unsuccessful at storing this 161062306a36Sopenharmony_ci * route into the cache we really need to set it. 161162306a36Sopenharmony_ci */ 161262306a36Sopenharmony_ci if (!rt->rt_gw4) { 161362306a36Sopenharmony_ci rt->rt_gw_family = AF_INET; 161462306a36Sopenharmony_ci rt->rt_gw4 = daddr; 161562306a36Sopenharmony_ci } 161662306a36Sopenharmony_ci rt_add_uncached_list(rt); 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci } else 161962306a36Sopenharmony_ci rt_add_uncached_list(rt); 162062306a36Sopenharmony_ci 162162306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 162262306a36Sopenharmony_ci#ifdef CONFIG_IP_MULTIPLE_TABLES 162362306a36Sopenharmony_ci set_class_tag(rt, res->tclassid); 162462306a36Sopenharmony_ci#endif 162562306a36Sopenharmony_ci set_class_tag(rt, itag); 162662306a36Sopenharmony_ci#endif 162762306a36Sopenharmony_ci} 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_cistruct rtable *rt_dst_alloc(struct net_device *dev, 163062306a36Sopenharmony_ci unsigned int flags, u16 type, 163162306a36Sopenharmony_ci bool noxfrm) 163262306a36Sopenharmony_ci{ 163362306a36Sopenharmony_ci struct rtable *rt; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 163662306a36Sopenharmony_ci (noxfrm ? DST_NOXFRM : 0)); 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci if (rt) { 163962306a36Sopenharmony_ci rt->rt_genid = rt_genid_ipv4(dev_net(dev)); 164062306a36Sopenharmony_ci rt->rt_flags = flags; 164162306a36Sopenharmony_ci rt->rt_type = type; 164262306a36Sopenharmony_ci rt->rt_is_input = 0; 164362306a36Sopenharmony_ci rt->rt_iif = 0; 164462306a36Sopenharmony_ci rt->rt_pmtu = 0; 164562306a36Sopenharmony_ci rt->rt_mtu_locked = 0; 164662306a36Sopenharmony_ci rt->rt_uses_gateway = 0; 164762306a36Sopenharmony_ci rt->rt_gw_family = 0; 164862306a36Sopenharmony_ci rt->rt_gw4 = 0; 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci rt->dst.output = ip_output; 165162306a36Sopenharmony_ci if (flags & RTCF_LOCAL) 165262306a36Sopenharmony_ci rt->dst.input = ip_local_deliver; 165362306a36Sopenharmony_ci } 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci return rt; 165662306a36Sopenharmony_ci} 165762306a36Sopenharmony_ciEXPORT_SYMBOL(rt_dst_alloc); 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_cistruct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) 166062306a36Sopenharmony_ci{ 166162306a36Sopenharmony_ci struct rtable *new_rt; 166262306a36Sopenharmony_ci 166362306a36Sopenharmony_ci new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 166462306a36Sopenharmony_ci rt->dst.flags); 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci if (new_rt) { 166762306a36Sopenharmony_ci new_rt->rt_genid = rt_genid_ipv4(dev_net(dev)); 166862306a36Sopenharmony_ci new_rt->rt_flags = rt->rt_flags; 166962306a36Sopenharmony_ci new_rt->rt_type = rt->rt_type; 167062306a36Sopenharmony_ci new_rt->rt_is_input = rt->rt_is_input; 167162306a36Sopenharmony_ci new_rt->rt_iif = rt->rt_iif; 167262306a36Sopenharmony_ci new_rt->rt_pmtu = rt->rt_pmtu; 167362306a36Sopenharmony_ci new_rt->rt_mtu_locked = rt->rt_mtu_locked; 167462306a36Sopenharmony_ci new_rt->rt_gw_family = rt->rt_gw_family; 167562306a36Sopenharmony_ci if (rt->rt_gw_family == AF_INET) 167662306a36Sopenharmony_ci new_rt->rt_gw4 = rt->rt_gw4; 167762306a36Sopenharmony_ci else if (rt->rt_gw_family == AF_INET6) 167862306a36Sopenharmony_ci new_rt->rt_gw6 = rt->rt_gw6; 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci new_rt->dst.input = rt->dst.input; 168162306a36Sopenharmony_ci new_rt->dst.output = rt->dst.output; 168262306a36Sopenharmony_ci new_rt->dst.error = rt->dst.error; 168362306a36Sopenharmony_ci new_rt->dst.lastuse = jiffies; 168462306a36Sopenharmony_ci new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); 168562306a36Sopenharmony_ci } 168662306a36Sopenharmony_ci return new_rt; 168762306a36Sopenharmony_ci} 168862306a36Sopenharmony_ciEXPORT_SYMBOL(rt_dst_clone); 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci/* called in rcu_read_lock() section */ 169162306a36Sopenharmony_ciint ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, 169262306a36Sopenharmony_ci u8 tos, struct net_device *dev, 169362306a36Sopenharmony_ci struct in_device *in_dev, u32 *itag) 169462306a36Sopenharmony_ci{ 169562306a36Sopenharmony_ci int err; 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci /* Primary sanity checks. */ 169862306a36Sopenharmony_ci if (!in_dev) 169962306a36Sopenharmony_ci return -EINVAL; 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 170262306a36Sopenharmony_ci skb->protocol != htons(ETH_P_IP)) 170362306a36Sopenharmony_ci return -EINVAL; 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_ci if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) 170662306a36Sopenharmony_ci return -EINVAL; 170762306a36Sopenharmony_ci 170862306a36Sopenharmony_ci if (ipv4_is_zeronet(saddr)) { 170962306a36Sopenharmony_ci if (!ipv4_is_local_multicast(daddr) && 171062306a36Sopenharmony_ci ip_hdr(skb)->protocol != IPPROTO_IGMP) 171162306a36Sopenharmony_ci return -EINVAL; 171262306a36Sopenharmony_ci } else { 171362306a36Sopenharmony_ci err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 171462306a36Sopenharmony_ci in_dev, itag); 171562306a36Sopenharmony_ci if (err < 0) 171662306a36Sopenharmony_ci return err; 171762306a36Sopenharmony_ci } 171862306a36Sopenharmony_ci return 0; 171962306a36Sopenharmony_ci} 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_ci/* called in rcu_read_lock() section */ 172262306a36Sopenharmony_cistatic int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 172362306a36Sopenharmony_ci u8 tos, struct net_device *dev, int our) 172462306a36Sopenharmony_ci{ 172562306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 172662306a36Sopenharmony_ci unsigned int flags = RTCF_MULTICAST; 172762306a36Sopenharmony_ci struct rtable *rth; 172862306a36Sopenharmony_ci u32 itag = 0; 172962306a36Sopenharmony_ci int err; 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); 173262306a36Sopenharmony_ci if (err) 173362306a36Sopenharmony_ci return err; 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci if (our) 173662306a36Sopenharmony_ci flags |= RTCF_LOCAL; 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci if (IN_DEV_ORCONF(in_dev, NOPOLICY)) 173962306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_NOPOLICY; 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, 174262306a36Sopenharmony_ci false); 174362306a36Sopenharmony_ci if (!rth) 174462306a36Sopenharmony_ci return -ENOBUFS; 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 174762306a36Sopenharmony_ci rth->dst.tclassid = itag; 174862306a36Sopenharmony_ci#endif 174962306a36Sopenharmony_ci rth->dst.output = ip_rt_bug; 175062306a36Sopenharmony_ci rth->rt_is_input= 1; 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci#ifdef CONFIG_IP_MROUTE 175362306a36Sopenharmony_ci if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 175462306a36Sopenharmony_ci rth->dst.input = ip_mr_input; 175562306a36Sopenharmony_ci#endif 175662306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_slow_mc); 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci skb_dst_drop(skb); 175962306a36Sopenharmony_ci skb_dst_set(skb, &rth->dst); 176062306a36Sopenharmony_ci return 0; 176162306a36Sopenharmony_ci} 176262306a36Sopenharmony_ci 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_cistatic void ip_handle_martian_source(struct net_device *dev, 176562306a36Sopenharmony_ci struct in_device *in_dev, 176662306a36Sopenharmony_ci struct sk_buff *skb, 176762306a36Sopenharmony_ci __be32 daddr, 176862306a36Sopenharmony_ci __be32 saddr) 176962306a36Sopenharmony_ci{ 177062306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_martian_src); 177162306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_VERBOSE 177262306a36Sopenharmony_ci if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { 177362306a36Sopenharmony_ci /* 177462306a36Sopenharmony_ci * RFC1812 recommendation, if source is martian, 177562306a36Sopenharmony_ci * the only hint is MAC header. 177662306a36Sopenharmony_ci */ 177762306a36Sopenharmony_ci pr_warn("martian source %pI4 from %pI4, on dev %s\n", 177862306a36Sopenharmony_ci &daddr, &saddr, dev->name); 177962306a36Sopenharmony_ci if (dev->hard_header_len && skb_mac_header_was_set(skb)) { 178062306a36Sopenharmony_ci print_hex_dump(KERN_WARNING, "ll header: ", 178162306a36Sopenharmony_ci DUMP_PREFIX_OFFSET, 16, 1, 178262306a36Sopenharmony_ci skb_mac_header(skb), 178362306a36Sopenharmony_ci dev->hard_header_len, false); 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci } 178662306a36Sopenharmony_ci#endif 178762306a36Sopenharmony_ci} 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_ci/* called in rcu_read_lock() section */ 179062306a36Sopenharmony_cistatic int __mkroute_input(struct sk_buff *skb, 179162306a36Sopenharmony_ci const struct fib_result *res, 179262306a36Sopenharmony_ci struct in_device *in_dev, 179362306a36Sopenharmony_ci __be32 daddr, __be32 saddr, u32 tos) 179462306a36Sopenharmony_ci{ 179562306a36Sopenharmony_ci struct fib_nh_common *nhc = FIB_RES_NHC(*res); 179662306a36Sopenharmony_ci struct net_device *dev = nhc->nhc_dev; 179762306a36Sopenharmony_ci struct fib_nh_exception *fnhe; 179862306a36Sopenharmony_ci struct rtable *rth; 179962306a36Sopenharmony_ci int err; 180062306a36Sopenharmony_ci struct in_device *out_dev; 180162306a36Sopenharmony_ci bool do_cache; 180262306a36Sopenharmony_ci u32 itag = 0; 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_ci /* get a working reference to the output device */ 180562306a36Sopenharmony_ci out_dev = __in_dev_get_rcu(dev); 180662306a36Sopenharmony_ci if (!out_dev) { 180762306a36Sopenharmony_ci net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); 180862306a36Sopenharmony_ci return -EINVAL; 180962306a36Sopenharmony_ci } 181062306a36Sopenharmony_ci 181162306a36Sopenharmony_ci err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), 181262306a36Sopenharmony_ci in_dev->dev, in_dev, &itag); 181362306a36Sopenharmony_ci if (err < 0) { 181462306a36Sopenharmony_ci ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 181562306a36Sopenharmony_ci saddr); 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci goto cleanup; 181862306a36Sopenharmony_ci } 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci do_cache = res->fi && !itag; 182162306a36Sopenharmony_ci if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && 182262306a36Sopenharmony_ci skb->protocol == htons(ETH_P_IP)) { 182362306a36Sopenharmony_ci __be32 gw; 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; 182662306a36Sopenharmony_ci if (IN_DEV_SHARED_MEDIA(out_dev) || 182762306a36Sopenharmony_ci inet_addr_onlink(out_dev, saddr, gw)) 182862306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_DOREDIRECT; 182962306a36Sopenharmony_ci } 183062306a36Sopenharmony_ci 183162306a36Sopenharmony_ci if (skb->protocol != htons(ETH_P_IP)) { 183262306a36Sopenharmony_ci /* Not IP (i.e. ARP). Do not create route, if it is 183362306a36Sopenharmony_ci * invalid for proxy arp. DNAT routes are always valid. 183462306a36Sopenharmony_ci * 183562306a36Sopenharmony_ci * Proxy arp feature have been extended to allow, ARP 183662306a36Sopenharmony_ci * replies back to the same interface, to support 183762306a36Sopenharmony_ci * Private VLAN switch technologies. See arp.c. 183862306a36Sopenharmony_ci */ 183962306a36Sopenharmony_ci if (out_dev == in_dev && 184062306a36Sopenharmony_ci IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { 184162306a36Sopenharmony_ci err = -EINVAL; 184262306a36Sopenharmony_ci goto cleanup; 184362306a36Sopenharmony_ci } 184462306a36Sopenharmony_ci } 184562306a36Sopenharmony_ci 184662306a36Sopenharmony_ci if (IN_DEV_ORCONF(in_dev, NOPOLICY)) 184762306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_NOPOLICY; 184862306a36Sopenharmony_ci 184962306a36Sopenharmony_ci fnhe = find_exception(nhc, daddr); 185062306a36Sopenharmony_ci if (do_cache) { 185162306a36Sopenharmony_ci if (fnhe) 185262306a36Sopenharmony_ci rth = rcu_dereference(fnhe->fnhe_rth_input); 185362306a36Sopenharmony_ci else 185462306a36Sopenharmony_ci rth = rcu_dereference(nhc->nhc_rth_input); 185562306a36Sopenharmony_ci if (rt_cache_valid(rth)) { 185662306a36Sopenharmony_ci skb_dst_set_noref(skb, &rth->dst); 185762306a36Sopenharmony_ci goto out; 185862306a36Sopenharmony_ci } 185962306a36Sopenharmony_ci } 186062306a36Sopenharmony_ci 186162306a36Sopenharmony_ci rth = rt_dst_alloc(out_dev->dev, 0, res->type, 186262306a36Sopenharmony_ci IN_DEV_ORCONF(out_dev, NOXFRM)); 186362306a36Sopenharmony_ci if (!rth) { 186462306a36Sopenharmony_ci err = -ENOBUFS; 186562306a36Sopenharmony_ci goto cleanup; 186662306a36Sopenharmony_ci } 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci rth->rt_is_input = 1; 186962306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_slow_tot); 187062306a36Sopenharmony_ci 187162306a36Sopenharmony_ci rth->dst.input = ip_forward; 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, 187462306a36Sopenharmony_ci do_cache); 187562306a36Sopenharmony_ci lwtunnel_set_redirect(&rth->dst); 187662306a36Sopenharmony_ci skb_dst_set(skb, &rth->dst); 187762306a36Sopenharmony_ciout: 187862306a36Sopenharmony_ci err = 0; 187962306a36Sopenharmony_ci cleanup: 188062306a36Sopenharmony_ci return err; 188162306a36Sopenharmony_ci} 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_MULTIPATH 188462306a36Sopenharmony_ci/* To make ICMP packets follow the right flow, the multipath hash is 188562306a36Sopenharmony_ci * calculated from the inner IP addresses. 188662306a36Sopenharmony_ci */ 188762306a36Sopenharmony_cistatic void ip_multipath_l3_keys(const struct sk_buff *skb, 188862306a36Sopenharmony_ci struct flow_keys *hash_keys) 188962306a36Sopenharmony_ci{ 189062306a36Sopenharmony_ci const struct iphdr *outer_iph = ip_hdr(skb); 189162306a36Sopenharmony_ci const struct iphdr *key_iph = outer_iph; 189262306a36Sopenharmony_ci const struct iphdr *inner_iph; 189362306a36Sopenharmony_ci const struct icmphdr *icmph; 189462306a36Sopenharmony_ci struct iphdr _inner_iph; 189562306a36Sopenharmony_ci struct icmphdr _icmph; 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci if (likely(outer_iph->protocol != IPPROTO_ICMP)) 189862306a36Sopenharmony_ci goto out; 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) 190162306a36Sopenharmony_ci goto out; 190262306a36Sopenharmony_ci 190362306a36Sopenharmony_ci icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), 190462306a36Sopenharmony_ci &_icmph); 190562306a36Sopenharmony_ci if (!icmph) 190662306a36Sopenharmony_ci goto out; 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci if (!icmp_is_err(icmph->type)) 190962306a36Sopenharmony_ci goto out; 191062306a36Sopenharmony_ci 191162306a36Sopenharmony_ci inner_iph = skb_header_pointer(skb, 191262306a36Sopenharmony_ci outer_iph->ihl * 4 + sizeof(_icmph), 191362306a36Sopenharmony_ci sizeof(_inner_iph), &_inner_iph); 191462306a36Sopenharmony_ci if (!inner_iph) 191562306a36Sopenharmony_ci goto out; 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ci key_iph = inner_iph; 191862306a36Sopenharmony_ciout: 191962306a36Sopenharmony_ci hash_keys->addrs.v4addrs.src = key_iph->saddr; 192062306a36Sopenharmony_ci hash_keys->addrs.v4addrs.dst = key_iph->daddr; 192162306a36Sopenharmony_ci} 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_cistatic u32 fib_multipath_custom_hash_outer(const struct net *net, 192462306a36Sopenharmony_ci const struct sk_buff *skb, 192562306a36Sopenharmony_ci bool *p_has_inner) 192662306a36Sopenharmony_ci{ 192762306a36Sopenharmony_ci u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); 192862306a36Sopenharmony_ci struct flow_keys keys, hash_keys; 192962306a36Sopenharmony_ci 193062306a36Sopenharmony_ci if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) 193162306a36Sopenharmony_ci return 0; 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 193462306a36Sopenharmony_ci skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); 193562306a36Sopenharmony_ci 193662306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 193762306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) 193862306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; 193962306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) 194062306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; 194162306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) 194262306a36Sopenharmony_ci hash_keys.basic.ip_proto = keys.basic.ip_proto; 194362306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) 194462306a36Sopenharmony_ci hash_keys.ports.src = keys.ports.src; 194562306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) 194662306a36Sopenharmony_ci hash_keys.ports.dst = keys.ports.dst; 194762306a36Sopenharmony_ci 194862306a36Sopenharmony_ci *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); 194962306a36Sopenharmony_ci return flow_hash_from_keys(&hash_keys); 195062306a36Sopenharmony_ci} 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_cistatic u32 fib_multipath_custom_hash_inner(const struct net *net, 195362306a36Sopenharmony_ci const struct sk_buff *skb, 195462306a36Sopenharmony_ci bool has_inner) 195562306a36Sopenharmony_ci{ 195662306a36Sopenharmony_ci u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); 195762306a36Sopenharmony_ci struct flow_keys keys, hash_keys; 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci /* We assume the packet carries an encapsulation, but if none was 196062306a36Sopenharmony_ci * encountered during dissection of the outer flow, then there is no 196162306a36Sopenharmony_ci * point in calling the flow dissector again. 196262306a36Sopenharmony_ci */ 196362306a36Sopenharmony_ci if (!has_inner) 196462306a36Sopenharmony_ci return 0; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) 196762306a36Sopenharmony_ci return 0; 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 197062306a36Sopenharmony_ci skb_flow_dissect_flow_keys(skb, &keys, 0); 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) 197362306a36Sopenharmony_ci return 0; 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 197662306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 197762306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) 197862306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; 197962306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) 198062306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; 198162306a36Sopenharmony_ci } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 198262306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 198362306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) 198462306a36Sopenharmony_ci hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; 198562306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) 198662306a36Sopenharmony_ci hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; 198762306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) 198862306a36Sopenharmony_ci hash_keys.tags.flow_label = keys.tags.flow_label; 198962306a36Sopenharmony_ci } 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) 199262306a36Sopenharmony_ci hash_keys.basic.ip_proto = keys.basic.ip_proto; 199362306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) 199462306a36Sopenharmony_ci hash_keys.ports.src = keys.ports.src; 199562306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) 199662306a36Sopenharmony_ci hash_keys.ports.dst = keys.ports.dst; 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci return flow_hash_from_keys(&hash_keys); 199962306a36Sopenharmony_ci} 200062306a36Sopenharmony_ci 200162306a36Sopenharmony_cistatic u32 fib_multipath_custom_hash_skb(const struct net *net, 200262306a36Sopenharmony_ci const struct sk_buff *skb) 200362306a36Sopenharmony_ci{ 200462306a36Sopenharmony_ci u32 mhash, mhash_inner; 200562306a36Sopenharmony_ci bool has_inner = true; 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); 200862306a36Sopenharmony_ci mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_ci return jhash_2words(mhash, mhash_inner, 0); 201162306a36Sopenharmony_ci} 201262306a36Sopenharmony_ci 201362306a36Sopenharmony_cistatic u32 fib_multipath_custom_hash_fl4(const struct net *net, 201462306a36Sopenharmony_ci const struct flowi4 *fl4) 201562306a36Sopenharmony_ci{ 201662306a36Sopenharmony_ci u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); 201762306a36Sopenharmony_ci struct flow_keys hash_keys; 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) 202062306a36Sopenharmony_ci return 0; 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 202362306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 202462306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) 202562306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = fl4->saddr; 202662306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) 202762306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = fl4->daddr; 202862306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) 202962306a36Sopenharmony_ci hash_keys.basic.ip_proto = fl4->flowi4_proto; 203062306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) 203162306a36Sopenharmony_ci hash_keys.ports.src = fl4->fl4_sport; 203262306a36Sopenharmony_ci if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) 203362306a36Sopenharmony_ci hash_keys.ports.dst = fl4->fl4_dport; 203462306a36Sopenharmony_ci 203562306a36Sopenharmony_ci return flow_hash_from_keys(&hash_keys); 203662306a36Sopenharmony_ci} 203762306a36Sopenharmony_ci 203862306a36Sopenharmony_ci/* if skb is set it will be used and fl4 can be NULL */ 203962306a36Sopenharmony_ciint fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, 204062306a36Sopenharmony_ci const struct sk_buff *skb, struct flow_keys *flkeys) 204162306a36Sopenharmony_ci{ 204262306a36Sopenharmony_ci u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; 204362306a36Sopenharmony_ci struct flow_keys hash_keys; 204462306a36Sopenharmony_ci u32 mhash = 0; 204562306a36Sopenharmony_ci 204662306a36Sopenharmony_ci switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { 204762306a36Sopenharmony_ci case 0: 204862306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 204962306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 205062306a36Sopenharmony_ci if (skb) { 205162306a36Sopenharmony_ci ip_multipath_l3_keys(skb, &hash_keys); 205262306a36Sopenharmony_ci } else { 205362306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = fl4->saddr; 205462306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = fl4->daddr; 205562306a36Sopenharmony_ci } 205662306a36Sopenharmony_ci mhash = flow_hash_from_keys(&hash_keys); 205762306a36Sopenharmony_ci break; 205862306a36Sopenharmony_ci case 1: 205962306a36Sopenharmony_ci /* skb is currently provided only when forwarding */ 206062306a36Sopenharmony_ci if (skb) { 206162306a36Sopenharmony_ci unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; 206262306a36Sopenharmony_ci struct flow_keys keys; 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci /* short-circuit if we already have L4 hash present */ 206562306a36Sopenharmony_ci if (skb->l4_hash) 206662306a36Sopenharmony_ci return skb_get_hash_raw(skb) >> 1; 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_ci if (!flkeys) { 207162306a36Sopenharmony_ci skb_flow_dissect_flow_keys(skb, &keys, flag); 207262306a36Sopenharmony_ci flkeys = &keys; 207362306a36Sopenharmony_ci } 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 207662306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; 207762306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; 207862306a36Sopenharmony_ci hash_keys.ports.src = flkeys->ports.src; 207962306a36Sopenharmony_ci hash_keys.ports.dst = flkeys->ports.dst; 208062306a36Sopenharmony_ci hash_keys.basic.ip_proto = flkeys->basic.ip_proto; 208162306a36Sopenharmony_ci } else { 208262306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 208362306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 208462306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = fl4->saddr; 208562306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = fl4->daddr; 208662306a36Sopenharmony_ci hash_keys.ports.src = fl4->fl4_sport; 208762306a36Sopenharmony_ci hash_keys.ports.dst = fl4->fl4_dport; 208862306a36Sopenharmony_ci hash_keys.basic.ip_proto = fl4->flowi4_proto; 208962306a36Sopenharmony_ci } 209062306a36Sopenharmony_ci mhash = flow_hash_from_keys(&hash_keys); 209162306a36Sopenharmony_ci break; 209262306a36Sopenharmony_ci case 2: 209362306a36Sopenharmony_ci memset(&hash_keys, 0, sizeof(hash_keys)); 209462306a36Sopenharmony_ci /* skb is currently provided only when forwarding */ 209562306a36Sopenharmony_ci if (skb) { 209662306a36Sopenharmony_ci struct flow_keys keys; 209762306a36Sopenharmony_ci 209862306a36Sopenharmony_ci skb_flow_dissect_flow_keys(skb, &keys, 0); 209962306a36Sopenharmony_ci /* Inner can be v4 or v6 */ 210062306a36Sopenharmony_ci if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 210162306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 210262306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; 210362306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; 210462306a36Sopenharmony_ci } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 210562306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 210662306a36Sopenharmony_ci hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; 210762306a36Sopenharmony_ci hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; 210862306a36Sopenharmony_ci hash_keys.tags.flow_label = keys.tags.flow_label; 210962306a36Sopenharmony_ci hash_keys.basic.ip_proto = keys.basic.ip_proto; 211062306a36Sopenharmony_ci } else { 211162306a36Sopenharmony_ci /* Same as case 0 */ 211262306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 211362306a36Sopenharmony_ci ip_multipath_l3_keys(skb, &hash_keys); 211462306a36Sopenharmony_ci } 211562306a36Sopenharmony_ci } else { 211662306a36Sopenharmony_ci /* Same as case 0 */ 211762306a36Sopenharmony_ci hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 211862306a36Sopenharmony_ci hash_keys.addrs.v4addrs.src = fl4->saddr; 211962306a36Sopenharmony_ci hash_keys.addrs.v4addrs.dst = fl4->daddr; 212062306a36Sopenharmony_ci } 212162306a36Sopenharmony_ci mhash = flow_hash_from_keys(&hash_keys); 212262306a36Sopenharmony_ci break; 212362306a36Sopenharmony_ci case 3: 212462306a36Sopenharmony_ci if (skb) 212562306a36Sopenharmony_ci mhash = fib_multipath_custom_hash_skb(net, skb); 212662306a36Sopenharmony_ci else 212762306a36Sopenharmony_ci mhash = fib_multipath_custom_hash_fl4(net, fl4); 212862306a36Sopenharmony_ci break; 212962306a36Sopenharmony_ci } 213062306a36Sopenharmony_ci 213162306a36Sopenharmony_ci if (multipath_hash) 213262306a36Sopenharmony_ci mhash = jhash_2words(mhash, multipath_hash, 0); 213362306a36Sopenharmony_ci 213462306a36Sopenharmony_ci return mhash >> 1; 213562306a36Sopenharmony_ci} 213662306a36Sopenharmony_ci#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_cistatic int ip_mkroute_input(struct sk_buff *skb, 213962306a36Sopenharmony_ci struct fib_result *res, 214062306a36Sopenharmony_ci struct in_device *in_dev, 214162306a36Sopenharmony_ci __be32 daddr, __be32 saddr, u32 tos, 214262306a36Sopenharmony_ci struct flow_keys *hkeys) 214362306a36Sopenharmony_ci{ 214462306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_MULTIPATH 214562306a36Sopenharmony_ci if (res->fi && fib_info_num_path(res->fi) > 1) { 214662306a36Sopenharmony_ci int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci fib_select_multipath(res, h); 214962306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_MULTIPATH; 215062306a36Sopenharmony_ci } 215162306a36Sopenharmony_ci#endif 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci /* create a routing cache entry */ 215462306a36Sopenharmony_ci return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); 215562306a36Sopenharmony_ci} 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci/* Implements all the saddr-related checks as ip_route_input_slow(), 215862306a36Sopenharmony_ci * assuming daddr is valid and the destination is not a local broadcast one. 215962306a36Sopenharmony_ci * Uses the provided hint instead of performing a route lookup. 216062306a36Sopenharmony_ci */ 216162306a36Sopenharmony_ciint ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, 216262306a36Sopenharmony_ci u8 tos, struct net_device *dev, 216362306a36Sopenharmony_ci const struct sk_buff *hint) 216462306a36Sopenharmony_ci{ 216562306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 216662306a36Sopenharmony_ci struct rtable *rt = skb_rtable(hint); 216762306a36Sopenharmony_ci struct net *net = dev_net(dev); 216862306a36Sopenharmony_ci int err = -EINVAL; 216962306a36Sopenharmony_ci u32 tag = 0; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) 217262306a36Sopenharmony_ci goto martian_source; 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci if (ipv4_is_zeronet(saddr)) 217562306a36Sopenharmony_ci goto martian_source; 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 217862306a36Sopenharmony_ci goto martian_source; 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_ci if (rt->rt_type != RTN_LOCAL) 218162306a36Sopenharmony_ci goto skip_validate_source; 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_ci tos &= IPTOS_RT_MASK; 218462306a36Sopenharmony_ci err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); 218562306a36Sopenharmony_ci if (err < 0) 218662306a36Sopenharmony_ci goto martian_source; 218762306a36Sopenharmony_ci 218862306a36Sopenharmony_ciskip_validate_source: 218962306a36Sopenharmony_ci skb_dst_copy(skb, hint); 219062306a36Sopenharmony_ci return 0; 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_cimartian_source: 219362306a36Sopenharmony_ci ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 219462306a36Sopenharmony_ci return err; 219562306a36Sopenharmony_ci} 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci/* get device for dst_alloc with local routes */ 219862306a36Sopenharmony_cistatic struct net_device *ip_rt_get_dev(struct net *net, 219962306a36Sopenharmony_ci const struct fib_result *res) 220062306a36Sopenharmony_ci{ 220162306a36Sopenharmony_ci struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; 220262306a36Sopenharmony_ci struct net_device *dev = NULL; 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ci if (nhc) 220562306a36Sopenharmony_ci dev = l3mdev_master_dev_rcu(nhc->nhc_dev); 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci return dev ? : net->loopback_dev; 220862306a36Sopenharmony_ci} 220962306a36Sopenharmony_ci 221062306a36Sopenharmony_ci/* 221162306a36Sopenharmony_ci * NOTE. We drop all the packets that has local source 221262306a36Sopenharmony_ci * addresses, because every properly looped back packet 221362306a36Sopenharmony_ci * must have correct destination already attached by output routine. 221462306a36Sopenharmony_ci * Changes in the enforced policies must be applied also to 221562306a36Sopenharmony_ci * ip_route_use_hint(). 221662306a36Sopenharmony_ci * 221762306a36Sopenharmony_ci * Such approach solves two big problems: 221862306a36Sopenharmony_ci * 1. Not simplex devices are handled properly. 221962306a36Sopenharmony_ci * 2. IP spoofing attempts are filtered with 100% of guarantee. 222062306a36Sopenharmony_ci * called with rcu_read_lock() 222162306a36Sopenharmony_ci */ 222262306a36Sopenharmony_ci 222362306a36Sopenharmony_cistatic int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 222462306a36Sopenharmony_ci u8 tos, struct net_device *dev, 222562306a36Sopenharmony_ci struct fib_result *res) 222662306a36Sopenharmony_ci{ 222762306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 222862306a36Sopenharmony_ci struct flow_keys *flkeys = NULL, _flkeys; 222962306a36Sopenharmony_ci struct net *net = dev_net(dev); 223062306a36Sopenharmony_ci struct ip_tunnel_info *tun_info; 223162306a36Sopenharmony_ci int err = -EINVAL; 223262306a36Sopenharmony_ci unsigned int flags = 0; 223362306a36Sopenharmony_ci u32 itag = 0; 223462306a36Sopenharmony_ci struct rtable *rth; 223562306a36Sopenharmony_ci struct flowi4 fl4; 223662306a36Sopenharmony_ci bool do_cache = true; 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_ci /* IP on this device is disabled. */ 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ci if (!in_dev) 224162306a36Sopenharmony_ci goto out; 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci /* Check for the most weird martians, which can be not detected 224462306a36Sopenharmony_ci * by fib_lookup. 224562306a36Sopenharmony_ci */ 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci tun_info = skb_tunnel_info(skb); 224862306a36Sopenharmony_ci if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 224962306a36Sopenharmony_ci fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; 225062306a36Sopenharmony_ci else 225162306a36Sopenharmony_ci fl4.flowi4_tun_key.tun_id = 0; 225262306a36Sopenharmony_ci skb_dst_drop(skb); 225362306a36Sopenharmony_ci 225462306a36Sopenharmony_ci if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) 225562306a36Sopenharmony_ci goto martian_source; 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_ci res->fi = NULL; 225862306a36Sopenharmony_ci res->table = NULL; 225962306a36Sopenharmony_ci if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) 226062306a36Sopenharmony_ci goto brd_input; 226162306a36Sopenharmony_ci 226262306a36Sopenharmony_ci /* Accept zero addresses only to limited broadcast; 226362306a36Sopenharmony_ci * I even do not know to fix it or not. Waiting for complains :-) 226462306a36Sopenharmony_ci */ 226562306a36Sopenharmony_ci if (ipv4_is_zeronet(saddr)) 226662306a36Sopenharmony_ci goto martian_source; 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ci if (ipv4_is_zeronet(daddr)) 226962306a36Sopenharmony_ci goto martian_destination; 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), 227262306a36Sopenharmony_ci * and call it once if daddr or/and saddr are loopback addresses 227362306a36Sopenharmony_ci */ 227462306a36Sopenharmony_ci if (ipv4_is_loopback(daddr)) { 227562306a36Sopenharmony_ci if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 227662306a36Sopenharmony_ci goto martian_destination; 227762306a36Sopenharmony_ci } else if (ipv4_is_loopback(saddr)) { 227862306a36Sopenharmony_ci if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 227962306a36Sopenharmony_ci goto martian_source; 228062306a36Sopenharmony_ci } 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci /* 228362306a36Sopenharmony_ci * Now we are ready to route packet. 228462306a36Sopenharmony_ci */ 228562306a36Sopenharmony_ci fl4.flowi4_l3mdev = 0; 228662306a36Sopenharmony_ci fl4.flowi4_oif = 0; 228762306a36Sopenharmony_ci fl4.flowi4_iif = dev->ifindex; 228862306a36Sopenharmony_ci fl4.flowi4_mark = skb->mark; 228962306a36Sopenharmony_ci fl4.flowi4_tos = tos; 229062306a36Sopenharmony_ci fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 229162306a36Sopenharmony_ci fl4.flowi4_flags = 0; 229262306a36Sopenharmony_ci fl4.daddr = daddr; 229362306a36Sopenharmony_ci fl4.saddr = saddr; 229462306a36Sopenharmony_ci fl4.flowi4_uid = sock_net_uid(net, NULL); 229562306a36Sopenharmony_ci fl4.flowi4_multipath_hash = 0; 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { 229862306a36Sopenharmony_ci flkeys = &_flkeys; 229962306a36Sopenharmony_ci } else { 230062306a36Sopenharmony_ci fl4.flowi4_proto = 0; 230162306a36Sopenharmony_ci fl4.fl4_sport = 0; 230262306a36Sopenharmony_ci fl4.fl4_dport = 0; 230362306a36Sopenharmony_ci } 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci err = fib_lookup(net, &fl4, res, 0); 230662306a36Sopenharmony_ci if (err != 0) { 230762306a36Sopenharmony_ci if (!IN_DEV_FORWARD(in_dev)) 230862306a36Sopenharmony_ci err = -EHOSTUNREACH; 230962306a36Sopenharmony_ci goto no_route; 231062306a36Sopenharmony_ci } 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci if (res->type == RTN_BROADCAST) { 231362306a36Sopenharmony_ci if (IN_DEV_BFORWARD(in_dev)) 231462306a36Sopenharmony_ci goto make_route; 231562306a36Sopenharmony_ci /* not do cache if bc_forwarding is enabled */ 231662306a36Sopenharmony_ci if (IPV4_DEVCONF_ALL(net, BC_FORWARDING)) 231762306a36Sopenharmony_ci do_cache = false; 231862306a36Sopenharmony_ci goto brd_input; 231962306a36Sopenharmony_ci } 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci if (res->type == RTN_LOCAL) { 232262306a36Sopenharmony_ci err = fib_validate_source(skb, saddr, daddr, tos, 232362306a36Sopenharmony_ci 0, dev, in_dev, &itag); 232462306a36Sopenharmony_ci if (err < 0) 232562306a36Sopenharmony_ci goto martian_source; 232662306a36Sopenharmony_ci goto local_input; 232762306a36Sopenharmony_ci } 232862306a36Sopenharmony_ci 232962306a36Sopenharmony_ci if (!IN_DEV_FORWARD(in_dev)) { 233062306a36Sopenharmony_ci err = -EHOSTUNREACH; 233162306a36Sopenharmony_ci goto no_route; 233262306a36Sopenharmony_ci } 233362306a36Sopenharmony_ci if (res->type != RTN_UNICAST) 233462306a36Sopenharmony_ci goto martian_destination; 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_cimake_route: 233762306a36Sopenharmony_ci err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); 233862306a36Sopenharmony_ciout: return err; 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_cibrd_input: 234162306a36Sopenharmony_ci if (skb->protocol != htons(ETH_P_IP)) 234262306a36Sopenharmony_ci goto e_inval; 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci if (!ipv4_is_zeronet(saddr)) { 234562306a36Sopenharmony_ci err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 234662306a36Sopenharmony_ci in_dev, &itag); 234762306a36Sopenharmony_ci if (err < 0) 234862306a36Sopenharmony_ci goto martian_source; 234962306a36Sopenharmony_ci } 235062306a36Sopenharmony_ci flags |= RTCF_BROADCAST; 235162306a36Sopenharmony_ci res->type = RTN_BROADCAST; 235262306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_brd); 235362306a36Sopenharmony_ci 235462306a36Sopenharmony_cilocal_input: 235562306a36Sopenharmony_ci if (IN_DEV_ORCONF(in_dev, NOPOLICY)) 235662306a36Sopenharmony_ci IPCB(skb)->flags |= IPSKB_NOPOLICY; 235762306a36Sopenharmony_ci 235862306a36Sopenharmony_ci do_cache &= res->fi && !itag; 235962306a36Sopenharmony_ci if (do_cache) { 236062306a36Sopenharmony_ci struct fib_nh_common *nhc = FIB_RES_NHC(*res); 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci rth = rcu_dereference(nhc->nhc_rth_input); 236362306a36Sopenharmony_ci if (rt_cache_valid(rth)) { 236462306a36Sopenharmony_ci skb_dst_set_noref(skb, &rth->dst); 236562306a36Sopenharmony_ci err = 0; 236662306a36Sopenharmony_ci goto out; 236762306a36Sopenharmony_ci } 236862306a36Sopenharmony_ci } 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci rth = rt_dst_alloc(ip_rt_get_dev(net, res), 237162306a36Sopenharmony_ci flags | RTCF_LOCAL, res->type, false); 237262306a36Sopenharmony_ci if (!rth) 237362306a36Sopenharmony_ci goto e_nobufs; 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_ci rth->dst.output= ip_rt_bug; 237662306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 237762306a36Sopenharmony_ci rth->dst.tclassid = itag; 237862306a36Sopenharmony_ci#endif 237962306a36Sopenharmony_ci rth->rt_is_input = 1; 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_slow_tot); 238262306a36Sopenharmony_ci if (res->type == RTN_UNREACHABLE) { 238362306a36Sopenharmony_ci rth->dst.input= ip_error; 238462306a36Sopenharmony_ci rth->dst.error= -err; 238562306a36Sopenharmony_ci rth->rt_flags &= ~RTCF_LOCAL; 238662306a36Sopenharmony_ci } 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci if (do_cache) { 238962306a36Sopenharmony_ci struct fib_nh_common *nhc = FIB_RES_NHC(*res); 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); 239262306a36Sopenharmony_ci if (lwtunnel_input_redirect(rth->dst.lwtstate)) { 239362306a36Sopenharmony_ci WARN_ON(rth->dst.input == lwtunnel_input); 239462306a36Sopenharmony_ci rth->dst.lwtstate->orig_input = rth->dst.input; 239562306a36Sopenharmony_ci rth->dst.input = lwtunnel_input; 239662306a36Sopenharmony_ci } 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci if (unlikely(!rt_cache_route(nhc, rth))) 239962306a36Sopenharmony_ci rt_add_uncached_list(rth); 240062306a36Sopenharmony_ci } 240162306a36Sopenharmony_ci skb_dst_set(skb, &rth->dst); 240262306a36Sopenharmony_ci err = 0; 240362306a36Sopenharmony_ci goto out; 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_cino_route: 240662306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_no_route); 240762306a36Sopenharmony_ci res->type = RTN_UNREACHABLE; 240862306a36Sopenharmony_ci res->fi = NULL; 240962306a36Sopenharmony_ci res->table = NULL; 241062306a36Sopenharmony_ci goto local_input; 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_ci /* 241362306a36Sopenharmony_ci * Do not cache martian addresses: they should be logged (RFC1812) 241462306a36Sopenharmony_ci */ 241562306a36Sopenharmony_cimartian_destination: 241662306a36Sopenharmony_ci RT_CACHE_STAT_INC(in_martian_dst); 241762306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_VERBOSE 241862306a36Sopenharmony_ci if (IN_DEV_LOG_MARTIANS(in_dev)) 241962306a36Sopenharmony_ci net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", 242062306a36Sopenharmony_ci &daddr, &saddr, dev->name); 242162306a36Sopenharmony_ci#endif 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_cie_inval: 242462306a36Sopenharmony_ci err = -EINVAL; 242562306a36Sopenharmony_ci goto out; 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_cie_nobufs: 242862306a36Sopenharmony_ci err = -ENOBUFS; 242962306a36Sopenharmony_ci goto out; 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_cimartian_source: 243262306a36Sopenharmony_ci ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 243362306a36Sopenharmony_ci goto out; 243462306a36Sopenharmony_ci} 243562306a36Sopenharmony_ci 243662306a36Sopenharmony_ci/* called with rcu_read_lock held */ 243762306a36Sopenharmony_cistatic int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, 243862306a36Sopenharmony_ci u8 tos, struct net_device *dev, struct fib_result *res) 243962306a36Sopenharmony_ci{ 244062306a36Sopenharmony_ci /* Multicast recognition logic is moved from route cache to here. 244162306a36Sopenharmony_ci * The problem was that too many Ethernet cards have broken/missing 244262306a36Sopenharmony_ci * hardware multicast filters :-( As result the host on multicasting 244362306a36Sopenharmony_ci * network acquires a lot of useless route cache entries, sort of 244462306a36Sopenharmony_ci * SDR messages from all the world. Now we try to get rid of them. 244562306a36Sopenharmony_ci * Really, provided software IP multicast filter is organized 244662306a36Sopenharmony_ci * reasonably (at least, hashed), it does not result in a slowdown 244762306a36Sopenharmony_ci * comparing with route cache reject entries. 244862306a36Sopenharmony_ci * Note, that multicast routers are not affected, because 244962306a36Sopenharmony_ci * route cache entry is created eventually. 245062306a36Sopenharmony_ci */ 245162306a36Sopenharmony_ci if (ipv4_is_multicast(daddr)) { 245262306a36Sopenharmony_ci struct in_device *in_dev = __in_dev_get_rcu(dev); 245362306a36Sopenharmony_ci int our = 0; 245462306a36Sopenharmony_ci int err = -EINVAL; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci if (!in_dev) 245762306a36Sopenharmony_ci return err; 245862306a36Sopenharmony_ci our = ip_check_mc_rcu(in_dev, daddr, saddr, 245962306a36Sopenharmony_ci ip_hdr(skb)->protocol); 246062306a36Sopenharmony_ci 246162306a36Sopenharmony_ci /* check l3 master if no match yet */ 246262306a36Sopenharmony_ci if (!our && netif_is_l3_slave(dev)) { 246362306a36Sopenharmony_ci struct in_device *l3_in_dev; 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci l3_in_dev = __in_dev_get_rcu(skb->dev); 246662306a36Sopenharmony_ci if (l3_in_dev) 246762306a36Sopenharmony_ci our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, 246862306a36Sopenharmony_ci ip_hdr(skb)->protocol); 246962306a36Sopenharmony_ci } 247062306a36Sopenharmony_ci 247162306a36Sopenharmony_ci if (our 247262306a36Sopenharmony_ci#ifdef CONFIG_IP_MROUTE 247362306a36Sopenharmony_ci || 247462306a36Sopenharmony_ci (!ipv4_is_local_multicast(daddr) && 247562306a36Sopenharmony_ci IN_DEV_MFORWARD(in_dev)) 247662306a36Sopenharmony_ci#endif 247762306a36Sopenharmony_ci ) { 247862306a36Sopenharmony_ci err = ip_route_input_mc(skb, daddr, saddr, 247962306a36Sopenharmony_ci tos, dev, our); 248062306a36Sopenharmony_ci } 248162306a36Sopenharmony_ci return err; 248262306a36Sopenharmony_ci } 248362306a36Sopenharmony_ci 248462306a36Sopenharmony_ci return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); 248562306a36Sopenharmony_ci} 248662306a36Sopenharmony_ci 248762306a36Sopenharmony_ciint ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, 248862306a36Sopenharmony_ci u8 tos, struct net_device *dev) 248962306a36Sopenharmony_ci{ 249062306a36Sopenharmony_ci struct fib_result res; 249162306a36Sopenharmony_ci int err; 249262306a36Sopenharmony_ci 249362306a36Sopenharmony_ci tos &= IPTOS_RT_MASK; 249462306a36Sopenharmony_ci rcu_read_lock(); 249562306a36Sopenharmony_ci err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); 249662306a36Sopenharmony_ci rcu_read_unlock(); 249762306a36Sopenharmony_ci 249862306a36Sopenharmony_ci return err; 249962306a36Sopenharmony_ci} 250062306a36Sopenharmony_ciEXPORT_SYMBOL(ip_route_input_noref); 250162306a36Sopenharmony_ci 250262306a36Sopenharmony_ci/* called with rcu_read_lock() */ 250362306a36Sopenharmony_cistatic struct rtable *__mkroute_output(const struct fib_result *res, 250462306a36Sopenharmony_ci const struct flowi4 *fl4, int orig_oif, 250562306a36Sopenharmony_ci struct net_device *dev_out, 250662306a36Sopenharmony_ci unsigned int flags) 250762306a36Sopenharmony_ci{ 250862306a36Sopenharmony_ci struct fib_info *fi = res->fi; 250962306a36Sopenharmony_ci struct fib_nh_exception *fnhe; 251062306a36Sopenharmony_ci struct in_device *in_dev; 251162306a36Sopenharmony_ci u16 type = res->type; 251262306a36Sopenharmony_ci struct rtable *rth; 251362306a36Sopenharmony_ci bool do_cache; 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci in_dev = __in_dev_get_rcu(dev_out); 251662306a36Sopenharmony_ci if (!in_dev) 251762306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) 252062306a36Sopenharmony_ci if (ipv4_is_loopback(fl4->saddr) && 252162306a36Sopenharmony_ci !(dev_out->flags & IFF_LOOPBACK) && 252262306a36Sopenharmony_ci !netif_is_l3_master(dev_out)) 252362306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci if (ipv4_is_lbcast(fl4->daddr)) 252662306a36Sopenharmony_ci type = RTN_BROADCAST; 252762306a36Sopenharmony_ci else if (ipv4_is_multicast(fl4->daddr)) 252862306a36Sopenharmony_ci type = RTN_MULTICAST; 252962306a36Sopenharmony_ci else if (ipv4_is_zeronet(fl4->daddr)) 253062306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci if (dev_out->flags & IFF_LOOPBACK) 253362306a36Sopenharmony_ci flags |= RTCF_LOCAL; 253462306a36Sopenharmony_ci 253562306a36Sopenharmony_ci do_cache = true; 253662306a36Sopenharmony_ci if (type == RTN_BROADCAST) { 253762306a36Sopenharmony_ci flags |= RTCF_BROADCAST | RTCF_LOCAL; 253862306a36Sopenharmony_ci fi = NULL; 253962306a36Sopenharmony_ci } else if (type == RTN_MULTICAST) { 254062306a36Sopenharmony_ci flags |= RTCF_MULTICAST | RTCF_LOCAL; 254162306a36Sopenharmony_ci if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, 254262306a36Sopenharmony_ci fl4->flowi4_proto)) 254362306a36Sopenharmony_ci flags &= ~RTCF_LOCAL; 254462306a36Sopenharmony_ci else 254562306a36Sopenharmony_ci do_cache = false; 254662306a36Sopenharmony_ci /* If multicast route do not exist use 254762306a36Sopenharmony_ci * default one, but do not gateway in this case. 254862306a36Sopenharmony_ci * Yes, it is hack. 254962306a36Sopenharmony_ci */ 255062306a36Sopenharmony_ci if (fi && res->prefixlen < 4) 255162306a36Sopenharmony_ci fi = NULL; 255262306a36Sopenharmony_ci } else if ((type == RTN_LOCAL) && (orig_oif != 0) && 255362306a36Sopenharmony_ci (orig_oif != dev_out->ifindex)) { 255462306a36Sopenharmony_ci /* For local routes that require a particular output interface 255562306a36Sopenharmony_ci * we do not want to cache the result. Caching the result 255662306a36Sopenharmony_ci * causes incorrect behaviour when there are multiple source 255762306a36Sopenharmony_ci * addresses on the interface, the end result being that if the 255862306a36Sopenharmony_ci * intended recipient is waiting on that interface for the 255962306a36Sopenharmony_ci * packet he won't receive it because it will be delivered on 256062306a36Sopenharmony_ci * the loopback interface and the IP_PKTINFO ipi_ifindex will 256162306a36Sopenharmony_ci * be set to the loopback interface as well. 256262306a36Sopenharmony_ci */ 256362306a36Sopenharmony_ci do_cache = false; 256462306a36Sopenharmony_ci } 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci fnhe = NULL; 256762306a36Sopenharmony_ci do_cache &= fi != NULL; 256862306a36Sopenharmony_ci if (fi) { 256962306a36Sopenharmony_ci struct fib_nh_common *nhc = FIB_RES_NHC(*res); 257062306a36Sopenharmony_ci struct rtable __rcu **prth; 257162306a36Sopenharmony_ci 257262306a36Sopenharmony_ci fnhe = find_exception(nhc, fl4->daddr); 257362306a36Sopenharmony_ci if (!do_cache) 257462306a36Sopenharmony_ci goto add; 257562306a36Sopenharmony_ci if (fnhe) { 257662306a36Sopenharmony_ci prth = &fnhe->fnhe_rth_output; 257762306a36Sopenharmony_ci } else { 257862306a36Sopenharmony_ci if (unlikely(fl4->flowi4_flags & 257962306a36Sopenharmony_ci FLOWI_FLAG_KNOWN_NH && 258062306a36Sopenharmony_ci !(nhc->nhc_gw_family && 258162306a36Sopenharmony_ci nhc->nhc_scope == RT_SCOPE_LINK))) { 258262306a36Sopenharmony_ci do_cache = false; 258362306a36Sopenharmony_ci goto add; 258462306a36Sopenharmony_ci } 258562306a36Sopenharmony_ci prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); 258662306a36Sopenharmony_ci } 258762306a36Sopenharmony_ci rth = rcu_dereference(*prth); 258862306a36Sopenharmony_ci if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) 258962306a36Sopenharmony_ci return rth; 259062306a36Sopenharmony_ci } 259162306a36Sopenharmony_ci 259262306a36Sopenharmony_ciadd: 259362306a36Sopenharmony_ci rth = rt_dst_alloc(dev_out, flags, type, 259462306a36Sopenharmony_ci IN_DEV_ORCONF(in_dev, NOXFRM)); 259562306a36Sopenharmony_ci if (!rth) 259662306a36Sopenharmony_ci return ERR_PTR(-ENOBUFS); 259762306a36Sopenharmony_ci 259862306a36Sopenharmony_ci rth->rt_iif = orig_oif; 259962306a36Sopenharmony_ci 260062306a36Sopenharmony_ci RT_CACHE_STAT_INC(out_slow_tot); 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 260362306a36Sopenharmony_ci if (flags & RTCF_LOCAL && 260462306a36Sopenharmony_ci !(dev_out->flags & IFF_LOOPBACK)) { 260562306a36Sopenharmony_ci rth->dst.output = ip_mc_output; 260662306a36Sopenharmony_ci RT_CACHE_STAT_INC(out_slow_mc); 260762306a36Sopenharmony_ci } 260862306a36Sopenharmony_ci#ifdef CONFIG_IP_MROUTE 260962306a36Sopenharmony_ci if (type == RTN_MULTICAST) { 261062306a36Sopenharmony_ci if (IN_DEV_MFORWARD(in_dev) && 261162306a36Sopenharmony_ci !ipv4_is_local_multicast(fl4->daddr)) { 261262306a36Sopenharmony_ci rth->dst.input = ip_mr_input; 261362306a36Sopenharmony_ci rth->dst.output = ip_mc_output; 261462306a36Sopenharmony_ci } 261562306a36Sopenharmony_ci } 261662306a36Sopenharmony_ci#endif 261762306a36Sopenharmony_ci } 261862306a36Sopenharmony_ci 261962306a36Sopenharmony_ci rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); 262062306a36Sopenharmony_ci lwtunnel_set_redirect(&rth->dst); 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_ci return rth; 262362306a36Sopenharmony_ci} 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci/* 262662306a36Sopenharmony_ci * Major route resolver routine. 262762306a36Sopenharmony_ci */ 262862306a36Sopenharmony_ci 262962306a36Sopenharmony_cistruct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, 263062306a36Sopenharmony_ci const struct sk_buff *skb) 263162306a36Sopenharmony_ci{ 263262306a36Sopenharmony_ci struct fib_result res = { 263362306a36Sopenharmony_ci .type = RTN_UNSPEC, 263462306a36Sopenharmony_ci .fi = NULL, 263562306a36Sopenharmony_ci .table = NULL, 263662306a36Sopenharmony_ci .tclassid = 0, 263762306a36Sopenharmony_ci }; 263862306a36Sopenharmony_ci struct rtable *rth; 263962306a36Sopenharmony_ci 264062306a36Sopenharmony_ci fl4->flowi4_iif = LOOPBACK_IFINDEX; 264162306a36Sopenharmony_ci ip_rt_fix_tos(fl4); 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_ci rcu_read_lock(); 264462306a36Sopenharmony_ci rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); 264562306a36Sopenharmony_ci rcu_read_unlock(); 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci return rth; 264862306a36Sopenharmony_ci} 264962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip_route_output_key_hash); 265062306a36Sopenharmony_ci 265162306a36Sopenharmony_cistruct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, 265262306a36Sopenharmony_ci struct fib_result *res, 265362306a36Sopenharmony_ci const struct sk_buff *skb) 265462306a36Sopenharmony_ci{ 265562306a36Sopenharmony_ci struct net_device *dev_out = NULL; 265662306a36Sopenharmony_ci int orig_oif = fl4->flowi4_oif; 265762306a36Sopenharmony_ci unsigned int flags = 0; 265862306a36Sopenharmony_ci struct rtable *rth; 265962306a36Sopenharmony_ci int err; 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci if (fl4->saddr) { 266262306a36Sopenharmony_ci if (ipv4_is_multicast(fl4->saddr) || 266362306a36Sopenharmony_ci ipv4_is_lbcast(fl4->saddr) || 266462306a36Sopenharmony_ci ipv4_is_zeronet(fl4->saddr)) { 266562306a36Sopenharmony_ci rth = ERR_PTR(-EINVAL); 266662306a36Sopenharmony_ci goto out; 266762306a36Sopenharmony_ci } 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci rth = ERR_PTR(-ENETUNREACH); 267062306a36Sopenharmony_ci 267162306a36Sopenharmony_ci /* I removed check for oif == dev_out->oif here. 267262306a36Sopenharmony_ci * It was wrong for two reasons: 267362306a36Sopenharmony_ci * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr 267462306a36Sopenharmony_ci * is assigned to multiple interfaces. 267562306a36Sopenharmony_ci * 2. Moreover, we are allowed to send packets with saddr 267662306a36Sopenharmony_ci * of another iface. --ANK 267762306a36Sopenharmony_ci */ 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_ci if (fl4->flowi4_oif == 0 && 268062306a36Sopenharmony_ci (ipv4_is_multicast(fl4->daddr) || 268162306a36Sopenharmony_ci ipv4_is_lbcast(fl4->daddr))) { 268262306a36Sopenharmony_ci /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 268362306a36Sopenharmony_ci dev_out = __ip_dev_find(net, fl4->saddr, false); 268462306a36Sopenharmony_ci if (!dev_out) 268562306a36Sopenharmony_ci goto out; 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_ci /* Special hack: user can direct multicasts 268862306a36Sopenharmony_ci * and limited broadcast via necessary interface 268962306a36Sopenharmony_ci * without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 269062306a36Sopenharmony_ci * This hack is not just for fun, it allows 269162306a36Sopenharmony_ci * vic,vat and friends to work. 269262306a36Sopenharmony_ci * They bind socket to loopback, set ttl to zero 269362306a36Sopenharmony_ci * and expect that it will work. 269462306a36Sopenharmony_ci * From the viewpoint of routing cache they are broken, 269562306a36Sopenharmony_ci * because we are not allowed to build multicast path 269662306a36Sopenharmony_ci * with loopback source addr (look, routing cache 269762306a36Sopenharmony_ci * cannot know, that ttl is zero, so that packet 269862306a36Sopenharmony_ci * will not leave this host and route is valid). 269962306a36Sopenharmony_ci * Luckily, this hack is good workaround. 270062306a36Sopenharmony_ci */ 270162306a36Sopenharmony_ci 270262306a36Sopenharmony_ci fl4->flowi4_oif = dev_out->ifindex; 270362306a36Sopenharmony_ci goto make_route; 270462306a36Sopenharmony_ci } 270562306a36Sopenharmony_ci 270662306a36Sopenharmony_ci if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { 270762306a36Sopenharmony_ci /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 270862306a36Sopenharmony_ci if (!__ip_dev_find(net, fl4->saddr, false)) 270962306a36Sopenharmony_ci goto out; 271062306a36Sopenharmony_ci } 271162306a36Sopenharmony_ci } 271262306a36Sopenharmony_ci 271362306a36Sopenharmony_ci 271462306a36Sopenharmony_ci if (fl4->flowi4_oif) { 271562306a36Sopenharmony_ci dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); 271662306a36Sopenharmony_ci rth = ERR_PTR(-ENODEV); 271762306a36Sopenharmony_ci if (!dev_out) 271862306a36Sopenharmony_ci goto out; 271962306a36Sopenharmony_ci 272062306a36Sopenharmony_ci /* RACE: Check return value of inet_select_addr instead. */ 272162306a36Sopenharmony_ci if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 272262306a36Sopenharmony_ci rth = ERR_PTR(-ENETUNREACH); 272362306a36Sopenharmony_ci goto out; 272462306a36Sopenharmony_ci } 272562306a36Sopenharmony_ci if (ipv4_is_local_multicast(fl4->daddr) || 272662306a36Sopenharmony_ci ipv4_is_lbcast(fl4->daddr) || 272762306a36Sopenharmony_ci fl4->flowi4_proto == IPPROTO_IGMP) { 272862306a36Sopenharmony_ci if (!fl4->saddr) 272962306a36Sopenharmony_ci fl4->saddr = inet_select_addr(dev_out, 0, 273062306a36Sopenharmony_ci RT_SCOPE_LINK); 273162306a36Sopenharmony_ci goto make_route; 273262306a36Sopenharmony_ci } 273362306a36Sopenharmony_ci if (!fl4->saddr) { 273462306a36Sopenharmony_ci if (ipv4_is_multicast(fl4->daddr)) 273562306a36Sopenharmony_ci fl4->saddr = inet_select_addr(dev_out, 0, 273662306a36Sopenharmony_ci fl4->flowi4_scope); 273762306a36Sopenharmony_ci else if (!fl4->daddr) 273862306a36Sopenharmony_ci fl4->saddr = inet_select_addr(dev_out, 0, 273962306a36Sopenharmony_ci RT_SCOPE_HOST); 274062306a36Sopenharmony_ci } 274162306a36Sopenharmony_ci } 274262306a36Sopenharmony_ci 274362306a36Sopenharmony_ci if (!fl4->daddr) { 274462306a36Sopenharmony_ci fl4->daddr = fl4->saddr; 274562306a36Sopenharmony_ci if (!fl4->daddr) 274662306a36Sopenharmony_ci fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); 274762306a36Sopenharmony_ci dev_out = net->loopback_dev; 274862306a36Sopenharmony_ci fl4->flowi4_oif = LOOPBACK_IFINDEX; 274962306a36Sopenharmony_ci res->type = RTN_LOCAL; 275062306a36Sopenharmony_ci flags |= RTCF_LOCAL; 275162306a36Sopenharmony_ci goto make_route; 275262306a36Sopenharmony_ci } 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci err = fib_lookup(net, fl4, res, 0); 275562306a36Sopenharmony_ci if (err) { 275662306a36Sopenharmony_ci res->fi = NULL; 275762306a36Sopenharmony_ci res->table = NULL; 275862306a36Sopenharmony_ci if (fl4->flowi4_oif && 275962306a36Sopenharmony_ci (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { 276062306a36Sopenharmony_ci /* Apparently, routing tables are wrong. Assume, 276162306a36Sopenharmony_ci * that the destination is on link. 276262306a36Sopenharmony_ci * 276362306a36Sopenharmony_ci * WHY? DW. 276462306a36Sopenharmony_ci * Because we are allowed to send to iface 276562306a36Sopenharmony_ci * even if it has NO routes and NO assigned 276662306a36Sopenharmony_ci * addresses. When oif is specified, routing 276762306a36Sopenharmony_ci * tables are looked up with only one purpose: 276862306a36Sopenharmony_ci * to catch if destination is gatewayed, rather than 276962306a36Sopenharmony_ci * direct. Moreover, if MSG_DONTROUTE is set, 277062306a36Sopenharmony_ci * we send packet, ignoring both routing tables 277162306a36Sopenharmony_ci * and ifaddr state. --ANK 277262306a36Sopenharmony_ci * 277362306a36Sopenharmony_ci * 277462306a36Sopenharmony_ci * We could make it even if oif is unknown, 277562306a36Sopenharmony_ci * likely IPv6, but we do not. 277662306a36Sopenharmony_ci */ 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_ci if (fl4->saddr == 0) 277962306a36Sopenharmony_ci fl4->saddr = inet_select_addr(dev_out, 0, 278062306a36Sopenharmony_ci RT_SCOPE_LINK); 278162306a36Sopenharmony_ci res->type = RTN_UNICAST; 278262306a36Sopenharmony_ci goto make_route; 278362306a36Sopenharmony_ci } 278462306a36Sopenharmony_ci rth = ERR_PTR(err); 278562306a36Sopenharmony_ci goto out; 278662306a36Sopenharmony_ci } 278762306a36Sopenharmony_ci 278862306a36Sopenharmony_ci if (res->type == RTN_LOCAL) { 278962306a36Sopenharmony_ci if (!fl4->saddr) { 279062306a36Sopenharmony_ci if (res->fi->fib_prefsrc) 279162306a36Sopenharmony_ci fl4->saddr = res->fi->fib_prefsrc; 279262306a36Sopenharmony_ci else 279362306a36Sopenharmony_ci fl4->saddr = fl4->daddr; 279462306a36Sopenharmony_ci } 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci /* L3 master device is the loopback for that domain */ 279762306a36Sopenharmony_ci dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : 279862306a36Sopenharmony_ci net->loopback_dev; 279962306a36Sopenharmony_ci 280062306a36Sopenharmony_ci /* make sure orig_oif points to fib result device even 280162306a36Sopenharmony_ci * though packet rx/tx happens over loopback or l3mdev 280262306a36Sopenharmony_ci */ 280362306a36Sopenharmony_ci orig_oif = FIB_RES_OIF(*res); 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_ci fl4->flowi4_oif = dev_out->ifindex; 280662306a36Sopenharmony_ci flags |= RTCF_LOCAL; 280762306a36Sopenharmony_ci goto make_route; 280862306a36Sopenharmony_ci } 280962306a36Sopenharmony_ci 281062306a36Sopenharmony_ci fib_select_path(net, res, fl4, skb); 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_ci dev_out = FIB_RES_DEV(*res); 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_cimake_route: 281562306a36Sopenharmony_ci rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); 281662306a36Sopenharmony_ci 281762306a36Sopenharmony_ciout: 281862306a36Sopenharmony_ci return rth; 281962306a36Sopenharmony_ci} 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_cistatic struct dst_ops ipv4_dst_blackhole_ops = { 282262306a36Sopenharmony_ci .family = AF_INET, 282362306a36Sopenharmony_ci .default_advmss = ipv4_default_advmss, 282462306a36Sopenharmony_ci .neigh_lookup = ipv4_neigh_lookup, 282562306a36Sopenharmony_ci .check = dst_blackhole_check, 282662306a36Sopenharmony_ci .cow_metrics = dst_blackhole_cow_metrics, 282762306a36Sopenharmony_ci .update_pmtu = dst_blackhole_update_pmtu, 282862306a36Sopenharmony_ci .redirect = dst_blackhole_redirect, 282962306a36Sopenharmony_ci .mtu = dst_blackhole_mtu, 283062306a36Sopenharmony_ci}; 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_cistruct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 283362306a36Sopenharmony_ci{ 283462306a36Sopenharmony_ci struct rtable *ort = (struct rtable *) dst_orig; 283562306a36Sopenharmony_ci struct rtable *rt; 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); 283862306a36Sopenharmony_ci if (rt) { 283962306a36Sopenharmony_ci struct dst_entry *new = &rt->dst; 284062306a36Sopenharmony_ci 284162306a36Sopenharmony_ci new->__use = 1; 284262306a36Sopenharmony_ci new->input = dst_discard; 284362306a36Sopenharmony_ci new->output = dst_discard_out; 284462306a36Sopenharmony_ci 284562306a36Sopenharmony_ci new->dev = net->loopback_dev; 284662306a36Sopenharmony_ci netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC); 284762306a36Sopenharmony_ci 284862306a36Sopenharmony_ci rt->rt_is_input = ort->rt_is_input; 284962306a36Sopenharmony_ci rt->rt_iif = ort->rt_iif; 285062306a36Sopenharmony_ci rt->rt_pmtu = ort->rt_pmtu; 285162306a36Sopenharmony_ci rt->rt_mtu_locked = ort->rt_mtu_locked; 285262306a36Sopenharmony_ci 285362306a36Sopenharmony_ci rt->rt_genid = rt_genid_ipv4(net); 285462306a36Sopenharmony_ci rt->rt_flags = ort->rt_flags; 285562306a36Sopenharmony_ci rt->rt_type = ort->rt_type; 285662306a36Sopenharmony_ci rt->rt_uses_gateway = ort->rt_uses_gateway; 285762306a36Sopenharmony_ci rt->rt_gw_family = ort->rt_gw_family; 285862306a36Sopenharmony_ci if (rt->rt_gw_family == AF_INET) 285962306a36Sopenharmony_ci rt->rt_gw4 = ort->rt_gw4; 286062306a36Sopenharmony_ci else if (rt->rt_gw_family == AF_INET6) 286162306a36Sopenharmony_ci rt->rt_gw6 = ort->rt_gw6; 286262306a36Sopenharmony_ci } 286362306a36Sopenharmony_ci 286462306a36Sopenharmony_ci dst_release(dst_orig); 286562306a36Sopenharmony_ci 286662306a36Sopenharmony_ci return rt ? &rt->dst : ERR_PTR(-ENOMEM); 286762306a36Sopenharmony_ci} 286862306a36Sopenharmony_ci 286962306a36Sopenharmony_cistruct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, 287062306a36Sopenharmony_ci const struct sock *sk) 287162306a36Sopenharmony_ci{ 287262306a36Sopenharmony_ci struct rtable *rt = __ip_route_output_key(net, flp4); 287362306a36Sopenharmony_ci 287462306a36Sopenharmony_ci if (IS_ERR(rt)) 287562306a36Sopenharmony_ci return rt; 287662306a36Sopenharmony_ci 287762306a36Sopenharmony_ci if (flp4->flowi4_proto) { 287862306a36Sopenharmony_ci flp4->flowi4_oif = rt->dst.dev->ifindex; 287962306a36Sopenharmony_ci rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, 288062306a36Sopenharmony_ci flowi4_to_flowi(flp4), 288162306a36Sopenharmony_ci sk, 0); 288262306a36Sopenharmony_ci } 288362306a36Sopenharmony_ci 288462306a36Sopenharmony_ci return rt; 288562306a36Sopenharmony_ci} 288662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip_route_output_flow); 288762306a36Sopenharmony_ci 288862306a36Sopenharmony_cistruct rtable *ip_route_output_tunnel(struct sk_buff *skb, 288962306a36Sopenharmony_ci struct net_device *dev, 289062306a36Sopenharmony_ci struct net *net, __be32 *saddr, 289162306a36Sopenharmony_ci const struct ip_tunnel_info *info, 289262306a36Sopenharmony_ci u8 protocol, bool use_cache) 289362306a36Sopenharmony_ci{ 289462306a36Sopenharmony_ci#ifdef CONFIG_DST_CACHE 289562306a36Sopenharmony_ci struct dst_cache *dst_cache; 289662306a36Sopenharmony_ci#endif 289762306a36Sopenharmony_ci struct rtable *rt = NULL; 289862306a36Sopenharmony_ci struct flowi4 fl4; 289962306a36Sopenharmony_ci __u8 tos; 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_ci#ifdef CONFIG_DST_CACHE 290262306a36Sopenharmony_ci dst_cache = (struct dst_cache *)&info->dst_cache; 290362306a36Sopenharmony_ci if (use_cache) { 290462306a36Sopenharmony_ci rt = dst_cache_get_ip4(dst_cache, saddr); 290562306a36Sopenharmony_ci if (rt) 290662306a36Sopenharmony_ci return rt; 290762306a36Sopenharmony_ci } 290862306a36Sopenharmony_ci#endif 290962306a36Sopenharmony_ci memset(&fl4, 0, sizeof(fl4)); 291062306a36Sopenharmony_ci fl4.flowi4_mark = skb->mark; 291162306a36Sopenharmony_ci fl4.flowi4_proto = protocol; 291262306a36Sopenharmony_ci fl4.daddr = info->key.u.ipv4.dst; 291362306a36Sopenharmony_ci fl4.saddr = info->key.u.ipv4.src; 291462306a36Sopenharmony_ci tos = info->key.tos; 291562306a36Sopenharmony_ci fl4.flowi4_tos = RT_TOS(tos); 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci rt = ip_route_output_key(net, &fl4); 291862306a36Sopenharmony_ci if (IS_ERR(rt)) { 291962306a36Sopenharmony_ci netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); 292062306a36Sopenharmony_ci return ERR_PTR(-ENETUNREACH); 292162306a36Sopenharmony_ci } 292262306a36Sopenharmony_ci if (rt->dst.dev == dev) { /* is this necessary? */ 292362306a36Sopenharmony_ci netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); 292462306a36Sopenharmony_ci ip_rt_put(rt); 292562306a36Sopenharmony_ci return ERR_PTR(-ELOOP); 292662306a36Sopenharmony_ci } 292762306a36Sopenharmony_ci#ifdef CONFIG_DST_CACHE 292862306a36Sopenharmony_ci if (use_cache) 292962306a36Sopenharmony_ci dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); 293062306a36Sopenharmony_ci#endif 293162306a36Sopenharmony_ci *saddr = fl4.saddr; 293262306a36Sopenharmony_ci return rt; 293362306a36Sopenharmony_ci} 293462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ip_route_output_tunnel); 293562306a36Sopenharmony_ci 293662306a36Sopenharmony_ci/* called with rcu_read_lock held */ 293762306a36Sopenharmony_cistatic int rt_fill_info(struct net *net, __be32 dst, __be32 src, 293862306a36Sopenharmony_ci struct rtable *rt, u32 table_id, struct flowi4 *fl4, 293962306a36Sopenharmony_ci struct sk_buff *skb, u32 portid, u32 seq, 294062306a36Sopenharmony_ci unsigned int flags) 294162306a36Sopenharmony_ci{ 294262306a36Sopenharmony_ci struct rtmsg *r; 294362306a36Sopenharmony_ci struct nlmsghdr *nlh; 294462306a36Sopenharmony_ci unsigned long expires = 0; 294562306a36Sopenharmony_ci u32 error; 294662306a36Sopenharmony_ci u32 metrics[RTAX_MAX]; 294762306a36Sopenharmony_ci 294862306a36Sopenharmony_ci nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags); 294962306a36Sopenharmony_ci if (!nlh) 295062306a36Sopenharmony_ci return -EMSGSIZE; 295162306a36Sopenharmony_ci 295262306a36Sopenharmony_ci r = nlmsg_data(nlh); 295362306a36Sopenharmony_ci r->rtm_family = AF_INET; 295462306a36Sopenharmony_ci r->rtm_dst_len = 32; 295562306a36Sopenharmony_ci r->rtm_src_len = 0; 295662306a36Sopenharmony_ci r->rtm_tos = fl4 ? fl4->flowi4_tos : 0; 295762306a36Sopenharmony_ci r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; 295862306a36Sopenharmony_ci if (nla_put_u32(skb, RTA_TABLE, table_id)) 295962306a36Sopenharmony_ci goto nla_put_failure; 296062306a36Sopenharmony_ci r->rtm_type = rt->rt_type; 296162306a36Sopenharmony_ci r->rtm_scope = RT_SCOPE_UNIVERSE; 296262306a36Sopenharmony_ci r->rtm_protocol = RTPROT_UNSPEC; 296362306a36Sopenharmony_ci r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 296462306a36Sopenharmony_ci if (rt->rt_flags & RTCF_NOTIFY) 296562306a36Sopenharmony_ci r->rtm_flags |= RTM_F_NOTIFY; 296662306a36Sopenharmony_ci if (IPCB(skb)->flags & IPSKB_DOREDIRECT) 296762306a36Sopenharmony_ci r->rtm_flags |= RTCF_DOREDIRECT; 296862306a36Sopenharmony_ci 296962306a36Sopenharmony_ci if (nla_put_in_addr(skb, RTA_DST, dst)) 297062306a36Sopenharmony_ci goto nla_put_failure; 297162306a36Sopenharmony_ci if (src) { 297262306a36Sopenharmony_ci r->rtm_src_len = 32; 297362306a36Sopenharmony_ci if (nla_put_in_addr(skb, RTA_SRC, src)) 297462306a36Sopenharmony_ci goto nla_put_failure; 297562306a36Sopenharmony_ci } 297662306a36Sopenharmony_ci if (rt->dst.dev && 297762306a36Sopenharmony_ci nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 297862306a36Sopenharmony_ci goto nla_put_failure; 297962306a36Sopenharmony_ci if (rt->dst.lwtstate && 298062306a36Sopenharmony_ci lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) 298162306a36Sopenharmony_ci goto nla_put_failure; 298262306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 298362306a36Sopenharmony_ci if (rt->dst.tclassid && 298462306a36Sopenharmony_ci nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) 298562306a36Sopenharmony_ci goto nla_put_failure; 298662306a36Sopenharmony_ci#endif 298762306a36Sopenharmony_ci if (fl4 && !rt_is_input_route(rt) && 298862306a36Sopenharmony_ci fl4->saddr != src) { 298962306a36Sopenharmony_ci if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) 299062306a36Sopenharmony_ci goto nla_put_failure; 299162306a36Sopenharmony_ci } 299262306a36Sopenharmony_ci if (rt->rt_uses_gateway) { 299362306a36Sopenharmony_ci if (rt->rt_gw_family == AF_INET && 299462306a36Sopenharmony_ci nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) { 299562306a36Sopenharmony_ci goto nla_put_failure; 299662306a36Sopenharmony_ci } else if (rt->rt_gw_family == AF_INET6) { 299762306a36Sopenharmony_ci int alen = sizeof(struct in6_addr); 299862306a36Sopenharmony_ci struct nlattr *nla; 299962306a36Sopenharmony_ci struct rtvia *via; 300062306a36Sopenharmony_ci 300162306a36Sopenharmony_ci nla = nla_reserve(skb, RTA_VIA, alen + 2); 300262306a36Sopenharmony_ci if (!nla) 300362306a36Sopenharmony_ci goto nla_put_failure; 300462306a36Sopenharmony_ci 300562306a36Sopenharmony_ci via = nla_data(nla); 300662306a36Sopenharmony_ci via->rtvia_family = AF_INET6; 300762306a36Sopenharmony_ci memcpy(via->rtvia_addr, &rt->rt_gw6, alen); 300862306a36Sopenharmony_ci } 300962306a36Sopenharmony_ci } 301062306a36Sopenharmony_ci 301162306a36Sopenharmony_ci expires = rt->dst.expires; 301262306a36Sopenharmony_ci if (expires) { 301362306a36Sopenharmony_ci unsigned long now = jiffies; 301462306a36Sopenharmony_ci 301562306a36Sopenharmony_ci if (time_before(now, expires)) 301662306a36Sopenharmony_ci expires -= now; 301762306a36Sopenharmony_ci else 301862306a36Sopenharmony_ci expires = 0; 301962306a36Sopenharmony_ci } 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_ci memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 302262306a36Sopenharmony_ci if (rt->rt_pmtu && expires) 302362306a36Sopenharmony_ci metrics[RTAX_MTU - 1] = rt->rt_pmtu; 302462306a36Sopenharmony_ci if (rt->rt_mtu_locked && expires) 302562306a36Sopenharmony_ci metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); 302662306a36Sopenharmony_ci if (rtnetlink_put_metrics(skb, metrics) < 0) 302762306a36Sopenharmony_ci goto nla_put_failure; 302862306a36Sopenharmony_ci 302962306a36Sopenharmony_ci if (fl4) { 303062306a36Sopenharmony_ci if (fl4->flowi4_mark && 303162306a36Sopenharmony_ci nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) 303262306a36Sopenharmony_ci goto nla_put_failure; 303362306a36Sopenharmony_ci 303462306a36Sopenharmony_ci if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && 303562306a36Sopenharmony_ci nla_put_u32(skb, RTA_UID, 303662306a36Sopenharmony_ci from_kuid_munged(current_user_ns(), 303762306a36Sopenharmony_ci fl4->flowi4_uid))) 303862306a36Sopenharmony_ci goto nla_put_failure; 303962306a36Sopenharmony_ci 304062306a36Sopenharmony_ci if (rt_is_input_route(rt)) { 304162306a36Sopenharmony_ci#ifdef CONFIG_IP_MROUTE 304262306a36Sopenharmony_ci if (ipv4_is_multicast(dst) && 304362306a36Sopenharmony_ci !ipv4_is_local_multicast(dst) && 304462306a36Sopenharmony_ci IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { 304562306a36Sopenharmony_ci int err = ipmr_get_route(net, skb, 304662306a36Sopenharmony_ci fl4->saddr, fl4->daddr, 304762306a36Sopenharmony_ci r, portid); 304862306a36Sopenharmony_ci 304962306a36Sopenharmony_ci if (err <= 0) { 305062306a36Sopenharmony_ci if (err == 0) 305162306a36Sopenharmony_ci return 0; 305262306a36Sopenharmony_ci goto nla_put_failure; 305362306a36Sopenharmony_ci } 305462306a36Sopenharmony_ci } else 305562306a36Sopenharmony_ci#endif 305662306a36Sopenharmony_ci if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) 305762306a36Sopenharmony_ci goto nla_put_failure; 305862306a36Sopenharmony_ci } 305962306a36Sopenharmony_ci } 306062306a36Sopenharmony_ci 306162306a36Sopenharmony_ci error = rt->dst.error; 306262306a36Sopenharmony_ci 306362306a36Sopenharmony_ci if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) 306462306a36Sopenharmony_ci goto nla_put_failure; 306562306a36Sopenharmony_ci 306662306a36Sopenharmony_ci nlmsg_end(skb, nlh); 306762306a36Sopenharmony_ci return 0; 306862306a36Sopenharmony_ci 306962306a36Sopenharmony_cinla_put_failure: 307062306a36Sopenharmony_ci nlmsg_cancel(skb, nlh); 307162306a36Sopenharmony_ci return -EMSGSIZE; 307262306a36Sopenharmony_ci} 307362306a36Sopenharmony_ci 307462306a36Sopenharmony_cistatic int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, 307562306a36Sopenharmony_ci struct netlink_callback *cb, u32 table_id, 307662306a36Sopenharmony_ci struct fnhe_hash_bucket *bucket, int genid, 307762306a36Sopenharmony_ci int *fa_index, int fa_start, unsigned int flags) 307862306a36Sopenharmony_ci{ 307962306a36Sopenharmony_ci int i; 308062306a36Sopenharmony_ci 308162306a36Sopenharmony_ci for (i = 0; i < FNHE_HASH_SIZE; i++) { 308262306a36Sopenharmony_ci struct fib_nh_exception *fnhe; 308362306a36Sopenharmony_ci 308462306a36Sopenharmony_ci for (fnhe = rcu_dereference(bucket[i].chain); fnhe; 308562306a36Sopenharmony_ci fnhe = rcu_dereference(fnhe->fnhe_next)) { 308662306a36Sopenharmony_ci struct rtable *rt; 308762306a36Sopenharmony_ci int err; 308862306a36Sopenharmony_ci 308962306a36Sopenharmony_ci if (*fa_index < fa_start) 309062306a36Sopenharmony_ci goto next; 309162306a36Sopenharmony_ci 309262306a36Sopenharmony_ci if (fnhe->fnhe_genid != genid) 309362306a36Sopenharmony_ci goto next; 309462306a36Sopenharmony_ci 309562306a36Sopenharmony_ci if (fnhe->fnhe_expires && 309662306a36Sopenharmony_ci time_after(jiffies, fnhe->fnhe_expires)) 309762306a36Sopenharmony_ci goto next; 309862306a36Sopenharmony_ci 309962306a36Sopenharmony_ci rt = rcu_dereference(fnhe->fnhe_rth_input); 310062306a36Sopenharmony_ci if (!rt) 310162306a36Sopenharmony_ci rt = rcu_dereference(fnhe->fnhe_rth_output); 310262306a36Sopenharmony_ci if (!rt) 310362306a36Sopenharmony_ci goto next; 310462306a36Sopenharmony_ci 310562306a36Sopenharmony_ci err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt, 310662306a36Sopenharmony_ci table_id, NULL, skb, 310762306a36Sopenharmony_ci NETLINK_CB(cb->skb).portid, 310862306a36Sopenharmony_ci cb->nlh->nlmsg_seq, flags); 310962306a36Sopenharmony_ci if (err) 311062306a36Sopenharmony_ci return err; 311162306a36Sopenharmony_cinext: 311262306a36Sopenharmony_ci (*fa_index)++; 311362306a36Sopenharmony_ci } 311462306a36Sopenharmony_ci } 311562306a36Sopenharmony_ci 311662306a36Sopenharmony_ci return 0; 311762306a36Sopenharmony_ci} 311862306a36Sopenharmony_ci 311962306a36Sopenharmony_ciint fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, 312062306a36Sopenharmony_ci u32 table_id, struct fib_info *fi, 312162306a36Sopenharmony_ci int *fa_index, int fa_start, unsigned int flags) 312262306a36Sopenharmony_ci{ 312362306a36Sopenharmony_ci struct net *net = sock_net(cb->skb->sk); 312462306a36Sopenharmony_ci int nhsel, genid = fnhe_genid(net); 312562306a36Sopenharmony_ci 312662306a36Sopenharmony_ci for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { 312762306a36Sopenharmony_ci struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); 312862306a36Sopenharmony_ci struct fnhe_hash_bucket *bucket; 312962306a36Sopenharmony_ci int err; 313062306a36Sopenharmony_ci 313162306a36Sopenharmony_ci if (nhc->nhc_flags & RTNH_F_DEAD) 313262306a36Sopenharmony_ci continue; 313362306a36Sopenharmony_ci 313462306a36Sopenharmony_ci rcu_read_lock(); 313562306a36Sopenharmony_ci bucket = rcu_dereference(nhc->nhc_exceptions); 313662306a36Sopenharmony_ci err = 0; 313762306a36Sopenharmony_ci if (bucket) 313862306a36Sopenharmony_ci err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, 313962306a36Sopenharmony_ci genid, fa_index, fa_start, 314062306a36Sopenharmony_ci flags); 314162306a36Sopenharmony_ci rcu_read_unlock(); 314262306a36Sopenharmony_ci if (err) 314362306a36Sopenharmony_ci return err; 314462306a36Sopenharmony_ci } 314562306a36Sopenharmony_ci 314662306a36Sopenharmony_ci return 0; 314762306a36Sopenharmony_ci} 314862306a36Sopenharmony_ci 314962306a36Sopenharmony_cistatic struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, 315062306a36Sopenharmony_ci u8 ip_proto, __be16 sport, 315162306a36Sopenharmony_ci __be16 dport) 315262306a36Sopenharmony_ci{ 315362306a36Sopenharmony_ci struct sk_buff *skb; 315462306a36Sopenharmony_ci struct iphdr *iph; 315562306a36Sopenharmony_ci 315662306a36Sopenharmony_ci skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 315762306a36Sopenharmony_ci if (!skb) 315862306a36Sopenharmony_ci return NULL; 315962306a36Sopenharmony_ci 316062306a36Sopenharmony_ci /* Reserve room for dummy headers, this skb can pass 316162306a36Sopenharmony_ci * through good chunk of routing engine. 316262306a36Sopenharmony_ci */ 316362306a36Sopenharmony_ci skb_reset_mac_header(skb); 316462306a36Sopenharmony_ci skb_reset_network_header(skb); 316562306a36Sopenharmony_ci skb->protocol = htons(ETH_P_IP); 316662306a36Sopenharmony_ci iph = skb_put(skb, sizeof(struct iphdr)); 316762306a36Sopenharmony_ci iph->protocol = ip_proto; 316862306a36Sopenharmony_ci iph->saddr = src; 316962306a36Sopenharmony_ci iph->daddr = dst; 317062306a36Sopenharmony_ci iph->version = 0x4; 317162306a36Sopenharmony_ci iph->frag_off = 0; 317262306a36Sopenharmony_ci iph->ihl = 0x5; 317362306a36Sopenharmony_ci skb_set_transport_header(skb, skb->len); 317462306a36Sopenharmony_ci 317562306a36Sopenharmony_ci switch (iph->protocol) { 317662306a36Sopenharmony_ci case IPPROTO_UDP: { 317762306a36Sopenharmony_ci struct udphdr *udph; 317862306a36Sopenharmony_ci 317962306a36Sopenharmony_ci udph = skb_put_zero(skb, sizeof(struct udphdr)); 318062306a36Sopenharmony_ci udph->source = sport; 318162306a36Sopenharmony_ci udph->dest = dport; 318262306a36Sopenharmony_ci udph->len = htons(sizeof(struct udphdr)); 318362306a36Sopenharmony_ci udph->check = 0; 318462306a36Sopenharmony_ci break; 318562306a36Sopenharmony_ci } 318662306a36Sopenharmony_ci case IPPROTO_TCP: { 318762306a36Sopenharmony_ci struct tcphdr *tcph; 318862306a36Sopenharmony_ci 318962306a36Sopenharmony_ci tcph = skb_put_zero(skb, sizeof(struct tcphdr)); 319062306a36Sopenharmony_ci tcph->source = sport; 319162306a36Sopenharmony_ci tcph->dest = dport; 319262306a36Sopenharmony_ci tcph->doff = sizeof(struct tcphdr) / 4; 319362306a36Sopenharmony_ci tcph->rst = 1; 319462306a36Sopenharmony_ci tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), 319562306a36Sopenharmony_ci src, dst, 0); 319662306a36Sopenharmony_ci break; 319762306a36Sopenharmony_ci } 319862306a36Sopenharmony_ci case IPPROTO_ICMP: { 319962306a36Sopenharmony_ci struct icmphdr *icmph; 320062306a36Sopenharmony_ci 320162306a36Sopenharmony_ci icmph = skb_put_zero(skb, sizeof(struct icmphdr)); 320262306a36Sopenharmony_ci icmph->type = ICMP_ECHO; 320362306a36Sopenharmony_ci icmph->code = 0; 320462306a36Sopenharmony_ci } 320562306a36Sopenharmony_ci } 320662306a36Sopenharmony_ci 320762306a36Sopenharmony_ci return skb; 320862306a36Sopenharmony_ci} 320962306a36Sopenharmony_ci 321062306a36Sopenharmony_cistatic int inet_rtm_valid_getroute_req(struct sk_buff *skb, 321162306a36Sopenharmony_ci const struct nlmsghdr *nlh, 321262306a36Sopenharmony_ci struct nlattr **tb, 321362306a36Sopenharmony_ci struct netlink_ext_ack *extack) 321462306a36Sopenharmony_ci{ 321562306a36Sopenharmony_ci struct rtmsg *rtm; 321662306a36Sopenharmony_ci int i, err; 321762306a36Sopenharmony_ci 321862306a36Sopenharmony_ci if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 321962306a36Sopenharmony_ci NL_SET_ERR_MSG(extack, 322062306a36Sopenharmony_ci "ipv4: Invalid header for route get request"); 322162306a36Sopenharmony_ci return -EINVAL; 322262306a36Sopenharmony_ci } 322362306a36Sopenharmony_ci 322462306a36Sopenharmony_ci if (!netlink_strict_get_check(skb)) 322562306a36Sopenharmony_ci return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 322662306a36Sopenharmony_ci rtm_ipv4_policy, extack); 322762306a36Sopenharmony_ci 322862306a36Sopenharmony_ci rtm = nlmsg_data(nlh); 322962306a36Sopenharmony_ci if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 323062306a36Sopenharmony_ci (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 323162306a36Sopenharmony_ci rtm->rtm_table || rtm->rtm_protocol || 323262306a36Sopenharmony_ci rtm->rtm_scope || rtm->rtm_type) { 323362306a36Sopenharmony_ci NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); 323462306a36Sopenharmony_ci return -EINVAL; 323562306a36Sopenharmony_ci } 323662306a36Sopenharmony_ci 323762306a36Sopenharmony_ci if (rtm->rtm_flags & ~(RTM_F_NOTIFY | 323862306a36Sopenharmony_ci RTM_F_LOOKUP_TABLE | 323962306a36Sopenharmony_ci RTM_F_FIB_MATCH)) { 324062306a36Sopenharmony_ci NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); 324162306a36Sopenharmony_ci return -EINVAL; 324262306a36Sopenharmony_ci } 324362306a36Sopenharmony_ci 324462306a36Sopenharmony_ci err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 324562306a36Sopenharmony_ci rtm_ipv4_policy, extack); 324662306a36Sopenharmony_ci if (err) 324762306a36Sopenharmony_ci return err; 324862306a36Sopenharmony_ci 324962306a36Sopenharmony_ci if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 325062306a36Sopenharmony_ci (tb[RTA_DST] && !rtm->rtm_dst_len)) { 325162306a36Sopenharmony_ci NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 325262306a36Sopenharmony_ci return -EINVAL; 325362306a36Sopenharmony_ci } 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ci for (i = 0; i <= RTA_MAX; i++) { 325662306a36Sopenharmony_ci if (!tb[i]) 325762306a36Sopenharmony_ci continue; 325862306a36Sopenharmony_ci 325962306a36Sopenharmony_ci switch (i) { 326062306a36Sopenharmony_ci case RTA_IIF: 326162306a36Sopenharmony_ci case RTA_OIF: 326262306a36Sopenharmony_ci case RTA_SRC: 326362306a36Sopenharmony_ci case RTA_DST: 326462306a36Sopenharmony_ci case RTA_IP_PROTO: 326562306a36Sopenharmony_ci case RTA_SPORT: 326662306a36Sopenharmony_ci case RTA_DPORT: 326762306a36Sopenharmony_ci case RTA_MARK: 326862306a36Sopenharmony_ci case RTA_UID: 326962306a36Sopenharmony_ci break; 327062306a36Sopenharmony_ci default: 327162306a36Sopenharmony_ci NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); 327262306a36Sopenharmony_ci return -EINVAL; 327362306a36Sopenharmony_ci } 327462306a36Sopenharmony_ci } 327562306a36Sopenharmony_ci 327662306a36Sopenharmony_ci return 0; 327762306a36Sopenharmony_ci} 327862306a36Sopenharmony_ci 327962306a36Sopenharmony_cistatic int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 328062306a36Sopenharmony_ci struct netlink_ext_ack *extack) 328162306a36Sopenharmony_ci{ 328262306a36Sopenharmony_ci struct net *net = sock_net(in_skb->sk); 328362306a36Sopenharmony_ci struct nlattr *tb[RTA_MAX+1]; 328462306a36Sopenharmony_ci u32 table_id = RT_TABLE_MAIN; 328562306a36Sopenharmony_ci __be16 sport = 0, dport = 0; 328662306a36Sopenharmony_ci struct fib_result res = {}; 328762306a36Sopenharmony_ci u8 ip_proto = IPPROTO_UDP; 328862306a36Sopenharmony_ci struct rtable *rt = NULL; 328962306a36Sopenharmony_ci struct sk_buff *skb; 329062306a36Sopenharmony_ci struct rtmsg *rtm; 329162306a36Sopenharmony_ci struct flowi4 fl4 = {}; 329262306a36Sopenharmony_ci __be32 dst = 0; 329362306a36Sopenharmony_ci __be32 src = 0; 329462306a36Sopenharmony_ci kuid_t uid; 329562306a36Sopenharmony_ci u32 iif; 329662306a36Sopenharmony_ci int err; 329762306a36Sopenharmony_ci int mark; 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 330062306a36Sopenharmony_ci if (err < 0) 330162306a36Sopenharmony_ci return err; 330262306a36Sopenharmony_ci 330362306a36Sopenharmony_ci rtm = nlmsg_data(nlh); 330462306a36Sopenharmony_ci src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 330562306a36Sopenharmony_ci dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 330662306a36Sopenharmony_ci iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; 330762306a36Sopenharmony_ci mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; 330862306a36Sopenharmony_ci if (tb[RTA_UID]) 330962306a36Sopenharmony_ci uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); 331062306a36Sopenharmony_ci else 331162306a36Sopenharmony_ci uid = (iif ? INVALID_UID : current_uid()); 331262306a36Sopenharmony_ci 331362306a36Sopenharmony_ci if (tb[RTA_IP_PROTO]) { 331462306a36Sopenharmony_ci err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 331562306a36Sopenharmony_ci &ip_proto, AF_INET, extack); 331662306a36Sopenharmony_ci if (err) 331762306a36Sopenharmony_ci return err; 331862306a36Sopenharmony_ci } 331962306a36Sopenharmony_ci 332062306a36Sopenharmony_ci if (tb[RTA_SPORT]) 332162306a36Sopenharmony_ci sport = nla_get_be16(tb[RTA_SPORT]); 332262306a36Sopenharmony_ci 332362306a36Sopenharmony_ci if (tb[RTA_DPORT]) 332462306a36Sopenharmony_ci dport = nla_get_be16(tb[RTA_DPORT]); 332562306a36Sopenharmony_ci 332662306a36Sopenharmony_ci skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); 332762306a36Sopenharmony_ci if (!skb) 332862306a36Sopenharmony_ci return -ENOBUFS; 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci fl4.daddr = dst; 333162306a36Sopenharmony_ci fl4.saddr = src; 333262306a36Sopenharmony_ci fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; 333362306a36Sopenharmony_ci fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; 333462306a36Sopenharmony_ci fl4.flowi4_mark = mark; 333562306a36Sopenharmony_ci fl4.flowi4_uid = uid; 333662306a36Sopenharmony_ci if (sport) 333762306a36Sopenharmony_ci fl4.fl4_sport = sport; 333862306a36Sopenharmony_ci if (dport) 333962306a36Sopenharmony_ci fl4.fl4_dport = dport; 334062306a36Sopenharmony_ci fl4.flowi4_proto = ip_proto; 334162306a36Sopenharmony_ci 334262306a36Sopenharmony_ci rcu_read_lock(); 334362306a36Sopenharmony_ci 334462306a36Sopenharmony_ci if (iif) { 334562306a36Sopenharmony_ci struct net_device *dev; 334662306a36Sopenharmony_ci 334762306a36Sopenharmony_ci dev = dev_get_by_index_rcu(net, iif); 334862306a36Sopenharmony_ci if (!dev) { 334962306a36Sopenharmony_ci err = -ENODEV; 335062306a36Sopenharmony_ci goto errout_rcu; 335162306a36Sopenharmony_ci } 335262306a36Sopenharmony_ci 335362306a36Sopenharmony_ci fl4.flowi4_iif = iif; /* for rt_fill_info */ 335462306a36Sopenharmony_ci skb->dev = dev; 335562306a36Sopenharmony_ci skb->mark = mark; 335662306a36Sopenharmony_ci err = ip_route_input_rcu(skb, dst, src, 335762306a36Sopenharmony_ci rtm->rtm_tos & IPTOS_RT_MASK, dev, 335862306a36Sopenharmony_ci &res); 335962306a36Sopenharmony_ci 336062306a36Sopenharmony_ci rt = skb_rtable(skb); 336162306a36Sopenharmony_ci if (err == 0 && rt->dst.error) 336262306a36Sopenharmony_ci err = -rt->dst.error; 336362306a36Sopenharmony_ci } else { 336462306a36Sopenharmony_ci fl4.flowi4_iif = LOOPBACK_IFINDEX; 336562306a36Sopenharmony_ci skb->dev = net->loopback_dev; 336662306a36Sopenharmony_ci rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); 336762306a36Sopenharmony_ci err = 0; 336862306a36Sopenharmony_ci if (IS_ERR(rt)) 336962306a36Sopenharmony_ci err = PTR_ERR(rt); 337062306a36Sopenharmony_ci else 337162306a36Sopenharmony_ci skb_dst_set(skb, &rt->dst); 337262306a36Sopenharmony_ci } 337362306a36Sopenharmony_ci 337462306a36Sopenharmony_ci if (err) 337562306a36Sopenharmony_ci goto errout_rcu; 337662306a36Sopenharmony_ci 337762306a36Sopenharmony_ci if (rtm->rtm_flags & RTM_F_NOTIFY) 337862306a36Sopenharmony_ci rt->rt_flags |= RTCF_NOTIFY; 337962306a36Sopenharmony_ci 338062306a36Sopenharmony_ci if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) 338162306a36Sopenharmony_ci table_id = res.table ? res.table->tb_id : 0; 338262306a36Sopenharmony_ci 338362306a36Sopenharmony_ci /* reset skb for netlink reply msg */ 338462306a36Sopenharmony_ci skb_trim(skb, 0); 338562306a36Sopenharmony_ci skb_reset_network_header(skb); 338662306a36Sopenharmony_ci skb_reset_transport_header(skb); 338762306a36Sopenharmony_ci skb_reset_mac_header(skb); 338862306a36Sopenharmony_ci 338962306a36Sopenharmony_ci if (rtm->rtm_flags & RTM_F_FIB_MATCH) { 339062306a36Sopenharmony_ci struct fib_rt_info fri; 339162306a36Sopenharmony_ci 339262306a36Sopenharmony_ci if (!res.fi) { 339362306a36Sopenharmony_ci err = fib_props[res.type].error; 339462306a36Sopenharmony_ci if (!err) 339562306a36Sopenharmony_ci err = -EHOSTUNREACH; 339662306a36Sopenharmony_ci goto errout_rcu; 339762306a36Sopenharmony_ci } 339862306a36Sopenharmony_ci fri.fi = res.fi; 339962306a36Sopenharmony_ci fri.tb_id = table_id; 340062306a36Sopenharmony_ci fri.dst = res.prefix; 340162306a36Sopenharmony_ci fri.dst_len = res.prefixlen; 340262306a36Sopenharmony_ci fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos); 340362306a36Sopenharmony_ci fri.type = rt->rt_type; 340462306a36Sopenharmony_ci fri.offload = 0; 340562306a36Sopenharmony_ci fri.trap = 0; 340662306a36Sopenharmony_ci fri.offload_failed = 0; 340762306a36Sopenharmony_ci if (res.fa_head) { 340862306a36Sopenharmony_ci struct fib_alias *fa; 340962306a36Sopenharmony_ci 341062306a36Sopenharmony_ci hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) { 341162306a36Sopenharmony_ci u8 slen = 32 - fri.dst_len; 341262306a36Sopenharmony_ci 341362306a36Sopenharmony_ci if (fa->fa_slen == slen && 341462306a36Sopenharmony_ci fa->tb_id == fri.tb_id && 341562306a36Sopenharmony_ci fa->fa_dscp == fri.dscp && 341662306a36Sopenharmony_ci fa->fa_info == res.fi && 341762306a36Sopenharmony_ci fa->fa_type == fri.type) { 341862306a36Sopenharmony_ci fri.offload = READ_ONCE(fa->offload); 341962306a36Sopenharmony_ci fri.trap = READ_ONCE(fa->trap); 342062306a36Sopenharmony_ci fri.offload_failed = 342162306a36Sopenharmony_ci READ_ONCE(fa->offload_failed); 342262306a36Sopenharmony_ci break; 342362306a36Sopenharmony_ci } 342462306a36Sopenharmony_ci } 342562306a36Sopenharmony_ci } 342662306a36Sopenharmony_ci err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, 342762306a36Sopenharmony_ci nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0); 342862306a36Sopenharmony_ci } else { 342962306a36Sopenharmony_ci err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, 343062306a36Sopenharmony_ci NETLINK_CB(in_skb).portid, 343162306a36Sopenharmony_ci nlh->nlmsg_seq, 0); 343262306a36Sopenharmony_ci } 343362306a36Sopenharmony_ci if (err < 0) 343462306a36Sopenharmony_ci goto errout_rcu; 343562306a36Sopenharmony_ci 343662306a36Sopenharmony_ci rcu_read_unlock(); 343762306a36Sopenharmony_ci 343862306a36Sopenharmony_ci err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 343962306a36Sopenharmony_ci 344062306a36Sopenharmony_cierrout_free: 344162306a36Sopenharmony_ci return err; 344262306a36Sopenharmony_cierrout_rcu: 344362306a36Sopenharmony_ci rcu_read_unlock(); 344462306a36Sopenharmony_ci kfree_skb(skb); 344562306a36Sopenharmony_ci goto errout_free; 344662306a36Sopenharmony_ci} 344762306a36Sopenharmony_ci 344862306a36Sopenharmony_civoid ip_rt_multicast_event(struct in_device *in_dev) 344962306a36Sopenharmony_ci{ 345062306a36Sopenharmony_ci rt_cache_flush(dev_net(in_dev->dev)); 345162306a36Sopenharmony_ci} 345262306a36Sopenharmony_ci 345362306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 345462306a36Sopenharmony_cistatic int ip_rt_gc_interval __read_mostly = 60 * HZ; 345562306a36Sopenharmony_cistatic int ip_rt_gc_min_interval __read_mostly = HZ / 2; 345662306a36Sopenharmony_cistatic int ip_rt_gc_elasticity __read_mostly = 8; 345762306a36Sopenharmony_cistatic int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; 345862306a36Sopenharmony_ci 345962306a36Sopenharmony_cistatic int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, 346062306a36Sopenharmony_ci void *buffer, size_t *lenp, loff_t *ppos) 346162306a36Sopenharmony_ci{ 346262306a36Sopenharmony_ci struct net *net = (struct net *)__ctl->extra1; 346362306a36Sopenharmony_ci 346462306a36Sopenharmony_ci if (write) { 346562306a36Sopenharmony_ci rt_cache_flush(net); 346662306a36Sopenharmony_ci fnhe_genid_bump(net); 346762306a36Sopenharmony_ci return 0; 346862306a36Sopenharmony_ci } 346962306a36Sopenharmony_ci 347062306a36Sopenharmony_ci return -EINVAL; 347162306a36Sopenharmony_ci} 347262306a36Sopenharmony_ci 347362306a36Sopenharmony_cistatic struct ctl_table ipv4_route_table[] = { 347462306a36Sopenharmony_ci { 347562306a36Sopenharmony_ci .procname = "gc_thresh", 347662306a36Sopenharmony_ci .data = &ipv4_dst_ops.gc_thresh, 347762306a36Sopenharmony_ci .maxlen = sizeof(int), 347862306a36Sopenharmony_ci .mode = 0644, 347962306a36Sopenharmony_ci .proc_handler = proc_dointvec, 348062306a36Sopenharmony_ci }, 348162306a36Sopenharmony_ci { 348262306a36Sopenharmony_ci .procname = "max_size", 348362306a36Sopenharmony_ci .data = &ip_rt_max_size, 348462306a36Sopenharmony_ci .maxlen = sizeof(int), 348562306a36Sopenharmony_ci .mode = 0644, 348662306a36Sopenharmony_ci .proc_handler = proc_dointvec, 348762306a36Sopenharmony_ci }, 348862306a36Sopenharmony_ci { 348962306a36Sopenharmony_ci /* Deprecated. Use gc_min_interval_ms */ 349062306a36Sopenharmony_ci 349162306a36Sopenharmony_ci .procname = "gc_min_interval", 349262306a36Sopenharmony_ci .data = &ip_rt_gc_min_interval, 349362306a36Sopenharmony_ci .maxlen = sizeof(int), 349462306a36Sopenharmony_ci .mode = 0644, 349562306a36Sopenharmony_ci .proc_handler = proc_dointvec_jiffies, 349662306a36Sopenharmony_ci }, 349762306a36Sopenharmony_ci { 349862306a36Sopenharmony_ci .procname = "gc_min_interval_ms", 349962306a36Sopenharmony_ci .data = &ip_rt_gc_min_interval, 350062306a36Sopenharmony_ci .maxlen = sizeof(int), 350162306a36Sopenharmony_ci .mode = 0644, 350262306a36Sopenharmony_ci .proc_handler = proc_dointvec_ms_jiffies, 350362306a36Sopenharmony_ci }, 350462306a36Sopenharmony_ci { 350562306a36Sopenharmony_ci .procname = "gc_timeout", 350662306a36Sopenharmony_ci .data = &ip_rt_gc_timeout, 350762306a36Sopenharmony_ci .maxlen = sizeof(int), 350862306a36Sopenharmony_ci .mode = 0644, 350962306a36Sopenharmony_ci .proc_handler = proc_dointvec_jiffies, 351062306a36Sopenharmony_ci }, 351162306a36Sopenharmony_ci { 351262306a36Sopenharmony_ci .procname = "gc_interval", 351362306a36Sopenharmony_ci .data = &ip_rt_gc_interval, 351462306a36Sopenharmony_ci .maxlen = sizeof(int), 351562306a36Sopenharmony_ci .mode = 0644, 351662306a36Sopenharmony_ci .proc_handler = proc_dointvec_jiffies, 351762306a36Sopenharmony_ci }, 351862306a36Sopenharmony_ci { 351962306a36Sopenharmony_ci .procname = "redirect_load", 352062306a36Sopenharmony_ci .data = &ip_rt_redirect_load, 352162306a36Sopenharmony_ci .maxlen = sizeof(int), 352262306a36Sopenharmony_ci .mode = 0644, 352362306a36Sopenharmony_ci .proc_handler = proc_dointvec, 352462306a36Sopenharmony_ci }, 352562306a36Sopenharmony_ci { 352662306a36Sopenharmony_ci .procname = "redirect_number", 352762306a36Sopenharmony_ci .data = &ip_rt_redirect_number, 352862306a36Sopenharmony_ci .maxlen = sizeof(int), 352962306a36Sopenharmony_ci .mode = 0644, 353062306a36Sopenharmony_ci .proc_handler = proc_dointvec, 353162306a36Sopenharmony_ci }, 353262306a36Sopenharmony_ci { 353362306a36Sopenharmony_ci .procname = "redirect_silence", 353462306a36Sopenharmony_ci .data = &ip_rt_redirect_silence, 353562306a36Sopenharmony_ci .maxlen = sizeof(int), 353662306a36Sopenharmony_ci .mode = 0644, 353762306a36Sopenharmony_ci .proc_handler = proc_dointvec, 353862306a36Sopenharmony_ci }, 353962306a36Sopenharmony_ci { 354062306a36Sopenharmony_ci .procname = "error_cost", 354162306a36Sopenharmony_ci .data = &ip_rt_error_cost, 354262306a36Sopenharmony_ci .maxlen = sizeof(int), 354362306a36Sopenharmony_ci .mode = 0644, 354462306a36Sopenharmony_ci .proc_handler = proc_dointvec, 354562306a36Sopenharmony_ci }, 354662306a36Sopenharmony_ci { 354762306a36Sopenharmony_ci .procname = "error_burst", 354862306a36Sopenharmony_ci .data = &ip_rt_error_burst, 354962306a36Sopenharmony_ci .maxlen = sizeof(int), 355062306a36Sopenharmony_ci .mode = 0644, 355162306a36Sopenharmony_ci .proc_handler = proc_dointvec, 355262306a36Sopenharmony_ci }, 355362306a36Sopenharmony_ci { 355462306a36Sopenharmony_ci .procname = "gc_elasticity", 355562306a36Sopenharmony_ci .data = &ip_rt_gc_elasticity, 355662306a36Sopenharmony_ci .maxlen = sizeof(int), 355762306a36Sopenharmony_ci .mode = 0644, 355862306a36Sopenharmony_ci .proc_handler = proc_dointvec, 355962306a36Sopenharmony_ci }, 356062306a36Sopenharmony_ci { } 356162306a36Sopenharmony_ci}; 356262306a36Sopenharmony_ci 356362306a36Sopenharmony_cistatic const char ipv4_route_flush_procname[] = "flush"; 356462306a36Sopenharmony_ci 356562306a36Sopenharmony_cistatic struct ctl_table ipv4_route_netns_table[] = { 356662306a36Sopenharmony_ci { 356762306a36Sopenharmony_ci .procname = ipv4_route_flush_procname, 356862306a36Sopenharmony_ci .maxlen = sizeof(int), 356962306a36Sopenharmony_ci .mode = 0200, 357062306a36Sopenharmony_ci .proc_handler = ipv4_sysctl_rtcache_flush, 357162306a36Sopenharmony_ci }, 357262306a36Sopenharmony_ci { 357362306a36Sopenharmony_ci .procname = "min_pmtu", 357462306a36Sopenharmony_ci .data = &init_net.ipv4.ip_rt_min_pmtu, 357562306a36Sopenharmony_ci .maxlen = sizeof(int), 357662306a36Sopenharmony_ci .mode = 0644, 357762306a36Sopenharmony_ci .proc_handler = proc_dointvec_minmax, 357862306a36Sopenharmony_ci .extra1 = &ip_min_valid_pmtu, 357962306a36Sopenharmony_ci }, 358062306a36Sopenharmony_ci { 358162306a36Sopenharmony_ci .procname = "mtu_expires", 358262306a36Sopenharmony_ci .data = &init_net.ipv4.ip_rt_mtu_expires, 358362306a36Sopenharmony_ci .maxlen = sizeof(int), 358462306a36Sopenharmony_ci .mode = 0644, 358562306a36Sopenharmony_ci .proc_handler = proc_dointvec_jiffies, 358662306a36Sopenharmony_ci }, 358762306a36Sopenharmony_ci { 358862306a36Sopenharmony_ci .procname = "min_adv_mss", 358962306a36Sopenharmony_ci .data = &init_net.ipv4.ip_rt_min_advmss, 359062306a36Sopenharmony_ci .maxlen = sizeof(int), 359162306a36Sopenharmony_ci .mode = 0644, 359262306a36Sopenharmony_ci .proc_handler = proc_dointvec, 359362306a36Sopenharmony_ci }, 359462306a36Sopenharmony_ci { }, 359562306a36Sopenharmony_ci}; 359662306a36Sopenharmony_ci 359762306a36Sopenharmony_cistatic __net_init int sysctl_route_net_init(struct net *net) 359862306a36Sopenharmony_ci{ 359962306a36Sopenharmony_ci struct ctl_table *tbl; 360062306a36Sopenharmony_ci size_t table_size = ARRAY_SIZE(ipv4_route_netns_table); 360162306a36Sopenharmony_ci 360262306a36Sopenharmony_ci tbl = ipv4_route_netns_table; 360362306a36Sopenharmony_ci if (!net_eq(net, &init_net)) { 360462306a36Sopenharmony_ci int i; 360562306a36Sopenharmony_ci 360662306a36Sopenharmony_ci tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL); 360762306a36Sopenharmony_ci if (!tbl) 360862306a36Sopenharmony_ci goto err_dup; 360962306a36Sopenharmony_ci 361062306a36Sopenharmony_ci /* Don't export non-whitelisted sysctls to unprivileged users */ 361162306a36Sopenharmony_ci if (net->user_ns != &init_user_ns) { 361262306a36Sopenharmony_ci if (tbl[0].procname != ipv4_route_flush_procname) { 361362306a36Sopenharmony_ci tbl[0].procname = NULL; 361462306a36Sopenharmony_ci table_size = 0; 361562306a36Sopenharmony_ci } 361662306a36Sopenharmony_ci } 361762306a36Sopenharmony_ci 361862306a36Sopenharmony_ci /* Update the variables to point into the current struct net 361962306a36Sopenharmony_ci * except for the first element flush 362062306a36Sopenharmony_ci */ 362162306a36Sopenharmony_ci for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++) 362262306a36Sopenharmony_ci tbl[i].data += (void *)net - (void *)&init_net; 362362306a36Sopenharmony_ci } 362462306a36Sopenharmony_ci tbl[0].extra1 = net; 362562306a36Sopenharmony_ci 362662306a36Sopenharmony_ci net->ipv4.route_hdr = register_net_sysctl_sz(net, "net/ipv4/route", 362762306a36Sopenharmony_ci tbl, table_size); 362862306a36Sopenharmony_ci if (!net->ipv4.route_hdr) 362962306a36Sopenharmony_ci goto err_reg; 363062306a36Sopenharmony_ci return 0; 363162306a36Sopenharmony_ci 363262306a36Sopenharmony_cierr_reg: 363362306a36Sopenharmony_ci if (tbl != ipv4_route_netns_table) 363462306a36Sopenharmony_ci kfree(tbl); 363562306a36Sopenharmony_cierr_dup: 363662306a36Sopenharmony_ci return -ENOMEM; 363762306a36Sopenharmony_ci} 363862306a36Sopenharmony_ci 363962306a36Sopenharmony_cistatic __net_exit void sysctl_route_net_exit(struct net *net) 364062306a36Sopenharmony_ci{ 364162306a36Sopenharmony_ci struct ctl_table *tbl; 364262306a36Sopenharmony_ci 364362306a36Sopenharmony_ci tbl = net->ipv4.route_hdr->ctl_table_arg; 364462306a36Sopenharmony_ci unregister_net_sysctl_table(net->ipv4.route_hdr); 364562306a36Sopenharmony_ci BUG_ON(tbl == ipv4_route_netns_table); 364662306a36Sopenharmony_ci kfree(tbl); 364762306a36Sopenharmony_ci} 364862306a36Sopenharmony_ci 364962306a36Sopenharmony_cistatic __net_initdata struct pernet_operations sysctl_route_ops = { 365062306a36Sopenharmony_ci .init = sysctl_route_net_init, 365162306a36Sopenharmony_ci .exit = sysctl_route_net_exit, 365262306a36Sopenharmony_ci}; 365362306a36Sopenharmony_ci#endif 365462306a36Sopenharmony_ci 365562306a36Sopenharmony_cistatic __net_init int netns_ip_rt_init(struct net *net) 365662306a36Sopenharmony_ci{ 365762306a36Sopenharmony_ci /* Set default value for namespaceified sysctls */ 365862306a36Sopenharmony_ci net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; 365962306a36Sopenharmony_ci net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; 366062306a36Sopenharmony_ci net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS; 366162306a36Sopenharmony_ci return 0; 366262306a36Sopenharmony_ci} 366362306a36Sopenharmony_ci 366462306a36Sopenharmony_cistatic struct pernet_operations __net_initdata ip_rt_ops = { 366562306a36Sopenharmony_ci .init = netns_ip_rt_init, 366662306a36Sopenharmony_ci}; 366762306a36Sopenharmony_ci 366862306a36Sopenharmony_cistatic __net_init int rt_genid_init(struct net *net) 366962306a36Sopenharmony_ci{ 367062306a36Sopenharmony_ci atomic_set(&net->ipv4.rt_genid, 0); 367162306a36Sopenharmony_ci atomic_set(&net->fnhe_genid, 0); 367262306a36Sopenharmony_ci atomic_set(&net->ipv4.dev_addr_genid, get_random_u32()); 367362306a36Sopenharmony_ci return 0; 367462306a36Sopenharmony_ci} 367562306a36Sopenharmony_ci 367662306a36Sopenharmony_cistatic __net_initdata struct pernet_operations rt_genid_ops = { 367762306a36Sopenharmony_ci .init = rt_genid_init, 367862306a36Sopenharmony_ci}; 367962306a36Sopenharmony_ci 368062306a36Sopenharmony_cistatic int __net_init ipv4_inetpeer_init(struct net *net) 368162306a36Sopenharmony_ci{ 368262306a36Sopenharmony_ci struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 368362306a36Sopenharmony_ci 368462306a36Sopenharmony_ci if (!bp) 368562306a36Sopenharmony_ci return -ENOMEM; 368662306a36Sopenharmony_ci inet_peer_base_init(bp); 368762306a36Sopenharmony_ci net->ipv4.peers = bp; 368862306a36Sopenharmony_ci return 0; 368962306a36Sopenharmony_ci} 369062306a36Sopenharmony_ci 369162306a36Sopenharmony_cistatic void __net_exit ipv4_inetpeer_exit(struct net *net) 369262306a36Sopenharmony_ci{ 369362306a36Sopenharmony_ci struct inet_peer_base *bp = net->ipv4.peers; 369462306a36Sopenharmony_ci 369562306a36Sopenharmony_ci net->ipv4.peers = NULL; 369662306a36Sopenharmony_ci inetpeer_invalidate_tree(bp); 369762306a36Sopenharmony_ci kfree(bp); 369862306a36Sopenharmony_ci} 369962306a36Sopenharmony_ci 370062306a36Sopenharmony_cistatic __net_initdata struct pernet_operations ipv4_inetpeer_ops = { 370162306a36Sopenharmony_ci .init = ipv4_inetpeer_init, 370262306a36Sopenharmony_ci .exit = ipv4_inetpeer_exit, 370362306a36Sopenharmony_ci}; 370462306a36Sopenharmony_ci 370562306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 370662306a36Sopenharmony_cistruct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 370762306a36Sopenharmony_ci#endif /* CONFIG_IP_ROUTE_CLASSID */ 370862306a36Sopenharmony_ci 370962306a36Sopenharmony_ciint __init ip_rt_init(void) 371062306a36Sopenharmony_ci{ 371162306a36Sopenharmony_ci void *idents_hash; 371262306a36Sopenharmony_ci int cpu; 371362306a36Sopenharmony_ci 371462306a36Sopenharmony_ci /* For modern hosts, this will use 2 MB of memory */ 371562306a36Sopenharmony_ci idents_hash = alloc_large_system_hash("IP idents", 371662306a36Sopenharmony_ci sizeof(*ip_idents) + sizeof(*ip_tstamps), 371762306a36Sopenharmony_ci 0, 371862306a36Sopenharmony_ci 16, /* one bucket per 64 KB */ 371962306a36Sopenharmony_ci HASH_ZERO, 372062306a36Sopenharmony_ci NULL, 372162306a36Sopenharmony_ci &ip_idents_mask, 372262306a36Sopenharmony_ci 2048, 372362306a36Sopenharmony_ci 256*1024); 372462306a36Sopenharmony_ci 372562306a36Sopenharmony_ci ip_idents = idents_hash; 372662306a36Sopenharmony_ci 372762306a36Sopenharmony_ci get_random_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents)); 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents); 373062306a36Sopenharmony_ci 373162306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 373262306a36Sopenharmony_ci struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 373362306a36Sopenharmony_ci 373462306a36Sopenharmony_ci INIT_LIST_HEAD(&ul->head); 373562306a36Sopenharmony_ci INIT_LIST_HEAD(&ul->quarantine); 373662306a36Sopenharmony_ci spin_lock_init(&ul->lock); 373762306a36Sopenharmony_ci } 373862306a36Sopenharmony_ci#ifdef CONFIG_IP_ROUTE_CLASSID 373962306a36Sopenharmony_ci ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 374062306a36Sopenharmony_ci if (!ip_rt_acct) 374162306a36Sopenharmony_ci panic("IP: failed to allocate ip_rt_acct\n"); 374262306a36Sopenharmony_ci#endif 374362306a36Sopenharmony_ci 374462306a36Sopenharmony_ci ipv4_dst_ops.kmem_cachep = 374562306a36Sopenharmony_ci kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, 374662306a36Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 374762306a36Sopenharmony_ci 374862306a36Sopenharmony_ci ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 374962306a36Sopenharmony_ci 375062306a36Sopenharmony_ci if (dst_entries_init(&ipv4_dst_ops) < 0) 375162306a36Sopenharmony_ci panic("IP: failed to allocate ipv4_dst_ops counter\n"); 375262306a36Sopenharmony_ci 375362306a36Sopenharmony_ci if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) 375462306a36Sopenharmony_ci panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); 375562306a36Sopenharmony_ci 375662306a36Sopenharmony_ci ipv4_dst_ops.gc_thresh = ~0; 375762306a36Sopenharmony_ci ip_rt_max_size = INT_MAX; 375862306a36Sopenharmony_ci 375962306a36Sopenharmony_ci devinet_init(); 376062306a36Sopenharmony_ci ip_fib_init(); 376162306a36Sopenharmony_ci 376262306a36Sopenharmony_ci if (ip_rt_proc_init()) 376362306a36Sopenharmony_ci pr_err("Unable to create route proc files\n"); 376462306a36Sopenharmony_ci#ifdef CONFIG_XFRM 376562306a36Sopenharmony_ci xfrm_init(); 376662306a36Sopenharmony_ci xfrm4_init(); 376762306a36Sopenharmony_ci#endif 376862306a36Sopenharmony_ci rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, 376962306a36Sopenharmony_ci RTNL_FLAG_DOIT_UNLOCKED); 377062306a36Sopenharmony_ci 377162306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 377262306a36Sopenharmony_ci register_pernet_subsys(&sysctl_route_ops); 377362306a36Sopenharmony_ci#endif 377462306a36Sopenharmony_ci register_pernet_subsys(&ip_rt_ops); 377562306a36Sopenharmony_ci register_pernet_subsys(&rt_genid_ops); 377662306a36Sopenharmony_ci register_pernet_subsys(&ipv4_inetpeer_ops); 377762306a36Sopenharmony_ci return 0; 377862306a36Sopenharmony_ci} 377962306a36Sopenharmony_ci 378062306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 378162306a36Sopenharmony_ci/* 378262306a36Sopenharmony_ci * We really need to sanitize the damn ipv4 init order, then all 378362306a36Sopenharmony_ci * this nonsense will go away. 378462306a36Sopenharmony_ci */ 378562306a36Sopenharmony_civoid __init ip_static_sysctl_init(void) 378662306a36Sopenharmony_ci{ 378762306a36Sopenharmony_ci register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); 378862306a36Sopenharmony_ci} 378962306a36Sopenharmony_ci#endif 3790