xref: /kernel/linux/linux-5.10/net/ipv6/ip6_output.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	IPv6 output functions
4 *	Linux INET6 implementation
5 *
6 *	Authors:
7 *	Pedro Roque		<roque@di.fc.ul.pt>
8 *
9 *	Based on linux/net/ipv4/ip_output.c
10 *
11 *	Changes:
12 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13 *				extension headers are implemented.
14 *				route changes now work.
15 *				ip6_forward does not confuse sniffers.
16 *				etc.
17 *
18 *      H. von Brand    :       Added missing #include <linux/string.h>
19 *	Imran Patel	:	frag id should be in NBO
20 *      Kazunori MIYAZAWA @USAGI
21 *			:       add ip6_append_data and related functions
22 *				for datagram xmit
23 */
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57#include <net/ip_tunnels.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61	struct dst_entry *dst = skb_dst(skb);
62	struct net_device *dev = dst->dev;
63	unsigned int hh_len = LL_RESERVED_SPACE(dev);
64	int delta = hh_len - skb_headroom(skb);
65	const struct in6_addr *nexthop;
66	struct neighbour *neigh;
67	int ret;
68
69	/* Be paranoid, rather than too clever. */
70	if (unlikely(delta > 0) && dev->header_ops) {
71		/* pskb_expand_head() might crash, if skb is shared */
72		if (skb_shared(skb)) {
73			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
74
75			if (likely(nskb)) {
76				if (skb->sk)
77					skb_set_owner_w(nskb, skb->sk);
78				consume_skb(skb);
79			} else {
80				kfree_skb(skb);
81			}
82			skb = nskb;
83		}
84		if (skb &&
85		    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86			kfree_skb(skb);
87			skb = NULL;
88		}
89		if (!skb) {
90			IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
91			return -ENOMEM;
92		}
93	}
94
95	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
99		    ((mroute6_is_socket(net, skb) &&
100		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102					 &ipv6_hdr(skb)->saddr))) {
103			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105			/* Do not check for IFF_ALLMULTI; multicast routing
106			   is not supported in any case.
107			 */
108			if (newskb)
109				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110					net, sk, newskb, NULL, newskb->dev,
111					dev_loopback_xmit);
112
113			if (ipv6_hdr(skb)->hop_limit == 0) {
114				IP6_INC_STATS(net, idev,
115					      IPSTATS_MIB_OUTDISCARDS);
116				kfree_skb(skb);
117				return 0;
118			}
119		}
120
121		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
122
123		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
124		    IPV6_ADDR_SCOPE_NODELOCAL &&
125		    !(dev->flags & IFF_LOOPBACK)) {
126			kfree_skb(skb);
127			return 0;
128		}
129	}
130
131	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
132		int res = lwtunnel_xmit(skb);
133
134		if (res != LWTUNNEL_XMIT_CONTINUE)
135			return res;
136	}
137
138	rcu_read_lock_bh();
139	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
140	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
141	if (unlikely(!neigh))
142		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
143	if (!IS_ERR(neigh)) {
144		sock_confirm_neigh(skb, neigh);
145		ret = neigh_output(neigh, skb, false);
146		rcu_read_unlock_bh();
147		return ret;
148	}
149	rcu_read_unlock_bh();
150
151	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
152	kfree_skb(skb);
153	return -EINVAL;
154}
155
156static int
157ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
158				    struct sk_buff *skb, unsigned int mtu)
159{
160	struct sk_buff *segs, *nskb;
161	netdev_features_t features;
162	int ret = 0;
163
164	/* Please see corresponding comment in ip_finish_output_gso
165	 * describing the cases where GSO segment length exceeds the
166	 * egress MTU.
167	 */
168	features = netif_skb_features(skb);
169	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
170	if (IS_ERR_OR_NULL(segs)) {
171		kfree_skb(skb);
172		return -ENOMEM;
173	}
174
175	consume_skb(skb);
176
177	skb_list_walk_safe(segs, segs, nskb) {
178		int err;
179
180		skb_mark_not_on_list(segs);
181		/* Last GSO segment can be smaller than gso_size (and MTU).
182		 * Adding a fragment header would produce an "atomic fragment",
183		 * which is considered harmful (RFC-8021). Avoid that.
184		 */
185		err = segs->len > mtu ?
186			ip6_fragment(net, sk, segs, ip6_finish_output2) :
187			ip6_finish_output2(net, sk, segs);
188		if (err && ret == 0)
189			ret = err;
190	}
191
192	return ret;
193}
194
195static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
196{
197	unsigned int mtu;
198
199#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
200	/* Policy lookup after SNAT yielded a new policy */
201	if (skb_dst(skb)->xfrm) {
202		IP6CB(skb)->flags |= IP6SKB_REROUTED;
203		return dst_output(net, sk, skb);
204	}
205#endif
206
207	mtu = ip6_skb_dst_mtu(skb);
208	if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
209		return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
210
211	if ((skb->len > mtu && !skb_is_gso(skb)) ||
212	    dst_allfrag(skb_dst(skb)) ||
213	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
214		return ip6_fragment(net, sk, skb, ip6_finish_output2);
215	else
216		return ip6_finish_output2(net, sk, skb);
217}
218
219static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
220{
221	int ret;
222
223	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
224	switch (ret) {
225	case NET_XMIT_SUCCESS:
226		return __ip6_finish_output(net, sk, skb);
227	case NET_XMIT_CN:
228		return __ip6_finish_output(net, sk, skb) ? : ret;
229	default:
230		kfree_skb(skb);
231		return ret;
232	}
233}
234
235int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
236{
237	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
238	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
239
240	skb->protocol = htons(ETH_P_IPV6);
241	skb->dev = dev;
242
243	if (unlikely(!idev || (idev->cnf.disable_ipv6))) {
244		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
245		kfree_skb(skb);
246		return 0;
247	}
248
249	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
250			    net, sk, skb, indev, dev,
251			    ip6_finish_output,
252			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
253}
254
255bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
256{
257	if (!np->autoflowlabel_set)
258		return ip6_default_np_autolabel(net);
259	else
260		return np->autoflowlabel;
261}
262
263/*
264 * xmit an sk_buff (used by TCP, SCTP and DCCP)
265 * Note : socket lock is not held for SYNACK packets, but might be modified
266 * by calls to skb_set_owner_w() and ipv6_local_error(),
267 * which are using proper atomic operations or spinlocks.
268 */
269int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
270	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
271{
272	struct net *net = sock_net(sk);
273	const struct ipv6_pinfo *np = inet6_sk(sk);
274	struct in6_addr *first_hop = &fl6->daddr;
275	struct dst_entry *dst = skb_dst(skb);
276	unsigned int head_room;
277	struct ipv6hdr *hdr;
278	u8  proto = fl6->flowi6_proto;
279	int seg_len = skb->len;
280	int hlimit = -1;
281	u32 mtu;
282
283	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
284	if (opt)
285		head_room += opt->opt_nflen + opt->opt_flen;
286
287	if (unlikely(skb_headroom(skb) < head_room)) {
288		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
289		if (!skb2) {
290			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
291				      IPSTATS_MIB_OUTDISCARDS);
292			kfree_skb(skb);
293			return -ENOBUFS;
294		}
295		if (skb->sk)
296			skb_set_owner_w(skb2, skb->sk);
297		consume_skb(skb);
298		skb = skb2;
299	}
300
301	if (opt) {
302		seg_len += opt->opt_nflen + opt->opt_flen;
303
304		if (opt->opt_flen)
305			ipv6_push_frag_opts(skb, opt, &proto);
306
307		if (opt->opt_nflen)
308			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
309					     &fl6->saddr);
310	}
311
312	skb_push(skb, sizeof(struct ipv6hdr));
313	skb_reset_network_header(skb);
314	hdr = ipv6_hdr(skb);
315
316	/*
317	 *	Fill in the IPv6 header
318	 */
319	if (np)
320		hlimit = np->hop_limit;
321	if (hlimit < 0)
322		hlimit = ip6_dst_hoplimit(dst);
323
324	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
325				ip6_autoflowlabel(net, np), fl6));
326
327	hdr->payload_len = htons(seg_len);
328	hdr->nexthdr = proto;
329	hdr->hop_limit = hlimit;
330
331	hdr->saddr = fl6->saddr;
332	hdr->daddr = *first_hop;
333
334	skb->protocol = htons(ETH_P_IPV6);
335	skb->priority = priority;
336	skb->mark = mark;
337
338	mtu = dst_mtu(dst);
339	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
340		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
341			      IPSTATS_MIB_OUT, skb->len);
342
343		/* if egress device is enslaved to an L3 master device pass the
344		 * skb to its handler for processing
345		 */
346		skb = l3mdev_ip6_out((struct sock *)sk, skb);
347		if (unlikely(!skb))
348			return 0;
349
350		/* hooks should never assume socket lock is held.
351		 * we promote our socket to non const
352		 */
353		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
354			       net, (struct sock *)sk, skb, NULL, dst->dev,
355			       dst_output);
356	}
357
358	skb->dev = dst->dev;
359	/* ipv6_local_error() does not require socket lock,
360	 * we promote our socket to non const
361	 */
362	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
363
364	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
365	kfree_skb(skb);
366	return -EMSGSIZE;
367}
368EXPORT_SYMBOL(ip6_xmit);
369
370static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
371{
372	struct ip6_ra_chain *ra;
373	struct sock *last = NULL;
374
375	read_lock(&ip6_ra_lock);
376	for (ra = ip6_ra_chain; ra; ra = ra->next) {
377		struct sock *sk = ra->sk;
378		if (sk && ra->sel == sel &&
379		    (!sk->sk_bound_dev_if ||
380		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
381			struct ipv6_pinfo *np = inet6_sk(sk);
382
383			if (np && np->rtalert_isolate &&
384			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
385				continue;
386			}
387			if (last) {
388				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
389				if (skb2)
390					rawv6_rcv(last, skb2);
391			}
392			last = sk;
393		}
394	}
395
396	if (last) {
397		rawv6_rcv(last, skb);
398		read_unlock(&ip6_ra_lock);
399		return 1;
400	}
401	read_unlock(&ip6_ra_lock);
402	return 0;
403}
404
405static int ip6_forward_proxy_check(struct sk_buff *skb)
406{
407	struct ipv6hdr *hdr = ipv6_hdr(skb);
408	u8 nexthdr = hdr->nexthdr;
409	__be16 frag_off;
410	int offset;
411
412	if (ipv6_ext_hdr(nexthdr)) {
413		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
414		if (offset < 0)
415			return 0;
416	} else
417		offset = sizeof(struct ipv6hdr);
418
419	if (nexthdr == IPPROTO_ICMPV6) {
420		struct icmp6hdr *icmp6;
421
422		if (!pskb_may_pull(skb, (skb_network_header(skb) +
423					 offset + 1 - skb->data)))
424			return 0;
425
426		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
427
428		switch (icmp6->icmp6_type) {
429		case NDISC_ROUTER_SOLICITATION:
430		case NDISC_ROUTER_ADVERTISEMENT:
431		case NDISC_NEIGHBOUR_SOLICITATION:
432		case NDISC_NEIGHBOUR_ADVERTISEMENT:
433		case NDISC_REDIRECT:
434			/* For reaction involving unicast neighbor discovery
435			 * message destined to the proxied address, pass it to
436			 * input function.
437			 */
438			return 1;
439		default:
440			break;
441		}
442	}
443
444	/*
445	 * The proxying router can't forward traffic sent to a link-local
446	 * address, so signal the sender and discard the packet. This
447	 * behavior is clarified by the MIPv6 specification.
448	 */
449	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
450		dst_link_failure(skb);
451		return -1;
452	}
453
454	return 0;
455}
456
457static inline int ip6_forward_finish(struct net *net, struct sock *sk,
458				     struct sk_buff *skb)
459{
460	struct dst_entry *dst = skb_dst(skb);
461
462	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
463	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
464
465#ifdef CONFIG_NET_SWITCHDEV
466	if (skb->offload_l3_fwd_mark) {
467		consume_skb(skb);
468		return 0;
469	}
470#endif
471
472	skb->tstamp = 0;
473	return dst_output(net, sk, skb);
474}
475
476static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
477{
478	if (skb->len <= mtu)
479		return false;
480
481	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
482	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
483		return true;
484
485	if (skb->ignore_df)
486		return false;
487
488	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
489		return false;
490
491	return true;
492}
493
494int ip6_forward(struct sk_buff *skb)
495{
496	struct dst_entry *dst = skb_dst(skb);
497	struct ipv6hdr *hdr = ipv6_hdr(skb);
498	struct inet6_skb_parm *opt = IP6CB(skb);
499	struct net *net = dev_net(dst->dev);
500	struct inet6_dev *idev;
501	u32 mtu;
502
503	idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
504	if (net->ipv6.devconf_all->forwarding == 0)
505		goto error;
506
507	if (skb->pkt_type != PACKET_HOST)
508		goto drop;
509
510	if (unlikely(skb->sk))
511		goto drop;
512
513	if (skb_warn_if_lro(skb))
514		goto drop;
515
516	if (!net->ipv6.devconf_all->disable_policy &&
517	    (!idev || !idev->cnf.disable_policy) &&
518	    !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
519		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
520		goto drop;
521	}
522
523	skb_forward_csum(skb);
524
525	/*
526	 *	We DO NOT make any processing on
527	 *	RA packets, pushing them to user level AS IS
528	 *	without ane WARRANTY that application will be able
529	 *	to interpret them. The reason is that we
530	 *	cannot make anything clever here.
531	 *
532	 *	We are not end-node, so that if packet contains
533	 *	AH/ESP, we cannot make anything.
534	 *	Defragmentation also would be mistake, RA packets
535	 *	cannot be fragmented, because there is no warranty
536	 *	that different fragments will go along one path. --ANK
537	 */
538	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
539		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
540			return 0;
541	}
542
543	/*
544	 *	check and decrement ttl
545	 */
546	if (hdr->hop_limit <= 1) {
547		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
548		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
549
550		kfree_skb(skb);
551		return -ETIMEDOUT;
552	}
553
554	/* XXX: idev->cnf.proxy_ndp? */
555	if (net->ipv6.devconf_all->proxy_ndp &&
556	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
557		int proxied = ip6_forward_proxy_check(skb);
558		if (proxied > 0)
559			return ip6_input(skb);
560		else if (proxied < 0) {
561			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
562			goto drop;
563		}
564	}
565
566	if (!xfrm6_route_forward(skb)) {
567		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
568		goto drop;
569	}
570	dst = skb_dst(skb);
571
572	/* IPv6 specs say nothing about it, but it is clear that we cannot
573	   send redirects to source routed frames.
574	   We don't send redirects to frames decapsulated from IPsec.
575	 */
576	if (IP6CB(skb)->iif == dst->dev->ifindex &&
577	    opt->srcrt == 0 && !skb_sec_path(skb)) {
578		struct in6_addr *target = NULL;
579		struct inet_peer *peer;
580		struct rt6_info *rt;
581
582		/*
583		 *	incoming and outgoing devices are the same
584		 *	send a redirect.
585		 */
586
587		rt = (struct rt6_info *) dst;
588		if (rt->rt6i_flags & RTF_GATEWAY)
589			target = &rt->rt6i_gateway;
590		else
591			target = &hdr->daddr;
592
593		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
594
595		/* Limit redirects both by destination (here)
596		   and by source (inside ndisc_send_redirect)
597		 */
598		if (inet_peer_xrlim_allow(peer, 1*HZ))
599			ndisc_send_redirect(skb, target);
600		if (peer)
601			inet_putpeer(peer);
602	} else {
603		int addrtype = ipv6_addr_type(&hdr->saddr);
604
605		/* This check is security critical. */
606		if (addrtype == IPV6_ADDR_ANY ||
607		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
608			goto error;
609		if (addrtype & IPV6_ADDR_LINKLOCAL) {
610			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
611				    ICMPV6_NOT_NEIGHBOUR, 0);
612			goto error;
613		}
614	}
615
616	mtu = ip6_dst_mtu_forward(dst);
617	if (mtu < IPV6_MIN_MTU)
618		mtu = IPV6_MIN_MTU;
619
620	if (ip6_pkt_too_big(skb, mtu)) {
621		/* Again, force OUTPUT device used as source address */
622		skb->dev = dst->dev;
623		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
624		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
625		__IP6_INC_STATS(net, ip6_dst_idev(dst),
626				IPSTATS_MIB_FRAGFAILS);
627		kfree_skb(skb);
628		return -EMSGSIZE;
629	}
630
631	if (skb_cow(skb, dst->dev->hard_header_len)) {
632		__IP6_INC_STATS(net, ip6_dst_idev(dst),
633				IPSTATS_MIB_OUTDISCARDS);
634		goto drop;
635	}
636
637	hdr = ipv6_hdr(skb);
638
639	/* Mangling hops number delayed to point after skb COW */
640
641	hdr->hop_limit--;
642
643	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
644		       net, NULL, skb, skb->dev, dst->dev,
645		       ip6_forward_finish);
646
647error:
648	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
649drop:
650	kfree_skb(skb);
651	return -EINVAL;
652}
653
654static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
655{
656	to->pkt_type = from->pkt_type;
657	to->priority = from->priority;
658	to->protocol = from->protocol;
659	skb_dst_drop(to);
660	skb_dst_set(to, dst_clone(skb_dst(from)));
661	to->dev = from->dev;
662	to->mark = from->mark;
663
664	skb_copy_hash(to, from);
665
666#ifdef CONFIG_NET_SCHED
667	to->tc_index = from->tc_index;
668#endif
669	nf_copy(to, from);
670	skb_ext_copy(to, from);
671	skb_copy_secmark(to, from);
672}
673
674int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
675		      u8 nexthdr, __be32 frag_id,
676		      struct ip6_fraglist_iter *iter)
677{
678	unsigned int first_len;
679	struct frag_hdr *fh;
680
681	/* BUILD HEADER */
682	*prevhdr = NEXTHDR_FRAGMENT;
683	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
684	if (!iter->tmp_hdr)
685		return -ENOMEM;
686
687	iter->frag = skb_shinfo(skb)->frag_list;
688	skb_frag_list_init(skb);
689
690	iter->offset = 0;
691	iter->hlen = hlen;
692	iter->frag_id = frag_id;
693	iter->nexthdr = nexthdr;
694
695	__skb_pull(skb, hlen);
696	fh = __skb_push(skb, sizeof(struct frag_hdr));
697	__skb_push(skb, hlen);
698	skb_reset_network_header(skb);
699	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
700
701	fh->nexthdr = nexthdr;
702	fh->reserved = 0;
703	fh->frag_off = htons(IP6_MF);
704	fh->identification = frag_id;
705
706	first_len = skb_pagelen(skb);
707	skb->data_len = first_len - skb_headlen(skb);
708	skb->len = first_len;
709	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
710
711	return 0;
712}
713EXPORT_SYMBOL(ip6_fraglist_init);
714
715void ip6_fraglist_prepare(struct sk_buff *skb,
716			  struct ip6_fraglist_iter *iter)
717{
718	struct sk_buff *frag = iter->frag;
719	unsigned int hlen = iter->hlen;
720	struct frag_hdr *fh;
721
722	frag->ip_summed = CHECKSUM_NONE;
723	skb_reset_transport_header(frag);
724	fh = __skb_push(frag, sizeof(struct frag_hdr));
725	__skb_push(frag, hlen);
726	skb_reset_network_header(frag);
727	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
728	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
729	fh->nexthdr = iter->nexthdr;
730	fh->reserved = 0;
731	fh->frag_off = htons(iter->offset);
732	if (frag->next)
733		fh->frag_off |= htons(IP6_MF);
734	fh->identification = iter->frag_id;
735	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
736	ip6_copy_metadata(frag, skb);
737}
738EXPORT_SYMBOL(ip6_fraglist_prepare);
739
740void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
741		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
742		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
743{
744	state->prevhdr = prevhdr;
745	state->nexthdr = nexthdr;
746	state->frag_id = frag_id;
747
748	state->hlen = hlen;
749	state->mtu = mtu;
750
751	state->left = skb->len - hlen;	/* Space per frame */
752	state->ptr = hlen;		/* Where to start from */
753
754	state->hroom = hdr_room;
755	state->troom = needed_tailroom;
756
757	state->offset = 0;
758}
759EXPORT_SYMBOL(ip6_frag_init);
760
761struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
762{
763	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
764	struct sk_buff *frag;
765	struct frag_hdr *fh;
766	unsigned int len;
767
768	len = state->left;
769	/* IF: it doesn't fit, use 'mtu' - the data space left */
770	if (len > state->mtu)
771		len = state->mtu;
772	/* IF: we are not sending up to and including the packet end
773	   then align the next start on an eight byte boundary */
774	if (len < state->left)
775		len &= ~7;
776
777	/* Allocate buffer */
778	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
779			 state->hroom + state->troom, GFP_ATOMIC);
780	if (!frag)
781		return ERR_PTR(-ENOMEM);
782
783	/*
784	 *	Set up data on packet
785	 */
786
787	ip6_copy_metadata(frag, skb);
788	skb_reserve(frag, state->hroom);
789	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
790	skb_reset_network_header(frag);
791	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
792	frag->transport_header = (frag->network_header + state->hlen +
793				  sizeof(struct frag_hdr));
794
795	/*
796	 *	Charge the memory for the fragment to any owner
797	 *	it might possess
798	 */
799	if (skb->sk)
800		skb_set_owner_w(frag, skb->sk);
801
802	/*
803	 *	Copy the packet header into the new buffer.
804	 */
805	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
806
807	fragnexthdr_offset = skb_network_header(frag);
808	fragnexthdr_offset += prevhdr - skb_network_header(skb);
809	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
810
811	/*
812	 *	Build fragment header.
813	 */
814	fh->nexthdr = state->nexthdr;
815	fh->reserved = 0;
816	fh->identification = state->frag_id;
817
818	/*
819	 *	Copy a block of the IP datagram.
820	 */
821	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
822			     len));
823	state->left -= len;
824
825	fh->frag_off = htons(state->offset);
826	if (state->left > 0)
827		fh->frag_off |= htons(IP6_MF);
828	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
829
830	state->ptr += len;
831	state->offset += len;
832
833	return frag;
834}
835EXPORT_SYMBOL(ip6_frag_next);
836
837int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
838		 int (*output)(struct net *, struct sock *, struct sk_buff *))
839{
840	struct sk_buff *frag;
841	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
842	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
843				inet6_sk(skb->sk) : NULL;
844	struct ip6_frag_state state;
845	unsigned int mtu, hlen, nexthdr_offset;
846	ktime_t tstamp = skb->tstamp;
847	int hroom, err = 0;
848	__be32 frag_id;
849	u8 *prevhdr, nexthdr = 0;
850
851	err = ip6_find_1stfragopt(skb, &prevhdr);
852	if (err < 0)
853		goto fail;
854	hlen = err;
855	nexthdr = *prevhdr;
856	nexthdr_offset = prevhdr - skb_network_header(skb);
857
858	mtu = ip6_skb_dst_mtu(skb);
859
860	/* We must not fragment if the socket is set to force MTU discovery
861	 * or if the skb it not generated by a local socket.
862	 */
863	if (unlikely(!skb->ignore_df && skb->len > mtu))
864		goto fail_toobig;
865
866	if (IP6CB(skb)->frag_max_size) {
867		if (IP6CB(skb)->frag_max_size > mtu)
868			goto fail_toobig;
869
870		/* don't send fragments larger than what we received */
871		mtu = IP6CB(skb)->frag_max_size;
872		if (mtu < IPV6_MIN_MTU)
873			mtu = IPV6_MIN_MTU;
874	}
875
876	if (np && np->frag_size < mtu) {
877		if (np->frag_size)
878			mtu = np->frag_size;
879	}
880	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
881		goto fail_toobig;
882	mtu -= hlen + sizeof(struct frag_hdr);
883
884	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
885				    &ipv6_hdr(skb)->saddr);
886
887	if (skb->ip_summed == CHECKSUM_PARTIAL &&
888	    (err = skb_checksum_help(skb)))
889		goto fail;
890
891	prevhdr = skb_network_header(skb) + nexthdr_offset;
892	hroom = LL_RESERVED_SPACE(rt->dst.dev);
893	if (skb_has_frag_list(skb)) {
894		unsigned int first_len = skb_pagelen(skb);
895		struct ip6_fraglist_iter iter;
896		struct sk_buff *frag2;
897
898		if (first_len - hlen > mtu ||
899		    ((first_len - hlen) & 7) ||
900		    skb_cloned(skb) ||
901		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
902			goto slow_path;
903
904		skb_walk_frags(skb, frag) {
905			/* Correct geometry. */
906			if (frag->len > mtu ||
907			    ((frag->len & 7) && frag->next) ||
908			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
909				goto slow_path_clean;
910
911			/* Partially cloned skb? */
912			if (skb_shared(frag))
913				goto slow_path_clean;
914
915			BUG_ON(frag->sk);
916			if (skb->sk) {
917				frag->sk = skb->sk;
918				frag->destructor = sock_wfree;
919			}
920			skb->truesize -= frag->truesize;
921		}
922
923		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
924					&iter);
925		if (err < 0)
926			goto fail;
927
928		/* We prevent @rt from being freed. */
929		rcu_read_lock();
930
931		for (;;) {
932			/* Prepare header of the next frame,
933			 * before previous one went down. */
934			if (iter.frag)
935				ip6_fraglist_prepare(skb, &iter);
936
937			skb->tstamp = tstamp;
938			err = output(net, sk, skb);
939			if (!err)
940				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
941					      IPSTATS_MIB_FRAGCREATES);
942
943			if (err || !iter.frag)
944				break;
945
946			skb = ip6_fraglist_next(&iter);
947		}
948
949		kfree(iter.tmp_hdr);
950
951		if (err == 0) {
952			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
953				      IPSTATS_MIB_FRAGOKS);
954			rcu_read_unlock();
955			return 0;
956		}
957
958		kfree_skb_list(iter.frag);
959
960		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
961			      IPSTATS_MIB_FRAGFAILS);
962		rcu_read_unlock();
963		return err;
964
965slow_path_clean:
966		skb_walk_frags(skb, frag2) {
967			if (frag2 == frag)
968				break;
969			frag2->sk = NULL;
970			frag2->destructor = NULL;
971			skb->truesize += frag2->truesize;
972		}
973	}
974
975slow_path:
976	/*
977	 *	Fragment the datagram.
978	 */
979
980	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
981		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
982		      &state);
983
984	/*
985	 *	Keep copying data until we run out.
986	 */
987
988	while (state.left > 0) {
989		frag = ip6_frag_next(skb, &state);
990		if (IS_ERR(frag)) {
991			err = PTR_ERR(frag);
992			goto fail;
993		}
994
995		/*
996		 *	Put this fragment into the sending queue.
997		 */
998		frag->tstamp = tstamp;
999		err = output(net, sk, frag);
1000		if (err)
1001			goto fail;
1002
1003		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1004			      IPSTATS_MIB_FRAGCREATES);
1005	}
1006	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1007		      IPSTATS_MIB_FRAGOKS);
1008	consume_skb(skb);
1009	return err;
1010
1011fail_toobig:
1012	if (skb->sk && dst_allfrag(skb_dst(skb)))
1013		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1014
1015	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1016	err = -EMSGSIZE;
1017
1018fail:
1019	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1020		      IPSTATS_MIB_FRAGFAILS);
1021	kfree_skb(skb);
1022	return err;
1023}
1024
1025static inline int ip6_rt_check(const struct rt6key *rt_key,
1026			       const struct in6_addr *fl_addr,
1027			       const struct in6_addr *addr_cache)
1028{
1029	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1030		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1031}
1032
1033static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1034					  struct dst_entry *dst,
1035					  const struct flowi6 *fl6)
1036{
1037	struct ipv6_pinfo *np = inet6_sk(sk);
1038	struct rt6_info *rt;
1039
1040	if (!dst)
1041		goto out;
1042
1043	if (dst->ops->family != AF_INET6) {
1044		dst_release(dst);
1045		return NULL;
1046	}
1047
1048	rt = (struct rt6_info *)dst;
1049	/* Yes, checking route validity in not connected
1050	 * case is not very simple. Take into account,
1051	 * that we do not support routing by source, TOS,
1052	 * and MSG_DONTROUTE		--ANK (980726)
1053	 *
1054	 * 1. ip6_rt_check(): If route was host route,
1055	 *    check that cached destination is current.
1056	 *    If it is network route, we still may
1057	 *    check its validity using saved pointer
1058	 *    to the last used address: daddr_cache.
1059	 *    We do not want to save whole address now,
1060	 *    (because main consumer of this service
1061	 *    is tcp, which has not this problem),
1062	 *    so that the last trick works only on connected
1063	 *    sockets.
1064	 * 2. oif also should be the same.
1065	 */
1066	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1067#ifdef CONFIG_IPV6_SUBTREES
1068	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1069#endif
1070	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1071	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1072		dst_release(dst);
1073		dst = NULL;
1074	}
1075
1076out:
1077	return dst;
1078}
1079
1080static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1081			       struct dst_entry **dst, struct flowi6 *fl6)
1082{
1083#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1084	struct neighbour *n;
1085	struct rt6_info *rt;
1086#endif
1087	int err;
1088	int flags = 0;
1089
1090	/* The correct way to handle this would be to do
1091	 * ip6_route_get_saddr, and then ip6_route_output; however,
1092	 * the route-specific preferred source forces the
1093	 * ip6_route_output call _before_ ip6_route_get_saddr.
1094	 *
1095	 * In source specific routing (no src=any default route),
1096	 * ip6_route_output will fail given src=any saddr, though, so
1097	 * that's why we try it again later.
1098	 */
1099	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1100		struct fib6_info *from;
1101		struct rt6_info *rt;
1102		bool had_dst = *dst != NULL;
1103
1104		if (!had_dst)
1105			*dst = ip6_route_output(net, sk, fl6);
1106		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1107
1108		rcu_read_lock();
1109		from = rt ? rcu_dereference(rt->from) : NULL;
1110		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1111					  sk ? inet6_sk(sk)->srcprefs : 0,
1112					  &fl6->saddr);
1113		rcu_read_unlock();
1114
1115		if (err)
1116			goto out_err_release;
1117
1118		/* If we had an erroneous initial result, pretend it
1119		 * never existed and let the SA-enabled version take
1120		 * over.
1121		 */
1122		if (!had_dst && (*dst)->error) {
1123			dst_release(*dst);
1124			*dst = NULL;
1125		}
1126
1127		if (fl6->flowi6_oif)
1128			flags |= RT6_LOOKUP_F_IFACE;
1129	}
1130
1131	if (!*dst)
1132		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1133
1134	err = (*dst)->error;
1135	if (err)
1136		goto out_err_release;
1137
1138#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1139	/*
1140	 * Here if the dst entry we've looked up
1141	 * has a neighbour entry that is in the INCOMPLETE
1142	 * state and the src address from the flow is
1143	 * marked as OPTIMISTIC, we release the found
1144	 * dst entry and replace it instead with the
1145	 * dst entry of the nexthop router
1146	 */
1147	rt = (struct rt6_info *) *dst;
1148	rcu_read_lock_bh();
1149	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1150				      rt6_nexthop(rt, &fl6->daddr));
1151	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1152	rcu_read_unlock_bh();
1153
1154	if (err) {
1155		struct inet6_ifaddr *ifp;
1156		struct flowi6 fl_gw6;
1157		int redirect;
1158
1159		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1160				      (*dst)->dev, 1);
1161
1162		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1163		if (ifp)
1164			in6_ifa_put(ifp);
1165
1166		if (redirect) {
1167			/*
1168			 * We need to get the dst entry for the
1169			 * default router instead
1170			 */
1171			dst_release(*dst);
1172			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1173			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1174			*dst = ip6_route_output(net, sk, &fl_gw6);
1175			err = (*dst)->error;
1176			if (err)
1177				goto out_err_release;
1178		}
1179	}
1180#endif
1181	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1182	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1183		err = -EAFNOSUPPORT;
1184		goto out_err_release;
1185	}
1186
1187	return 0;
1188
1189out_err_release:
1190	dst_release(*dst);
1191	*dst = NULL;
1192
1193	if (err == -ENETUNREACH)
1194		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1195	return err;
1196}
1197
1198/**
1199 *	ip6_dst_lookup - perform route lookup on flow
1200 *	@net: Network namespace to perform lookup in
1201 *	@sk: socket which provides route info
1202 *	@dst: pointer to dst_entry * for result
1203 *	@fl6: flow to lookup
1204 *
1205 *	This function performs a route lookup on the given flow.
1206 *
1207 *	It returns zero on success, or a standard errno code on error.
1208 */
1209int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1210		   struct flowi6 *fl6)
1211{
1212	*dst = NULL;
1213	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1214}
1215EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1216
1217/**
1218 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1219 *	@net: Network namespace to perform lookup in
1220 *	@sk: socket which provides route info
1221 *	@fl6: flow to lookup
1222 *	@final_dst: final destination address for ipsec lookup
1223 *
1224 *	This function performs a route lookup on the given flow.
1225 *
1226 *	It returns a valid dst pointer on success, or a pointer encoded
1227 *	error code.
1228 */
1229struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1230				      const struct in6_addr *final_dst)
1231{
1232	struct dst_entry *dst = NULL;
1233	int err;
1234
1235	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1236	if (err)
1237		return ERR_PTR(err);
1238	if (final_dst)
1239		fl6->daddr = *final_dst;
1240
1241	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1242}
1243EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1244
1245/**
1246 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1247 *	@sk: socket which provides the dst cache and route info
1248 *	@fl6: flow to lookup
1249 *	@final_dst: final destination address for ipsec lookup
1250 *	@connected: whether @sk is connected or not
1251 *
1252 *	This function performs a route lookup on the given flow with the
1253 *	possibility of using the cached route in the socket if it is valid.
1254 *	It will take the socket dst lock when operating on the dst cache.
1255 *	As a result, this function can only be used in process context.
1256 *
1257 *	In addition, for a connected socket, cache the dst in the socket
1258 *	if the current cache is not valid.
1259 *
1260 *	It returns a valid dst pointer on success, or a pointer encoded
1261 *	error code.
1262 */
1263struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1264					 const struct in6_addr *final_dst,
1265					 bool connected)
1266{
1267	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1268
1269	dst = ip6_sk_dst_check(sk, dst, fl6);
1270	if (dst)
1271		return dst;
1272
1273	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1274	if (connected && !IS_ERR(dst))
1275		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1276
1277	return dst;
1278}
1279EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1280
1281/**
1282 *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1283 *      @skb: Packet for which lookup is done
1284 *      @dev: Tunnel device
1285 *      @net: Network namespace of tunnel device
1286 *      @sock: Socket which provides route info
1287 *      @saddr: Memory to store the src ip address
1288 *      @info: Tunnel information
1289 *      @protocol: IP protocol
1290 *      @use_cache: Flag to enable cache usage
1291 *      This function performs a route lookup on a tunnel
1292 *
1293 *      It returns a valid dst pointer and stores src address to be used in
1294 *      tunnel in param saddr on success, else a pointer encoded error code.
1295 */
1296
1297struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1298					struct net_device *dev,
1299					struct net *net,
1300					struct socket *sock,
1301					struct in6_addr *saddr,
1302					const struct ip_tunnel_info *info,
1303					u8 protocol,
1304					bool use_cache)
1305{
1306	struct dst_entry *dst = NULL;
1307#ifdef CONFIG_DST_CACHE
1308	struct dst_cache *dst_cache;
1309#endif
1310	struct flowi6 fl6;
1311	__u8 prio;
1312
1313#ifdef CONFIG_DST_CACHE
1314	dst_cache = (struct dst_cache *)&info->dst_cache;
1315	if (use_cache) {
1316		dst = dst_cache_get_ip6(dst_cache, saddr);
1317		if (dst)
1318			return dst;
1319	}
1320#endif
1321	memset(&fl6, 0, sizeof(fl6));
1322	fl6.flowi6_mark = skb->mark;
1323	fl6.flowi6_proto = protocol;
1324	fl6.daddr = info->key.u.ipv6.dst;
1325	fl6.saddr = info->key.u.ipv6.src;
1326	prio = info->key.tos;
1327	fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
1328
1329	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1330					      NULL);
1331	if (IS_ERR(dst)) {
1332		netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1333		return ERR_PTR(-ENETUNREACH);
1334	}
1335	if (dst->dev == dev) { /* is this necessary? */
1336		netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1337		dst_release(dst);
1338		return ERR_PTR(-ELOOP);
1339	}
1340#ifdef CONFIG_DST_CACHE
1341	if (use_cache)
1342		dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1343#endif
1344	*saddr = fl6.saddr;
1345	return dst;
1346}
1347EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1348
1349static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1350					       gfp_t gfp)
1351{
1352	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1353}
1354
1355static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1356						gfp_t gfp)
1357{
1358	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1359}
1360
1361static void ip6_append_data_mtu(unsigned int *mtu,
1362				int *maxfraglen,
1363				unsigned int fragheaderlen,
1364				struct sk_buff *skb,
1365				struct rt6_info *rt,
1366				unsigned int orig_mtu)
1367{
1368	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1369		if (!skb) {
1370			/* first fragment, reserve header_len */
1371			*mtu = orig_mtu - rt->dst.header_len;
1372
1373		} else {
1374			/*
1375			 * this fragment is not first, the headers
1376			 * space is regarded as data space.
1377			 */
1378			*mtu = orig_mtu;
1379		}
1380		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1381			      + fragheaderlen - sizeof(struct frag_hdr);
1382	}
1383}
1384
1385static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1386			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1387			  struct rt6_info *rt, struct flowi6 *fl6)
1388{
1389	struct ipv6_pinfo *np = inet6_sk(sk);
1390	unsigned int mtu;
1391	struct ipv6_txoptions *opt = ipc6->opt;
1392
1393	/*
1394	 * setup for corking
1395	 */
1396	if (opt) {
1397		if (WARN_ON(v6_cork->opt))
1398			return -EINVAL;
1399
1400		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1401		if (unlikely(!v6_cork->opt))
1402			return -ENOBUFS;
1403
1404		v6_cork->opt->tot_len = sizeof(*opt);
1405		v6_cork->opt->opt_flen = opt->opt_flen;
1406		v6_cork->opt->opt_nflen = opt->opt_nflen;
1407
1408		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1409						    sk->sk_allocation);
1410		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1411			return -ENOBUFS;
1412
1413		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1414						    sk->sk_allocation);
1415		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1416			return -ENOBUFS;
1417
1418		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1419						   sk->sk_allocation);
1420		if (opt->hopopt && !v6_cork->opt->hopopt)
1421			return -ENOBUFS;
1422
1423		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1424						    sk->sk_allocation);
1425		if (opt->srcrt && !v6_cork->opt->srcrt)
1426			return -ENOBUFS;
1427
1428		/* need source address above miyazawa*/
1429	}
1430	dst_hold(&rt->dst);
1431	cork->base.dst = &rt->dst;
1432	cork->fl.u.ip6 = *fl6;
1433	v6_cork->hop_limit = ipc6->hlimit;
1434	v6_cork->tclass = ipc6->tclass;
1435	if (rt->dst.flags & DST_XFRM_TUNNEL)
1436		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1437		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1438	else
1439		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1440			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1441	if (np->frag_size < mtu) {
1442		if (np->frag_size)
1443			mtu = np->frag_size;
1444	}
1445	cork->base.fragsize = mtu;
1446	cork->base.gso_size = ipc6->gso_size;
1447	cork->base.tx_flags = 0;
1448	cork->base.mark = ipc6->sockc.mark;
1449	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1450
1451	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1452		cork->base.flags |= IPCORK_ALLFRAG;
1453	cork->base.length = 0;
1454
1455	cork->base.transmit_time = ipc6->sockc.transmit_time;
1456
1457	return 0;
1458}
1459
1460static int __ip6_append_data(struct sock *sk,
1461			     struct flowi6 *fl6,
1462			     struct sk_buff_head *queue,
1463			     struct inet_cork *cork,
1464			     struct inet6_cork *v6_cork,
1465			     struct page_frag *pfrag,
1466			     int getfrag(void *from, char *to, int offset,
1467					 int len, int odd, struct sk_buff *skb),
1468			     void *from, int length, int transhdrlen,
1469			     unsigned int flags, struct ipcm6_cookie *ipc6)
1470{
1471	struct sk_buff *skb, *skb_prev = NULL;
1472	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1473	struct ubuf_info *uarg = NULL;
1474	int exthdrlen = 0;
1475	int dst_exthdrlen = 0;
1476	int hh_len;
1477	int copy;
1478	int err;
1479	int offset = 0;
1480	u32 tskey = 0;
1481	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1482	struct ipv6_txoptions *opt = v6_cork->opt;
1483	int csummode = CHECKSUM_NONE;
1484	unsigned int maxnonfragsize, headersize;
1485	unsigned int wmem_alloc_delta = 0;
1486	bool paged, extra_uref = false;
1487
1488	skb = skb_peek_tail(queue);
1489	if (!skb) {
1490		exthdrlen = opt ? opt->opt_flen : 0;
1491		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1492	}
1493
1494	paged = !!cork->gso_size;
1495	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1496	orig_mtu = mtu;
1497
1498	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1499	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1500		tskey = sk->sk_tskey++;
1501
1502	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1503
1504	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1505			(opt ? opt->opt_nflen : 0);
1506
1507	headersize = sizeof(struct ipv6hdr) +
1508		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1509		     (dst_allfrag(&rt->dst) ?
1510		      sizeof(struct frag_hdr) : 0) +
1511		     rt->rt6i_nfheader_len;
1512
1513	if (mtu <= fragheaderlen ||
1514	    ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1515		goto emsgsize;
1516
1517	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1518		     sizeof(struct frag_hdr);
1519
1520	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1521	 * the first fragment
1522	 */
1523	if (headersize + transhdrlen > mtu)
1524		goto emsgsize;
1525
1526	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1527	    (sk->sk_protocol == IPPROTO_UDP ||
1528	     sk->sk_protocol == IPPROTO_RAW)) {
1529		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1530				sizeof(struct ipv6hdr));
1531		goto emsgsize;
1532	}
1533
1534	if (ip6_sk_ignore_df(sk))
1535		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1536	else
1537		maxnonfragsize = mtu;
1538
1539	if (cork->length + length > maxnonfragsize - headersize) {
1540emsgsize:
1541		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1542		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1543		return -EMSGSIZE;
1544	}
1545
1546	/* CHECKSUM_PARTIAL only with no extension headers and when
1547	 * we are not going to fragment
1548	 */
1549	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1550	    headersize == sizeof(struct ipv6hdr) &&
1551	    length <= mtu - headersize &&
1552	    (!(flags & MSG_MORE) || cork->gso_size) &&
1553	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1554		csummode = CHECKSUM_PARTIAL;
1555
1556	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1557		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1558		if (!uarg)
1559			return -ENOBUFS;
1560		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1561		if (rt->dst.dev->features & NETIF_F_SG &&
1562		    csummode == CHECKSUM_PARTIAL) {
1563			paged = true;
1564		} else {
1565			uarg->zerocopy = 0;
1566			skb_zcopy_set(skb, uarg, &extra_uref);
1567		}
1568	}
1569
1570	/*
1571	 * Let's try using as much space as possible.
1572	 * Use MTU if total length of the message fits into the MTU.
1573	 * Otherwise, we need to reserve fragment header and
1574	 * fragment alignment (= 8-15 octects, in total).
1575	 *
1576	 * Note that we may need to "move" the data from the tail
1577	 * of the buffer to the new fragment when we split
1578	 * the message.
1579	 *
1580	 * FIXME: It may be fragmented into multiple chunks
1581	 *        at once if non-fragmentable extension headers
1582	 *        are too large.
1583	 * --yoshfuji
1584	 */
1585
1586	cork->length += length;
1587	if (!skb)
1588		goto alloc_new_skb;
1589
1590	while (length > 0) {
1591		/* Check if the remaining data fits into current packet. */
1592		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1593		if (copy < length)
1594			copy = maxfraglen - skb->len;
1595
1596		if (copy <= 0) {
1597			char *data;
1598			unsigned int datalen;
1599			unsigned int fraglen;
1600			unsigned int fraggap;
1601			unsigned int alloclen, alloc_extra;
1602			unsigned int pagedlen;
1603alloc_new_skb:
1604			/* There's no room in the current skb */
1605			if (skb)
1606				fraggap = skb->len - maxfraglen;
1607			else
1608				fraggap = 0;
1609			/* update mtu and maxfraglen if necessary */
1610			if (!skb || !skb_prev)
1611				ip6_append_data_mtu(&mtu, &maxfraglen,
1612						    fragheaderlen, skb, rt,
1613						    orig_mtu);
1614
1615			skb_prev = skb;
1616
1617			/*
1618			 * If remaining data exceeds the mtu,
1619			 * we know we need more fragment(s).
1620			 */
1621			datalen = length + fraggap;
1622
1623			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1624				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1625			fraglen = datalen + fragheaderlen;
1626			pagedlen = 0;
1627
1628			alloc_extra = hh_len;
1629			alloc_extra += dst_exthdrlen;
1630			alloc_extra += rt->dst.trailer_len;
1631
1632			/* We just reserve space for fragment header.
1633			 * Note: this may be overallocation if the message
1634			 * (without MSG_MORE) fits into the MTU.
1635			 */
1636			alloc_extra += sizeof(struct frag_hdr);
1637
1638			if ((flags & MSG_MORE) &&
1639			    !(rt->dst.dev->features&NETIF_F_SG))
1640				alloclen = mtu;
1641			else if (!paged &&
1642				 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1643				  !(rt->dst.dev->features & NETIF_F_SG)))
1644				alloclen = fraglen;
1645			else {
1646				alloclen = min_t(int, fraglen, MAX_HEADER);
1647				pagedlen = fraglen - alloclen;
1648			}
1649			alloclen += alloc_extra;
1650
1651			if (datalen != length + fraggap) {
1652				/*
1653				 * this is not the last fragment, the trailer
1654				 * space is regarded as data space.
1655				 */
1656				datalen += rt->dst.trailer_len;
1657			}
1658
1659			fraglen = datalen + fragheaderlen;
1660
1661			copy = datalen - transhdrlen - fraggap - pagedlen;
1662			if (copy < 0) {
1663				err = -EINVAL;
1664				goto error;
1665			}
1666			if (transhdrlen) {
1667				skb = sock_alloc_send_skb(sk, alloclen,
1668						(flags & MSG_DONTWAIT), &err);
1669			} else {
1670				skb = NULL;
1671				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1672				    2 * sk->sk_sndbuf)
1673					skb = alloc_skb(alloclen,
1674							sk->sk_allocation);
1675				if (unlikely(!skb))
1676					err = -ENOBUFS;
1677			}
1678			if (!skb)
1679				goto error;
1680			/*
1681			 *	Fill in the control structures
1682			 */
1683			skb->protocol = htons(ETH_P_IPV6);
1684			skb->ip_summed = csummode;
1685			skb->csum = 0;
1686			/* reserve for fragmentation and ipsec header */
1687			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1688				    dst_exthdrlen);
1689
1690			/*
1691			 *	Find where to start putting bytes
1692			 */
1693			data = skb_put(skb, fraglen - pagedlen);
1694			skb_set_network_header(skb, exthdrlen);
1695			data += fragheaderlen;
1696			skb->transport_header = (skb->network_header +
1697						 fragheaderlen);
1698			if (fraggap) {
1699				skb->csum = skb_copy_and_csum_bits(
1700					skb_prev, maxfraglen,
1701					data + transhdrlen, fraggap);
1702				skb_prev->csum = csum_sub(skb_prev->csum,
1703							  skb->csum);
1704				data += fraggap;
1705				pskb_trim_unique(skb_prev, maxfraglen);
1706			}
1707			if (copy > 0 &&
1708			    getfrag(from, data + transhdrlen, offset,
1709				    copy, fraggap, skb) < 0) {
1710				err = -EFAULT;
1711				kfree_skb(skb);
1712				goto error;
1713			}
1714
1715			offset += copy;
1716			length -= copy + transhdrlen;
1717			transhdrlen = 0;
1718			exthdrlen = 0;
1719			dst_exthdrlen = 0;
1720
1721			/* Only the initial fragment is time stamped */
1722			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1723			cork->tx_flags = 0;
1724			skb_shinfo(skb)->tskey = tskey;
1725			tskey = 0;
1726			skb_zcopy_set(skb, uarg, &extra_uref);
1727
1728			if ((flags & MSG_CONFIRM) && !skb_prev)
1729				skb_set_dst_pending_confirm(skb, 1);
1730
1731			/*
1732			 * Put the packet on the pending queue
1733			 */
1734			if (!skb->destructor) {
1735				skb->destructor = sock_wfree;
1736				skb->sk = sk;
1737				wmem_alloc_delta += skb->truesize;
1738			}
1739			__skb_queue_tail(queue, skb);
1740			continue;
1741		}
1742
1743		if (copy > length)
1744			copy = length;
1745
1746		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1747		    skb_tailroom(skb) >= copy) {
1748			unsigned int off;
1749
1750			off = skb->len;
1751			if (getfrag(from, skb_put(skb, copy),
1752						offset, copy, off, skb) < 0) {
1753				__skb_trim(skb, off);
1754				err = -EFAULT;
1755				goto error;
1756			}
1757		} else if (!uarg || !uarg->zerocopy) {
1758			int i = skb_shinfo(skb)->nr_frags;
1759
1760			err = -ENOMEM;
1761			if (!sk_page_frag_refill(sk, pfrag))
1762				goto error;
1763
1764			if (!skb_can_coalesce(skb, i, pfrag->page,
1765					      pfrag->offset)) {
1766				err = -EMSGSIZE;
1767				if (i == MAX_SKB_FRAGS)
1768					goto error;
1769
1770				__skb_fill_page_desc(skb, i, pfrag->page,
1771						     pfrag->offset, 0);
1772				skb_shinfo(skb)->nr_frags = ++i;
1773				get_page(pfrag->page);
1774			}
1775			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1776			if (getfrag(from,
1777				    page_address(pfrag->page) + pfrag->offset,
1778				    offset, copy, skb->len, skb) < 0)
1779				goto error_efault;
1780
1781			pfrag->offset += copy;
1782			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1783			skb->len += copy;
1784			skb->data_len += copy;
1785			skb->truesize += copy;
1786			wmem_alloc_delta += copy;
1787		} else {
1788			err = skb_zerocopy_iter_dgram(skb, from, copy);
1789			if (err < 0)
1790				goto error;
1791		}
1792		offset += copy;
1793		length -= copy;
1794	}
1795
1796	if (wmem_alloc_delta)
1797		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1798	return 0;
1799
1800error_efault:
1801	err = -EFAULT;
1802error:
1803	if (uarg)
1804		sock_zerocopy_put_abort(uarg, extra_uref);
1805	cork->length -= length;
1806	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1807	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1808	return err;
1809}
1810
1811int ip6_append_data(struct sock *sk,
1812		    int getfrag(void *from, char *to, int offset, int len,
1813				int odd, struct sk_buff *skb),
1814		    void *from, int length, int transhdrlen,
1815		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1816		    struct rt6_info *rt, unsigned int flags)
1817{
1818	struct inet_sock *inet = inet_sk(sk);
1819	struct ipv6_pinfo *np = inet6_sk(sk);
1820	int exthdrlen;
1821	int err;
1822
1823	if (flags&MSG_PROBE)
1824		return 0;
1825	if (skb_queue_empty(&sk->sk_write_queue)) {
1826		/*
1827		 * setup for corking
1828		 */
1829		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1830				     ipc6, rt, fl6);
1831		if (err)
1832			return err;
1833
1834		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1835		length += exthdrlen;
1836		transhdrlen += exthdrlen;
1837	} else {
1838		fl6 = &inet->cork.fl.u.ip6;
1839		transhdrlen = 0;
1840	}
1841
1842	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1843				 &np->cork, sk_page_frag(sk), getfrag,
1844				 from, length, transhdrlen, flags, ipc6);
1845}
1846EXPORT_SYMBOL_GPL(ip6_append_data);
1847
1848static void ip6_cork_release(struct inet_cork_full *cork,
1849			     struct inet6_cork *v6_cork)
1850{
1851	if (v6_cork->opt) {
1852		kfree(v6_cork->opt->dst0opt);
1853		kfree(v6_cork->opt->dst1opt);
1854		kfree(v6_cork->opt->hopopt);
1855		kfree(v6_cork->opt->srcrt);
1856		kfree(v6_cork->opt);
1857		v6_cork->opt = NULL;
1858	}
1859
1860	if (cork->base.dst) {
1861		dst_release(cork->base.dst);
1862		cork->base.dst = NULL;
1863		cork->base.flags &= ~IPCORK_ALLFRAG;
1864	}
1865	memset(&cork->fl, 0, sizeof(cork->fl));
1866}
1867
1868struct sk_buff *__ip6_make_skb(struct sock *sk,
1869			       struct sk_buff_head *queue,
1870			       struct inet_cork_full *cork,
1871			       struct inet6_cork *v6_cork)
1872{
1873	struct sk_buff *skb, *tmp_skb;
1874	struct sk_buff **tail_skb;
1875	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1876	struct ipv6_pinfo *np = inet6_sk(sk);
1877	struct net *net = sock_net(sk);
1878	struct ipv6hdr *hdr;
1879	struct ipv6_txoptions *opt = v6_cork->opt;
1880	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1881	struct flowi6 *fl6 = &cork->fl.u.ip6;
1882	unsigned char proto = fl6->flowi6_proto;
1883
1884	skb = __skb_dequeue(queue);
1885	if (!skb)
1886		goto out;
1887	tail_skb = &(skb_shinfo(skb)->frag_list);
1888
1889	/* move skb->data to ip header from ext header */
1890	if (skb->data < skb_network_header(skb))
1891		__skb_pull(skb, skb_network_offset(skb));
1892	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1893		__skb_pull(tmp_skb, skb_network_header_len(skb));
1894		*tail_skb = tmp_skb;
1895		tail_skb = &(tmp_skb->next);
1896		skb->len += tmp_skb->len;
1897		skb->data_len += tmp_skb->len;
1898		skb->truesize += tmp_skb->truesize;
1899		tmp_skb->destructor = NULL;
1900		tmp_skb->sk = NULL;
1901	}
1902
1903	/* Allow local fragmentation. */
1904	skb->ignore_df = ip6_sk_ignore_df(sk);
1905
1906	*final_dst = fl6->daddr;
1907	__skb_pull(skb, skb_network_header_len(skb));
1908	if (opt && opt->opt_flen)
1909		ipv6_push_frag_opts(skb, opt, &proto);
1910	if (opt && opt->opt_nflen)
1911		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1912
1913	skb_push(skb, sizeof(struct ipv6hdr));
1914	skb_reset_network_header(skb);
1915	hdr = ipv6_hdr(skb);
1916
1917	ip6_flow_hdr(hdr, v6_cork->tclass,
1918		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1919					ip6_autoflowlabel(net, np), fl6));
1920	hdr->hop_limit = v6_cork->hop_limit;
1921	hdr->nexthdr = proto;
1922	hdr->saddr = fl6->saddr;
1923	hdr->daddr = *final_dst;
1924
1925	skb->priority = sk->sk_priority;
1926	skb->mark = cork->base.mark;
1927
1928	skb->tstamp = cork->base.transmit_time;
1929
1930	skb_dst_set(skb, dst_clone(&rt->dst));
1931	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1932	if (proto == IPPROTO_ICMPV6) {
1933		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1934		u8 icmp6_type;
1935
1936		if (sk->sk_socket->type == SOCK_RAW &&
1937			!(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
1938			icmp6_type = fl6->fl6_icmp_type;
1939		else
1940			icmp6_type = icmp6_hdr(skb)->icmp6_type;
1941		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1942		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1943	}
1944
1945	ip6_cork_release(cork, v6_cork);
1946out:
1947	return skb;
1948}
1949
1950int ip6_send_skb(struct sk_buff *skb)
1951{
1952	struct net *net = sock_net(skb->sk);
1953	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1954	int err;
1955
1956	rcu_read_lock();
1957	err = ip6_local_out(net, skb->sk, skb);
1958	if (err) {
1959		if (err > 0)
1960			err = net_xmit_errno(err);
1961		if (err)
1962			IP6_INC_STATS(net, rt->rt6i_idev,
1963				      IPSTATS_MIB_OUTDISCARDS);
1964	}
1965
1966	rcu_read_unlock();
1967	return err;
1968}
1969
1970int ip6_push_pending_frames(struct sock *sk)
1971{
1972	struct sk_buff *skb;
1973
1974	skb = ip6_finish_skb(sk);
1975	if (!skb)
1976		return 0;
1977
1978	return ip6_send_skb(skb);
1979}
1980EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1981
1982static void __ip6_flush_pending_frames(struct sock *sk,
1983				       struct sk_buff_head *queue,
1984				       struct inet_cork_full *cork,
1985				       struct inet6_cork *v6_cork)
1986{
1987	struct sk_buff *skb;
1988
1989	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1990		if (skb_dst(skb))
1991			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1992				      IPSTATS_MIB_OUTDISCARDS);
1993		kfree_skb(skb);
1994	}
1995
1996	ip6_cork_release(cork, v6_cork);
1997}
1998
1999void ip6_flush_pending_frames(struct sock *sk)
2000{
2001	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
2002				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
2003}
2004EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
2005
2006struct sk_buff *ip6_make_skb(struct sock *sk,
2007			     int getfrag(void *from, char *to, int offset,
2008					 int len, int odd, struct sk_buff *skb),
2009			     void *from, int length, int transhdrlen,
2010			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
2011			     struct rt6_info *rt, unsigned int flags,
2012			     struct inet_cork_full *cork)
2013{
2014	struct inet6_cork v6_cork;
2015	struct sk_buff_head queue;
2016	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2017	int err;
2018
2019	if (flags & MSG_PROBE)
2020		return NULL;
2021
2022	__skb_queue_head_init(&queue);
2023
2024	cork->base.flags = 0;
2025	cork->base.addr = 0;
2026	cork->base.opt = NULL;
2027	cork->base.dst = NULL;
2028	v6_cork.opt = NULL;
2029	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
2030	if (err) {
2031		ip6_cork_release(cork, &v6_cork);
2032		return ERR_PTR(err);
2033	}
2034	if (ipc6->dontfrag < 0)
2035		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2036
2037	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
2038				&current->task_frag, getfrag, from,
2039				length + exthdrlen, transhdrlen + exthdrlen,
2040				flags, ipc6);
2041	if (err) {
2042		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2043		return ERR_PTR(err);
2044	}
2045
2046	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2047}
2048