1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22#include <linux/bottom_half.h> 23#include <linux/module.h> 24#include <linux/errno.h> 25#include <linux/types.h> 26#include <linux/socket.h> 27#include <linux/sockios.h> 28#include <linux/net.h> 29#include <linux/jiffies.h> 30#include <linux/in.h> 31#include <linux/in6.h> 32#include <linux/netdevice.h> 33#include <linux/init.h> 34#include <linux/jhash.h> 35#include <linux/ipsec.h> 36#include <linux/times.h> 37#include <linux/slab.h> 38#include <linux/uaccess.h> 39#include <linux/ipv6.h> 40#include <linux/icmpv6.h> 41#include <linux/random.h> 42#include <linux/indirect_call_wrapper.h> 43 44#include <net/tcp.h> 45#include <net/ndisc.h> 46#include <net/inet6_hashtables.h> 47#include <net/inet6_connection_sock.h> 48#include <net/ipv6.h> 49#include <net/transp_v6.h> 50#include <net/addrconf.h> 51#include <net/ip6_route.h> 52#include <net/ip6_checksum.h> 53#include <net/inet_ecn.h> 54#include <net/protocol.h> 55#include <net/xfrm.h> 56#include <net/snmp.h> 57#include <net/dsfield.h> 58#include <net/timewait_sock.h> 59#include <net/inet_common.h> 60#include <net/secure_seq.h> 61#include <net/busy_poll.h> 62 63#include <linux/proc_fs.h> 64#include <linux/seq_file.h> 65 66#include <crypto/hash.h> 67#include <linux/scatterlist.h> 68 69#include <trace/events/tcp.h> 70 71static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77static const struct inet_connection_sock_af_ops ipv6_mapped; 78const struct inet_connection_sock_af_ops ipv6_specific; 79#ifdef CONFIG_TCP_MD5SIG 80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82#else 83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86{ 87 return NULL; 88} 89#endif 90 91/* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97{ 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101} 102 103static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104{ 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); 113 } 114} 115 116static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117{ 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122} 123 124static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125{ 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128} 129 130static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132{ 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143} 144 145static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147{ 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 152 struct tcp_sock *tp = tcp_sk(sk); 153 struct in6_addr *saddr = NULL, *final_p, final; 154 struct ipv6_txoptions *opt; 155 struct flowi6 fl6; 156 struct dst_entry *dst; 157 int addr_type; 158 int err; 159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (__ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 241 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 242 if (sk_is_mptcp(sk)) 243 mptcpv6_handle_mapped(sk, true); 244 sk->sk_backlog_rcv = tcp_v4_do_rcv; 245#ifdef CONFIG_TCP_MD5SIG 246 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 247#endif 248 249 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 250 251 if (err) { 252 icsk->icsk_ext_hdr_len = exthdrlen; 253 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 254 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 255 if (sk_is_mptcp(sk)) 256 mptcpv6_handle_mapped(sk, false); 257 sk->sk_backlog_rcv = tcp_v6_do_rcv; 258#ifdef CONFIG_TCP_MD5SIG 259 tp->af_specific = &tcp_sock_ipv6_specific; 260#endif 261 goto failure; 262 } 263 np->saddr = sk->sk_v6_rcv_saddr; 264 265 return err; 266 } 267 268 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 269 saddr = &sk->sk_v6_rcv_saddr; 270 271 fl6.flowi6_proto = IPPROTO_TCP; 272 fl6.daddr = sk->sk_v6_daddr; 273 fl6.saddr = saddr ? *saddr : np->saddr; 274 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 275 fl6.flowi6_oif = sk->sk_bound_dev_if; 276 fl6.flowi6_mark = sk->sk_mark; 277 fl6.fl6_dport = usin->sin6_port; 278 fl6.fl6_sport = inet->inet_sport; 279 fl6.flowi6_uid = sk->sk_uid; 280 281 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 282 final_p = fl6_update_dst(&fl6, opt, &final); 283 284 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 285 286 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 287 if (IS_ERR(dst)) { 288 err = PTR_ERR(dst); 289 goto failure; 290 } 291 292 if (!saddr) { 293 saddr = &fl6.saddr; 294 sk->sk_v6_rcv_saddr = *saddr; 295 } 296 297 /* set the source address */ 298 np->saddr = *saddr; 299 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 300 301 sk->sk_gso_type = SKB_GSO_TCPV6; 302 ip6_dst_store(sk, dst, NULL, NULL); 303 304 icsk->icsk_ext_hdr_len = 0; 305 if (opt) 306 icsk->icsk_ext_hdr_len = opt->opt_flen + 307 opt->opt_nflen; 308 309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 310 311 inet->inet_dport = usin->sin6_port; 312 313 tcp_set_state(sk, TCP_SYN_SENT); 314 err = inet6_hash_connect(tcp_death_row, sk); 315 if (err) 316 goto late_failure; 317 318 sk_set_txhash(sk); 319 320 if (likely(!tp->repair)) { 321 if (!tp->write_seq) 322 WRITE_ONCE(tp->write_seq, 323 secure_tcpv6_seq(np->saddr.s6_addr32, 324 sk->sk_v6_daddr.s6_addr32, 325 inet->inet_sport, 326 inet->inet_dport)); 327 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 328 np->saddr.s6_addr32, 329 sk->sk_v6_daddr.s6_addr32); 330 } 331 332 if (tcp_fastopen_defer_connect(sk, &err)) 333 return err; 334 if (err) 335 goto late_failure; 336 337 err = tcp_connect(sk); 338 if (err) 339 goto late_failure; 340 341 return 0; 342 343late_failure: 344 tcp_set_state(sk, TCP_CLOSE); 345 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 346 inet_reset_saddr(sk); 347failure: 348 inet->inet_dport = 0; 349 sk->sk_route_caps = 0; 350 return err; 351} 352 353static void tcp_v6_mtu_reduced(struct sock *sk) 354{ 355 struct dst_entry *dst; 356 u32 mtu; 357 358 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 359 return; 360 361 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 362 363 /* Drop requests trying to increase our current mss. 364 * Check done in __ip6_rt_update_pmtu() is too late. 365 */ 366 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 367 return; 368 369 dst = inet6_csk_update_pmtu(sk, mtu); 370 if (!dst) 371 return; 372 373 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 374 tcp_sync_mss(sk, dst_mtu(dst)); 375 tcp_simple_retransmit(sk); 376 } 377} 378 379static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 380 u8 type, u8 code, int offset, __be32 info) 381{ 382 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 383 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 384 struct net *net = dev_net(skb->dev); 385 struct request_sock *fastopen; 386 struct ipv6_pinfo *np; 387 struct tcp_sock *tp; 388 __u32 seq, snd_una; 389 struct sock *sk; 390 bool fatal; 391 int err; 392 393 sk = __inet6_lookup_established(net, &tcp_hashinfo, 394 &hdr->daddr, th->dest, 395 &hdr->saddr, ntohs(th->source), 396 skb->dev->ifindex, inet6_sdif(skb)); 397 398 if (!sk) { 399 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 400 ICMP6_MIB_INERRORS); 401 return -ENOENT; 402 } 403 404 if (sk->sk_state == TCP_TIME_WAIT) { 405 inet_twsk_put(inet_twsk(sk)); 406 return 0; 407 } 408 seq = ntohl(th->seq); 409 fatal = icmpv6_err_convert(type, code, &err); 410 if (sk->sk_state == TCP_NEW_SYN_RECV) { 411 tcp_req_err(sk, seq, fatal); 412 return 0; 413 } 414 415 bh_lock_sock(sk); 416 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 417 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 418 419 if (sk->sk_state == TCP_CLOSE) 420 goto out; 421 422 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { 423 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 424 goto out; 425 } 426 427 tp = tcp_sk(sk); 428 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 429 fastopen = rcu_dereference(tp->fastopen_rsk); 430 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 431 if (sk->sk_state != TCP_LISTEN && 432 !between(seq, snd_una, tp->snd_nxt)) { 433 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 434 goto out; 435 } 436 437 np = tcp_inet6_sk(sk); 438 439 if (type == NDISC_REDIRECT) { 440 if (!sock_owned_by_user(sk)) { 441 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 442 443 if (dst) 444 dst->ops->redirect(dst, sk, skb); 445 } 446 goto out; 447 } 448 449 if (type == ICMPV6_PKT_TOOBIG) { 450 u32 mtu = ntohl(info); 451 452 /* We are not interested in TCP_LISTEN and open_requests 453 * (SYN-ACKs send out by Linux are always <576bytes so 454 * they should go through unfragmented). 455 */ 456 if (sk->sk_state == TCP_LISTEN) 457 goto out; 458 459 if (!ip6_sk_accept_pmtu(sk)) 460 goto out; 461 462 if (mtu < IPV6_MIN_MTU) 463 goto out; 464 465 WRITE_ONCE(tp->mtu_info, mtu); 466 467 if (!sock_owned_by_user(sk)) 468 tcp_v6_mtu_reduced(sk); 469 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 470 &sk->sk_tsq_flags)) 471 sock_hold(sk); 472 goto out; 473 } 474 475 476 /* Might be for an request_sock */ 477 switch (sk->sk_state) { 478 case TCP_SYN_SENT: 479 case TCP_SYN_RECV: 480 /* Only in fast or simultaneous open. If a fast open socket is 481 * already accepted it is treated as a connected one below. 482 */ 483 if (fastopen && !fastopen->sk) 484 break; 485 486 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 487 488 if (!sock_owned_by_user(sk)) { 489 sk->sk_err = err; 490 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 491 492 tcp_done(sk); 493 } else 494 sk->sk_err_soft = err; 495 goto out; 496 case TCP_LISTEN: 497 break; 498 default: 499 /* check if this ICMP message allows revert of backoff. 500 * (see RFC 6069) 501 */ 502 if (!fastopen && type == ICMPV6_DEST_UNREACH && 503 code == ICMPV6_NOROUTE) 504 tcp_ld_RTO_revert(sk, seq); 505 } 506 507 if (!sock_owned_by_user(sk) && np->recverr) { 508 sk->sk_err = err; 509 sk->sk_error_report(sk); 510 } else 511 sk->sk_err_soft = err; 512 513out: 514 bh_unlock_sock(sk); 515 sock_put(sk); 516 return 0; 517} 518 519 520static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 521 struct flowi *fl, 522 struct request_sock *req, 523 struct tcp_fastopen_cookie *foc, 524 enum tcp_synack_type synack_type, 525 struct sk_buff *syn_skb) 526{ 527 struct inet_request_sock *ireq = inet_rsk(req); 528 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 529 struct ipv6_txoptions *opt; 530 struct flowi6 *fl6 = &fl->u.ip6; 531 struct sk_buff *skb; 532 int err = -ENOMEM; 533 u8 tclass; 534 535 /* First, grab a route. */ 536 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 537 IPPROTO_TCP)) == NULL) 538 goto done; 539 540 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 541 542 if (skb) { 543 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 544 &ireq->ir_v6_rmt_addr); 545 546 fl6->daddr = ireq->ir_v6_rmt_addr; 547 if (np->repflow && ireq->pktopts) 548 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 549 550 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 551 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 552 (np->tclass & INET_ECN_MASK) : 553 np->tclass; 554 555 if (!INET_ECN_is_capable(tclass) && 556 tcp_bpf_ca_needs_ecn((struct sock *)req)) 557 tclass |= INET_ECN_ECT_0; 558 559 rcu_read_lock(); 560 opt = ireq->ipv6_opt; 561 if (!opt) 562 opt = rcu_dereference(np->opt); 563 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 564 tclass, sk->sk_priority); 565 rcu_read_unlock(); 566 err = net_xmit_eval(err); 567 } 568 569done: 570 return err; 571} 572 573 574static void tcp_v6_reqsk_destructor(struct request_sock *req) 575{ 576 kfree(inet_rsk(req)->ipv6_opt); 577 kfree_skb(inet_rsk(req)->pktopts); 578} 579 580#ifdef CONFIG_TCP_MD5SIG 581static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 582 const struct in6_addr *addr, 583 int l3index) 584{ 585 return tcp_md5_do_lookup(sk, l3index, 586 (union tcp_md5_addr *)addr, AF_INET6); 587} 588 589static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 590 const struct sock *addr_sk) 591{ 592 int l3index; 593 594 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 595 addr_sk->sk_bound_dev_if); 596 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 597 l3index); 598} 599 600static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 601 sockptr_t optval, int optlen) 602{ 603 struct tcp_md5sig cmd; 604 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 605 int l3index = 0; 606 u8 prefixlen; 607 608 if (optlen < sizeof(cmd)) 609 return -EINVAL; 610 611 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 612 return -EFAULT; 613 614 if (sin6->sin6_family != AF_INET6) 615 return -EINVAL; 616 617 if (optname == TCP_MD5SIG_EXT && 618 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 619 prefixlen = cmd.tcpm_prefixlen; 620 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 621 prefixlen > 32)) 622 return -EINVAL; 623 } else { 624 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 625 } 626 627 if (optname == TCP_MD5SIG_EXT && 628 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 629 struct net_device *dev; 630 631 rcu_read_lock(); 632 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 633 if (dev && netif_is_l3_master(dev)) 634 l3index = dev->ifindex; 635 rcu_read_unlock(); 636 637 /* ok to reference set/not set outside of rcu; 638 * right now device MUST be an L3 master 639 */ 640 if (!dev || !l3index) 641 return -EINVAL; 642 } 643 644 if (!cmd.tcpm_keylen) { 645 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 646 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 647 AF_INET, prefixlen, 648 l3index); 649 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 650 AF_INET6, prefixlen, l3index); 651 } 652 653 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 654 return -EINVAL; 655 656 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 657 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 658 AF_INET, prefixlen, l3index, 659 cmd.tcpm_key, cmd.tcpm_keylen, 660 GFP_KERNEL); 661 662 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 663 AF_INET6, prefixlen, l3index, 664 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 665} 666 667static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 668 const struct in6_addr *daddr, 669 const struct in6_addr *saddr, 670 const struct tcphdr *th, int nbytes) 671{ 672 struct tcp6_pseudohdr *bp; 673 struct scatterlist sg; 674 struct tcphdr *_th; 675 676 bp = hp->scratch; 677 /* 1. TCP pseudo-header (RFC2460) */ 678 bp->saddr = *saddr; 679 bp->daddr = *daddr; 680 bp->protocol = cpu_to_be32(IPPROTO_TCP); 681 bp->len = cpu_to_be32(nbytes); 682 683 _th = (struct tcphdr *)(bp + 1); 684 memcpy(_th, th, sizeof(*th)); 685 _th->check = 0; 686 687 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 688 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 689 sizeof(*bp) + sizeof(*th)); 690 return crypto_ahash_update(hp->md5_req); 691} 692 693static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 694 const struct in6_addr *daddr, struct in6_addr *saddr, 695 const struct tcphdr *th) 696{ 697 struct tcp_md5sig_pool *hp; 698 struct ahash_request *req; 699 700 hp = tcp_get_md5sig_pool(); 701 if (!hp) 702 goto clear_hash_noput; 703 req = hp->md5_req; 704 705 if (crypto_ahash_init(req)) 706 goto clear_hash; 707 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 708 goto clear_hash; 709 if (tcp_md5_hash_key(hp, key)) 710 goto clear_hash; 711 ahash_request_set_crypt(req, NULL, md5_hash, 0); 712 if (crypto_ahash_final(req)) 713 goto clear_hash; 714 715 tcp_put_md5sig_pool(); 716 return 0; 717 718clear_hash: 719 tcp_put_md5sig_pool(); 720clear_hash_noput: 721 memset(md5_hash, 0, 16); 722 return 1; 723} 724 725static int tcp_v6_md5_hash_skb(char *md5_hash, 726 const struct tcp_md5sig_key *key, 727 const struct sock *sk, 728 const struct sk_buff *skb) 729{ 730 const struct in6_addr *saddr, *daddr; 731 struct tcp_md5sig_pool *hp; 732 struct ahash_request *req; 733 const struct tcphdr *th = tcp_hdr(skb); 734 735 if (sk) { /* valid for establish/request sockets */ 736 saddr = &sk->sk_v6_rcv_saddr; 737 daddr = &sk->sk_v6_daddr; 738 } else { 739 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 740 saddr = &ip6h->saddr; 741 daddr = &ip6h->daddr; 742 } 743 744 hp = tcp_get_md5sig_pool(); 745 if (!hp) 746 goto clear_hash_noput; 747 req = hp->md5_req; 748 749 if (crypto_ahash_init(req)) 750 goto clear_hash; 751 752 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 753 goto clear_hash; 754 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 755 goto clear_hash; 756 if (tcp_md5_hash_key(hp, key)) 757 goto clear_hash; 758 ahash_request_set_crypt(req, NULL, md5_hash, 0); 759 if (crypto_ahash_final(req)) 760 goto clear_hash; 761 762 tcp_put_md5sig_pool(); 763 return 0; 764 765clear_hash: 766 tcp_put_md5sig_pool(); 767clear_hash_noput: 768 memset(md5_hash, 0, 16); 769 return 1; 770} 771 772#endif 773 774static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 775 const struct sk_buff *skb, 776 int dif, int sdif) 777{ 778#ifdef CONFIG_TCP_MD5SIG 779 const __u8 *hash_location = NULL; 780 struct tcp_md5sig_key *hash_expected; 781 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 782 const struct tcphdr *th = tcp_hdr(skb); 783 int genhash, l3index; 784 u8 newhash[16]; 785 786 /* sdif set, means packet ingressed via a device 787 * in an L3 domain and dif is set to the l3mdev 788 */ 789 l3index = sdif ? dif : 0; 790 791 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); 792 hash_location = tcp_parse_md5sig_option(th); 793 794 /* We've parsed the options - do we have a hash? */ 795 if (!hash_expected && !hash_location) 796 return false; 797 798 if (hash_expected && !hash_location) { 799 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 800 return true; 801 } 802 803 if (!hash_expected && hash_location) { 804 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 805 return true; 806 } 807 808 /* check the signature */ 809 genhash = tcp_v6_md5_hash_skb(newhash, 810 hash_expected, 811 NULL, skb); 812 813 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 814 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 815 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", 816 genhash ? "failed" : "mismatch", 817 &ip6h->saddr, ntohs(th->source), 818 &ip6h->daddr, ntohs(th->dest), l3index); 819 return true; 820 } 821#endif 822 return false; 823} 824 825static void tcp_v6_init_req(struct request_sock *req, 826 const struct sock *sk_listener, 827 struct sk_buff *skb) 828{ 829 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 830 struct inet_request_sock *ireq = inet_rsk(req); 831 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 832 833 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 834 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 835 836 /* So that link locals have meaning */ 837 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 838 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 839 ireq->ir_iif = tcp_v6_iif(skb); 840 841 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 842 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 843 np->rxopt.bits.rxinfo || 844 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 845 np->rxopt.bits.rxohlim || np->repflow)) { 846 refcount_inc(&skb->users); 847 ireq->pktopts = skb; 848 } 849} 850 851static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 852 struct flowi *fl, 853 const struct request_sock *req) 854{ 855 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 856} 857 858struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 859 .family = AF_INET6, 860 .obj_size = sizeof(struct tcp6_request_sock), 861 .rtx_syn_ack = tcp_rtx_synack, 862 .send_ack = tcp_v6_reqsk_send_ack, 863 .destructor = tcp_v6_reqsk_destructor, 864 .send_reset = tcp_v6_send_reset, 865 .syn_ack_timeout = tcp_syn_ack_timeout, 866}; 867 868const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 869 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 870 sizeof(struct ipv6hdr), 871#ifdef CONFIG_TCP_MD5SIG 872 .req_md5_lookup = tcp_v6_md5_lookup, 873 .calc_md5_hash = tcp_v6_md5_hash_skb, 874#endif 875 .init_req = tcp_v6_init_req, 876#ifdef CONFIG_SYN_COOKIES 877 .cookie_init_seq = cookie_v6_init_sequence, 878#endif 879 .route_req = tcp_v6_route_req, 880 .init_seq = tcp_v6_init_seq, 881 .init_ts_off = tcp_v6_init_ts_off, 882 .send_synack = tcp_v6_send_synack, 883}; 884 885static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 886 u32 ack, u32 win, u32 tsval, u32 tsecr, 887 int oif, struct tcp_md5sig_key *key, int rst, 888 u8 tclass, __be32 label, u32 priority) 889{ 890 const struct tcphdr *th = tcp_hdr(skb); 891 struct tcphdr *t1; 892 struct sk_buff *buff; 893 struct flowi6 fl6; 894 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 895 struct sock *ctl_sk = net->ipv6.tcp_sk; 896 unsigned int tot_len = sizeof(struct tcphdr); 897 struct dst_entry *dst; 898 __be32 *topt; 899 __u32 mark = 0; 900 901 if (tsecr) 902 tot_len += TCPOLEN_TSTAMP_ALIGNED; 903#ifdef CONFIG_TCP_MD5SIG 904 if (key) 905 tot_len += TCPOLEN_MD5SIG_ALIGNED; 906#endif 907 908 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, 909 GFP_ATOMIC); 910 if (!buff) 911 return; 912 913 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); 914 915 t1 = skb_push(buff, tot_len); 916 skb_reset_transport_header(buff); 917 918 /* Swap the send and the receive. */ 919 memset(t1, 0, sizeof(*t1)); 920 t1->dest = th->source; 921 t1->source = th->dest; 922 t1->doff = tot_len / 4; 923 t1->seq = htonl(seq); 924 t1->ack_seq = htonl(ack); 925 t1->ack = !rst || !th->ack; 926 t1->rst = rst; 927 t1->window = htons(win); 928 929 topt = (__be32 *)(t1 + 1); 930 931 if (tsecr) { 932 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 933 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 934 *topt++ = htonl(tsval); 935 *topt++ = htonl(tsecr); 936 } 937 938#ifdef CONFIG_TCP_MD5SIG 939 if (key) { 940 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 941 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 942 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 943 &ipv6_hdr(skb)->saddr, 944 &ipv6_hdr(skb)->daddr, t1); 945 } 946#endif 947 948 memset(&fl6, 0, sizeof(fl6)); 949 fl6.daddr = ipv6_hdr(skb)->saddr; 950 fl6.saddr = ipv6_hdr(skb)->daddr; 951 fl6.flowlabel = label; 952 953 buff->ip_summed = CHECKSUM_PARTIAL; 954 buff->csum = 0; 955 956 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 957 958 fl6.flowi6_proto = IPPROTO_TCP; 959 if (rt6_need_strict(&fl6.daddr) && !oif) 960 fl6.flowi6_oif = tcp_v6_iif(skb); 961 else { 962 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 963 oif = skb->skb_iif; 964 965 fl6.flowi6_oif = oif; 966 } 967 968 if (sk) { 969 if (sk->sk_state == TCP_TIME_WAIT) { 970 mark = inet_twsk(sk)->tw_mark; 971 /* autoflowlabel relies on buff->hash */ 972 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 973 PKT_HASH_TYPE_L4); 974 } else { 975 mark = sk->sk_mark; 976 } 977 buff->tstamp = tcp_transmit_time(sk); 978 } 979 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 980 fl6.fl6_dport = t1->dest; 981 fl6.fl6_sport = t1->source; 982 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 983 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 984 985 /* Pass a socket to ip6_dst_lookup either it is for RST 986 * Underlying function will use this to retrieve the network 987 * namespace 988 */ 989 if (sk && sk->sk_state != TCP_TIME_WAIT) 990 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 991 else 992 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 993 if (!IS_ERR(dst)) { 994 skb_dst_set(buff, dst); 995 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 996 tclass & ~INET_ECN_MASK, priority); 997 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 998 if (rst) 999 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 1000 return; 1001 } 1002 1003 kfree_skb(buff); 1004} 1005 1006static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 1007{ 1008 const struct tcphdr *th = tcp_hdr(skb); 1009 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1010 u32 seq = 0, ack_seq = 0; 1011 struct tcp_md5sig_key *key = NULL; 1012#ifdef CONFIG_TCP_MD5SIG 1013 const __u8 *hash_location = NULL; 1014 unsigned char newhash[16]; 1015 int genhash; 1016 struct sock *sk1 = NULL; 1017#endif 1018 __be32 label = 0; 1019 u32 priority = 0; 1020 struct net *net; 1021 int oif = 0; 1022 1023 if (th->rst) 1024 return; 1025 1026 /* If sk not NULL, it means we did a successful lookup and incoming 1027 * route had to be correct. prequeue might have dropped our dst. 1028 */ 1029 if (!sk && !ipv6_unicast_destination(skb)) 1030 return; 1031 1032 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1033#ifdef CONFIG_TCP_MD5SIG 1034 rcu_read_lock(); 1035 hash_location = tcp_parse_md5sig_option(th); 1036 if (sk && sk_fullsock(sk)) { 1037 int l3index; 1038 1039 /* sdif set, means packet ingressed via a device 1040 * in an L3 domain and inet_iif is set to it. 1041 */ 1042 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1043 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1044 } else if (hash_location) { 1045 int dif = tcp_v6_iif_l3_slave(skb); 1046 int sdif = tcp_v6_sdif(skb); 1047 int l3index; 1048 1049 /* 1050 * active side is lost. Try to find listening socket through 1051 * source port, and then find md5 key through listening socket. 1052 * we are not loose security here: 1053 * Incoming packet is checked with md5 hash with finding key, 1054 * no RST generated if md5 hash doesn't match. 1055 */ 1056 sk1 = inet6_lookup_listener(net, 1057 &tcp_hashinfo, NULL, 0, 1058 &ipv6h->saddr, 1059 th->source, &ipv6h->daddr, 1060 ntohs(th->source), dif, sdif); 1061 if (!sk1) 1062 goto out; 1063 1064 /* sdif set, means packet ingressed via a device 1065 * in an L3 domain and dif is set to it. 1066 */ 1067 l3index = tcp_v6_sdif(skb) ? dif : 0; 1068 1069 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1070 if (!key) 1071 goto out; 1072 1073 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1074 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1075 goto out; 1076 } 1077#endif 1078 1079 if (th->ack) 1080 seq = ntohl(th->ack_seq); 1081 else 1082 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1083 (th->doff << 2); 1084 1085 if (sk) { 1086 oif = sk->sk_bound_dev_if; 1087 if (sk_fullsock(sk)) { 1088 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1089 1090 trace_tcp_send_reset(sk, skb); 1091 if (np->repflow) 1092 label = ip6_flowlabel(ipv6h); 1093 priority = sk->sk_priority; 1094 } 1095 if (sk->sk_state == TCP_TIME_WAIT) { 1096 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1097 priority = inet_twsk(sk)->tw_priority; 1098 } 1099 } else { 1100 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1101 label = ip6_flowlabel(ipv6h); 1102 } 1103 1104 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1105 ipv6_get_dsfield(ipv6h), label, priority); 1106 1107#ifdef CONFIG_TCP_MD5SIG 1108out: 1109 rcu_read_unlock(); 1110#endif 1111} 1112 1113static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1114 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1115 struct tcp_md5sig_key *key, u8 tclass, 1116 __be32 label, u32 priority) 1117{ 1118 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1119 tclass, label, priority); 1120} 1121 1122static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1123{ 1124 struct inet_timewait_sock *tw = inet_twsk(sk); 1125 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1126 1127 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1128 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1129 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1130 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1131 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1132 1133 inet_twsk_put(tw); 1134} 1135 1136static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1137 struct request_sock *req) 1138{ 1139 int l3index; 1140 1141 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1142 1143 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1144 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1145 */ 1146 /* RFC 7323 2.3 1147 * The window field (SEG.WND) of every outgoing segment, with the 1148 * exception of <SYN> segments, MUST be right-shifted by 1149 * Rcv.Wind.Shift bits: 1150 */ 1151 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1152 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1153 tcp_rsk(req)->rcv_nxt, 1154 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1155 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1156 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, 1157 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1158 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1159} 1160 1161 1162static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1163{ 1164#ifdef CONFIG_SYN_COOKIES 1165 const struct tcphdr *th = tcp_hdr(skb); 1166 1167 if (!th->syn) 1168 sk = cookie_v6_check(sk, skb); 1169#endif 1170 return sk; 1171} 1172 1173u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1174 struct tcphdr *th, u32 *cookie) 1175{ 1176 u16 mss = 0; 1177#ifdef CONFIG_SYN_COOKIES 1178 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1179 &tcp_request_sock_ipv6_ops, sk, th); 1180 if (mss) { 1181 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1182 tcp_synq_overflow(sk); 1183 } 1184#endif 1185 return mss; 1186} 1187 1188static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1189{ 1190 if (skb->protocol == htons(ETH_P_IP)) 1191 return tcp_v4_conn_request(sk, skb); 1192 1193 if (!ipv6_unicast_destination(skb)) 1194 goto drop; 1195 1196 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1197 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1198 return 0; 1199 } 1200 1201 return tcp_conn_request(&tcp6_request_sock_ops, 1202 &tcp_request_sock_ipv6_ops, sk, skb); 1203 1204drop: 1205 tcp_listendrop(sk); 1206 return 0; /* don't send reset */ 1207} 1208 1209static void tcp_v6_restore_cb(struct sk_buff *skb) 1210{ 1211 /* We need to move header back to the beginning if xfrm6_policy_check() 1212 * and tcp_v6_fill_cb() are going to be called again. 1213 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1214 */ 1215 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1216 sizeof(struct inet6_skb_parm)); 1217} 1218 1219static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1220 struct request_sock *req, 1221 struct dst_entry *dst, 1222 struct request_sock *req_unhash, 1223 bool *own_req) 1224{ 1225 struct inet_request_sock *ireq; 1226 struct ipv6_pinfo *newnp; 1227 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1228 struct ipv6_txoptions *opt; 1229 struct inet_sock *newinet; 1230 bool found_dup_sk = false; 1231 struct tcp_sock *newtp; 1232 struct sock *newsk; 1233#ifdef CONFIG_TCP_MD5SIG 1234 struct tcp_md5sig_key *key; 1235 int l3index; 1236#endif 1237 struct flowi6 fl6; 1238 1239 if (skb->protocol == htons(ETH_P_IP)) { 1240 /* 1241 * v6 mapped 1242 */ 1243 1244 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1245 req_unhash, own_req); 1246 1247 if (!newsk) 1248 return NULL; 1249 1250 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1251 1252 newinet = inet_sk(newsk); 1253 newnp = tcp_inet6_sk(newsk); 1254 newtp = tcp_sk(newsk); 1255 1256 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1257 1258 newnp->saddr = newsk->sk_v6_rcv_saddr; 1259 1260 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1261 if (sk_is_mptcp(newsk)) 1262 mptcpv6_handle_mapped(newsk, true); 1263 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1264#ifdef CONFIG_TCP_MD5SIG 1265 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1266#endif 1267 1268 newnp->ipv6_mc_list = NULL; 1269 newnp->ipv6_ac_list = NULL; 1270 newnp->ipv6_fl_list = NULL; 1271 newnp->pktoptions = NULL; 1272 newnp->opt = NULL; 1273 newnp->mcast_oif = inet_iif(skb); 1274 newnp->mcast_hops = ip_hdr(skb)->ttl; 1275 newnp->rcv_flowinfo = 0; 1276 if (np->repflow) 1277 newnp->flow_label = 0; 1278 1279 /* 1280 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1281 * here, tcp_create_openreq_child now does this for us, see the comment in 1282 * that function for the gory details. -acme 1283 */ 1284 1285 /* It is tricky place. Until this moment IPv4 tcp 1286 worked with IPv6 icsk.icsk_af_ops. 1287 Sync it now. 1288 */ 1289 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1290 1291 return newsk; 1292 } 1293 1294 ireq = inet_rsk(req); 1295 1296 if (sk_acceptq_is_full(sk)) 1297 goto out_overflow; 1298 1299 if (!dst) { 1300 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1301 if (!dst) 1302 goto out; 1303 } 1304 1305 newsk = tcp_create_openreq_child(sk, req, skb); 1306 if (!newsk) 1307 goto out_nonewsk; 1308 1309 /* 1310 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1311 * count here, tcp_create_openreq_child now does this for us, see the 1312 * comment in that function for the gory details. -acme 1313 */ 1314 1315 newsk->sk_gso_type = SKB_GSO_TCPV6; 1316 ip6_dst_store(newsk, dst, NULL, NULL); 1317 inet6_sk_rx_dst_set(newsk, skb); 1318 1319 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1320 1321 newtp = tcp_sk(newsk); 1322 newinet = inet_sk(newsk); 1323 newnp = tcp_inet6_sk(newsk); 1324 1325 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1326 1327 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1328 newnp->saddr = ireq->ir_v6_loc_addr; 1329 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1330 newsk->sk_bound_dev_if = ireq->ir_iif; 1331 1332 /* Now IPv6 options... 1333 1334 First: no IPv4 options. 1335 */ 1336 newinet->inet_opt = NULL; 1337 newnp->ipv6_mc_list = NULL; 1338 newnp->ipv6_ac_list = NULL; 1339 newnp->ipv6_fl_list = NULL; 1340 1341 /* Clone RX bits */ 1342 newnp->rxopt.all = np->rxopt.all; 1343 1344 newnp->pktoptions = NULL; 1345 newnp->opt = NULL; 1346 newnp->mcast_oif = tcp_v6_iif(skb); 1347 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1348 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1349 if (np->repflow) 1350 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1351 1352 /* Set ToS of the new socket based upon the value of incoming SYN. 1353 * ECT bits are set later in tcp_init_transfer(). 1354 */ 1355 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1356 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1357 1358 /* Clone native IPv6 options from listening socket (if any) 1359 1360 Yes, keeping reference count would be much more clever, 1361 but we make one more one thing there: reattach optmem 1362 to newsk. 1363 */ 1364 opt = ireq->ipv6_opt; 1365 if (!opt) 1366 opt = rcu_dereference(np->opt); 1367 if (opt) { 1368 opt = ipv6_dup_options(newsk, opt); 1369 RCU_INIT_POINTER(newnp->opt, opt); 1370 } 1371 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1372 if (opt) 1373 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1374 opt->opt_flen; 1375 1376 tcp_ca_openreq_child(newsk, dst); 1377 1378 tcp_sync_mss(newsk, dst_mtu(dst)); 1379 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1380 1381 tcp_initialize_rcv_mss(newsk); 1382 1383 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1384 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1385 1386#ifdef CONFIG_TCP_MD5SIG 1387 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1388 1389 /* Copy over the MD5 key from the original socket */ 1390 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1391 if (key) { 1392 /* We're using one, so create a matching key 1393 * on the newsk structure. If we fail to get 1394 * memory, then we end up not copying the key 1395 * across. Shucks. 1396 */ 1397 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1398 AF_INET6, 128, l3index, key->key, key->keylen, 1399 sk_gfp_mask(sk, GFP_ATOMIC)); 1400 } 1401#endif 1402 1403 if (__inet_inherit_port(sk, newsk) < 0) { 1404 inet_csk_prepare_forced_close(newsk); 1405 tcp_done(newsk); 1406 goto out; 1407 } 1408 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1409 &found_dup_sk); 1410 if (*own_req) { 1411 tcp_move_syn(newtp, req); 1412 1413 /* Clone pktoptions received with SYN, if we own the req */ 1414 if (ireq->pktopts) { 1415 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1416 consume_skb(ireq->pktopts); 1417 ireq->pktopts = NULL; 1418 if (newnp->pktoptions) 1419 tcp_v6_restore_cb(newnp->pktoptions); 1420 } 1421 } else { 1422 if (!req_unhash && found_dup_sk) { 1423 /* This code path should only be executed in the 1424 * syncookie case only 1425 */ 1426 bh_unlock_sock(newsk); 1427 sock_put(newsk); 1428 newsk = NULL; 1429 } 1430 } 1431 1432 return newsk; 1433 1434out_overflow: 1435 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1436out_nonewsk: 1437 dst_release(dst); 1438out: 1439 tcp_listendrop(sk); 1440 return NULL; 1441} 1442 1443/* The socket must have it's spinlock held when we get 1444 * here, unless it is a TCP_LISTEN socket. 1445 * 1446 * We have a potential double-lock case here, so even when 1447 * doing backlog processing we use the BH locking scheme. 1448 * This is because we cannot sleep with the original spinlock 1449 * held. 1450 */ 1451static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1452{ 1453 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1454 struct sk_buff *opt_skb = NULL; 1455 struct tcp_sock *tp; 1456 1457 /* Imagine: socket is IPv6. IPv4 packet arrives, 1458 goes to IPv4 receive handler and backlogged. 1459 From backlog it always goes here. Kerboom... 1460 Fortunately, tcp_rcv_established and rcv_established 1461 handle them correctly, but it is not case with 1462 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1463 */ 1464 1465 if (skb->protocol == htons(ETH_P_IP)) 1466 return tcp_v4_do_rcv(sk, skb); 1467 1468 /* 1469 * socket locking is here for SMP purposes as backlog rcv 1470 * is currently called with bh processing disabled. 1471 */ 1472 1473 /* Do Stevens' IPV6_PKTOPTIONS. 1474 1475 Yes, guys, it is the only place in our code, where we 1476 may make it not affecting IPv4. 1477 The rest of code is protocol independent, 1478 and I do not like idea to uglify IPv4. 1479 1480 Actually, all the idea behind IPV6_PKTOPTIONS 1481 looks not very well thought. For now we latch 1482 options, received in the last packet, enqueued 1483 by tcp. Feel free to propose better solution. 1484 --ANK (980728) 1485 */ 1486 if (np->rxopt.all) 1487 opt_skb = skb_clone_and_charge_r(skb, sk); 1488 1489 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1490 struct dst_entry *dst; 1491 1492 dst = rcu_dereference_protected(sk->sk_rx_dst, 1493 lockdep_sock_is_held(sk)); 1494 1495 sock_rps_save_rxhash(sk, skb); 1496 sk_mark_napi_id(sk, skb); 1497 if (dst) { 1498 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1499 dst->ops->check(dst, np->rx_dst_cookie) == NULL) { 1500 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1501 dst_release(dst); 1502 } 1503 } 1504 1505 tcp_rcv_established(sk, skb); 1506 if (opt_skb) 1507 goto ipv6_pktoptions; 1508 return 0; 1509 } 1510 1511 if (tcp_checksum_complete(skb)) 1512 goto csum_err; 1513 1514 if (sk->sk_state == TCP_LISTEN) { 1515 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1516 1517 if (!nsk) 1518 goto discard; 1519 1520 if (nsk != sk) { 1521 if (tcp_child_process(sk, nsk, skb)) 1522 goto reset; 1523 if (opt_skb) 1524 __kfree_skb(opt_skb); 1525 return 0; 1526 } 1527 } else 1528 sock_rps_save_rxhash(sk, skb); 1529 1530 if (tcp_rcv_state_process(sk, skb)) 1531 goto reset; 1532 if (opt_skb) 1533 goto ipv6_pktoptions; 1534 return 0; 1535 1536reset: 1537 tcp_v6_send_reset(sk, skb); 1538discard: 1539 if (opt_skb) 1540 __kfree_skb(opt_skb); 1541 kfree_skb(skb); 1542 return 0; 1543csum_err: 1544 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1545 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1546 goto discard; 1547 1548 1549ipv6_pktoptions: 1550 /* Do you ask, what is it? 1551 1552 1. skb was enqueued by tcp. 1553 2. skb is added to tail of read queue, rather than out of order. 1554 3. socket is not in passive state. 1555 4. Finally, it really contains options, which user wants to receive. 1556 */ 1557 tp = tcp_sk(sk); 1558 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1559 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1560 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1561 np->mcast_oif = tcp_v6_iif(opt_skb); 1562 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1563 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1564 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1565 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1566 if (np->repflow) 1567 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1568 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1569 tcp_v6_restore_cb(opt_skb); 1570 opt_skb = xchg(&np->pktoptions, opt_skb); 1571 } else { 1572 __kfree_skb(opt_skb); 1573 opt_skb = xchg(&np->pktoptions, NULL); 1574 } 1575 } 1576 1577 kfree_skb(opt_skb); 1578 return 0; 1579} 1580 1581static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1582 const struct tcphdr *th) 1583{ 1584 /* This is tricky: we move IP6CB at its correct location into 1585 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1586 * _decode_session6() uses IP6CB(). 1587 * barrier() makes sure compiler won't play aliasing games. 1588 */ 1589 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1590 sizeof(struct inet6_skb_parm)); 1591 barrier(); 1592 1593 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1594 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1595 skb->len - th->doff*4); 1596 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1597 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1598 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1599 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1600 TCP_SKB_CB(skb)->sacked = 0; 1601 TCP_SKB_CB(skb)->has_rxtstamp = 1602 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1603} 1604 1605INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1606{ 1607 struct sk_buff *skb_to_free; 1608 int sdif = inet6_sdif(skb); 1609 int dif = inet6_iif(skb); 1610 const struct tcphdr *th; 1611 const struct ipv6hdr *hdr; 1612 bool refcounted; 1613 struct sock *sk; 1614 int ret; 1615 struct net *net = dev_net(skb->dev); 1616 1617 if (skb->pkt_type != PACKET_HOST) 1618 goto discard_it; 1619 1620 /* 1621 * Count it even if it's bad. 1622 */ 1623 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1624 1625 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1626 goto discard_it; 1627 1628 th = (const struct tcphdr *)skb->data; 1629 1630 if (unlikely(th->doff < sizeof(struct tcphdr)/4)) 1631 goto bad_packet; 1632 if (!pskb_may_pull(skb, th->doff*4)) 1633 goto discard_it; 1634 1635 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1636 goto csum_error; 1637 1638 th = (const struct tcphdr *)skb->data; 1639 hdr = ipv6_hdr(skb); 1640 1641lookup: 1642 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1643 th->source, th->dest, inet6_iif(skb), sdif, 1644 &refcounted); 1645 if (!sk) 1646 goto no_tcp_socket; 1647 1648process: 1649 if (sk->sk_state == TCP_TIME_WAIT) 1650 goto do_time_wait; 1651 1652 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1653 struct request_sock *req = inet_reqsk(sk); 1654 bool req_stolen = false; 1655 struct sock *nsk; 1656 1657 sk = req->rsk_listener; 1658 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { 1659 sk_drops_add(sk, skb); 1660 reqsk_put(req); 1661 goto discard_it; 1662 } 1663 if (tcp_checksum_complete(skb)) { 1664 reqsk_put(req); 1665 goto csum_error; 1666 } 1667 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1668 inet_csk_reqsk_queue_drop_and_put(sk, req); 1669 goto lookup; 1670 } 1671 sock_hold(sk); 1672 refcounted = true; 1673 nsk = NULL; 1674 if (!tcp_filter(sk, skb)) { 1675 th = (const struct tcphdr *)skb->data; 1676 hdr = ipv6_hdr(skb); 1677 tcp_v6_fill_cb(skb, hdr, th); 1678 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1679 } 1680 if (!nsk) { 1681 reqsk_put(req); 1682 if (req_stolen) { 1683 /* Another cpu got exclusive access to req 1684 * and created a full blown socket. 1685 * Try to feed this packet to this socket 1686 * instead of discarding it. 1687 */ 1688 tcp_v6_restore_cb(skb); 1689 sock_put(sk); 1690 goto lookup; 1691 } 1692 goto discard_and_relse; 1693 } 1694 if (nsk == sk) { 1695 reqsk_put(req); 1696 tcp_v6_restore_cb(skb); 1697 } else if (tcp_child_process(sk, nsk, skb)) { 1698 tcp_v6_send_reset(nsk, skb); 1699 goto discard_and_relse; 1700 } else { 1701 sock_put(sk); 1702 return 0; 1703 } 1704 } 1705 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { 1706 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1707 goto discard_and_relse; 1708 } 1709 1710 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1711 goto discard_and_relse; 1712 1713 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) 1714 goto discard_and_relse; 1715 1716 if (tcp_filter(sk, skb)) 1717 goto discard_and_relse; 1718 th = (const struct tcphdr *)skb->data; 1719 hdr = ipv6_hdr(skb); 1720 tcp_v6_fill_cb(skb, hdr, th); 1721 1722 skb->dev = NULL; 1723 1724 if (sk->sk_state == TCP_LISTEN) { 1725 ret = tcp_v6_do_rcv(sk, skb); 1726 goto put_and_return; 1727 } 1728 1729 sk_incoming_cpu_update(sk); 1730 1731 bh_lock_sock_nested(sk); 1732 tcp_segs_in(tcp_sk(sk), skb); 1733 ret = 0; 1734 if (!sock_owned_by_user(sk)) { 1735 skb_to_free = sk->sk_rx_skb_cache; 1736 sk->sk_rx_skb_cache = NULL; 1737 ret = tcp_v6_do_rcv(sk, skb); 1738 } else { 1739 if (tcp_add_backlog(sk, skb)) 1740 goto discard_and_relse; 1741 skb_to_free = NULL; 1742 } 1743 bh_unlock_sock(sk); 1744 if (skb_to_free) 1745 __kfree_skb(skb_to_free); 1746put_and_return: 1747 if (refcounted) 1748 sock_put(sk); 1749 return ret ? -1 : 0; 1750 1751no_tcp_socket: 1752 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1753 goto discard_it; 1754 1755 tcp_v6_fill_cb(skb, hdr, th); 1756 1757 if (tcp_checksum_complete(skb)) { 1758csum_error: 1759 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1760bad_packet: 1761 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1762 } else { 1763 tcp_v6_send_reset(NULL, skb); 1764 } 1765 1766discard_it: 1767 kfree_skb(skb); 1768 return 0; 1769 1770discard_and_relse: 1771 sk_drops_add(sk, skb); 1772 if (refcounted) 1773 sock_put(sk); 1774 goto discard_it; 1775 1776do_time_wait: 1777 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1778 inet_twsk_put(inet_twsk(sk)); 1779 goto discard_it; 1780 } 1781 1782 tcp_v6_fill_cb(skb, hdr, th); 1783 1784 if (tcp_checksum_complete(skb)) { 1785 inet_twsk_put(inet_twsk(sk)); 1786 goto csum_error; 1787 } 1788 1789 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1790 case TCP_TW_SYN: 1791 { 1792 struct sock *sk2; 1793 1794 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1795 skb, __tcp_hdrlen(th), 1796 &ipv6_hdr(skb)->saddr, th->source, 1797 &ipv6_hdr(skb)->daddr, 1798 ntohs(th->dest), 1799 tcp_v6_iif_l3_slave(skb), 1800 sdif); 1801 if (sk2) { 1802 struct inet_timewait_sock *tw = inet_twsk(sk); 1803 inet_twsk_deschedule_put(tw); 1804 sk = sk2; 1805 tcp_v6_restore_cb(skb); 1806 refcounted = false; 1807 goto process; 1808 } 1809 } 1810 /* to ACK */ 1811 fallthrough; 1812 case TCP_TW_ACK: 1813 tcp_v6_timewait_ack(sk, skb); 1814 break; 1815 case TCP_TW_RST: 1816 tcp_v6_send_reset(sk, skb); 1817 inet_twsk_deschedule_put(inet_twsk(sk)); 1818 goto discard_it; 1819 case TCP_TW_SUCCESS: 1820 ; 1821 } 1822 goto discard_it; 1823} 1824 1825void tcp_v6_early_demux(struct sk_buff *skb) 1826{ 1827 const struct ipv6hdr *hdr; 1828 const struct tcphdr *th; 1829 struct sock *sk; 1830 1831 if (skb->pkt_type != PACKET_HOST) 1832 return; 1833 1834 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1835 return; 1836 1837 hdr = ipv6_hdr(skb); 1838 th = tcp_hdr(skb); 1839 1840 if (th->doff < sizeof(struct tcphdr) / 4) 1841 return; 1842 1843 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1844 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1845 &hdr->saddr, th->source, 1846 &hdr->daddr, ntohs(th->dest), 1847 inet6_iif(skb), inet6_sdif(skb)); 1848 if (sk) { 1849 skb->sk = sk; 1850 skb->destructor = sock_edemux; 1851 if (sk_fullsock(sk)) { 1852 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1853 1854 if (dst) 1855 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); 1856 if (dst && 1857 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 1858 skb_dst_set_noref(skb, dst); 1859 } 1860 } 1861} 1862 1863static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1864 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1865 .twsk_unique = tcp_twsk_unique, 1866 .twsk_destructor = tcp_twsk_destructor, 1867}; 1868 1869INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1870{ 1871 struct ipv6_pinfo *np = inet6_sk(sk); 1872 1873 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); 1874} 1875 1876const struct inet_connection_sock_af_ops ipv6_specific = { 1877 .queue_xmit = inet6_csk_xmit, 1878 .send_check = tcp_v6_send_check, 1879 .rebuild_header = inet6_sk_rebuild_header, 1880 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1881 .conn_request = tcp_v6_conn_request, 1882 .syn_recv_sock = tcp_v6_syn_recv_sock, 1883 .net_header_len = sizeof(struct ipv6hdr), 1884 .net_frag_header_len = sizeof(struct frag_hdr), 1885 .setsockopt = ipv6_setsockopt, 1886 .getsockopt = ipv6_getsockopt, 1887 .addr2sockaddr = inet6_csk_addr2sockaddr, 1888 .sockaddr_len = sizeof(struct sockaddr_in6), 1889 .mtu_reduced = tcp_v6_mtu_reduced, 1890}; 1891 1892#ifdef CONFIG_TCP_MD5SIG 1893static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1894 .md5_lookup = tcp_v6_md5_lookup, 1895 .calc_md5_hash = tcp_v6_md5_hash_skb, 1896 .md5_parse = tcp_v6_parse_md5_keys, 1897}; 1898#endif 1899 1900/* 1901 * TCP over IPv4 via INET6 API 1902 */ 1903static const struct inet_connection_sock_af_ops ipv6_mapped = { 1904 .queue_xmit = ip_queue_xmit, 1905 .send_check = tcp_v4_send_check, 1906 .rebuild_header = inet_sk_rebuild_header, 1907 .sk_rx_dst_set = inet_sk_rx_dst_set, 1908 .conn_request = tcp_v6_conn_request, 1909 .syn_recv_sock = tcp_v6_syn_recv_sock, 1910 .net_header_len = sizeof(struct iphdr), 1911 .setsockopt = ipv6_setsockopt, 1912 .getsockopt = ipv6_getsockopt, 1913 .addr2sockaddr = inet6_csk_addr2sockaddr, 1914 .sockaddr_len = sizeof(struct sockaddr_in6), 1915 .mtu_reduced = tcp_v4_mtu_reduced, 1916}; 1917 1918#ifdef CONFIG_TCP_MD5SIG 1919static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1920 .md5_lookup = tcp_v4_md5_lookup, 1921 .calc_md5_hash = tcp_v4_md5_hash_skb, 1922 .md5_parse = tcp_v6_parse_md5_keys, 1923}; 1924#endif 1925 1926/* NOTE: A lot of things set to zero explicitly by call to 1927 * sk_alloc() so need not be done here. 1928 */ 1929static int tcp_v6_init_sock(struct sock *sk) 1930{ 1931 struct inet_connection_sock *icsk = inet_csk(sk); 1932 1933 tcp_init_sock(sk); 1934 1935 icsk->icsk_af_ops = &ipv6_specific; 1936 1937#ifdef CONFIG_TCP_MD5SIG 1938 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1939#endif 1940 1941 return 0; 1942} 1943 1944#ifdef CONFIG_PROC_FS 1945/* Proc filesystem TCPv6 sock list dumping. */ 1946static void get_openreq6(struct seq_file *seq, 1947 const struct request_sock *req, int i) 1948{ 1949 long ttd = req->rsk_timer.expires - jiffies; 1950 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1951 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1952 1953 if (ttd < 0) 1954 ttd = 0; 1955 1956 seq_printf(seq, 1957 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1958 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1959 i, 1960 src->s6_addr32[0], src->s6_addr32[1], 1961 src->s6_addr32[2], src->s6_addr32[3], 1962 inet_rsk(req)->ir_num, 1963 dest->s6_addr32[0], dest->s6_addr32[1], 1964 dest->s6_addr32[2], dest->s6_addr32[3], 1965 ntohs(inet_rsk(req)->ir_rmt_port), 1966 TCP_SYN_RECV, 1967 0, 0, /* could print option size, but that is af dependent. */ 1968 1, /* timers active (only the expire timer) */ 1969 jiffies_to_clock_t(ttd), 1970 req->num_timeout, 1971 from_kuid_munged(seq_user_ns(seq), 1972 sock_i_uid(req->rsk_listener)), 1973 0, /* non standard timer */ 1974 0, /* open_requests have no inode */ 1975 0, req); 1976} 1977 1978static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1979{ 1980 const struct in6_addr *dest, *src; 1981 __u16 destp, srcp; 1982 int timer_active; 1983 unsigned long timer_expires; 1984 const struct inet_sock *inet = inet_sk(sp); 1985 const struct tcp_sock *tp = tcp_sk(sp); 1986 const struct inet_connection_sock *icsk = inet_csk(sp); 1987 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1988 int rx_queue; 1989 int state; 1990 1991 dest = &sp->sk_v6_daddr; 1992 src = &sp->sk_v6_rcv_saddr; 1993 destp = ntohs(inet->inet_dport); 1994 srcp = ntohs(inet->inet_sport); 1995 1996 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 1997 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 1998 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 1999 timer_active = 1; 2000 timer_expires = icsk->icsk_timeout; 2001 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2002 timer_active = 4; 2003 timer_expires = icsk->icsk_timeout; 2004 } else if (timer_pending(&sp->sk_timer)) { 2005 timer_active = 2; 2006 timer_expires = sp->sk_timer.expires; 2007 } else { 2008 timer_active = 0; 2009 timer_expires = jiffies; 2010 } 2011 2012 state = inet_sk_state_load(sp); 2013 if (state == TCP_LISTEN) 2014 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2015 else 2016 /* Because we don't lock the socket, 2017 * we might find a transient negative value. 2018 */ 2019 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2020 READ_ONCE(tp->copied_seq), 0); 2021 2022 seq_printf(seq, 2023 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2024 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2025 i, 2026 src->s6_addr32[0], src->s6_addr32[1], 2027 src->s6_addr32[2], src->s6_addr32[3], srcp, 2028 dest->s6_addr32[0], dest->s6_addr32[1], 2029 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2030 state, 2031 READ_ONCE(tp->write_seq) - tp->snd_una, 2032 rx_queue, 2033 timer_active, 2034 jiffies_delta_to_clock_t(timer_expires - jiffies), 2035 icsk->icsk_retransmits, 2036 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2037 icsk->icsk_probes_out, 2038 sock_i_ino(sp), 2039 refcount_read(&sp->sk_refcnt), sp, 2040 jiffies_to_clock_t(icsk->icsk_rto), 2041 jiffies_to_clock_t(icsk->icsk_ack.ato), 2042 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2043 tp->snd_cwnd, 2044 state == TCP_LISTEN ? 2045 fastopenq->max_qlen : 2046 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2047 ); 2048} 2049 2050static void get_timewait6_sock(struct seq_file *seq, 2051 struct inet_timewait_sock *tw, int i) 2052{ 2053 long delta = tw->tw_timer.expires - jiffies; 2054 const struct in6_addr *dest, *src; 2055 __u16 destp, srcp; 2056 2057 dest = &tw->tw_v6_daddr; 2058 src = &tw->tw_v6_rcv_saddr; 2059 destp = ntohs(tw->tw_dport); 2060 srcp = ntohs(tw->tw_sport); 2061 2062 seq_printf(seq, 2063 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2064 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2065 i, 2066 src->s6_addr32[0], src->s6_addr32[1], 2067 src->s6_addr32[2], src->s6_addr32[3], srcp, 2068 dest->s6_addr32[0], dest->s6_addr32[1], 2069 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2070 tw->tw_substate, 0, 0, 2071 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2072 refcount_read(&tw->tw_refcnt), tw); 2073} 2074 2075static int tcp6_seq_show(struct seq_file *seq, void *v) 2076{ 2077 struct tcp_iter_state *st; 2078 struct sock *sk = v; 2079 2080 if (v == SEQ_START_TOKEN) { 2081 seq_puts(seq, 2082 " sl " 2083 "local_address " 2084 "remote_address " 2085 "st tx_queue rx_queue tr tm->when retrnsmt" 2086 " uid timeout inode\n"); 2087 goto out; 2088 } 2089 st = seq->private; 2090 2091 if (sk->sk_state == TCP_TIME_WAIT) 2092 get_timewait6_sock(seq, v, st->num); 2093 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2094 get_openreq6(seq, v, st->num); 2095 else 2096 get_tcp6_sock(seq, v, st->num); 2097out: 2098 return 0; 2099} 2100 2101static const struct seq_operations tcp6_seq_ops = { 2102 .show = tcp6_seq_show, 2103 .start = tcp_seq_start, 2104 .next = tcp_seq_next, 2105 .stop = tcp_seq_stop, 2106}; 2107 2108static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2109 .family = AF_INET6, 2110}; 2111 2112int __net_init tcp6_proc_init(struct net *net) 2113{ 2114 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2115 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2116 return -ENOMEM; 2117 return 0; 2118} 2119 2120void tcp6_proc_exit(struct net *net) 2121{ 2122 remove_proc_entry("tcp6", net->proc_net); 2123} 2124#endif 2125 2126struct proto tcpv6_prot = { 2127 .name = "TCPv6", 2128 .owner = THIS_MODULE, 2129 .close = tcp_close, 2130 .pre_connect = tcp_v6_pre_connect, 2131 .connect = tcp_v6_connect, 2132 .disconnect = tcp_disconnect, 2133 .accept = inet_csk_accept, 2134 .ioctl = tcp_ioctl, 2135 .init = tcp_v6_init_sock, 2136 .destroy = tcp_v4_destroy_sock, 2137 .shutdown = tcp_shutdown, 2138 .setsockopt = tcp_setsockopt, 2139 .getsockopt = tcp_getsockopt, 2140 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2141 .keepalive = tcp_set_keepalive, 2142 .recvmsg = tcp_recvmsg, 2143 .sendmsg = tcp_sendmsg, 2144 .sendpage = tcp_sendpage, 2145 .backlog_rcv = tcp_v6_do_rcv, 2146 .release_cb = tcp_release_cb, 2147 .hash = inet6_hash, 2148 .unhash = inet_unhash, 2149 .get_port = inet_csk_get_port, 2150 .enter_memory_pressure = tcp_enter_memory_pressure, 2151 .leave_memory_pressure = tcp_leave_memory_pressure, 2152 .stream_memory_free = tcp_stream_memory_free, 2153 .sockets_allocated = &tcp_sockets_allocated, 2154 .memory_allocated = &tcp_memory_allocated, 2155 .memory_pressure = &tcp_memory_pressure, 2156 .orphan_count = &tcp_orphan_count, 2157 .sysctl_mem = sysctl_tcp_mem, 2158 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2159 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2160 .max_header = MAX_TCP_HEADER, 2161 .obj_size = sizeof(struct tcp6_sock), 2162 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2163 .twsk_prot = &tcp6_timewait_sock_ops, 2164 .rsk_prot = &tcp6_request_sock_ops, 2165 .h.hashinfo = &tcp_hashinfo, 2166 .no_autobind = true, 2167 .diag_destroy = tcp_abort, 2168}; 2169EXPORT_SYMBOL_GPL(tcpv6_prot); 2170 2171static const struct inet6_protocol tcpv6_protocol = { 2172 .handler = tcp_v6_rcv, 2173 .err_handler = tcp_v6_err, 2174 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2175}; 2176 2177static struct inet_protosw tcpv6_protosw = { 2178 .type = SOCK_STREAM, 2179 .protocol = IPPROTO_TCP, 2180 .prot = &tcpv6_prot, 2181 .ops = &inet6_stream_ops, 2182 .flags = INET_PROTOSW_PERMANENT | 2183 INET_PROTOSW_ICSK, 2184}; 2185 2186static int __net_init tcpv6_net_init(struct net *net) 2187{ 2188 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2189 SOCK_RAW, IPPROTO_TCP, net); 2190} 2191 2192static void __net_exit tcpv6_net_exit(struct net *net) 2193{ 2194 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2195} 2196 2197static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2198{ 2199 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2200} 2201 2202static struct pernet_operations tcpv6_net_ops = { 2203 .init = tcpv6_net_init, 2204 .exit = tcpv6_net_exit, 2205 .exit_batch = tcpv6_net_exit_batch, 2206}; 2207 2208int __init tcpv6_init(void) 2209{ 2210 int ret; 2211 2212 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2213 if (ret) 2214 goto out; 2215 2216 /* register inet6 protocol */ 2217 ret = inet6_register_protosw(&tcpv6_protosw); 2218 if (ret) 2219 goto out_tcpv6_protocol; 2220 2221 ret = register_pernet_subsys(&tcpv6_net_ops); 2222 if (ret) 2223 goto out_tcpv6_protosw; 2224 2225 ret = mptcpv6_init(); 2226 if (ret) 2227 goto out_tcpv6_pernet_subsys; 2228 2229out: 2230 return ret; 2231 2232out_tcpv6_pernet_subsys: 2233 unregister_pernet_subsys(&tcpv6_net_ops); 2234out_tcpv6_protosw: 2235 inet6_unregister_protosw(&tcpv6_protosw); 2236out_tcpv6_protocol: 2237 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2238 goto out; 2239} 2240 2241void tcpv6_exit(void) 2242{ 2243 unregister_pernet_subsys(&tcpv6_net_ops); 2244 inet6_unregister_protosw(&tcpv6_protosw); 2245 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2246} 2247