18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 48c2ecf20Sopenharmony_ci * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include <linux/skbuff.h> 88c2ecf20Sopenharmony_ci#include <linux/if_arp.h> 98c2ecf20Sopenharmony_ci#include <linux/netdevice.h> 108c2ecf20Sopenharmony_ci#include <linux/if.h> 118c2ecf20Sopenharmony_ci#include <linux/if_vlan.h> 128c2ecf20Sopenharmony_ci#include <net/udp_tunnel.h> 138c2ecf20Sopenharmony_ci#include <net/sch_generic.h> 148c2ecf20Sopenharmony_ci#include <linux/netfilter.h> 158c2ecf20Sopenharmony_ci#include <rdma/ib_addr.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include "rxe.h" 188c2ecf20Sopenharmony_ci#include "rxe_net.h" 198c2ecf20Sopenharmony_ci#include "rxe_loc.h" 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_cistatic struct rxe_recv_sockets recv_sockets; 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ciint rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 248c2ecf20Sopenharmony_ci{ 258c2ecf20Sopenharmony_ci int err; 268c2ecf20Sopenharmony_ci unsigned char ll_addr[ETH_ALEN]; 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 298c2ecf20Sopenharmony_ci err = dev_mc_add(rxe->ndev, ll_addr); 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci return err; 328c2ecf20Sopenharmony_ci} 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ciint rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci int err; 378c2ecf20Sopenharmony_ci unsigned char ll_addr[ETH_ALEN]; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); 408c2ecf20Sopenharmony_ci err = dev_mc_del(rxe->ndev, ll_addr); 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci return err; 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic struct dst_entry *rxe_find_route4(struct net_device *ndev, 468c2ecf20Sopenharmony_ci struct in_addr *saddr, 478c2ecf20Sopenharmony_ci struct in_addr *daddr) 488c2ecf20Sopenharmony_ci{ 498c2ecf20Sopenharmony_ci struct rtable *rt; 508c2ecf20Sopenharmony_ci struct flowi4 fl = { { 0 } }; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci memset(&fl, 0, sizeof(fl)); 538c2ecf20Sopenharmony_ci fl.flowi4_oif = ndev->ifindex; 548c2ecf20Sopenharmony_ci memcpy(&fl.saddr, saddr, sizeof(*saddr)); 558c2ecf20Sopenharmony_ci memcpy(&fl.daddr, daddr, sizeof(*daddr)); 568c2ecf20Sopenharmony_ci fl.flowi4_proto = IPPROTO_UDP; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci rt = ip_route_output_key(&init_net, &fl); 598c2ecf20Sopenharmony_ci if (IS_ERR(rt)) { 608c2ecf20Sopenharmony_ci pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); 618c2ecf20Sopenharmony_ci return NULL; 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci return &rt->dst; 658c2ecf20Sopenharmony_ci} 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 688c2ecf20Sopenharmony_cistatic struct dst_entry *rxe_find_route6(struct net_device *ndev, 698c2ecf20Sopenharmony_ci struct in6_addr *saddr, 708c2ecf20Sopenharmony_ci struct in6_addr *daddr) 718c2ecf20Sopenharmony_ci{ 728c2ecf20Sopenharmony_ci struct dst_entry *ndst; 738c2ecf20Sopenharmony_ci struct flowi6 fl6 = { { 0 } }; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci memset(&fl6, 0, sizeof(fl6)); 768c2ecf20Sopenharmony_ci fl6.flowi6_oif = ndev->ifindex; 778c2ecf20Sopenharmony_ci memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 788c2ecf20Sopenharmony_ci memcpy(&fl6.daddr, daddr, sizeof(*daddr)); 798c2ecf20Sopenharmony_ci fl6.flowi6_proto = IPPROTO_UDP; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), 828c2ecf20Sopenharmony_ci recv_sockets.sk6->sk, &fl6, 838c2ecf20Sopenharmony_ci NULL); 848c2ecf20Sopenharmony_ci if (IS_ERR(ndst)) { 858c2ecf20Sopenharmony_ci pr_err_ratelimited("no route to %pI6\n", daddr); 868c2ecf20Sopenharmony_ci return NULL; 878c2ecf20Sopenharmony_ci } 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci if (unlikely(ndst->error)) { 908c2ecf20Sopenharmony_ci pr_err("no route to %pI6\n", daddr); 918c2ecf20Sopenharmony_ci goto put; 928c2ecf20Sopenharmony_ci } 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci return ndst; 958c2ecf20Sopenharmony_ciput: 968c2ecf20Sopenharmony_ci dst_release(ndst); 978c2ecf20Sopenharmony_ci return NULL; 988c2ecf20Sopenharmony_ci} 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci#else 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_cistatic struct dst_entry *rxe_find_route6(struct net_device *ndev, 1038c2ecf20Sopenharmony_ci struct in6_addr *saddr, 1048c2ecf20Sopenharmony_ci struct in6_addr *daddr) 1058c2ecf20Sopenharmony_ci{ 1068c2ecf20Sopenharmony_ci return NULL; 1078c2ecf20Sopenharmony_ci} 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci#endif 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_cistatic struct dst_entry *rxe_find_route(struct net_device *ndev, 1128c2ecf20Sopenharmony_ci struct rxe_qp *qp, 1138c2ecf20Sopenharmony_ci struct rxe_av *av) 1148c2ecf20Sopenharmony_ci{ 1158c2ecf20Sopenharmony_ci struct dst_entry *dst = NULL; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci if (qp_type(qp) == IB_QPT_RC) 1188c2ecf20Sopenharmony_ci dst = sk_dst_get(qp->sk->sk); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci if (!dst || !dst_check(dst, qp->dst_cookie)) { 1218c2ecf20Sopenharmony_ci if (dst) 1228c2ecf20Sopenharmony_ci dst_release(dst); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci if (av->network_type == RXE_NETWORK_TYPE_IPV4) { 1258c2ecf20Sopenharmony_ci struct in_addr *saddr; 1268c2ecf20Sopenharmony_ci struct in_addr *daddr; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci saddr = &av->sgid_addr._sockaddr_in.sin_addr; 1298c2ecf20Sopenharmony_ci daddr = &av->dgid_addr._sockaddr_in.sin_addr; 1308c2ecf20Sopenharmony_ci dst = rxe_find_route4(ndev, saddr, daddr); 1318c2ecf20Sopenharmony_ci } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { 1328c2ecf20Sopenharmony_ci struct in6_addr *saddr6; 1338c2ecf20Sopenharmony_ci struct in6_addr *daddr6; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; 1368c2ecf20Sopenharmony_ci daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; 1378c2ecf20Sopenharmony_ci dst = rxe_find_route6(ndev, saddr6, daddr6); 1388c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 1398c2ecf20Sopenharmony_ci if (dst) 1408c2ecf20Sopenharmony_ci qp->dst_cookie = 1418c2ecf20Sopenharmony_ci rt6_get_cookie((struct rt6_info *)dst); 1428c2ecf20Sopenharmony_ci#endif 1438c2ecf20Sopenharmony_ci } 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci if (dst && (qp_type(qp) == IB_QPT_RC)) { 1468c2ecf20Sopenharmony_ci dst_hold(dst); 1478c2ecf20Sopenharmony_ci sk_dst_set(qp->sk->sk, dst); 1488c2ecf20Sopenharmony_ci } 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci return dst; 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_cistatic int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci struct udphdr *udph; 1568c2ecf20Sopenharmony_ci struct net_device *ndev = skb->dev; 1578c2ecf20Sopenharmony_ci struct net_device *rdev = ndev; 1588c2ecf20Sopenharmony_ci struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); 1598c2ecf20Sopenharmony_ci struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci if (!rxe && is_vlan_dev(rdev)) { 1628c2ecf20Sopenharmony_ci rdev = vlan_dev_real_dev(ndev); 1638c2ecf20Sopenharmony_ci rxe = rxe_get_dev_from_net(rdev); 1648c2ecf20Sopenharmony_ci } 1658c2ecf20Sopenharmony_ci if (!rxe) 1668c2ecf20Sopenharmony_ci goto drop; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci if (skb_linearize(skb)) { 1698c2ecf20Sopenharmony_ci pr_err("skb_linearize failed\n"); 1708c2ecf20Sopenharmony_ci ib_device_put(&rxe->ib_dev); 1718c2ecf20Sopenharmony_ci goto drop; 1728c2ecf20Sopenharmony_ci } 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci udph = udp_hdr(skb); 1758c2ecf20Sopenharmony_ci pkt->rxe = rxe; 1768c2ecf20Sopenharmony_ci pkt->port_num = 1; 1778c2ecf20Sopenharmony_ci pkt->hdr = (u8 *)(udph + 1); 1788c2ecf20Sopenharmony_ci pkt->mask = RXE_GRH_MASK; 1798c2ecf20Sopenharmony_ci pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci rxe_rcv(skb); 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci /* 1848c2ecf20Sopenharmony_ci * FIXME: this is in the wrong place, it needs to be done when pkt is 1858c2ecf20Sopenharmony_ci * destroyed 1868c2ecf20Sopenharmony_ci */ 1878c2ecf20Sopenharmony_ci ib_device_put(&rxe->ib_dev); 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci return 0; 1908c2ecf20Sopenharmony_cidrop: 1918c2ecf20Sopenharmony_ci kfree_skb(skb); 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci return 0; 1948c2ecf20Sopenharmony_ci} 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_cistatic struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, 1978c2ecf20Sopenharmony_ci bool ipv6) 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci int err; 2008c2ecf20Sopenharmony_ci struct socket *sock; 2018c2ecf20Sopenharmony_ci struct udp_port_cfg udp_cfg = { }; 2028c2ecf20Sopenharmony_ci struct udp_tunnel_sock_cfg tnl_cfg = { }; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci if (ipv6) { 2058c2ecf20Sopenharmony_ci udp_cfg.family = AF_INET6; 2068c2ecf20Sopenharmony_ci udp_cfg.ipv6_v6only = 1; 2078c2ecf20Sopenharmony_ci } else { 2088c2ecf20Sopenharmony_ci udp_cfg.family = AF_INET; 2098c2ecf20Sopenharmony_ci } 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci udp_cfg.local_udp_port = port; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci /* Create UDP socket */ 2148c2ecf20Sopenharmony_ci err = udp_sock_create(net, &udp_cfg, &sock); 2158c2ecf20Sopenharmony_ci if (err < 0) 2168c2ecf20Sopenharmony_ci return ERR_PTR(err); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci tnl_cfg.encap_type = 1; 2198c2ecf20Sopenharmony_ci tnl_cfg.encap_rcv = rxe_udp_encap_recv; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci /* Setup UDP tunnel */ 2228c2ecf20Sopenharmony_ci setup_udp_tunnel_sock(net, sock, &tnl_cfg); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci return sock; 2258c2ecf20Sopenharmony_ci} 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_cistatic void rxe_release_udp_tunnel(struct socket *sk) 2288c2ecf20Sopenharmony_ci{ 2298c2ecf20Sopenharmony_ci if (sk) 2308c2ecf20Sopenharmony_ci udp_tunnel_sock_release(sk); 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, 2348c2ecf20Sopenharmony_ci __be16 dst_port) 2358c2ecf20Sopenharmony_ci{ 2368c2ecf20Sopenharmony_ci struct udphdr *udph; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci __skb_push(skb, sizeof(*udph)); 2398c2ecf20Sopenharmony_ci skb_reset_transport_header(skb); 2408c2ecf20Sopenharmony_ci udph = udp_hdr(skb); 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci udph->dest = dst_port; 2438c2ecf20Sopenharmony_ci udph->source = src_port; 2448c2ecf20Sopenharmony_ci udph->len = htons(skb->len); 2458c2ecf20Sopenharmony_ci udph->check = 0; 2468c2ecf20Sopenharmony_ci} 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_cistatic void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, 2498c2ecf20Sopenharmony_ci __be32 saddr, __be32 daddr, __u8 proto, 2508c2ecf20Sopenharmony_ci __u8 tos, __u8 ttl, __be16 df, bool xnet) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci struct iphdr *iph; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci skb_scrub_packet(skb, xnet); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci skb_clear_hash(skb); 2578c2ecf20Sopenharmony_ci skb_dst_set(skb, dst_clone(dst)); 2588c2ecf20Sopenharmony_ci memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci skb_push(skb, sizeof(struct iphdr)); 2618c2ecf20Sopenharmony_ci skb_reset_network_header(skb); 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci iph = ip_hdr(skb); 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci iph->version = IPVERSION; 2668c2ecf20Sopenharmony_ci iph->ihl = sizeof(struct iphdr) >> 2; 2678c2ecf20Sopenharmony_ci iph->frag_off = df; 2688c2ecf20Sopenharmony_ci iph->protocol = proto; 2698c2ecf20Sopenharmony_ci iph->tos = tos; 2708c2ecf20Sopenharmony_ci iph->daddr = daddr; 2718c2ecf20Sopenharmony_ci iph->saddr = saddr; 2728c2ecf20Sopenharmony_ci iph->ttl = ttl; 2738c2ecf20Sopenharmony_ci __ip_select_ident(dev_net(dst->dev), iph, 2748c2ecf20Sopenharmony_ci skb_shinfo(skb)->gso_segs ?: 1); 2758c2ecf20Sopenharmony_ci iph->tot_len = htons(skb->len); 2768c2ecf20Sopenharmony_ci ip_send_check(iph); 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cistatic void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, 2808c2ecf20Sopenharmony_ci struct in6_addr *saddr, struct in6_addr *daddr, 2818c2ecf20Sopenharmony_ci __u8 proto, __u8 prio, __u8 ttl) 2828c2ecf20Sopenharmony_ci{ 2838c2ecf20Sopenharmony_ci struct ipv6hdr *ip6h; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 2868c2ecf20Sopenharmony_ci IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED 2878c2ecf20Sopenharmony_ci | IPSKB_REROUTED); 2888c2ecf20Sopenharmony_ci skb_dst_set(skb, dst_clone(dst)); 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci __skb_push(skb, sizeof(*ip6h)); 2918c2ecf20Sopenharmony_ci skb_reset_network_header(skb); 2928c2ecf20Sopenharmony_ci ip6h = ipv6_hdr(skb); 2938c2ecf20Sopenharmony_ci ip6_flow_hdr(ip6h, prio, htonl(0)); 2948c2ecf20Sopenharmony_ci ip6h->payload_len = htons(skb->len); 2958c2ecf20Sopenharmony_ci ip6h->nexthdr = proto; 2968c2ecf20Sopenharmony_ci ip6h->hop_limit = ttl; 2978c2ecf20Sopenharmony_ci ip6h->daddr = *daddr; 2988c2ecf20Sopenharmony_ci ip6h->saddr = *saddr; 2998c2ecf20Sopenharmony_ci ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); 3008c2ecf20Sopenharmony_ci} 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_cistatic int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) 3038c2ecf20Sopenharmony_ci{ 3048c2ecf20Sopenharmony_ci struct rxe_qp *qp = pkt->qp; 3058c2ecf20Sopenharmony_ci struct dst_entry *dst; 3068c2ecf20Sopenharmony_ci bool xnet = false; 3078c2ecf20Sopenharmony_ci __be16 df = htons(IP_DF); 3088c2ecf20Sopenharmony_ci struct rxe_av *av = rxe_get_av(pkt); 3098c2ecf20Sopenharmony_ci struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; 3108c2ecf20Sopenharmony_ci struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci dst = rxe_find_route(skb->dev, qp, av); 3138c2ecf20Sopenharmony_ci if (!dst) { 3148c2ecf20Sopenharmony_ci pr_err("Host not reachable\n"); 3158c2ecf20Sopenharmony_ci return -EHOSTUNREACH; 3168c2ecf20Sopenharmony_ci } 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), 3198c2ecf20Sopenharmony_ci cpu_to_be16(ROCE_V2_UDP_DPORT)); 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, 3228c2ecf20Sopenharmony_ci av->grh.traffic_class, av->grh.hop_limit, df, xnet); 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci dst_release(dst); 3258c2ecf20Sopenharmony_ci return 0; 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_cistatic int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) 3298c2ecf20Sopenharmony_ci{ 3308c2ecf20Sopenharmony_ci struct rxe_qp *qp = pkt->qp; 3318c2ecf20Sopenharmony_ci struct dst_entry *dst; 3328c2ecf20Sopenharmony_ci struct rxe_av *av = rxe_get_av(pkt); 3338c2ecf20Sopenharmony_ci struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; 3348c2ecf20Sopenharmony_ci struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci dst = rxe_find_route(skb->dev, qp, av); 3378c2ecf20Sopenharmony_ci if (!dst) { 3388c2ecf20Sopenharmony_ci pr_err("Host not reachable\n"); 3398c2ecf20Sopenharmony_ci return -EHOSTUNREACH; 3408c2ecf20Sopenharmony_ci } 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), 3438c2ecf20Sopenharmony_ci cpu_to_be16(ROCE_V2_UDP_DPORT)); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, 3468c2ecf20Sopenharmony_ci av->grh.traffic_class, 3478c2ecf20Sopenharmony_ci av->grh.hop_limit); 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci dst_release(dst); 3508c2ecf20Sopenharmony_ci return 0; 3518c2ecf20Sopenharmony_ci} 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ciint rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) 3548c2ecf20Sopenharmony_ci{ 3558c2ecf20Sopenharmony_ci int err = 0; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci if (skb->protocol == htons(ETH_P_IP)) 3588c2ecf20Sopenharmony_ci err = prepare4(pkt, skb); 3598c2ecf20Sopenharmony_ci else if (skb->protocol == htons(ETH_P_IPV6)) 3608c2ecf20Sopenharmony_ci err = prepare6(pkt, skb); 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci *crc = rxe_icrc_hdr(pkt, skb); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) 3658c2ecf20Sopenharmony_ci pkt->mask |= RXE_LOOPBACK_MASK; 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci return err; 3688c2ecf20Sopenharmony_ci} 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_cistatic void rxe_skb_tx_dtor(struct sk_buff *skb) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci struct sock *sk = skb->sk; 3738c2ecf20Sopenharmony_ci struct rxe_qp *qp = sk->sk_user_data; 3748c2ecf20Sopenharmony_ci int skb_out = atomic_dec_return(&qp->skb_out); 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci if (unlikely(qp->need_req_skb && 3778c2ecf20Sopenharmony_ci skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) 3788c2ecf20Sopenharmony_ci rxe_run_task(&qp->req.task, 1); 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci rxe_drop_ref(qp); 3818c2ecf20Sopenharmony_ci} 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ciint rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) 3848c2ecf20Sopenharmony_ci{ 3858c2ecf20Sopenharmony_ci int err; 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci skb->destructor = rxe_skb_tx_dtor; 3888c2ecf20Sopenharmony_ci skb->sk = pkt->qp->sk->sk; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci rxe_add_ref(pkt->qp); 3918c2ecf20Sopenharmony_ci atomic_inc(&pkt->qp->skb_out); 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci if (skb->protocol == htons(ETH_P_IP)) { 3948c2ecf20Sopenharmony_ci err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); 3958c2ecf20Sopenharmony_ci } else if (skb->protocol == htons(ETH_P_IPV6)) { 3968c2ecf20Sopenharmony_ci err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); 3978c2ecf20Sopenharmony_ci } else { 3988c2ecf20Sopenharmony_ci pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); 3998c2ecf20Sopenharmony_ci atomic_dec(&pkt->qp->skb_out); 4008c2ecf20Sopenharmony_ci rxe_drop_ref(pkt->qp); 4018c2ecf20Sopenharmony_ci kfree_skb(skb); 4028c2ecf20Sopenharmony_ci return -EINVAL; 4038c2ecf20Sopenharmony_ci } 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci if (unlikely(net_xmit_eval(err))) { 4068c2ecf20Sopenharmony_ci pr_debug("error sending packet: %d\n", err); 4078c2ecf20Sopenharmony_ci return -EAGAIN; 4088c2ecf20Sopenharmony_ci } 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci return 0; 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_civoid rxe_loopback(struct sk_buff *skb) 4148c2ecf20Sopenharmony_ci{ 4158c2ecf20Sopenharmony_ci if (skb->protocol == htons(ETH_P_IP)) 4168c2ecf20Sopenharmony_ci skb_pull(skb, sizeof(struct iphdr)); 4178c2ecf20Sopenharmony_ci else 4188c2ecf20Sopenharmony_ci skb_pull(skb, sizeof(struct ipv6hdr)); 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci rxe_rcv(skb); 4218c2ecf20Sopenharmony_ci} 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_cistruct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, 4248c2ecf20Sopenharmony_ci int paylen, struct rxe_pkt_info *pkt) 4258c2ecf20Sopenharmony_ci{ 4268c2ecf20Sopenharmony_ci unsigned int hdr_len; 4278c2ecf20Sopenharmony_ci struct sk_buff *skb = NULL; 4288c2ecf20Sopenharmony_ci struct net_device *ndev; 4298c2ecf20Sopenharmony_ci const struct ib_gid_attr *attr; 4308c2ecf20Sopenharmony_ci const int port_num = 1; 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); 4338c2ecf20Sopenharmony_ci if (IS_ERR(attr)) 4348c2ecf20Sopenharmony_ci return NULL; 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci if (av->network_type == RXE_NETWORK_TYPE_IPV4) 4378c2ecf20Sopenharmony_ci hdr_len = ETH_HLEN + sizeof(struct udphdr) + 4388c2ecf20Sopenharmony_ci sizeof(struct iphdr); 4398c2ecf20Sopenharmony_ci else 4408c2ecf20Sopenharmony_ci hdr_len = ETH_HLEN + sizeof(struct udphdr) + 4418c2ecf20Sopenharmony_ci sizeof(struct ipv6hdr); 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci rcu_read_lock(); 4448c2ecf20Sopenharmony_ci ndev = rdma_read_gid_attr_ndev_rcu(attr); 4458c2ecf20Sopenharmony_ci if (IS_ERR(ndev)) { 4468c2ecf20Sopenharmony_ci rcu_read_unlock(); 4478c2ecf20Sopenharmony_ci goto out; 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), 4508c2ecf20Sopenharmony_ci GFP_ATOMIC); 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci if (unlikely(!skb)) { 4538c2ecf20Sopenharmony_ci rcu_read_unlock(); 4548c2ecf20Sopenharmony_ci goto out; 4558c2ecf20Sopenharmony_ci } 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci /* FIXME: hold reference to this netdev until life of this skb. */ 4608c2ecf20Sopenharmony_ci skb->dev = ndev; 4618c2ecf20Sopenharmony_ci rcu_read_unlock(); 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci if (av->network_type == RXE_NETWORK_TYPE_IPV4) 4648c2ecf20Sopenharmony_ci skb->protocol = htons(ETH_P_IP); 4658c2ecf20Sopenharmony_ci else 4668c2ecf20Sopenharmony_ci skb->protocol = htons(ETH_P_IPV6); 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci pkt->rxe = rxe; 4698c2ecf20Sopenharmony_ci pkt->port_num = port_num; 4708c2ecf20Sopenharmony_ci pkt->hdr = skb_put_zero(skb, paylen); 4718c2ecf20Sopenharmony_ci pkt->mask |= RXE_GRH_MASK; 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ciout: 4748c2ecf20Sopenharmony_ci rdma_put_gid_attr(attr); 4758c2ecf20Sopenharmony_ci return skb; 4768c2ecf20Sopenharmony_ci} 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci/* 4798c2ecf20Sopenharmony_ci * this is required by rxe_cfg to match rxe devices in 4808c2ecf20Sopenharmony_ci * /sys/class/infiniband up with their underlying ethernet devices 4818c2ecf20Sopenharmony_ci */ 4828c2ecf20Sopenharmony_ciconst char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) 4838c2ecf20Sopenharmony_ci{ 4848c2ecf20Sopenharmony_ci return rxe->ndev->name; 4858c2ecf20Sopenharmony_ci} 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ciint rxe_net_add(const char *ibdev_name, struct net_device *ndev) 4888c2ecf20Sopenharmony_ci{ 4898c2ecf20Sopenharmony_ci int err; 4908c2ecf20Sopenharmony_ci struct rxe_dev *rxe = NULL; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci rxe = ib_alloc_device(rxe_dev, ib_dev); 4938c2ecf20Sopenharmony_ci if (!rxe) 4948c2ecf20Sopenharmony_ci return -ENOMEM; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci rxe->ndev = ndev; 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci err = rxe_add(rxe, ndev->mtu, ibdev_name); 4998c2ecf20Sopenharmony_ci if (err) { 5008c2ecf20Sopenharmony_ci ib_dealloc_device(&rxe->ib_dev); 5018c2ecf20Sopenharmony_ci return err; 5028c2ecf20Sopenharmony_ci } 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci return 0; 5058c2ecf20Sopenharmony_ci} 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_cistatic void rxe_port_event(struct rxe_dev *rxe, 5088c2ecf20Sopenharmony_ci enum ib_event_type event) 5098c2ecf20Sopenharmony_ci{ 5108c2ecf20Sopenharmony_ci struct ib_event ev; 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci ev.device = &rxe->ib_dev; 5138c2ecf20Sopenharmony_ci ev.element.port_num = 1; 5148c2ecf20Sopenharmony_ci ev.event = event; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci ib_dispatch_event(&ev); 5178c2ecf20Sopenharmony_ci} 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci/* Caller must hold net_info_lock */ 5208c2ecf20Sopenharmony_civoid rxe_port_up(struct rxe_dev *rxe) 5218c2ecf20Sopenharmony_ci{ 5228c2ecf20Sopenharmony_ci struct rxe_port *port; 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci port = &rxe->port; 5258c2ecf20Sopenharmony_ci port->attr.state = IB_PORT_ACTIVE; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); 5288c2ecf20Sopenharmony_ci dev_info(&rxe->ib_dev.dev, "set active\n"); 5298c2ecf20Sopenharmony_ci} 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci/* Caller must hold net_info_lock */ 5328c2ecf20Sopenharmony_civoid rxe_port_down(struct rxe_dev *rxe) 5338c2ecf20Sopenharmony_ci{ 5348c2ecf20Sopenharmony_ci struct rxe_port *port; 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci port = &rxe->port; 5378c2ecf20Sopenharmony_ci port->attr.state = IB_PORT_DOWN; 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci rxe_port_event(rxe, IB_EVENT_PORT_ERR); 5408c2ecf20Sopenharmony_ci rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); 5418c2ecf20Sopenharmony_ci dev_info(&rxe->ib_dev.dev, "set down\n"); 5428c2ecf20Sopenharmony_ci} 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_civoid rxe_set_port_state(struct rxe_dev *rxe) 5458c2ecf20Sopenharmony_ci{ 5468c2ecf20Sopenharmony_ci if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev)) 5478c2ecf20Sopenharmony_ci rxe_port_up(rxe); 5488c2ecf20Sopenharmony_ci else 5498c2ecf20Sopenharmony_ci rxe_port_down(rxe); 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic int rxe_notify(struct notifier_block *not_blk, 5538c2ecf20Sopenharmony_ci unsigned long event, 5548c2ecf20Sopenharmony_ci void *arg) 5558c2ecf20Sopenharmony_ci{ 5568c2ecf20Sopenharmony_ci struct net_device *ndev = netdev_notifier_info_to_dev(arg); 5578c2ecf20Sopenharmony_ci struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci if (!rxe) 5608c2ecf20Sopenharmony_ci return NOTIFY_OK; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci switch (event) { 5638c2ecf20Sopenharmony_ci case NETDEV_UNREGISTER: 5648c2ecf20Sopenharmony_ci ib_unregister_device_queued(&rxe->ib_dev); 5658c2ecf20Sopenharmony_ci break; 5668c2ecf20Sopenharmony_ci case NETDEV_UP: 5678c2ecf20Sopenharmony_ci rxe_port_up(rxe); 5688c2ecf20Sopenharmony_ci break; 5698c2ecf20Sopenharmony_ci case NETDEV_DOWN: 5708c2ecf20Sopenharmony_ci rxe_port_down(rxe); 5718c2ecf20Sopenharmony_ci break; 5728c2ecf20Sopenharmony_ci case NETDEV_CHANGEMTU: 5738c2ecf20Sopenharmony_ci pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); 5748c2ecf20Sopenharmony_ci rxe_set_mtu(rxe, ndev->mtu); 5758c2ecf20Sopenharmony_ci break; 5768c2ecf20Sopenharmony_ci case NETDEV_CHANGE: 5778c2ecf20Sopenharmony_ci rxe_set_port_state(rxe); 5788c2ecf20Sopenharmony_ci break; 5798c2ecf20Sopenharmony_ci case NETDEV_REBOOT: 5808c2ecf20Sopenharmony_ci case NETDEV_GOING_DOWN: 5818c2ecf20Sopenharmony_ci case NETDEV_CHANGEADDR: 5828c2ecf20Sopenharmony_ci case NETDEV_CHANGENAME: 5838c2ecf20Sopenharmony_ci case NETDEV_FEAT_CHANGE: 5848c2ecf20Sopenharmony_ci default: 5858c2ecf20Sopenharmony_ci pr_info("ignoring netdev event = %ld for %s\n", 5868c2ecf20Sopenharmony_ci event, ndev->name); 5878c2ecf20Sopenharmony_ci break; 5888c2ecf20Sopenharmony_ci } 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci ib_device_put(&rxe->ib_dev); 5918c2ecf20Sopenharmony_ci return NOTIFY_OK; 5928c2ecf20Sopenharmony_ci} 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_cistatic struct notifier_block rxe_net_notifier = { 5958c2ecf20Sopenharmony_ci .notifier_call = rxe_notify, 5968c2ecf20Sopenharmony_ci}; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_cistatic int rxe_net_ipv4_init(void) 5998c2ecf20Sopenharmony_ci{ 6008c2ecf20Sopenharmony_ci recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, 6018c2ecf20Sopenharmony_ci htons(ROCE_V2_UDP_DPORT), false); 6028c2ecf20Sopenharmony_ci if (IS_ERR(recv_sockets.sk4)) { 6038c2ecf20Sopenharmony_ci recv_sockets.sk4 = NULL; 6048c2ecf20Sopenharmony_ci pr_err("Failed to create IPv4 UDP tunnel\n"); 6058c2ecf20Sopenharmony_ci return -1; 6068c2ecf20Sopenharmony_ci } 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci return 0; 6098c2ecf20Sopenharmony_ci} 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_cistatic int rxe_net_ipv6_init(void) 6128c2ecf20Sopenharmony_ci{ 6138c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, 6168c2ecf20Sopenharmony_ci htons(ROCE_V2_UDP_DPORT), true); 6178c2ecf20Sopenharmony_ci if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) { 6188c2ecf20Sopenharmony_ci recv_sockets.sk6 = NULL; 6198c2ecf20Sopenharmony_ci pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n"); 6208c2ecf20Sopenharmony_ci return 0; 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci if (IS_ERR(recv_sockets.sk6)) { 6248c2ecf20Sopenharmony_ci recv_sockets.sk6 = NULL; 6258c2ecf20Sopenharmony_ci pr_err("Failed to create IPv6 UDP tunnel\n"); 6268c2ecf20Sopenharmony_ci return -1; 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci#endif 6298c2ecf20Sopenharmony_ci return 0; 6308c2ecf20Sopenharmony_ci} 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_civoid rxe_net_exit(void) 6338c2ecf20Sopenharmony_ci{ 6348c2ecf20Sopenharmony_ci rxe_release_udp_tunnel(recv_sockets.sk6); 6358c2ecf20Sopenharmony_ci rxe_release_udp_tunnel(recv_sockets.sk4); 6368c2ecf20Sopenharmony_ci unregister_netdevice_notifier(&rxe_net_notifier); 6378c2ecf20Sopenharmony_ci} 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ciint rxe_net_init(void) 6408c2ecf20Sopenharmony_ci{ 6418c2ecf20Sopenharmony_ci int err; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci recv_sockets.sk6 = NULL; 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci err = rxe_net_ipv4_init(); 6468c2ecf20Sopenharmony_ci if (err) 6478c2ecf20Sopenharmony_ci return err; 6488c2ecf20Sopenharmony_ci err = rxe_net_ipv6_init(); 6498c2ecf20Sopenharmony_ci if (err) 6508c2ecf20Sopenharmony_ci goto err_out; 6518c2ecf20Sopenharmony_ci err = register_netdevice_notifier(&rxe_net_notifier); 6528c2ecf20Sopenharmony_ci if (err) { 6538c2ecf20Sopenharmony_ci pr_err("Failed to register netdev notifier\n"); 6548c2ecf20Sopenharmony_ci goto err_out; 6558c2ecf20Sopenharmony_ci } 6568c2ecf20Sopenharmony_ci return 0; 6578c2ecf20Sopenharmony_cierr_out: 6588c2ecf20Sopenharmony_ci rxe_net_exit(); 6598c2ecf20Sopenharmony_ci return err; 6608c2ecf20Sopenharmony_ci} 661