18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ci#include <linux/kernel.h> 348c2ecf20Sopenharmony_ci#include <linux/slab.h> 358c2ecf20Sopenharmony_ci#include <linux/in.h> 368c2ecf20Sopenharmony_ci#include <linux/module.h> 378c2ecf20Sopenharmony_ci#include <net/tcp.h> 388c2ecf20Sopenharmony_ci#include <net/net_namespace.h> 398c2ecf20Sopenharmony_ci#include <net/netns/generic.h> 408c2ecf20Sopenharmony_ci#include <net/addrconf.h> 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#include "rds.h" 438c2ecf20Sopenharmony_ci#include "tcp.h" 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/* only for info exporting */ 468c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rds_tcp_tc_list_lock); 478c2ecf20Sopenharmony_cistatic LIST_HEAD(rds_tcp_tc_list); 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci/* rds_tcp_tc_count counts only IPv4 connections. 508c2ecf20Sopenharmony_ci * rds6_tcp_tc_count counts both IPv4 and IPv6 connections. 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_cistatic unsigned int rds_tcp_tc_count; 538c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 548c2ecf20Sopenharmony_cistatic unsigned int rds6_tcp_tc_count; 558c2ecf20Sopenharmony_ci#endif 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci/* Track rds_tcp_connection structs so they can be cleaned up */ 588c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rds_tcp_conn_lock); 598c2ecf20Sopenharmony_cistatic LIST_HEAD(rds_tcp_conn_list); 608c2ecf20Sopenharmony_cistatic atomic_t rds_tcp_unloading = ATOMIC_INIT(0); 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_cistatic struct kmem_cache *rds_tcp_conn_slab; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, 658c2ecf20Sopenharmony_ci void *buffer, size_t *lenp, loff_t *fpos); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_cistatic int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; 688c2ecf20Sopenharmony_cistatic int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic struct ctl_table rds_tcp_sysctl_table[] = { 718c2ecf20Sopenharmony_ci#define RDS_TCP_SNDBUF 0 728c2ecf20Sopenharmony_ci { 738c2ecf20Sopenharmony_ci .procname = "rds_tcp_sndbuf", 748c2ecf20Sopenharmony_ci /* data is per-net pointer */ 758c2ecf20Sopenharmony_ci .maxlen = sizeof(int), 768c2ecf20Sopenharmony_ci .mode = 0644, 778c2ecf20Sopenharmony_ci .proc_handler = rds_tcp_skbuf_handler, 788c2ecf20Sopenharmony_ci .extra1 = &rds_tcp_min_sndbuf, 798c2ecf20Sopenharmony_ci }, 808c2ecf20Sopenharmony_ci#define RDS_TCP_RCVBUF 1 818c2ecf20Sopenharmony_ci { 828c2ecf20Sopenharmony_ci .procname = "rds_tcp_rcvbuf", 838c2ecf20Sopenharmony_ci /* data is per-net pointer */ 848c2ecf20Sopenharmony_ci .maxlen = sizeof(int), 858c2ecf20Sopenharmony_ci .mode = 0644, 868c2ecf20Sopenharmony_ci .proc_handler = rds_tcp_skbuf_handler, 878c2ecf20Sopenharmony_ci .extra1 = &rds_tcp_min_rcvbuf, 888c2ecf20Sopenharmony_ci }, 898c2ecf20Sopenharmony_ci { } 908c2ecf20Sopenharmony_ci}; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ciu32 rds_tcp_write_seq(struct rds_tcp_connection *tc) 938c2ecf20Sopenharmony_ci{ 948c2ecf20Sopenharmony_ci /* seq# of the last byte of data in tcp send buffer */ 958c2ecf20Sopenharmony_ci return tcp_sk(tc->t_sock->sk)->write_seq; 968c2ecf20Sopenharmony_ci} 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ciu32 rds_tcp_snd_una(struct rds_tcp_connection *tc) 998c2ecf20Sopenharmony_ci{ 1008c2ecf20Sopenharmony_ci return tcp_sk(tc->t_sock->sk)->snd_una; 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_civoid rds_tcp_restore_callbacks(struct socket *sock, 1048c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc) 1058c2ecf20Sopenharmony_ci{ 1068c2ecf20Sopenharmony_ci rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc); 1078c2ecf20Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci /* done under the callback_lock to serialize with write_space */ 1108c2ecf20Sopenharmony_ci spin_lock(&rds_tcp_tc_list_lock); 1118c2ecf20Sopenharmony_ci list_del_init(&tc->t_list_item); 1128c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 1138c2ecf20Sopenharmony_ci rds6_tcp_tc_count--; 1148c2ecf20Sopenharmony_ci#endif 1158c2ecf20Sopenharmony_ci if (!tc->t_cpath->cp_conn->c_isv6) 1168c2ecf20Sopenharmony_ci rds_tcp_tc_count--; 1178c2ecf20Sopenharmony_ci spin_unlock(&rds_tcp_tc_list_lock); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci tc->t_sock = NULL; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci sock->sk->sk_write_space = tc->t_orig_write_space; 1228c2ecf20Sopenharmony_ci sock->sk->sk_data_ready = tc->t_orig_data_ready; 1238c2ecf20Sopenharmony_ci sock->sk->sk_state_change = tc->t_orig_state_change; 1248c2ecf20Sopenharmony_ci sock->sk->sk_user_data = NULL; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci/* 1308c2ecf20Sopenharmony_ci * rds_tcp_reset_callbacks() switches the to the new sock and 1318c2ecf20Sopenharmony_ci * returns the existing tc->t_sock. 1328c2ecf20Sopenharmony_ci * 1338c2ecf20Sopenharmony_ci * The only functions that set tc->t_sock are rds_tcp_set_callbacks 1348c2ecf20Sopenharmony_ci * and rds_tcp_reset_callbacks. Send and receive trust that 1358c2ecf20Sopenharmony_ci * it is set. The absence of RDS_CONN_UP bit protects those paths 1368c2ecf20Sopenharmony_ci * from being called while it isn't set. 1378c2ecf20Sopenharmony_ci */ 1388c2ecf20Sopenharmony_civoid rds_tcp_reset_callbacks(struct socket *sock, 1398c2ecf20Sopenharmony_ci struct rds_conn_path *cp) 1408c2ecf20Sopenharmony_ci{ 1418c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc = cp->cp_transport_data; 1428c2ecf20Sopenharmony_ci struct socket *osock = tc->t_sock; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci if (!osock) 1458c2ecf20Sopenharmony_ci goto newsock; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* Need to resolve a duelling SYN between peers. 1488c2ecf20Sopenharmony_ci * We have an outstanding SYN to this peer, which may 1498c2ecf20Sopenharmony_ci * potentially have transitioned to the RDS_CONN_UP state, 1508c2ecf20Sopenharmony_ci * so we must quiesce any send threads before resetting 1518c2ecf20Sopenharmony_ci * cp_transport_data. We quiesce these threads by setting 1528c2ecf20Sopenharmony_ci * cp_state to something other than RDS_CONN_UP, and then 1538c2ecf20Sopenharmony_ci * waiting for any existing threads in rds_send_xmit to 1548c2ecf20Sopenharmony_ci * complete release_in_xmit(). (Subsequent threads entering 1558c2ecf20Sopenharmony_ci * rds_send_xmit() will bail on !rds_conn_up(). 1568c2ecf20Sopenharmony_ci * 1578c2ecf20Sopenharmony_ci * However an incoming syn-ack at this point would end up 1588c2ecf20Sopenharmony_ci * marking the conn as RDS_CONN_UP, and would again permit 1598c2ecf20Sopenharmony_ci * rds_send_xmi() threads through, so ideally we would 1608c2ecf20Sopenharmony_ci * synchronize on RDS_CONN_UP after lock_sock(), but cannot 1618c2ecf20Sopenharmony_ci * do that: waiting on !RDS_IN_XMIT after lock_sock() may 1628c2ecf20Sopenharmony_ci * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT 1638c2ecf20Sopenharmony_ci * would not get set. As a result, we set c_state to 1648c2ecf20Sopenharmony_ci * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change 1658c2ecf20Sopenharmony_ci * cannot mark rds_conn_path_up() in the window before lock_sock() 1668c2ecf20Sopenharmony_ci */ 1678c2ecf20Sopenharmony_ci atomic_set(&cp->cp_state, RDS_CONN_RESETTING); 1688c2ecf20Sopenharmony_ci wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); 1698c2ecf20Sopenharmony_ci /* reset receive side state for rds_tcp_data_recv() for osock */ 1708c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&cp->cp_send_w); 1718c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&cp->cp_recv_w); 1728c2ecf20Sopenharmony_ci lock_sock(osock->sk); 1738c2ecf20Sopenharmony_ci if (tc->t_tinc) { 1748c2ecf20Sopenharmony_ci rds_inc_put(&tc->t_tinc->ti_inc); 1758c2ecf20Sopenharmony_ci tc->t_tinc = NULL; 1768c2ecf20Sopenharmony_ci } 1778c2ecf20Sopenharmony_ci tc->t_tinc_hdr_rem = sizeof(struct rds_header); 1788c2ecf20Sopenharmony_ci tc->t_tinc_data_rem = 0; 1798c2ecf20Sopenharmony_ci rds_tcp_restore_callbacks(osock, tc); 1808c2ecf20Sopenharmony_ci release_sock(osock->sk); 1818c2ecf20Sopenharmony_ci sock_release(osock); 1828c2ecf20Sopenharmony_cinewsock: 1838c2ecf20Sopenharmony_ci rds_send_path_reset(cp); 1848c2ecf20Sopenharmony_ci lock_sock(sock->sk); 1858c2ecf20Sopenharmony_ci rds_tcp_set_callbacks(sock, cp); 1868c2ecf20Sopenharmony_ci release_sock(sock->sk); 1878c2ecf20Sopenharmony_ci} 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments 1908c2ecf20Sopenharmony_ci * above rds_tcp_reset_callbacks for notes about synchronization 1918c2ecf20Sopenharmony_ci * with data path 1928c2ecf20Sopenharmony_ci */ 1938c2ecf20Sopenharmony_civoid rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc = cp->cp_transport_data; 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); 1988c2ecf20Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci /* done under the callback_lock to serialize with write_space */ 2018c2ecf20Sopenharmony_ci spin_lock(&rds_tcp_tc_list_lock); 2028c2ecf20Sopenharmony_ci list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); 2038c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 2048c2ecf20Sopenharmony_ci rds6_tcp_tc_count++; 2058c2ecf20Sopenharmony_ci#endif 2068c2ecf20Sopenharmony_ci if (!tc->t_cpath->cp_conn->c_isv6) 2078c2ecf20Sopenharmony_ci rds_tcp_tc_count++; 2088c2ecf20Sopenharmony_ci spin_unlock(&rds_tcp_tc_list_lock); 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci /* accepted sockets need our listen data ready undone */ 2118c2ecf20Sopenharmony_ci if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready) 2128c2ecf20Sopenharmony_ci sock->sk->sk_data_ready = sock->sk->sk_user_data; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci tc->t_sock = sock; 2158c2ecf20Sopenharmony_ci tc->t_cpath = cp; 2168c2ecf20Sopenharmony_ci tc->t_orig_data_ready = sock->sk->sk_data_ready; 2178c2ecf20Sopenharmony_ci tc->t_orig_write_space = sock->sk->sk_write_space; 2188c2ecf20Sopenharmony_ci tc->t_orig_state_change = sock->sk->sk_state_change; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci sock->sk->sk_user_data = cp; 2218c2ecf20Sopenharmony_ci sock->sk->sk_data_ready = rds_tcp_data_ready; 2228c2ecf20Sopenharmony_ci sock->sk->sk_write_space = rds_tcp_write_space; 2238c2ecf20Sopenharmony_ci sock->sk->sk_state_change = rds_tcp_state_change; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 2268c2ecf20Sopenharmony_ci} 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci/* Handle RDS_INFO_TCP_SOCKETS socket option. It only returns IPv4 2298c2ecf20Sopenharmony_ci * connections for backward compatibility. 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_cistatic void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, 2328c2ecf20Sopenharmony_ci struct rds_info_iterator *iter, 2338c2ecf20Sopenharmony_ci struct rds_info_lengths *lens) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci struct rds_info_tcp_socket tsinfo; 2368c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc; 2378c2ecf20Sopenharmony_ci unsigned long flags; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci if (len / sizeof(tsinfo) < rds_tcp_tc_count) 2428c2ecf20Sopenharmony_ci goto out; 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { 2458c2ecf20Sopenharmony_ci struct inet_sock *inet = inet_sk(tc->t_sock->sk); 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci if (tc->t_cpath->cp_conn->c_isv6) 2488c2ecf20Sopenharmony_ci continue; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci tsinfo.local_addr = inet->inet_saddr; 2518c2ecf20Sopenharmony_ci tsinfo.local_port = inet->inet_sport; 2528c2ecf20Sopenharmony_ci tsinfo.peer_addr = inet->inet_daddr; 2538c2ecf20Sopenharmony_ci tsinfo.peer_port = inet->inet_dport; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci tsinfo.hdr_rem = tc->t_tinc_hdr_rem; 2568c2ecf20Sopenharmony_ci tsinfo.data_rem = tc->t_tinc_data_rem; 2578c2ecf20Sopenharmony_ci tsinfo.last_sent_nxt = tc->t_last_sent_nxt; 2588c2ecf20Sopenharmony_ci tsinfo.last_expected_una = tc->t_last_expected_una; 2598c2ecf20Sopenharmony_ci tsinfo.last_seen_una = tc->t_last_seen_una; 2608c2ecf20Sopenharmony_ci tsinfo.tos = tc->t_cpath->cp_conn->c_tos; 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci rds_info_copy(iter, &tsinfo, sizeof(tsinfo)); 2638c2ecf20Sopenharmony_ci } 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ciout: 2668c2ecf20Sopenharmony_ci lens->nr = rds_tcp_tc_count; 2678c2ecf20Sopenharmony_ci lens->each = sizeof(tsinfo); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); 2708c2ecf20Sopenharmony_ci} 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 2738c2ecf20Sopenharmony_ci/* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and 2748c2ecf20Sopenharmony_ci * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped 2758c2ecf20Sopenharmony_ci * address. 2768c2ecf20Sopenharmony_ci */ 2778c2ecf20Sopenharmony_cistatic void rds6_tcp_tc_info(struct socket *sock, unsigned int len, 2788c2ecf20Sopenharmony_ci struct rds_info_iterator *iter, 2798c2ecf20Sopenharmony_ci struct rds_info_lengths *lens) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci struct rds6_info_tcp_socket tsinfo6; 2828c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc; 2838c2ecf20Sopenharmony_ci unsigned long flags; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci if (len / sizeof(tsinfo6) < rds6_tcp_tc_count) 2888c2ecf20Sopenharmony_ci goto out; 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { 2918c2ecf20Sopenharmony_ci struct sock *sk = tc->t_sock->sk; 2928c2ecf20Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci tsinfo6.local_addr = sk->sk_v6_rcv_saddr; 2958c2ecf20Sopenharmony_ci tsinfo6.local_port = inet->inet_sport; 2968c2ecf20Sopenharmony_ci tsinfo6.peer_addr = sk->sk_v6_daddr; 2978c2ecf20Sopenharmony_ci tsinfo6.peer_port = inet->inet_dport; 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci tsinfo6.hdr_rem = tc->t_tinc_hdr_rem; 3008c2ecf20Sopenharmony_ci tsinfo6.data_rem = tc->t_tinc_data_rem; 3018c2ecf20Sopenharmony_ci tsinfo6.last_sent_nxt = tc->t_last_sent_nxt; 3028c2ecf20Sopenharmony_ci tsinfo6.last_expected_una = tc->t_last_expected_una; 3038c2ecf20Sopenharmony_ci tsinfo6.last_seen_una = tc->t_last_seen_una; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6)); 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ciout: 3098c2ecf20Sopenharmony_ci lens->nr = rds6_tcp_tc_count; 3108c2ecf20Sopenharmony_ci lens->each = sizeof(tsinfo6); 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); 3138c2ecf20Sopenharmony_ci} 3148c2ecf20Sopenharmony_ci#endif 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ciint rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, 3178c2ecf20Sopenharmony_ci __u32 scope_id) 3188c2ecf20Sopenharmony_ci{ 3198c2ecf20Sopenharmony_ci struct net_device *dev = NULL; 3208c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 3218c2ecf20Sopenharmony_ci int ret; 3228c2ecf20Sopenharmony_ci#endif 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci if (ipv6_addr_v4mapped(addr)) { 3258c2ecf20Sopenharmony_ci if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL) 3268c2ecf20Sopenharmony_ci return 0; 3278c2ecf20Sopenharmony_ci return -EADDRNOTAVAIL; 3288c2ecf20Sopenharmony_ci } 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci /* If the scope_id is specified, check only those addresses 3318c2ecf20Sopenharmony_ci * hosted on the specified interface. 3328c2ecf20Sopenharmony_ci */ 3338c2ecf20Sopenharmony_ci if (scope_id != 0) { 3348c2ecf20Sopenharmony_ci rcu_read_lock(); 3358c2ecf20Sopenharmony_ci dev = dev_get_by_index_rcu(net, scope_id); 3368c2ecf20Sopenharmony_ci /* scope_id is not valid... */ 3378c2ecf20Sopenharmony_ci if (!dev) { 3388c2ecf20Sopenharmony_ci rcu_read_unlock(); 3398c2ecf20Sopenharmony_ci return -EADDRNOTAVAIL; 3408c2ecf20Sopenharmony_ci } 3418c2ecf20Sopenharmony_ci rcu_read_unlock(); 3428c2ecf20Sopenharmony_ci } 3438c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 3448c2ecf20Sopenharmony_ci ret = ipv6_chk_addr(net, addr, dev, 0); 3458c2ecf20Sopenharmony_ci if (ret) 3468c2ecf20Sopenharmony_ci return 0; 3478c2ecf20Sopenharmony_ci#endif 3488c2ecf20Sopenharmony_ci return -EADDRNOTAVAIL; 3498c2ecf20Sopenharmony_ci} 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_cistatic void rds_tcp_conn_free(void *arg) 3528c2ecf20Sopenharmony_ci{ 3538c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc = arg; 3548c2ecf20Sopenharmony_ci unsigned long flags; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci rdsdebug("freeing tc %p\n", tc); 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci spin_lock_irqsave(&rds_tcp_conn_lock, flags); 3598c2ecf20Sopenharmony_ci if (!tc->t_tcp_node_detached) 3608c2ecf20Sopenharmony_ci list_del(&tc->t_tcp_node); 3618c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci kmem_cache_free(rds_tcp_conn_slab, tc); 3648c2ecf20Sopenharmony_ci} 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_cistatic int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) 3678c2ecf20Sopenharmony_ci{ 3688c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc; 3698c2ecf20Sopenharmony_ci int i, j; 3708c2ecf20Sopenharmony_ci int ret = 0; 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci for (i = 0; i < RDS_MPATH_WORKERS; i++) { 3738c2ecf20Sopenharmony_ci tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 3748c2ecf20Sopenharmony_ci if (!tc) { 3758c2ecf20Sopenharmony_ci ret = -ENOMEM; 3768c2ecf20Sopenharmony_ci goto fail; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci mutex_init(&tc->t_conn_path_lock); 3798c2ecf20Sopenharmony_ci tc->t_sock = NULL; 3808c2ecf20Sopenharmony_ci tc->t_tinc = NULL; 3818c2ecf20Sopenharmony_ci tc->t_tinc_hdr_rem = sizeof(struct rds_header); 3828c2ecf20Sopenharmony_ci tc->t_tinc_data_rem = 0; 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci conn->c_path[i].cp_transport_data = tc; 3858c2ecf20Sopenharmony_ci tc->t_cpath = &conn->c_path[i]; 3868c2ecf20Sopenharmony_ci tc->t_tcp_node_detached = true; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci rdsdebug("rds_conn_path [%d] tc %p\n", i, 3898c2ecf20Sopenharmony_ci conn->c_path[i].cp_transport_data); 3908c2ecf20Sopenharmony_ci } 3918c2ecf20Sopenharmony_ci spin_lock_irq(&rds_tcp_conn_lock); 3928c2ecf20Sopenharmony_ci for (i = 0; i < RDS_MPATH_WORKERS; i++) { 3938c2ecf20Sopenharmony_ci tc = conn->c_path[i].cp_transport_data; 3948c2ecf20Sopenharmony_ci tc->t_tcp_node_detached = false; 3958c2ecf20Sopenharmony_ci list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_tcp_conn_lock); 3988c2ecf20Sopenharmony_cifail: 3998c2ecf20Sopenharmony_ci if (ret) { 4008c2ecf20Sopenharmony_ci for (j = 0; j < i; j++) 4018c2ecf20Sopenharmony_ci rds_tcp_conn_free(conn->c_path[j].cp_transport_data); 4028c2ecf20Sopenharmony_ci } 4038c2ecf20Sopenharmony_ci return ret; 4048c2ecf20Sopenharmony_ci} 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_cistatic bool list_has_conn(struct list_head *list, struct rds_connection *conn) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc, *_tc; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { 4118c2ecf20Sopenharmony_ci if (tc->t_cpath->cp_conn == conn) 4128c2ecf20Sopenharmony_ci return true; 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci return false; 4158c2ecf20Sopenharmony_ci} 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_cistatic void rds_tcp_set_unloading(void) 4188c2ecf20Sopenharmony_ci{ 4198c2ecf20Sopenharmony_ci atomic_set(&rds_tcp_unloading, 1); 4208c2ecf20Sopenharmony_ci} 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_cistatic bool rds_tcp_is_unloading(struct rds_connection *conn) 4238c2ecf20Sopenharmony_ci{ 4248c2ecf20Sopenharmony_ci return atomic_read(&rds_tcp_unloading) != 0; 4258c2ecf20Sopenharmony_ci} 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_cistatic void rds_tcp_destroy_conns(void) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc, *_tc; 4308c2ecf20Sopenharmony_ci LIST_HEAD(tmp_list); 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci /* avoid calling conn_destroy with irqs off */ 4338c2ecf20Sopenharmony_ci spin_lock_irq(&rds_tcp_conn_lock); 4348c2ecf20Sopenharmony_ci list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 4358c2ecf20Sopenharmony_ci if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) 4368c2ecf20Sopenharmony_ci list_move_tail(&tc->t_tcp_node, &tmp_list); 4378c2ecf20Sopenharmony_ci } 4388c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_tcp_conn_lock); 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) 4418c2ecf20Sopenharmony_ci rds_conn_destroy(tc->t_cpath->cp_conn); 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic void rds_tcp_exit(void); 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_cistatic u8 rds_tcp_get_tos_map(u8 tos) 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci /* all user tos mapped to default 0 for TCP transport */ 4498c2ecf20Sopenharmony_ci return 0; 4508c2ecf20Sopenharmony_ci} 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_cistruct rds_transport rds_tcp_transport = { 4538c2ecf20Sopenharmony_ci .laddr_check = rds_tcp_laddr_check, 4548c2ecf20Sopenharmony_ci .xmit_path_prepare = rds_tcp_xmit_path_prepare, 4558c2ecf20Sopenharmony_ci .xmit_path_complete = rds_tcp_xmit_path_complete, 4568c2ecf20Sopenharmony_ci .xmit = rds_tcp_xmit, 4578c2ecf20Sopenharmony_ci .recv_path = rds_tcp_recv_path, 4588c2ecf20Sopenharmony_ci .conn_alloc = rds_tcp_conn_alloc, 4598c2ecf20Sopenharmony_ci .conn_free = rds_tcp_conn_free, 4608c2ecf20Sopenharmony_ci .conn_path_connect = rds_tcp_conn_path_connect, 4618c2ecf20Sopenharmony_ci .conn_path_shutdown = rds_tcp_conn_path_shutdown, 4628c2ecf20Sopenharmony_ci .inc_copy_to_user = rds_tcp_inc_copy_to_user, 4638c2ecf20Sopenharmony_ci .inc_free = rds_tcp_inc_free, 4648c2ecf20Sopenharmony_ci .stats_info_copy = rds_tcp_stats_info_copy, 4658c2ecf20Sopenharmony_ci .exit = rds_tcp_exit, 4668c2ecf20Sopenharmony_ci .get_tos_map = rds_tcp_get_tos_map, 4678c2ecf20Sopenharmony_ci .t_owner = THIS_MODULE, 4688c2ecf20Sopenharmony_ci .t_name = "tcp", 4698c2ecf20Sopenharmony_ci .t_type = RDS_TRANS_TCP, 4708c2ecf20Sopenharmony_ci .t_prefer_loopback = 1, 4718c2ecf20Sopenharmony_ci .t_mp_capable = 1, 4728c2ecf20Sopenharmony_ci .t_unloading = rds_tcp_is_unloading, 4738c2ecf20Sopenharmony_ci}; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_cistatic unsigned int rds_tcp_netid; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci/* per-network namespace private data for this module */ 4788c2ecf20Sopenharmony_cistruct rds_tcp_net { 4798c2ecf20Sopenharmony_ci struct socket *rds_tcp_listen_sock; 4808c2ecf20Sopenharmony_ci struct work_struct rds_tcp_accept_w; 4818c2ecf20Sopenharmony_ci struct ctl_table_header *rds_tcp_sysctl; 4828c2ecf20Sopenharmony_ci struct ctl_table *ctl_table; 4838c2ecf20Sopenharmony_ci int sndbuf_size; 4848c2ecf20Sopenharmony_ci int rcvbuf_size; 4858c2ecf20Sopenharmony_ci}; 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci/* All module specific customizations to the RDS-TCP socket should be done in 4888c2ecf20Sopenharmony_ci * rds_tcp_tune() and applied after socket creation. 4898c2ecf20Sopenharmony_ci */ 4908c2ecf20Sopenharmony_civoid rds_tcp_tune(struct socket *sock) 4918c2ecf20Sopenharmony_ci{ 4928c2ecf20Sopenharmony_ci struct sock *sk = sock->sk; 4938c2ecf20Sopenharmony_ci struct net *net = sock_net(sk); 4948c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(sock->sk); 4978c2ecf20Sopenharmony_ci lock_sock(sk); 4988c2ecf20Sopenharmony_ci if (rtn->sndbuf_size > 0) { 4998c2ecf20Sopenharmony_ci sk->sk_sndbuf = rtn->sndbuf_size; 5008c2ecf20Sopenharmony_ci sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci if (rtn->rcvbuf_size > 0) { 5038c2ecf20Sopenharmony_ci sk->sk_rcvbuf = rtn->rcvbuf_size; 5048c2ecf20Sopenharmony_ci sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 5058c2ecf20Sopenharmony_ci } 5068c2ecf20Sopenharmony_ci release_sock(sk); 5078c2ecf20Sopenharmony_ci} 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_cistatic void rds_tcp_accept_worker(struct work_struct *work) 5108c2ecf20Sopenharmony_ci{ 5118c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = container_of(work, 5128c2ecf20Sopenharmony_ci struct rds_tcp_net, 5138c2ecf20Sopenharmony_ci rds_tcp_accept_w); 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0) 5168c2ecf20Sopenharmony_ci cond_resched(); 5178c2ecf20Sopenharmony_ci} 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_civoid rds_tcp_accept_work(struct sock *sk) 5208c2ecf20Sopenharmony_ci{ 5218c2ecf20Sopenharmony_ci struct net *net = sock_net(sk); 5228c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci queue_work(rds_wq, &rtn->rds_tcp_accept_w); 5258c2ecf20Sopenharmony_ci} 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_cistatic __net_init int rds_tcp_init_net(struct net *net) 5288c2ecf20Sopenharmony_ci{ 5298c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 5308c2ecf20Sopenharmony_ci struct ctl_table *tbl; 5318c2ecf20Sopenharmony_ci int err = 0; 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci memset(rtn, 0, sizeof(*rtn)); 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci /* {snd, rcv}buf_size default to 0, which implies we let the 5368c2ecf20Sopenharmony_ci * stack pick the value, and permit auto-tuning of buffer size. 5378c2ecf20Sopenharmony_ci */ 5388c2ecf20Sopenharmony_ci if (net == &init_net) { 5398c2ecf20Sopenharmony_ci tbl = rds_tcp_sysctl_table; 5408c2ecf20Sopenharmony_ci } else { 5418c2ecf20Sopenharmony_ci tbl = kmemdup(rds_tcp_sysctl_table, 5428c2ecf20Sopenharmony_ci sizeof(rds_tcp_sysctl_table), GFP_KERNEL); 5438c2ecf20Sopenharmony_ci if (!tbl) { 5448c2ecf20Sopenharmony_ci pr_warn("could not set allocate sysctl table\n"); 5458c2ecf20Sopenharmony_ci return -ENOMEM; 5468c2ecf20Sopenharmony_ci } 5478c2ecf20Sopenharmony_ci rtn->ctl_table = tbl; 5488c2ecf20Sopenharmony_ci } 5498c2ecf20Sopenharmony_ci tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size; 5508c2ecf20Sopenharmony_ci tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size; 5518c2ecf20Sopenharmony_ci rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl); 5528c2ecf20Sopenharmony_ci if (!rtn->rds_tcp_sysctl) { 5538c2ecf20Sopenharmony_ci pr_warn("could not register sysctl\n"); 5548c2ecf20Sopenharmony_ci err = -ENOMEM; 5558c2ecf20Sopenharmony_ci goto fail; 5568c2ecf20Sopenharmony_ci } 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 5598c2ecf20Sopenharmony_ci rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true); 5608c2ecf20Sopenharmony_ci#else 5618c2ecf20Sopenharmony_ci rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); 5628c2ecf20Sopenharmony_ci#endif 5638c2ecf20Sopenharmony_ci if (!rtn->rds_tcp_listen_sock) { 5648c2ecf20Sopenharmony_ci pr_warn("could not set up IPv6 listen sock\n"); 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 5678c2ecf20Sopenharmony_ci /* Try IPv4 as some systems disable IPv6 */ 5688c2ecf20Sopenharmony_ci rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); 5698c2ecf20Sopenharmony_ci if (!rtn->rds_tcp_listen_sock) { 5708c2ecf20Sopenharmony_ci#endif 5718c2ecf20Sopenharmony_ci unregister_net_sysctl_table(rtn->rds_tcp_sysctl); 5728c2ecf20Sopenharmony_ci rtn->rds_tcp_sysctl = NULL; 5738c2ecf20Sopenharmony_ci err = -EAFNOSUPPORT; 5748c2ecf20Sopenharmony_ci goto fail; 5758c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 5768c2ecf20Sopenharmony_ci } 5778c2ecf20Sopenharmony_ci#endif 5788c2ecf20Sopenharmony_ci } 5798c2ecf20Sopenharmony_ci INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); 5808c2ecf20Sopenharmony_ci return 0; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_cifail: 5838c2ecf20Sopenharmony_ci if (net != &init_net) 5848c2ecf20Sopenharmony_ci kfree(tbl); 5858c2ecf20Sopenharmony_ci return err; 5868c2ecf20Sopenharmony_ci} 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_cistatic void rds_tcp_kill_sock(struct net *net) 5898c2ecf20Sopenharmony_ci{ 5908c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc, *_tc; 5918c2ecf20Sopenharmony_ci LIST_HEAD(tmp_list); 5928c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 5938c2ecf20Sopenharmony_ci struct socket *lsock = rtn->rds_tcp_listen_sock; 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci rtn->rds_tcp_listen_sock = NULL; 5968c2ecf20Sopenharmony_ci rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); 5978c2ecf20Sopenharmony_ci spin_lock_irq(&rds_tcp_conn_lock); 5988c2ecf20Sopenharmony_ci list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 5998c2ecf20Sopenharmony_ci struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci if (net != c_net) 6028c2ecf20Sopenharmony_ci continue; 6038c2ecf20Sopenharmony_ci if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { 6048c2ecf20Sopenharmony_ci list_move_tail(&tc->t_tcp_node, &tmp_list); 6058c2ecf20Sopenharmony_ci } else { 6068c2ecf20Sopenharmony_ci list_del(&tc->t_tcp_node); 6078c2ecf20Sopenharmony_ci tc->t_tcp_node_detached = true; 6088c2ecf20Sopenharmony_ci } 6098c2ecf20Sopenharmony_ci } 6108c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_tcp_conn_lock); 6118c2ecf20Sopenharmony_ci list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) 6128c2ecf20Sopenharmony_ci rds_conn_destroy(tc->t_cpath->cp_conn); 6138c2ecf20Sopenharmony_ci} 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_cistatic void __net_exit rds_tcp_exit_net(struct net *net) 6168c2ecf20Sopenharmony_ci{ 6178c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci rds_tcp_kill_sock(net); 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci if (rtn->rds_tcp_sysctl) 6228c2ecf20Sopenharmony_ci unregister_net_sysctl_table(rtn->rds_tcp_sysctl); 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci if (net != &init_net) 6258c2ecf20Sopenharmony_ci kfree(rtn->ctl_table); 6268c2ecf20Sopenharmony_ci} 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_cistatic struct pernet_operations rds_tcp_net_ops = { 6298c2ecf20Sopenharmony_ci .init = rds_tcp_init_net, 6308c2ecf20Sopenharmony_ci .exit = rds_tcp_exit_net, 6318c2ecf20Sopenharmony_ci .id = &rds_tcp_netid, 6328c2ecf20Sopenharmony_ci .size = sizeof(struct rds_tcp_net), 6338c2ecf20Sopenharmony_ci}; 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_civoid *rds_tcp_listen_sock_def_readable(struct net *net) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 6388c2ecf20Sopenharmony_ci struct socket *lsock = rtn->rds_tcp_listen_sock; 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci if (!lsock) 6418c2ecf20Sopenharmony_ci return NULL; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci return lsock->sk->sk_user_data; 6448c2ecf20Sopenharmony_ci} 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci/* when sysctl is used to modify some kernel socket parameters,this 6478c2ecf20Sopenharmony_ci * function resets the RDS connections in that netns so that we can 6488c2ecf20Sopenharmony_ci * restart with new parameters. The assumption is that such reset 6498c2ecf20Sopenharmony_ci * events are few and far-between. 6508c2ecf20Sopenharmony_ci */ 6518c2ecf20Sopenharmony_cistatic void rds_tcp_sysctl_reset(struct net *net) 6528c2ecf20Sopenharmony_ci{ 6538c2ecf20Sopenharmony_ci struct rds_tcp_connection *tc, *_tc; 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci spin_lock_irq(&rds_tcp_conn_lock); 6568c2ecf20Sopenharmony_ci list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 6578c2ecf20Sopenharmony_ci struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci if (net != c_net || !tc->t_sock) 6608c2ecf20Sopenharmony_ci continue; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci /* reconnect with new parameters */ 6638c2ecf20Sopenharmony_ci rds_conn_path_drop(tc->t_cpath, false); 6648c2ecf20Sopenharmony_ci } 6658c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_tcp_conn_lock); 6668c2ecf20Sopenharmony_ci} 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_cistatic int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, 6698c2ecf20Sopenharmony_ci void *buffer, size_t *lenp, loff_t *fpos) 6708c2ecf20Sopenharmony_ci{ 6718c2ecf20Sopenharmony_ci struct net *net = current->nsproxy->net_ns; 6728c2ecf20Sopenharmony_ci int err; 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos); 6758c2ecf20Sopenharmony_ci if (err < 0) { 6768c2ecf20Sopenharmony_ci pr_warn("Invalid input. Must be >= %d\n", 6778c2ecf20Sopenharmony_ci *(int *)(ctl->extra1)); 6788c2ecf20Sopenharmony_ci return err; 6798c2ecf20Sopenharmony_ci } 6808c2ecf20Sopenharmony_ci if (write) 6818c2ecf20Sopenharmony_ci rds_tcp_sysctl_reset(net); 6828c2ecf20Sopenharmony_ci return 0; 6838c2ecf20Sopenharmony_ci} 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_cistatic void rds_tcp_exit(void) 6868c2ecf20Sopenharmony_ci{ 6878c2ecf20Sopenharmony_ci rds_tcp_set_unloading(); 6888c2ecf20Sopenharmony_ci synchronize_rcu(); 6898c2ecf20Sopenharmony_ci rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 6908c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 6918c2ecf20Sopenharmony_ci rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); 6928c2ecf20Sopenharmony_ci#endif 6938c2ecf20Sopenharmony_ci unregister_pernet_device(&rds_tcp_net_ops); 6948c2ecf20Sopenharmony_ci rds_tcp_destroy_conns(); 6958c2ecf20Sopenharmony_ci rds_trans_unregister(&rds_tcp_transport); 6968c2ecf20Sopenharmony_ci rds_tcp_recv_exit(); 6978c2ecf20Sopenharmony_ci kmem_cache_destroy(rds_tcp_conn_slab); 6988c2ecf20Sopenharmony_ci} 6998c2ecf20Sopenharmony_cimodule_exit(rds_tcp_exit); 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_cistatic int rds_tcp_init(void) 7028c2ecf20Sopenharmony_ci{ 7038c2ecf20Sopenharmony_ci int ret; 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", 7068c2ecf20Sopenharmony_ci sizeof(struct rds_tcp_connection), 7078c2ecf20Sopenharmony_ci 0, 0, NULL); 7088c2ecf20Sopenharmony_ci if (!rds_tcp_conn_slab) { 7098c2ecf20Sopenharmony_ci ret = -ENOMEM; 7108c2ecf20Sopenharmony_ci goto out; 7118c2ecf20Sopenharmony_ci } 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci ret = rds_tcp_recv_init(); 7148c2ecf20Sopenharmony_ci if (ret) 7158c2ecf20Sopenharmony_ci goto out_slab; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci ret = register_pernet_device(&rds_tcp_net_ops); 7188c2ecf20Sopenharmony_ci if (ret) 7198c2ecf20Sopenharmony_ci goto out_recv; 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_ci rds_trans_register(&rds_tcp_transport); 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 7248c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 7258c2ecf20Sopenharmony_ci rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); 7268c2ecf20Sopenharmony_ci#endif 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci goto out; 7298c2ecf20Sopenharmony_ciout_recv: 7308c2ecf20Sopenharmony_ci rds_tcp_recv_exit(); 7318c2ecf20Sopenharmony_ciout_slab: 7328c2ecf20Sopenharmony_ci kmem_cache_destroy(rds_tcp_conn_slab); 7338c2ecf20Sopenharmony_ciout: 7348c2ecf20Sopenharmony_ci return ret; 7358c2ecf20Sopenharmony_ci} 7368c2ecf20Sopenharmony_cimodule_init(rds_tcp_init); 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ciMODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); 7398c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RDS: TCP transport"); 7408c2ecf20Sopenharmony_ciMODULE_LICENSE("Dual BSD/GPL"); 741