18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/****************************************************************************** 38c2ecf20Sopenharmony_ci******************************************************************************* 48c2ecf20Sopenharmony_ci** 58c2ecf20Sopenharmony_ci** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 68c2ecf20Sopenharmony_ci** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 78c2ecf20Sopenharmony_ci** 88c2ecf20Sopenharmony_ci** 98c2ecf20Sopenharmony_ci******************************************************************************* 108c2ecf20Sopenharmony_ci******************************************************************************/ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci/* 138c2ecf20Sopenharmony_ci * lowcomms.c 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * This is the "low-level" comms layer. 168c2ecf20Sopenharmony_ci * 178c2ecf20Sopenharmony_ci * It is responsible for sending/receiving messages 188c2ecf20Sopenharmony_ci * from other nodes in the cluster. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Cluster nodes are referred to by their nodeids. nodeids are 218c2ecf20Sopenharmony_ci * simply 32 bit numbers to the locking module - if they need to 228c2ecf20Sopenharmony_ci * be expanded for the cluster infrastructure then that is its 238c2ecf20Sopenharmony_ci * responsibility. It is this layer's 248c2ecf20Sopenharmony_ci * responsibility to resolve these into IP address or 258c2ecf20Sopenharmony_ci * whatever it needs for inter-node communication. 268c2ecf20Sopenharmony_ci * 278c2ecf20Sopenharmony_ci * The comms level is two kernel threads that deal mainly with 288c2ecf20Sopenharmony_ci * the receiving of messages from other nodes and passing them 298c2ecf20Sopenharmony_ci * up to the mid-level comms layer (which understands the 308c2ecf20Sopenharmony_ci * message format) for execution by the locking core, and 318c2ecf20Sopenharmony_ci * a send thread which does all the setting up of connections 328c2ecf20Sopenharmony_ci * to remote nodes and the sending of data. Threads are not allowed 338c2ecf20Sopenharmony_ci * to send their own data because it may cause them to wait in times 348c2ecf20Sopenharmony_ci * of high load. Also, this way, the sending thread can collect together 358c2ecf20Sopenharmony_ci * messages bound for one node and send them in one block. 368c2ecf20Sopenharmony_ci * 378c2ecf20Sopenharmony_ci * lowcomms will choose to use either TCP or SCTP as its transport layer 388c2ecf20Sopenharmony_ci * depending on the configuration variable 'protocol'. This should be set 398c2ecf20Sopenharmony_ci * to 0 (default) for TCP or 1 for SCTP. It should be configured using a 408c2ecf20Sopenharmony_ci * cluster-wide mechanism as it must be the same on all nodes of the cluster 418c2ecf20Sopenharmony_ci * for the DLM to function. 428c2ecf20Sopenharmony_ci * 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci#include <asm/ioctls.h> 468c2ecf20Sopenharmony_ci#include <net/sock.h> 478c2ecf20Sopenharmony_ci#include <net/tcp.h> 488c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 498c2ecf20Sopenharmony_ci#include <linux/file.h> 508c2ecf20Sopenharmony_ci#include <linux/mutex.h> 518c2ecf20Sopenharmony_ci#include <linux/sctp.h> 528c2ecf20Sopenharmony_ci#include <linux/slab.h> 538c2ecf20Sopenharmony_ci#include <net/sctp/sctp.h> 548c2ecf20Sopenharmony_ci#include <net/ipv6.h> 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#include "dlm_internal.h" 578c2ecf20Sopenharmony_ci#include "lowcomms.h" 588c2ecf20Sopenharmony_ci#include "midcomms.h" 598c2ecf20Sopenharmony_ci#include "config.h" 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci#define NEEDED_RMEM (4*1024*1024) 628c2ecf20Sopenharmony_ci#define CONN_HASH_SIZE 32 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci/* Number of messages to send before rescheduling */ 658c2ecf20Sopenharmony_ci#define MAX_SEND_MSG_COUNT 25 668c2ecf20Sopenharmony_ci#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000) 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_cistruct connection { 698c2ecf20Sopenharmony_ci struct socket *sock; /* NULL if not connected */ 708c2ecf20Sopenharmony_ci uint32_t nodeid; /* So we know who we are in the list */ 718c2ecf20Sopenharmony_ci struct mutex sock_mutex; 728c2ecf20Sopenharmony_ci unsigned long flags; 738c2ecf20Sopenharmony_ci#define CF_READ_PENDING 1 748c2ecf20Sopenharmony_ci#define CF_WRITE_PENDING 2 758c2ecf20Sopenharmony_ci#define CF_INIT_PENDING 4 768c2ecf20Sopenharmony_ci#define CF_IS_OTHERCON 5 778c2ecf20Sopenharmony_ci#define CF_CLOSE 6 788c2ecf20Sopenharmony_ci#define CF_APP_LIMITED 7 798c2ecf20Sopenharmony_ci#define CF_CLOSING 8 808c2ecf20Sopenharmony_ci#define CF_SHUTDOWN 9 818c2ecf20Sopenharmony_ci struct list_head writequeue; /* List of outgoing writequeue_entries */ 828c2ecf20Sopenharmony_ci spinlock_t writequeue_lock; 838c2ecf20Sopenharmony_ci int (*rx_action) (struct connection *); /* What to do when active */ 848c2ecf20Sopenharmony_ci void (*connect_action) (struct connection *); /* What to do to connect */ 858c2ecf20Sopenharmony_ci void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ 868c2ecf20Sopenharmony_ci int retries; 878c2ecf20Sopenharmony_ci#define MAX_CONNECT_RETRIES 3 888c2ecf20Sopenharmony_ci struct hlist_node list; 898c2ecf20Sopenharmony_ci struct connection *othercon; 908c2ecf20Sopenharmony_ci struct work_struct rwork; /* Receive workqueue */ 918c2ecf20Sopenharmony_ci struct work_struct swork; /* Send workqueue */ 928c2ecf20Sopenharmony_ci wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */ 938c2ecf20Sopenharmony_ci unsigned char *rx_buf; 948c2ecf20Sopenharmony_ci int rx_buflen; 958c2ecf20Sopenharmony_ci int rx_leftover; 968c2ecf20Sopenharmony_ci struct rcu_head rcu; 978c2ecf20Sopenharmony_ci}; 988c2ecf20Sopenharmony_ci#define sock2con(x) ((struct connection *)(x)->sk_user_data) 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci/* An entry waiting to be sent */ 1018c2ecf20Sopenharmony_cistruct writequeue_entry { 1028c2ecf20Sopenharmony_ci struct list_head list; 1038c2ecf20Sopenharmony_ci struct page *page; 1048c2ecf20Sopenharmony_ci int offset; 1058c2ecf20Sopenharmony_ci int len; 1068c2ecf20Sopenharmony_ci int end; 1078c2ecf20Sopenharmony_ci int users; 1088c2ecf20Sopenharmony_ci struct connection *con; 1098c2ecf20Sopenharmony_ci}; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_cistruct dlm_node_addr { 1128c2ecf20Sopenharmony_ci struct list_head list; 1138c2ecf20Sopenharmony_ci int nodeid; 1148c2ecf20Sopenharmony_ci int addr_count; 1158c2ecf20Sopenharmony_ci int curr_addr_index; 1168c2ecf20Sopenharmony_ci struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 1178c2ecf20Sopenharmony_ci}; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_cistatic struct listen_sock_callbacks { 1208c2ecf20Sopenharmony_ci void (*sk_error_report)(struct sock *); 1218c2ecf20Sopenharmony_ci void (*sk_data_ready)(struct sock *); 1228c2ecf20Sopenharmony_ci void (*sk_state_change)(struct sock *); 1238c2ecf20Sopenharmony_ci void (*sk_write_space)(struct sock *); 1248c2ecf20Sopenharmony_ci} listen_sock; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_cistatic LIST_HEAD(dlm_node_addrs); 1278c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(dlm_node_addrs_spin); 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistatic struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; 1308c2ecf20Sopenharmony_cistatic int dlm_local_count; 1318c2ecf20Sopenharmony_cistatic int dlm_allow_conn; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci/* Work queues */ 1348c2ecf20Sopenharmony_cistatic struct workqueue_struct *recv_workqueue; 1358c2ecf20Sopenharmony_cistatic struct workqueue_struct *send_workqueue; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_cistatic struct hlist_head connection_hash[CONN_HASH_SIZE]; 1388c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(connections_lock); 1398c2ecf20Sopenharmony_ciDEFINE_STATIC_SRCU(connections_srcu); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_cistatic void process_recv_sockets(struct work_struct *work); 1428c2ecf20Sopenharmony_cistatic void process_send_sockets(struct work_struct *work); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci/* This is deliberately very simple because most clusters have simple 1468c2ecf20Sopenharmony_ci sequential nodeids, so we should be able to go straight to a connection 1478c2ecf20Sopenharmony_ci struct in the array */ 1488c2ecf20Sopenharmony_cistatic inline int nodeid_hash(int nodeid) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci return nodeid & (CONN_HASH_SIZE-1); 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_cistatic struct connection *__find_con(int nodeid) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci int r, idx; 1568c2ecf20Sopenharmony_ci struct connection *con; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci r = nodeid_hash(nodeid); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci idx = srcu_read_lock(&connections_srcu); 1618c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(con, &connection_hash[r], list) { 1628c2ecf20Sopenharmony_ci if (con->nodeid == nodeid) { 1638c2ecf20Sopenharmony_ci srcu_read_unlock(&connections_srcu, idx); 1648c2ecf20Sopenharmony_ci return con; 1658c2ecf20Sopenharmony_ci } 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci srcu_read_unlock(&connections_srcu, idx); 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci return NULL; 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci/* 1738c2ecf20Sopenharmony_ci * If 'allocation' is zero then we don't attempt to create a new 1748c2ecf20Sopenharmony_ci * connection structure for this node. 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_cistatic struct connection *nodeid2con(int nodeid, gfp_t alloc) 1778c2ecf20Sopenharmony_ci{ 1788c2ecf20Sopenharmony_ci struct connection *con, *tmp; 1798c2ecf20Sopenharmony_ci int r; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci con = __find_con(nodeid); 1828c2ecf20Sopenharmony_ci if (con || !alloc) 1838c2ecf20Sopenharmony_ci return con; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci con = kzalloc(sizeof(*con), alloc); 1868c2ecf20Sopenharmony_ci if (!con) 1878c2ecf20Sopenharmony_ci return NULL; 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci con->rx_buflen = dlm_config.ci_buffer_size; 1908c2ecf20Sopenharmony_ci con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS); 1918c2ecf20Sopenharmony_ci if (!con->rx_buf) { 1928c2ecf20Sopenharmony_ci kfree(con); 1938c2ecf20Sopenharmony_ci return NULL; 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci con->nodeid = nodeid; 1978c2ecf20Sopenharmony_ci mutex_init(&con->sock_mutex); 1988c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&con->writequeue); 1998c2ecf20Sopenharmony_ci spin_lock_init(&con->writequeue_lock); 2008c2ecf20Sopenharmony_ci INIT_WORK(&con->swork, process_send_sockets); 2018c2ecf20Sopenharmony_ci INIT_WORK(&con->rwork, process_recv_sockets); 2028c2ecf20Sopenharmony_ci init_waitqueue_head(&con->shutdown_wait); 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci /* Setup action pointers for child sockets */ 2058c2ecf20Sopenharmony_ci if (con->nodeid) { 2068c2ecf20Sopenharmony_ci struct connection *zerocon = __find_con(0); 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci con->connect_action = zerocon->connect_action; 2098c2ecf20Sopenharmony_ci if (!con->rx_action) 2108c2ecf20Sopenharmony_ci con->rx_action = zerocon->rx_action; 2118c2ecf20Sopenharmony_ci } 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci r = nodeid_hash(nodeid); 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci spin_lock(&connections_lock); 2168c2ecf20Sopenharmony_ci /* Because multiple workqueues/threads calls this function it can 2178c2ecf20Sopenharmony_ci * race on multiple cpu's. Instead of locking hot path __find_con() 2188c2ecf20Sopenharmony_ci * we just check in rare cases of recently added nodes again 2198c2ecf20Sopenharmony_ci * under protection of connections_lock. If this is the case we 2208c2ecf20Sopenharmony_ci * abort our connection creation and return the existing connection. 2218c2ecf20Sopenharmony_ci */ 2228c2ecf20Sopenharmony_ci tmp = __find_con(nodeid); 2238c2ecf20Sopenharmony_ci if (tmp) { 2248c2ecf20Sopenharmony_ci spin_unlock(&connections_lock); 2258c2ecf20Sopenharmony_ci kfree(con->rx_buf); 2268c2ecf20Sopenharmony_ci kfree(con); 2278c2ecf20Sopenharmony_ci return tmp; 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci hlist_add_head_rcu(&con->list, &connection_hash[r]); 2318c2ecf20Sopenharmony_ci spin_unlock(&connections_lock); 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci return con; 2348c2ecf20Sopenharmony_ci} 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci/* Loop round all connections */ 2378c2ecf20Sopenharmony_cistatic void foreach_conn(void (*conn_func)(struct connection *c)) 2388c2ecf20Sopenharmony_ci{ 2398c2ecf20Sopenharmony_ci int i, idx; 2408c2ecf20Sopenharmony_ci struct connection *con; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci idx = srcu_read_lock(&connections_srcu); 2438c2ecf20Sopenharmony_ci for (i = 0; i < CONN_HASH_SIZE; i++) { 2448c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(con, &connection_hash[i], list) 2458c2ecf20Sopenharmony_ci conn_func(con); 2468c2ecf20Sopenharmony_ci } 2478c2ecf20Sopenharmony_ci srcu_read_unlock(&connections_srcu, idx); 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_cistatic struct dlm_node_addr *find_node_addr(int nodeid) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci struct dlm_node_addr *na; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci list_for_each_entry(na, &dlm_node_addrs, list) { 2558c2ecf20Sopenharmony_ci if (na->nodeid == nodeid) 2568c2ecf20Sopenharmony_ci return na; 2578c2ecf20Sopenharmony_ci } 2588c2ecf20Sopenharmony_ci return NULL; 2598c2ecf20Sopenharmony_ci} 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_cistatic int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) 2628c2ecf20Sopenharmony_ci{ 2638c2ecf20Sopenharmony_ci switch (x->ss_family) { 2648c2ecf20Sopenharmony_ci case AF_INET: { 2658c2ecf20Sopenharmony_ci struct sockaddr_in *sinx = (struct sockaddr_in *)x; 2668c2ecf20Sopenharmony_ci struct sockaddr_in *siny = (struct sockaddr_in *)y; 2678c2ecf20Sopenharmony_ci if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) 2688c2ecf20Sopenharmony_ci return 0; 2698c2ecf20Sopenharmony_ci if (sinx->sin_port != siny->sin_port) 2708c2ecf20Sopenharmony_ci return 0; 2718c2ecf20Sopenharmony_ci break; 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci case AF_INET6: { 2748c2ecf20Sopenharmony_ci struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; 2758c2ecf20Sopenharmony_ci struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; 2768c2ecf20Sopenharmony_ci if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) 2778c2ecf20Sopenharmony_ci return 0; 2788c2ecf20Sopenharmony_ci if (sinx->sin6_port != siny->sin6_port) 2798c2ecf20Sopenharmony_ci return 0; 2808c2ecf20Sopenharmony_ci break; 2818c2ecf20Sopenharmony_ci } 2828c2ecf20Sopenharmony_ci default: 2838c2ecf20Sopenharmony_ci return 0; 2848c2ecf20Sopenharmony_ci } 2858c2ecf20Sopenharmony_ci return 1; 2868c2ecf20Sopenharmony_ci} 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_cistatic int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, 2898c2ecf20Sopenharmony_ci struct sockaddr *sa_out, bool try_new_addr) 2908c2ecf20Sopenharmony_ci{ 2918c2ecf20Sopenharmony_ci struct sockaddr_storage sas; 2928c2ecf20Sopenharmony_ci struct dlm_node_addr *na; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci if (!dlm_local_count) 2958c2ecf20Sopenharmony_ci return -1; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci spin_lock(&dlm_node_addrs_spin); 2988c2ecf20Sopenharmony_ci na = find_node_addr(nodeid); 2998c2ecf20Sopenharmony_ci if (na && na->addr_count) { 3008c2ecf20Sopenharmony_ci memcpy(&sas, na->addr[na->curr_addr_index], 3018c2ecf20Sopenharmony_ci sizeof(struct sockaddr_storage)); 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci if (try_new_addr) { 3048c2ecf20Sopenharmony_ci na->curr_addr_index++; 3058c2ecf20Sopenharmony_ci if (na->curr_addr_index == na->addr_count) 3068c2ecf20Sopenharmony_ci na->curr_addr_index = 0; 3078c2ecf20Sopenharmony_ci } 3088c2ecf20Sopenharmony_ci } 3098c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci if (!na) 3128c2ecf20Sopenharmony_ci return -EEXIST; 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci if (!na->addr_count) 3158c2ecf20Sopenharmony_ci return -ENOENT; 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci if (sas_out) 3188c2ecf20Sopenharmony_ci memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci if (!sa_out) 3218c2ecf20Sopenharmony_ci return 0; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci if (dlm_local_addr[0]->ss_family == AF_INET) { 3248c2ecf20Sopenharmony_ci struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; 3258c2ecf20Sopenharmony_ci struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; 3268c2ecf20Sopenharmony_ci ret4->sin_addr.s_addr = in4->sin_addr.s_addr; 3278c2ecf20Sopenharmony_ci } else { 3288c2ecf20Sopenharmony_ci struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; 3298c2ecf20Sopenharmony_ci struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; 3308c2ecf20Sopenharmony_ci ret6->sin6_addr = in6->sin6_addr; 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci return 0; 3348c2ecf20Sopenharmony_ci} 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_cistatic int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci struct dlm_node_addr *na; 3398c2ecf20Sopenharmony_ci int rv = -EEXIST; 3408c2ecf20Sopenharmony_ci int addr_i; 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci spin_lock(&dlm_node_addrs_spin); 3438c2ecf20Sopenharmony_ci list_for_each_entry(na, &dlm_node_addrs, list) { 3448c2ecf20Sopenharmony_ci if (!na->addr_count) 3458c2ecf20Sopenharmony_ci continue; 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci for (addr_i = 0; addr_i < na->addr_count; addr_i++) { 3488c2ecf20Sopenharmony_ci if (addr_compare(na->addr[addr_i], addr)) { 3498c2ecf20Sopenharmony_ci *nodeid = na->nodeid; 3508c2ecf20Sopenharmony_ci rv = 0; 3518c2ecf20Sopenharmony_ci goto unlock; 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci } 3548c2ecf20Sopenharmony_ci } 3558c2ecf20Sopenharmony_ciunlock: 3568c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 3578c2ecf20Sopenharmony_ci return rv; 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ciint dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) 3618c2ecf20Sopenharmony_ci{ 3628c2ecf20Sopenharmony_ci struct sockaddr_storage *new_addr; 3638c2ecf20Sopenharmony_ci struct dlm_node_addr *new_node, *na; 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); 3668c2ecf20Sopenharmony_ci if (!new_node) 3678c2ecf20Sopenharmony_ci return -ENOMEM; 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); 3708c2ecf20Sopenharmony_ci if (!new_addr) { 3718c2ecf20Sopenharmony_ci kfree(new_node); 3728c2ecf20Sopenharmony_ci return -ENOMEM; 3738c2ecf20Sopenharmony_ci } 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci memcpy(new_addr, addr, len); 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci spin_lock(&dlm_node_addrs_spin); 3788c2ecf20Sopenharmony_ci na = find_node_addr(nodeid); 3798c2ecf20Sopenharmony_ci if (!na) { 3808c2ecf20Sopenharmony_ci new_node->nodeid = nodeid; 3818c2ecf20Sopenharmony_ci new_node->addr[0] = new_addr; 3828c2ecf20Sopenharmony_ci new_node->addr_count = 1; 3838c2ecf20Sopenharmony_ci list_add(&new_node->list, &dlm_node_addrs); 3848c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 3858c2ecf20Sopenharmony_ci return 0; 3868c2ecf20Sopenharmony_ci } 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci if (na->addr_count >= DLM_MAX_ADDR_COUNT) { 3898c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 3908c2ecf20Sopenharmony_ci kfree(new_addr); 3918c2ecf20Sopenharmony_ci kfree(new_node); 3928c2ecf20Sopenharmony_ci return -ENOSPC; 3938c2ecf20Sopenharmony_ci } 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci na->addr[na->addr_count++] = new_addr; 3968c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 3978c2ecf20Sopenharmony_ci kfree(new_node); 3988c2ecf20Sopenharmony_ci return 0; 3998c2ecf20Sopenharmony_ci} 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci/* Data available on socket or listen socket received a connect */ 4028c2ecf20Sopenharmony_cistatic void lowcomms_data_ready(struct sock *sk) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci struct connection *con; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 4078c2ecf20Sopenharmony_ci con = sock2con(sk); 4088c2ecf20Sopenharmony_ci if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags)) 4098c2ecf20Sopenharmony_ci queue_work(recv_workqueue, &con->rwork); 4108c2ecf20Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_cistatic void lowcomms_write_space(struct sock *sk) 4148c2ecf20Sopenharmony_ci{ 4158c2ecf20Sopenharmony_ci struct connection *con; 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 4188c2ecf20Sopenharmony_ci con = sock2con(sk); 4198c2ecf20Sopenharmony_ci if (!con) 4208c2ecf20Sopenharmony_ci goto out; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci clear_bit(SOCK_NOSPACE, &con->sock->flags); 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { 4258c2ecf20Sopenharmony_ci con->sock->sk->sk_write_pending--; 4268c2ecf20Sopenharmony_ci clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags); 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci queue_work(send_workqueue, &con->swork); 4308c2ecf20Sopenharmony_ciout: 4318c2ecf20Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 4328c2ecf20Sopenharmony_ci} 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_cistatic inline void lowcomms_connect_sock(struct connection *con) 4358c2ecf20Sopenharmony_ci{ 4368c2ecf20Sopenharmony_ci if (test_bit(CF_CLOSE, &con->flags)) 4378c2ecf20Sopenharmony_ci return; 4388c2ecf20Sopenharmony_ci queue_work(send_workqueue, &con->swork); 4398c2ecf20Sopenharmony_ci cond_resched(); 4408c2ecf20Sopenharmony_ci} 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_cistatic void lowcomms_state_change(struct sock *sk) 4438c2ecf20Sopenharmony_ci{ 4448c2ecf20Sopenharmony_ci /* SCTP layer is not calling sk_data_ready when the connection 4458c2ecf20Sopenharmony_ci * is done, so we catch the signal through here. Also, it 4468c2ecf20Sopenharmony_ci * doesn't switch socket state when entering shutdown, so we 4478c2ecf20Sopenharmony_ci * skip the write in that case. 4488c2ecf20Sopenharmony_ci */ 4498c2ecf20Sopenharmony_ci if (sk->sk_shutdown) { 4508c2ecf20Sopenharmony_ci if (sk->sk_shutdown == RCV_SHUTDOWN) 4518c2ecf20Sopenharmony_ci lowcomms_data_ready(sk); 4528c2ecf20Sopenharmony_ci } else if (sk->sk_state == TCP_ESTABLISHED) { 4538c2ecf20Sopenharmony_ci lowcomms_write_space(sk); 4548c2ecf20Sopenharmony_ci } 4558c2ecf20Sopenharmony_ci} 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ciint dlm_lowcomms_connect_node(int nodeid) 4588c2ecf20Sopenharmony_ci{ 4598c2ecf20Sopenharmony_ci struct connection *con; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci if (nodeid == dlm_our_nodeid()) 4628c2ecf20Sopenharmony_ci return 0; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci con = nodeid2con(nodeid, GFP_NOFS); 4658c2ecf20Sopenharmony_ci if (!con) 4668c2ecf20Sopenharmony_ci return -ENOMEM; 4678c2ecf20Sopenharmony_ci lowcomms_connect_sock(con); 4688c2ecf20Sopenharmony_ci return 0; 4698c2ecf20Sopenharmony_ci} 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_cistatic void lowcomms_error_report(struct sock *sk) 4728c2ecf20Sopenharmony_ci{ 4738c2ecf20Sopenharmony_ci struct connection *con; 4748c2ecf20Sopenharmony_ci void (*orig_report)(struct sock *) = NULL; 4758c2ecf20Sopenharmony_ci struct inet_sock *inet; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 4788c2ecf20Sopenharmony_ci con = sock2con(sk); 4798c2ecf20Sopenharmony_ci if (con == NULL) 4808c2ecf20Sopenharmony_ci goto out; 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci orig_report = listen_sock.sk_error_report; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci inet = inet_sk(sk); 4858c2ecf20Sopenharmony_ci switch (sk->sk_family) { 4868c2ecf20Sopenharmony_ci case AF_INET: 4878c2ecf20Sopenharmony_ci printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 4888c2ecf20Sopenharmony_ci "sending to node %d at %pI4, dport %d, " 4898c2ecf20Sopenharmony_ci "sk_err=%d/%d\n", dlm_our_nodeid(), 4908c2ecf20Sopenharmony_ci con->nodeid, &inet->inet_daddr, 4918c2ecf20Sopenharmony_ci ntohs(inet->inet_dport), sk->sk_err, 4928c2ecf20Sopenharmony_ci sk->sk_err_soft); 4938c2ecf20Sopenharmony_ci break; 4948c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 4958c2ecf20Sopenharmony_ci case AF_INET6: 4968c2ecf20Sopenharmony_ci printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 4978c2ecf20Sopenharmony_ci "sending to node %d at %pI6c, " 4988c2ecf20Sopenharmony_ci "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), 4998c2ecf20Sopenharmony_ci con->nodeid, &sk->sk_v6_daddr, 5008c2ecf20Sopenharmony_ci ntohs(inet->inet_dport), sk->sk_err, 5018c2ecf20Sopenharmony_ci sk->sk_err_soft); 5028c2ecf20Sopenharmony_ci break; 5038c2ecf20Sopenharmony_ci#endif 5048c2ecf20Sopenharmony_ci default: 5058c2ecf20Sopenharmony_ci printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 5068c2ecf20Sopenharmony_ci "invalid socket family %d set, " 5078c2ecf20Sopenharmony_ci "sk_err=%d/%d\n", dlm_our_nodeid(), 5088c2ecf20Sopenharmony_ci sk->sk_family, sk->sk_err, sk->sk_err_soft); 5098c2ecf20Sopenharmony_ci goto out; 5108c2ecf20Sopenharmony_ci } 5118c2ecf20Sopenharmony_ciout: 5128c2ecf20Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 5138c2ecf20Sopenharmony_ci if (orig_report) 5148c2ecf20Sopenharmony_ci orig_report(sk); 5158c2ecf20Sopenharmony_ci} 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci/* Note: sk_callback_lock must be locked before calling this function. */ 5188c2ecf20Sopenharmony_cistatic void save_listen_callbacks(struct socket *sock) 5198c2ecf20Sopenharmony_ci{ 5208c2ecf20Sopenharmony_ci struct sock *sk = sock->sk; 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci listen_sock.sk_data_ready = sk->sk_data_ready; 5238c2ecf20Sopenharmony_ci listen_sock.sk_state_change = sk->sk_state_change; 5248c2ecf20Sopenharmony_ci listen_sock.sk_write_space = sk->sk_write_space; 5258c2ecf20Sopenharmony_ci listen_sock.sk_error_report = sk->sk_error_report; 5268c2ecf20Sopenharmony_ci} 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_cistatic void restore_callbacks(struct socket *sock) 5298c2ecf20Sopenharmony_ci{ 5308c2ecf20Sopenharmony_ci struct sock *sk = sock->sk; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 5338c2ecf20Sopenharmony_ci sk->sk_user_data = NULL; 5348c2ecf20Sopenharmony_ci sk->sk_data_ready = listen_sock.sk_data_ready; 5358c2ecf20Sopenharmony_ci sk->sk_state_change = listen_sock.sk_state_change; 5368c2ecf20Sopenharmony_ci sk->sk_write_space = listen_sock.sk_write_space; 5378c2ecf20Sopenharmony_ci sk->sk_error_report = listen_sock.sk_error_report; 5388c2ecf20Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 5398c2ecf20Sopenharmony_ci} 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci/* Make a socket active */ 5428c2ecf20Sopenharmony_cistatic void add_sock(struct socket *sock, struct connection *con) 5438c2ecf20Sopenharmony_ci{ 5448c2ecf20Sopenharmony_ci struct sock *sk = sock->sk; 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 5478c2ecf20Sopenharmony_ci con->sock = sock; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci sk->sk_user_data = con; 5508c2ecf20Sopenharmony_ci /* Install a data_ready callback */ 5518c2ecf20Sopenharmony_ci sk->sk_data_ready = lowcomms_data_ready; 5528c2ecf20Sopenharmony_ci sk->sk_write_space = lowcomms_write_space; 5538c2ecf20Sopenharmony_ci sk->sk_state_change = lowcomms_state_change; 5548c2ecf20Sopenharmony_ci sk->sk_allocation = GFP_NOFS; 5558c2ecf20Sopenharmony_ci sk->sk_error_report = lowcomms_error_report; 5568c2ecf20Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 5578c2ecf20Sopenharmony_ci} 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci/* Add the port number to an IPv6 or 4 sockaddr and return the address 5608c2ecf20Sopenharmony_ci length */ 5618c2ecf20Sopenharmony_cistatic void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, 5628c2ecf20Sopenharmony_ci int *addr_len) 5638c2ecf20Sopenharmony_ci{ 5648c2ecf20Sopenharmony_ci saddr->ss_family = dlm_local_addr[0]->ss_family; 5658c2ecf20Sopenharmony_ci if (saddr->ss_family == AF_INET) { 5668c2ecf20Sopenharmony_ci struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; 5678c2ecf20Sopenharmony_ci in4_addr->sin_port = cpu_to_be16(port); 5688c2ecf20Sopenharmony_ci *addr_len = sizeof(struct sockaddr_in); 5698c2ecf20Sopenharmony_ci memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); 5708c2ecf20Sopenharmony_ci } else { 5718c2ecf20Sopenharmony_ci struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; 5728c2ecf20Sopenharmony_ci in6_addr->sin6_port = cpu_to_be16(port); 5738c2ecf20Sopenharmony_ci *addr_len = sizeof(struct sockaddr_in6); 5748c2ecf20Sopenharmony_ci } 5758c2ecf20Sopenharmony_ci memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len); 5768c2ecf20Sopenharmony_ci} 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci/* Close a remote connection and tidy up */ 5798c2ecf20Sopenharmony_cistatic void close_connection(struct connection *con, bool and_other, 5808c2ecf20Sopenharmony_ci bool tx, bool rx) 5818c2ecf20Sopenharmony_ci{ 5828c2ecf20Sopenharmony_ci bool closing = test_and_set_bit(CF_CLOSING, &con->flags); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci if (tx && !closing && cancel_work_sync(&con->swork)) { 5858c2ecf20Sopenharmony_ci log_print("canceled swork for node %d", con->nodeid); 5868c2ecf20Sopenharmony_ci clear_bit(CF_WRITE_PENDING, &con->flags); 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci if (rx && !closing && cancel_work_sync(&con->rwork)) { 5898c2ecf20Sopenharmony_ci log_print("canceled rwork for node %d", con->nodeid); 5908c2ecf20Sopenharmony_ci clear_bit(CF_READ_PENDING, &con->flags); 5918c2ecf20Sopenharmony_ci } 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 5948c2ecf20Sopenharmony_ci if (con->sock) { 5958c2ecf20Sopenharmony_ci restore_callbacks(con->sock); 5968c2ecf20Sopenharmony_ci sock_release(con->sock); 5978c2ecf20Sopenharmony_ci con->sock = NULL; 5988c2ecf20Sopenharmony_ci } 5998c2ecf20Sopenharmony_ci if (con->othercon && and_other) { 6008c2ecf20Sopenharmony_ci /* Will only re-enter once. */ 6018c2ecf20Sopenharmony_ci close_connection(con->othercon, false, tx, rx); 6028c2ecf20Sopenharmony_ci } 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci con->rx_leftover = 0; 6058c2ecf20Sopenharmony_ci con->retries = 0; 6068c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 6078c2ecf20Sopenharmony_ci clear_bit(CF_CLOSING, &con->flags); 6088c2ecf20Sopenharmony_ci} 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_cistatic void shutdown_connection(struct connection *con) 6118c2ecf20Sopenharmony_ci{ 6128c2ecf20Sopenharmony_ci int ret; 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci flush_work(&con->swork); 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 6178c2ecf20Sopenharmony_ci /* nothing to shutdown */ 6188c2ecf20Sopenharmony_ci if (!con->sock) { 6198c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 6208c2ecf20Sopenharmony_ci return; 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci set_bit(CF_SHUTDOWN, &con->flags); 6248c2ecf20Sopenharmony_ci ret = kernel_sock_shutdown(con->sock, SHUT_WR); 6258c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 6268c2ecf20Sopenharmony_ci if (ret) { 6278c2ecf20Sopenharmony_ci log_print("Connection %p failed to shutdown: %d will force close", 6288c2ecf20Sopenharmony_ci con, ret); 6298c2ecf20Sopenharmony_ci goto force_close; 6308c2ecf20Sopenharmony_ci } else { 6318c2ecf20Sopenharmony_ci ret = wait_event_timeout(con->shutdown_wait, 6328c2ecf20Sopenharmony_ci !test_bit(CF_SHUTDOWN, &con->flags), 6338c2ecf20Sopenharmony_ci DLM_SHUTDOWN_WAIT_TIMEOUT); 6348c2ecf20Sopenharmony_ci if (ret == 0) { 6358c2ecf20Sopenharmony_ci log_print("Connection %p shutdown timed out, will force close", 6368c2ecf20Sopenharmony_ci con); 6378c2ecf20Sopenharmony_ci goto force_close; 6388c2ecf20Sopenharmony_ci } 6398c2ecf20Sopenharmony_ci } 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci return; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ciforce_close: 6448c2ecf20Sopenharmony_ci clear_bit(CF_SHUTDOWN, &con->flags); 6458c2ecf20Sopenharmony_ci close_connection(con, false, true, true); 6468c2ecf20Sopenharmony_ci} 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_cistatic void dlm_tcp_shutdown(struct connection *con) 6498c2ecf20Sopenharmony_ci{ 6508c2ecf20Sopenharmony_ci if (con->othercon) 6518c2ecf20Sopenharmony_ci shutdown_connection(con->othercon); 6528c2ecf20Sopenharmony_ci shutdown_connection(con); 6538c2ecf20Sopenharmony_ci} 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_cistatic int con_realloc_receive_buf(struct connection *con, int newlen) 6568c2ecf20Sopenharmony_ci{ 6578c2ecf20Sopenharmony_ci unsigned char *newbuf; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci newbuf = kmalloc(newlen, GFP_NOFS); 6608c2ecf20Sopenharmony_ci if (!newbuf) 6618c2ecf20Sopenharmony_ci return -ENOMEM; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci /* copy any leftover from last receive */ 6648c2ecf20Sopenharmony_ci if (con->rx_leftover) 6658c2ecf20Sopenharmony_ci memmove(newbuf, con->rx_buf, con->rx_leftover); 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci /* swap to new buffer space */ 6688c2ecf20Sopenharmony_ci kfree(con->rx_buf); 6698c2ecf20Sopenharmony_ci con->rx_buflen = newlen; 6708c2ecf20Sopenharmony_ci con->rx_buf = newbuf; 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci return 0; 6738c2ecf20Sopenharmony_ci} 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci/* Data received from remote end */ 6768c2ecf20Sopenharmony_cistatic int receive_from_sock(struct connection *con) 6778c2ecf20Sopenharmony_ci{ 6788c2ecf20Sopenharmony_ci int call_again_soon = 0; 6798c2ecf20Sopenharmony_ci struct msghdr msg; 6808c2ecf20Sopenharmony_ci struct kvec iov; 6818c2ecf20Sopenharmony_ci int ret, buflen; 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci if (con->sock == NULL) { 6868c2ecf20Sopenharmony_ci ret = -EAGAIN; 6878c2ecf20Sopenharmony_ci goto out_close; 6888c2ecf20Sopenharmony_ci } 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci if (con->nodeid == 0) { 6918c2ecf20Sopenharmony_ci ret = -EINVAL; 6928c2ecf20Sopenharmony_ci goto out_close; 6938c2ecf20Sopenharmony_ci } 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci /* realloc if we get new buffer size to read out */ 6968c2ecf20Sopenharmony_ci buflen = dlm_config.ci_buffer_size; 6978c2ecf20Sopenharmony_ci if (con->rx_buflen != buflen && con->rx_leftover <= buflen) { 6988c2ecf20Sopenharmony_ci ret = con_realloc_receive_buf(con, buflen); 6998c2ecf20Sopenharmony_ci if (ret < 0) 7008c2ecf20Sopenharmony_ci goto out_resched; 7018c2ecf20Sopenharmony_ci } 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci /* calculate new buffer parameter regarding last receive and 7048c2ecf20Sopenharmony_ci * possible leftover bytes 7058c2ecf20Sopenharmony_ci */ 7068c2ecf20Sopenharmony_ci iov.iov_base = con->rx_buf + con->rx_leftover; 7078c2ecf20Sopenharmony_ci iov.iov_len = con->rx_buflen - con->rx_leftover; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci memset(&msg, 0, sizeof(msg)); 7108c2ecf20Sopenharmony_ci msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 7118c2ecf20Sopenharmony_ci ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 7128c2ecf20Sopenharmony_ci msg.msg_flags); 7138c2ecf20Sopenharmony_ci if (ret <= 0) 7148c2ecf20Sopenharmony_ci goto out_close; 7158c2ecf20Sopenharmony_ci else if (ret == iov.iov_len) 7168c2ecf20Sopenharmony_ci call_again_soon = 1; 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci /* new buflen according readed bytes and leftover from last receive */ 7198c2ecf20Sopenharmony_ci buflen = ret + con->rx_leftover; 7208c2ecf20Sopenharmony_ci ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen); 7218c2ecf20Sopenharmony_ci if (ret < 0) 7228c2ecf20Sopenharmony_ci goto out_close; 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci /* calculate leftover bytes from process and put it into begin of 7258c2ecf20Sopenharmony_ci * the receive buffer, so next receive we have the full message 7268c2ecf20Sopenharmony_ci * at the start address of the receive buffer. 7278c2ecf20Sopenharmony_ci */ 7288c2ecf20Sopenharmony_ci con->rx_leftover = buflen - ret; 7298c2ecf20Sopenharmony_ci if (con->rx_leftover) { 7308c2ecf20Sopenharmony_ci memmove(con->rx_buf, con->rx_buf + ret, 7318c2ecf20Sopenharmony_ci con->rx_leftover); 7328c2ecf20Sopenharmony_ci call_again_soon = true; 7338c2ecf20Sopenharmony_ci } 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci if (call_again_soon) 7368c2ecf20Sopenharmony_ci goto out_resched; 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 7398c2ecf20Sopenharmony_ci return 0; 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ciout_resched: 7428c2ecf20Sopenharmony_ci if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) 7438c2ecf20Sopenharmony_ci queue_work(recv_workqueue, &con->rwork); 7448c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 7458c2ecf20Sopenharmony_ci return -EAGAIN; 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ciout_close: 7488c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 7498c2ecf20Sopenharmony_ci if (ret != -EAGAIN) { 7508c2ecf20Sopenharmony_ci /* Reconnect when there is something to send */ 7518c2ecf20Sopenharmony_ci close_connection(con, false, true, false); 7528c2ecf20Sopenharmony_ci if (ret == 0) { 7538c2ecf20Sopenharmony_ci log_print("connection %p got EOF from %d", 7548c2ecf20Sopenharmony_ci con, con->nodeid); 7558c2ecf20Sopenharmony_ci /* handling for tcp shutdown */ 7568c2ecf20Sopenharmony_ci clear_bit(CF_SHUTDOWN, &con->flags); 7578c2ecf20Sopenharmony_ci wake_up(&con->shutdown_wait); 7588c2ecf20Sopenharmony_ci /* signal to breaking receive worker */ 7598c2ecf20Sopenharmony_ci ret = -1; 7608c2ecf20Sopenharmony_ci } 7618c2ecf20Sopenharmony_ci } 7628c2ecf20Sopenharmony_ci return ret; 7638c2ecf20Sopenharmony_ci} 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci/* Listening socket is busy, accept a connection */ 7668c2ecf20Sopenharmony_cistatic int accept_from_sock(struct connection *con) 7678c2ecf20Sopenharmony_ci{ 7688c2ecf20Sopenharmony_ci int result; 7698c2ecf20Sopenharmony_ci struct sockaddr_storage peeraddr; 7708c2ecf20Sopenharmony_ci struct socket *newsock; 7718c2ecf20Sopenharmony_ci int len; 7728c2ecf20Sopenharmony_ci int nodeid; 7738c2ecf20Sopenharmony_ci struct connection *newcon; 7748c2ecf20Sopenharmony_ci struct connection *addcon; 7758c2ecf20Sopenharmony_ci unsigned int mark; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci if (!dlm_allow_conn) { 7788c2ecf20Sopenharmony_ci return -1; 7798c2ecf20Sopenharmony_ci } 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci mutex_lock_nested(&con->sock_mutex, 0); 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci if (!con->sock) { 7848c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 7858c2ecf20Sopenharmony_ci return -ENOTCONN; 7868c2ecf20Sopenharmony_ci } 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci result = kernel_accept(con->sock, &newsock, O_NONBLOCK); 7898c2ecf20Sopenharmony_ci if (result < 0) 7908c2ecf20Sopenharmony_ci goto accept_err; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci /* Get the connected socket's peer */ 7938c2ecf20Sopenharmony_ci memset(&peeraddr, 0, sizeof(peeraddr)); 7948c2ecf20Sopenharmony_ci len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2); 7958c2ecf20Sopenharmony_ci if (len < 0) { 7968c2ecf20Sopenharmony_ci result = -ECONNABORTED; 7978c2ecf20Sopenharmony_ci goto accept_err; 7988c2ecf20Sopenharmony_ci } 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci /* Get the new node's NODEID */ 8018c2ecf20Sopenharmony_ci make_sockaddr(&peeraddr, 0, &len); 8028c2ecf20Sopenharmony_ci if (addr_to_nodeid(&peeraddr, &nodeid)) { 8038c2ecf20Sopenharmony_ci unsigned char *b=(unsigned char *)&peeraddr; 8048c2ecf20Sopenharmony_ci log_print("connect from non cluster node"); 8058c2ecf20Sopenharmony_ci print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 8068c2ecf20Sopenharmony_ci b, sizeof(struct sockaddr_storage)); 8078c2ecf20Sopenharmony_ci sock_release(newsock); 8088c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 8098c2ecf20Sopenharmony_ci return -1; 8108c2ecf20Sopenharmony_ci } 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci dlm_comm_mark(nodeid, &mark); 8138c2ecf20Sopenharmony_ci sock_set_mark(newsock->sk, mark); 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci log_print("got connection from %d", nodeid); 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_ci /* Check to see if we already have a connection to this node. This 8188c2ecf20Sopenharmony_ci * could happen if the two nodes initiate a connection at roughly 8198c2ecf20Sopenharmony_ci * the same time and the connections cross on the wire. 8208c2ecf20Sopenharmony_ci * In this case we store the incoming one in "othercon" 8218c2ecf20Sopenharmony_ci */ 8228c2ecf20Sopenharmony_ci newcon = nodeid2con(nodeid, GFP_NOFS); 8238c2ecf20Sopenharmony_ci if (!newcon) { 8248c2ecf20Sopenharmony_ci result = -ENOMEM; 8258c2ecf20Sopenharmony_ci goto accept_err; 8268c2ecf20Sopenharmony_ci } 8278c2ecf20Sopenharmony_ci mutex_lock_nested(&newcon->sock_mutex, 1); 8288c2ecf20Sopenharmony_ci if (newcon->sock) { 8298c2ecf20Sopenharmony_ci struct connection *othercon = newcon->othercon; 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci if (!othercon) { 8328c2ecf20Sopenharmony_ci othercon = kzalloc(sizeof(*othercon), GFP_NOFS); 8338c2ecf20Sopenharmony_ci if (!othercon) { 8348c2ecf20Sopenharmony_ci log_print("failed to allocate incoming socket"); 8358c2ecf20Sopenharmony_ci mutex_unlock(&newcon->sock_mutex); 8368c2ecf20Sopenharmony_ci result = -ENOMEM; 8378c2ecf20Sopenharmony_ci goto accept_err; 8388c2ecf20Sopenharmony_ci } 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci othercon->rx_buflen = dlm_config.ci_buffer_size; 8418c2ecf20Sopenharmony_ci othercon->rx_buf = kmalloc(othercon->rx_buflen, GFP_NOFS); 8428c2ecf20Sopenharmony_ci if (!othercon->rx_buf) { 8438c2ecf20Sopenharmony_ci mutex_unlock(&newcon->sock_mutex); 8448c2ecf20Sopenharmony_ci kfree(othercon); 8458c2ecf20Sopenharmony_ci log_print("failed to allocate incoming socket receive buffer"); 8468c2ecf20Sopenharmony_ci result = -ENOMEM; 8478c2ecf20Sopenharmony_ci goto accept_err; 8488c2ecf20Sopenharmony_ci } 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci othercon->nodeid = nodeid; 8518c2ecf20Sopenharmony_ci othercon->rx_action = receive_from_sock; 8528c2ecf20Sopenharmony_ci mutex_init(&othercon->sock_mutex); 8538c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&othercon->writequeue); 8548c2ecf20Sopenharmony_ci spin_lock_init(&othercon->writequeue_lock); 8558c2ecf20Sopenharmony_ci INIT_WORK(&othercon->swork, process_send_sockets); 8568c2ecf20Sopenharmony_ci INIT_WORK(&othercon->rwork, process_recv_sockets); 8578c2ecf20Sopenharmony_ci init_waitqueue_head(&othercon->shutdown_wait); 8588c2ecf20Sopenharmony_ci set_bit(CF_IS_OTHERCON, &othercon->flags); 8598c2ecf20Sopenharmony_ci } else { 8608c2ecf20Sopenharmony_ci /* close other sock con if we have something new */ 8618c2ecf20Sopenharmony_ci close_connection(othercon, false, true, false); 8628c2ecf20Sopenharmony_ci } 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci mutex_lock_nested(&othercon->sock_mutex, 2); 8658c2ecf20Sopenharmony_ci newcon->othercon = othercon; 8668c2ecf20Sopenharmony_ci add_sock(newsock, othercon); 8678c2ecf20Sopenharmony_ci addcon = othercon; 8688c2ecf20Sopenharmony_ci mutex_unlock(&othercon->sock_mutex); 8698c2ecf20Sopenharmony_ci } 8708c2ecf20Sopenharmony_ci else { 8718c2ecf20Sopenharmony_ci newcon->rx_action = receive_from_sock; 8728c2ecf20Sopenharmony_ci /* accept copies the sk after we've saved the callbacks, so we 8738c2ecf20Sopenharmony_ci don't want to save them a second time or comm errors will 8748c2ecf20Sopenharmony_ci result in calling sk_error_report recursively. */ 8758c2ecf20Sopenharmony_ci add_sock(newsock, newcon); 8768c2ecf20Sopenharmony_ci addcon = newcon; 8778c2ecf20Sopenharmony_ci } 8788c2ecf20Sopenharmony_ci 8798c2ecf20Sopenharmony_ci mutex_unlock(&newcon->sock_mutex); 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci /* 8828c2ecf20Sopenharmony_ci * Add it to the active queue in case we got data 8838c2ecf20Sopenharmony_ci * between processing the accept adding the socket 8848c2ecf20Sopenharmony_ci * to the read_sockets list 8858c2ecf20Sopenharmony_ci */ 8868c2ecf20Sopenharmony_ci if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) 8878c2ecf20Sopenharmony_ci queue_work(recv_workqueue, &addcon->rwork); 8888c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci return 0; 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ciaccept_err: 8938c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 8948c2ecf20Sopenharmony_ci if (newsock) 8958c2ecf20Sopenharmony_ci sock_release(newsock); 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci if (result != -EAGAIN) 8988c2ecf20Sopenharmony_ci log_print("error accepting connection from node: %d", result); 8998c2ecf20Sopenharmony_ci return result; 9008c2ecf20Sopenharmony_ci} 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_cistatic void free_entry(struct writequeue_entry *e) 9038c2ecf20Sopenharmony_ci{ 9048c2ecf20Sopenharmony_ci __free_page(e->page); 9058c2ecf20Sopenharmony_ci kfree(e); 9068c2ecf20Sopenharmony_ci} 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci/* 9098c2ecf20Sopenharmony_ci * writequeue_entry_complete - try to delete and free write queue entry 9108c2ecf20Sopenharmony_ci * @e: write queue entry to try to delete 9118c2ecf20Sopenharmony_ci * @completed: bytes completed 9128c2ecf20Sopenharmony_ci * 9138c2ecf20Sopenharmony_ci * writequeue_lock must be held. 9148c2ecf20Sopenharmony_ci */ 9158c2ecf20Sopenharmony_cistatic void writequeue_entry_complete(struct writequeue_entry *e, int completed) 9168c2ecf20Sopenharmony_ci{ 9178c2ecf20Sopenharmony_ci e->offset += completed; 9188c2ecf20Sopenharmony_ci e->len -= completed; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci if (e->len == 0 && e->users == 0) { 9218c2ecf20Sopenharmony_ci list_del(&e->list); 9228c2ecf20Sopenharmony_ci free_entry(e); 9238c2ecf20Sopenharmony_ci } 9248c2ecf20Sopenharmony_ci} 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci/* 9278c2ecf20Sopenharmony_ci * sctp_bind_addrs - bind a SCTP socket to all our addresses 9288c2ecf20Sopenharmony_ci */ 9298c2ecf20Sopenharmony_cistatic int sctp_bind_addrs(struct connection *con, uint16_t port) 9308c2ecf20Sopenharmony_ci{ 9318c2ecf20Sopenharmony_ci struct sockaddr_storage localaddr; 9328c2ecf20Sopenharmony_ci struct sockaddr *addr = (struct sockaddr *)&localaddr; 9338c2ecf20Sopenharmony_ci int i, addr_len, result = 0; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci for (i = 0; i < dlm_local_count; i++) { 9368c2ecf20Sopenharmony_ci memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 9378c2ecf20Sopenharmony_ci make_sockaddr(&localaddr, port, &addr_len); 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci if (!i) 9408c2ecf20Sopenharmony_ci result = kernel_bind(con->sock, addr, addr_len); 9418c2ecf20Sopenharmony_ci else 9428c2ecf20Sopenharmony_ci result = sock_bind_add(con->sock->sk, addr, addr_len); 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci if (result < 0) { 9458c2ecf20Sopenharmony_ci log_print("Can't bind to %d addr number %d, %d.\n", 9468c2ecf20Sopenharmony_ci port, i + 1, result); 9478c2ecf20Sopenharmony_ci break; 9488c2ecf20Sopenharmony_ci } 9498c2ecf20Sopenharmony_ci } 9508c2ecf20Sopenharmony_ci return result; 9518c2ecf20Sopenharmony_ci} 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci/* Initiate an SCTP association. 9548c2ecf20Sopenharmony_ci This is a special case of send_to_sock() in that we don't yet have a 9558c2ecf20Sopenharmony_ci peeled-off socket for this association, so we use the listening socket 9568c2ecf20Sopenharmony_ci and add the primary IP address of the remote node. 9578c2ecf20Sopenharmony_ci */ 9588c2ecf20Sopenharmony_cistatic void sctp_connect_to_sock(struct connection *con) 9598c2ecf20Sopenharmony_ci{ 9608c2ecf20Sopenharmony_ci struct sockaddr_storage daddr; 9618c2ecf20Sopenharmony_ci int result; 9628c2ecf20Sopenharmony_ci int addr_len; 9638c2ecf20Sopenharmony_ci struct socket *sock; 9648c2ecf20Sopenharmony_ci unsigned int mark; 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci if (con->nodeid == 0) { 9678c2ecf20Sopenharmony_ci log_print("attempt to connect sock 0 foiled"); 9688c2ecf20Sopenharmony_ci return; 9698c2ecf20Sopenharmony_ci } 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_ci dlm_comm_mark(con->nodeid, &mark); 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci /* Some odd races can cause double-connects, ignore them */ 9768c2ecf20Sopenharmony_ci if (con->retries++ > MAX_CONNECT_RETRIES) 9778c2ecf20Sopenharmony_ci goto out; 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_ci if (con->sock) { 9808c2ecf20Sopenharmony_ci log_print("node %d already connected.", con->nodeid); 9818c2ecf20Sopenharmony_ci goto out; 9828c2ecf20Sopenharmony_ci } 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci memset(&daddr, 0, sizeof(daddr)); 9858c2ecf20Sopenharmony_ci result = nodeid_to_addr(con->nodeid, &daddr, NULL, true); 9868c2ecf20Sopenharmony_ci if (result < 0) { 9878c2ecf20Sopenharmony_ci log_print("no address for nodeid %d", con->nodeid); 9888c2ecf20Sopenharmony_ci goto out; 9898c2ecf20Sopenharmony_ci } 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci /* Create a socket to communicate with */ 9928c2ecf20Sopenharmony_ci result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 9938c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_SCTP, &sock); 9948c2ecf20Sopenharmony_ci if (result < 0) 9958c2ecf20Sopenharmony_ci goto socket_err; 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci sock_set_mark(sock->sk, mark); 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_ci con->rx_action = receive_from_sock; 10008c2ecf20Sopenharmony_ci con->connect_action = sctp_connect_to_sock; 10018c2ecf20Sopenharmony_ci add_sock(sock, con); 10028c2ecf20Sopenharmony_ci 10038c2ecf20Sopenharmony_ci /* Bind to all addresses. */ 10048c2ecf20Sopenharmony_ci if (sctp_bind_addrs(con, 0)) 10058c2ecf20Sopenharmony_ci goto bind_err; 10068c2ecf20Sopenharmony_ci 10078c2ecf20Sopenharmony_ci make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len); 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci log_print("connecting to %d", con->nodeid); 10108c2ecf20Sopenharmony_ci 10118c2ecf20Sopenharmony_ci /* Turn off Nagle's algorithm */ 10128c2ecf20Sopenharmony_ci sctp_sock_set_nodelay(sock->sk); 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci /* 10158c2ecf20Sopenharmony_ci * Make sock->ops->connect() function return in specified time, 10168c2ecf20Sopenharmony_ci * since O_NONBLOCK argument in connect() function does not work here, 10178c2ecf20Sopenharmony_ci * then, we should restore the default value of this attribute. 10188c2ecf20Sopenharmony_ci */ 10198c2ecf20Sopenharmony_ci sock_set_sndtimeo(sock->sk, 5); 10208c2ecf20Sopenharmony_ci result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, 10218c2ecf20Sopenharmony_ci 0); 10228c2ecf20Sopenharmony_ci sock_set_sndtimeo(sock->sk, 0); 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ci if (result == -EINPROGRESS) 10258c2ecf20Sopenharmony_ci result = 0; 10268c2ecf20Sopenharmony_ci if (result == 0) 10278c2ecf20Sopenharmony_ci goto out; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_cibind_err: 10308c2ecf20Sopenharmony_ci con->sock = NULL; 10318c2ecf20Sopenharmony_ci sock_release(sock); 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_cisocket_err: 10348c2ecf20Sopenharmony_ci /* 10358c2ecf20Sopenharmony_ci * Some errors are fatal and this list might need adjusting. For other 10368c2ecf20Sopenharmony_ci * errors we try again until the max number of retries is reached. 10378c2ecf20Sopenharmony_ci */ 10388c2ecf20Sopenharmony_ci if (result != -EHOSTUNREACH && 10398c2ecf20Sopenharmony_ci result != -ENETUNREACH && 10408c2ecf20Sopenharmony_ci result != -ENETDOWN && 10418c2ecf20Sopenharmony_ci result != -EINVAL && 10428c2ecf20Sopenharmony_ci result != -EPROTONOSUPPORT) { 10438c2ecf20Sopenharmony_ci log_print("connect %d try %d error %d", con->nodeid, 10448c2ecf20Sopenharmony_ci con->retries, result); 10458c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 10468c2ecf20Sopenharmony_ci msleep(1000); 10478c2ecf20Sopenharmony_ci lowcomms_connect_sock(con); 10488c2ecf20Sopenharmony_ci return; 10498c2ecf20Sopenharmony_ci } 10508c2ecf20Sopenharmony_ci 10518c2ecf20Sopenharmony_ciout: 10528c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 10538c2ecf20Sopenharmony_ci} 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci/* Connect a new socket to its peer */ 10568c2ecf20Sopenharmony_cistatic void tcp_connect_to_sock(struct connection *con) 10578c2ecf20Sopenharmony_ci{ 10588c2ecf20Sopenharmony_ci struct sockaddr_storage saddr, src_addr; 10598c2ecf20Sopenharmony_ci int addr_len; 10608c2ecf20Sopenharmony_ci struct socket *sock = NULL; 10618c2ecf20Sopenharmony_ci unsigned int mark; 10628c2ecf20Sopenharmony_ci int result; 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci if (con->nodeid == 0) { 10658c2ecf20Sopenharmony_ci log_print("attempt to connect sock 0 foiled"); 10668c2ecf20Sopenharmony_ci return; 10678c2ecf20Sopenharmony_ci } 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci dlm_comm_mark(con->nodeid, &mark); 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 10728c2ecf20Sopenharmony_ci if (con->retries++ > MAX_CONNECT_RETRIES) 10738c2ecf20Sopenharmony_ci goto out; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci /* Some odd races can cause double-connects, ignore them */ 10768c2ecf20Sopenharmony_ci if (con->sock) 10778c2ecf20Sopenharmony_ci goto out; 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci /* Create a socket to communicate with */ 10808c2ecf20Sopenharmony_ci result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 10818c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &sock); 10828c2ecf20Sopenharmony_ci if (result < 0) 10838c2ecf20Sopenharmony_ci goto out_err; 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci sock_set_mark(sock->sk, mark); 10868c2ecf20Sopenharmony_ci 10878c2ecf20Sopenharmony_ci memset(&saddr, 0, sizeof(saddr)); 10888c2ecf20Sopenharmony_ci result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); 10898c2ecf20Sopenharmony_ci if (result < 0) { 10908c2ecf20Sopenharmony_ci log_print("no address for nodeid %d", con->nodeid); 10918c2ecf20Sopenharmony_ci goto out_err; 10928c2ecf20Sopenharmony_ci } 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci con->rx_action = receive_from_sock; 10958c2ecf20Sopenharmony_ci con->connect_action = tcp_connect_to_sock; 10968c2ecf20Sopenharmony_ci con->shutdown_action = dlm_tcp_shutdown; 10978c2ecf20Sopenharmony_ci add_sock(sock, con); 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci /* Bind to our cluster-known address connecting to avoid 11008c2ecf20Sopenharmony_ci routing problems */ 11018c2ecf20Sopenharmony_ci memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); 11028c2ecf20Sopenharmony_ci make_sockaddr(&src_addr, 0, &addr_len); 11038c2ecf20Sopenharmony_ci result = sock->ops->bind(sock, (struct sockaddr *) &src_addr, 11048c2ecf20Sopenharmony_ci addr_len); 11058c2ecf20Sopenharmony_ci if (result < 0) { 11068c2ecf20Sopenharmony_ci log_print("could not bind for connect: %d", result); 11078c2ecf20Sopenharmony_ci /* This *may* not indicate a critical error */ 11088c2ecf20Sopenharmony_ci } 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_ci make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci log_print("connecting to %d", con->nodeid); 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci /* Turn off Nagle's algorithm */ 11158c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(sock->sk); 11168c2ecf20Sopenharmony_ci 11178c2ecf20Sopenharmony_ci result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 11188c2ecf20Sopenharmony_ci O_NONBLOCK); 11198c2ecf20Sopenharmony_ci if (result == -EINPROGRESS) 11208c2ecf20Sopenharmony_ci result = 0; 11218c2ecf20Sopenharmony_ci if (result == 0) 11228c2ecf20Sopenharmony_ci goto out; 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ciout_err: 11258c2ecf20Sopenharmony_ci if (con->sock) { 11268c2ecf20Sopenharmony_ci sock_release(con->sock); 11278c2ecf20Sopenharmony_ci con->sock = NULL; 11288c2ecf20Sopenharmony_ci } else if (sock) { 11298c2ecf20Sopenharmony_ci sock_release(sock); 11308c2ecf20Sopenharmony_ci } 11318c2ecf20Sopenharmony_ci /* 11328c2ecf20Sopenharmony_ci * Some errors are fatal and this list might need adjusting. For other 11338c2ecf20Sopenharmony_ci * errors we try again until the max number of retries is reached. 11348c2ecf20Sopenharmony_ci */ 11358c2ecf20Sopenharmony_ci if (result != -EHOSTUNREACH && 11368c2ecf20Sopenharmony_ci result != -ENETUNREACH && 11378c2ecf20Sopenharmony_ci result != -ENETDOWN && 11388c2ecf20Sopenharmony_ci result != -EINVAL && 11398c2ecf20Sopenharmony_ci result != -EPROTONOSUPPORT) { 11408c2ecf20Sopenharmony_ci log_print("connect %d try %d error %d", con->nodeid, 11418c2ecf20Sopenharmony_ci con->retries, result); 11428c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 11438c2ecf20Sopenharmony_ci msleep(1000); 11448c2ecf20Sopenharmony_ci lowcomms_connect_sock(con); 11458c2ecf20Sopenharmony_ci return; 11468c2ecf20Sopenharmony_ci } 11478c2ecf20Sopenharmony_ciout: 11488c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 11498c2ecf20Sopenharmony_ci return; 11508c2ecf20Sopenharmony_ci} 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_cistatic struct socket *tcp_create_listen_sock(struct connection *con, 11538c2ecf20Sopenharmony_ci struct sockaddr_storage *saddr) 11548c2ecf20Sopenharmony_ci{ 11558c2ecf20Sopenharmony_ci struct socket *sock = NULL; 11568c2ecf20Sopenharmony_ci int result = 0; 11578c2ecf20Sopenharmony_ci int addr_len; 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_ci if (dlm_local_addr[0]->ss_family == AF_INET) 11608c2ecf20Sopenharmony_ci addr_len = sizeof(struct sockaddr_in); 11618c2ecf20Sopenharmony_ci else 11628c2ecf20Sopenharmony_ci addr_len = sizeof(struct sockaddr_in6); 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci /* Create a socket to communicate with */ 11658c2ecf20Sopenharmony_ci result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 11668c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &sock); 11678c2ecf20Sopenharmony_ci if (result < 0) { 11688c2ecf20Sopenharmony_ci log_print("Can't create listening comms socket"); 11698c2ecf20Sopenharmony_ci goto create_out; 11708c2ecf20Sopenharmony_ci } 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci sock_set_mark(sock->sk, dlm_config.ci_mark); 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_ci /* Turn off Nagle's algorithm */ 11758c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(sock->sk); 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci sock_set_reuseaddr(sock->sk); 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 11808c2ecf20Sopenharmony_ci sock->sk->sk_user_data = con; 11818c2ecf20Sopenharmony_ci save_listen_callbacks(sock); 11828c2ecf20Sopenharmony_ci con->rx_action = accept_from_sock; 11838c2ecf20Sopenharmony_ci con->connect_action = tcp_connect_to_sock; 11848c2ecf20Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci /* Bind to our port */ 11878c2ecf20Sopenharmony_ci make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); 11888c2ecf20Sopenharmony_ci result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 11898c2ecf20Sopenharmony_ci if (result < 0) { 11908c2ecf20Sopenharmony_ci log_print("Can't bind to port %d", dlm_config.ci_tcp_port); 11918c2ecf20Sopenharmony_ci sock_release(sock); 11928c2ecf20Sopenharmony_ci sock = NULL; 11938c2ecf20Sopenharmony_ci con->sock = NULL; 11948c2ecf20Sopenharmony_ci goto create_out; 11958c2ecf20Sopenharmony_ci } 11968c2ecf20Sopenharmony_ci sock_set_keepalive(sock->sk); 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_ci result = sock->ops->listen(sock, 5); 11998c2ecf20Sopenharmony_ci if (result < 0) { 12008c2ecf20Sopenharmony_ci log_print("Can't listen on port %d", dlm_config.ci_tcp_port); 12018c2ecf20Sopenharmony_ci sock_release(sock); 12028c2ecf20Sopenharmony_ci sock = NULL; 12038c2ecf20Sopenharmony_ci goto create_out; 12048c2ecf20Sopenharmony_ci } 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_cicreate_out: 12078c2ecf20Sopenharmony_ci return sock; 12088c2ecf20Sopenharmony_ci} 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci/* Get local addresses */ 12118c2ecf20Sopenharmony_cistatic void init_local(void) 12128c2ecf20Sopenharmony_ci{ 12138c2ecf20Sopenharmony_ci struct sockaddr_storage sas, *addr; 12148c2ecf20Sopenharmony_ci int i; 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ci dlm_local_count = 0; 12178c2ecf20Sopenharmony_ci for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) { 12188c2ecf20Sopenharmony_ci if (dlm_our_addr(&sas, i)) 12198c2ecf20Sopenharmony_ci break; 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_ci addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS); 12228c2ecf20Sopenharmony_ci if (!addr) 12238c2ecf20Sopenharmony_ci break; 12248c2ecf20Sopenharmony_ci dlm_local_addr[dlm_local_count++] = addr; 12258c2ecf20Sopenharmony_ci } 12268c2ecf20Sopenharmony_ci} 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_cistatic void deinit_local(void) 12298c2ecf20Sopenharmony_ci{ 12308c2ecf20Sopenharmony_ci int i; 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci for (i = 0; i < dlm_local_count; i++) 12338c2ecf20Sopenharmony_ci kfree(dlm_local_addr[i]); 12348c2ecf20Sopenharmony_ci} 12358c2ecf20Sopenharmony_ci 12368c2ecf20Sopenharmony_ci/* Initialise SCTP socket and bind to all interfaces */ 12378c2ecf20Sopenharmony_cistatic int sctp_listen_for_all(void) 12388c2ecf20Sopenharmony_ci{ 12398c2ecf20Sopenharmony_ci struct socket *sock = NULL; 12408c2ecf20Sopenharmony_ci int result = -EINVAL; 12418c2ecf20Sopenharmony_ci struct connection *con = nodeid2con(0, GFP_NOFS); 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci if (!con) 12448c2ecf20Sopenharmony_ci return -ENOMEM; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci log_print("Using SCTP for communications"); 12478c2ecf20Sopenharmony_ci 12488c2ecf20Sopenharmony_ci result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 12498c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_SCTP, &sock); 12508c2ecf20Sopenharmony_ci if (result < 0) { 12518c2ecf20Sopenharmony_ci log_print("Can't create comms socket, check SCTP is loaded"); 12528c2ecf20Sopenharmony_ci goto out; 12538c2ecf20Sopenharmony_ci } 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci sock_set_rcvbuf(sock->sk, NEEDED_RMEM); 12568c2ecf20Sopenharmony_ci sock_set_mark(sock->sk, dlm_config.ci_mark); 12578c2ecf20Sopenharmony_ci sctp_sock_set_nodelay(sock->sk); 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 12608c2ecf20Sopenharmony_ci /* Init con struct */ 12618c2ecf20Sopenharmony_ci sock->sk->sk_user_data = con; 12628c2ecf20Sopenharmony_ci save_listen_callbacks(sock); 12638c2ecf20Sopenharmony_ci con->sock = sock; 12648c2ecf20Sopenharmony_ci con->sock->sk->sk_data_ready = lowcomms_data_ready; 12658c2ecf20Sopenharmony_ci con->rx_action = accept_from_sock; 12668c2ecf20Sopenharmony_ci con->connect_action = sctp_connect_to_sock; 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_ci /* Bind to all addresses. */ 12718c2ecf20Sopenharmony_ci if (sctp_bind_addrs(con, dlm_config.ci_tcp_port)) 12728c2ecf20Sopenharmony_ci goto create_delsock; 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci result = sock->ops->listen(sock, 5); 12758c2ecf20Sopenharmony_ci if (result < 0) { 12768c2ecf20Sopenharmony_ci log_print("Can't set socket listening"); 12778c2ecf20Sopenharmony_ci goto create_delsock; 12788c2ecf20Sopenharmony_ci } 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci return 0; 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_cicreate_delsock: 12838c2ecf20Sopenharmony_ci sock_release(sock); 12848c2ecf20Sopenharmony_ci con->sock = NULL; 12858c2ecf20Sopenharmony_ciout: 12868c2ecf20Sopenharmony_ci return result; 12878c2ecf20Sopenharmony_ci} 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_cistatic int tcp_listen_for_all(void) 12908c2ecf20Sopenharmony_ci{ 12918c2ecf20Sopenharmony_ci struct socket *sock = NULL; 12928c2ecf20Sopenharmony_ci struct connection *con = nodeid2con(0, GFP_NOFS); 12938c2ecf20Sopenharmony_ci int result = -EINVAL; 12948c2ecf20Sopenharmony_ci 12958c2ecf20Sopenharmony_ci if (!con) 12968c2ecf20Sopenharmony_ci return -ENOMEM; 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci /* We don't support multi-homed hosts */ 12998c2ecf20Sopenharmony_ci if (dlm_local_addr[1] != NULL) { 13008c2ecf20Sopenharmony_ci log_print("TCP protocol can't handle multi-homed hosts, " 13018c2ecf20Sopenharmony_ci "try SCTP"); 13028c2ecf20Sopenharmony_ci return -EINVAL; 13038c2ecf20Sopenharmony_ci } 13048c2ecf20Sopenharmony_ci 13058c2ecf20Sopenharmony_ci log_print("Using TCP for communications"); 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci sock = tcp_create_listen_sock(con, dlm_local_addr[0]); 13088c2ecf20Sopenharmony_ci if (sock) { 13098c2ecf20Sopenharmony_ci add_sock(sock, con); 13108c2ecf20Sopenharmony_ci result = 0; 13118c2ecf20Sopenharmony_ci } 13128c2ecf20Sopenharmony_ci else { 13138c2ecf20Sopenharmony_ci result = -EADDRINUSE; 13148c2ecf20Sopenharmony_ci } 13158c2ecf20Sopenharmony_ci 13168c2ecf20Sopenharmony_ci return result; 13178c2ecf20Sopenharmony_ci} 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci 13208c2ecf20Sopenharmony_ci 13218c2ecf20Sopenharmony_cistatic struct writequeue_entry *new_writequeue_entry(struct connection *con, 13228c2ecf20Sopenharmony_ci gfp_t allocation) 13238c2ecf20Sopenharmony_ci{ 13248c2ecf20Sopenharmony_ci struct writequeue_entry *entry; 13258c2ecf20Sopenharmony_ci 13268c2ecf20Sopenharmony_ci entry = kmalloc(sizeof(struct writequeue_entry), allocation); 13278c2ecf20Sopenharmony_ci if (!entry) 13288c2ecf20Sopenharmony_ci return NULL; 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_ci entry->page = alloc_page(allocation); 13318c2ecf20Sopenharmony_ci if (!entry->page) { 13328c2ecf20Sopenharmony_ci kfree(entry); 13338c2ecf20Sopenharmony_ci return NULL; 13348c2ecf20Sopenharmony_ci } 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci entry->offset = 0; 13378c2ecf20Sopenharmony_ci entry->len = 0; 13388c2ecf20Sopenharmony_ci entry->end = 0; 13398c2ecf20Sopenharmony_ci entry->users = 0; 13408c2ecf20Sopenharmony_ci entry->con = con; 13418c2ecf20Sopenharmony_ci 13428c2ecf20Sopenharmony_ci return entry; 13438c2ecf20Sopenharmony_ci} 13448c2ecf20Sopenharmony_ci 13458c2ecf20Sopenharmony_civoid *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) 13468c2ecf20Sopenharmony_ci{ 13478c2ecf20Sopenharmony_ci struct connection *con; 13488c2ecf20Sopenharmony_ci struct writequeue_entry *e; 13498c2ecf20Sopenharmony_ci int offset = 0; 13508c2ecf20Sopenharmony_ci 13518c2ecf20Sopenharmony_ci con = nodeid2con(nodeid, allocation); 13528c2ecf20Sopenharmony_ci if (!con) 13538c2ecf20Sopenharmony_ci return NULL; 13548c2ecf20Sopenharmony_ci 13558c2ecf20Sopenharmony_ci spin_lock(&con->writequeue_lock); 13568c2ecf20Sopenharmony_ci e = list_entry(con->writequeue.prev, struct writequeue_entry, list); 13578c2ecf20Sopenharmony_ci if ((&e->list == &con->writequeue) || 13588c2ecf20Sopenharmony_ci (PAGE_SIZE - e->end < len)) { 13598c2ecf20Sopenharmony_ci e = NULL; 13608c2ecf20Sopenharmony_ci } else { 13618c2ecf20Sopenharmony_ci offset = e->end; 13628c2ecf20Sopenharmony_ci e->end += len; 13638c2ecf20Sopenharmony_ci e->users++; 13648c2ecf20Sopenharmony_ci } 13658c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci if (e) { 13688c2ecf20Sopenharmony_ci got_one: 13698c2ecf20Sopenharmony_ci *ppc = page_address(e->page) + offset; 13708c2ecf20Sopenharmony_ci return e; 13718c2ecf20Sopenharmony_ci } 13728c2ecf20Sopenharmony_ci 13738c2ecf20Sopenharmony_ci e = new_writequeue_entry(con, allocation); 13748c2ecf20Sopenharmony_ci if (e) { 13758c2ecf20Sopenharmony_ci spin_lock(&con->writequeue_lock); 13768c2ecf20Sopenharmony_ci offset = e->end; 13778c2ecf20Sopenharmony_ci e->end += len; 13788c2ecf20Sopenharmony_ci e->users++; 13798c2ecf20Sopenharmony_ci list_add_tail(&e->list, &con->writequeue); 13808c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 13818c2ecf20Sopenharmony_ci goto got_one; 13828c2ecf20Sopenharmony_ci } 13838c2ecf20Sopenharmony_ci return NULL; 13848c2ecf20Sopenharmony_ci} 13858c2ecf20Sopenharmony_ci 13868c2ecf20Sopenharmony_civoid dlm_lowcomms_commit_buffer(void *mh) 13878c2ecf20Sopenharmony_ci{ 13888c2ecf20Sopenharmony_ci struct writequeue_entry *e = (struct writequeue_entry *)mh; 13898c2ecf20Sopenharmony_ci struct connection *con = e->con; 13908c2ecf20Sopenharmony_ci int users; 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_ci spin_lock(&con->writequeue_lock); 13938c2ecf20Sopenharmony_ci users = --e->users; 13948c2ecf20Sopenharmony_ci if (users) 13958c2ecf20Sopenharmony_ci goto out; 13968c2ecf20Sopenharmony_ci e->len = e->end - e->offset; 13978c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci queue_work(send_workqueue, &con->swork); 14008c2ecf20Sopenharmony_ci return; 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_ciout: 14038c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 14048c2ecf20Sopenharmony_ci return; 14058c2ecf20Sopenharmony_ci} 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci/* Send a message */ 14088c2ecf20Sopenharmony_cistatic void send_to_sock(struct connection *con) 14098c2ecf20Sopenharmony_ci{ 14108c2ecf20Sopenharmony_ci int ret = 0; 14118c2ecf20Sopenharmony_ci const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 14128c2ecf20Sopenharmony_ci struct writequeue_entry *e; 14138c2ecf20Sopenharmony_ci int len, offset; 14148c2ecf20Sopenharmony_ci int count = 0; 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 14178c2ecf20Sopenharmony_ci if (con->sock == NULL) 14188c2ecf20Sopenharmony_ci goto out_connect; 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_ci spin_lock(&con->writequeue_lock); 14218c2ecf20Sopenharmony_ci for (;;) { 14228c2ecf20Sopenharmony_ci e = list_entry(con->writequeue.next, struct writequeue_entry, 14238c2ecf20Sopenharmony_ci list); 14248c2ecf20Sopenharmony_ci if ((struct list_head *) e == &con->writequeue) 14258c2ecf20Sopenharmony_ci break; 14268c2ecf20Sopenharmony_ci 14278c2ecf20Sopenharmony_ci len = e->len; 14288c2ecf20Sopenharmony_ci offset = e->offset; 14298c2ecf20Sopenharmony_ci BUG_ON(len == 0 && e->users == 0); 14308c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci ret = 0; 14338c2ecf20Sopenharmony_ci if (len) { 14348c2ecf20Sopenharmony_ci ret = kernel_sendpage(con->sock, e->page, offset, len, 14358c2ecf20Sopenharmony_ci msg_flags); 14368c2ecf20Sopenharmony_ci if (ret == -EAGAIN || ret == 0) { 14378c2ecf20Sopenharmony_ci if (ret == -EAGAIN && 14388c2ecf20Sopenharmony_ci test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && 14398c2ecf20Sopenharmony_ci !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { 14408c2ecf20Sopenharmony_ci /* Notify TCP that we're limited by the 14418c2ecf20Sopenharmony_ci * application window size. 14428c2ecf20Sopenharmony_ci */ 14438c2ecf20Sopenharmony_ci set_bit(SOCK_NOSPACE, &con->sock->flags); 14448c2ecf20Sopenharmony_ci con->sock->sk->sk_write_pending++; 14458c2ecf20Sopenharmony_ci } 14468c2ecf20Sopenharmony_ci cond_resched(); 14478c2ecf20Sopenharmony_ci goto out; 14488c2ecf20Sopenharmony_ci } else if (ret < 0) 14498c2ecf20Sopenharmony_ci goto send_error; 14508c2ecf20Sopenharmony_ci } 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci /* Don't starve people filling buffers */ 14538c2ecf20Sopenharmony_ci if (++count >= MAX_SEND_MSG_COUNT) { 14548c2ecf20Sopenharmony_ci cond_resched(); 14558c2ecf20Sopenharmony_ci count = 0; 14568c2ecf20Sopenharmony_ci } 14578c2ecf20Sopenharmony_ci 14588c2ecf20Sopenharmony_ci spin_lock(&con->writequeue_lock); 14598c2ecf20Sopenharmony_ci writequeue_entry_complete(e, ret); 14608c2ecf20Sopenharmony_ci } 14618c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 14628c2ecf20Sopenharmony_ciout: 14638c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 14648c2ecf20Sopenharmony_ci return; 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_cisend_error: 14678c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 14688c2ecf20Sopenharmony_ci close_connection(con, false, false, true); 14698c2ecf20Sopenharmony_ci /* Requeue the send work. When the work daemon runs again, it will try 14708c2ecf20Sopenharmony_ci a new connection, then call this function again. */ 14718c2ecf20Sopenharmony_ci queue_work(send_workqueue, &con->swork); 14728c2ecf20Sopenharmony_ci return; 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ciout_connect: 14758c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 14768c2ecf20Sopenharmony_ci queue_work(send_workqueue, &con->swork); 14778c2ecf20Sopenharmony_ci cond_resched(); 14788c2ecf20Sopenharmony_ci} 14798c2ecf20Sopenharmony_ci 14808c2ecf20Sopenharmony_cistatic void clean_one_writequeue(struct connection *con) 14818c2ecf20Sopenharmony_ci{ 14828c2ecf20Sopenharmony_ci struct writequeue_entry *e, *safe; 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci spin_lock(&con->writequeue_lock); 14858c2ecf20Sopenharmony_ci list_for_each_entry_safe(e, safe, &con->writequeue, list) { 14868c2ecf20Sopenharmony_ci list_del(&e->list); 14878c2ecf20Sopenharmony_ci free_entry(e); 14888c2ecf20Sopenharmony_ci } 14898c2ecf20Sopenharmony_ci spin_unlock(&con->writequeue_lock); 14908c2ecf20Sopenharmony_ci} 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_ci/* Called from recovery when it knows that a node has 14938c2ecf20Sopenharmony_ci left the cluster */ 14948c2ecf20Sopenharmony_ciint dlm_lowcomms_close(int nodeid) 14958c2ecf20Sopenharmony_ci{ 14968c2ecf20Sopenharmony_ci struct connection *con; 14978c2ecf20Sopenharmony_ci struct dlm_node_addr *na; 14988c2ecf20Sopenharmony_ci 14998c2ecf20Sopenharmony_ci log_print("closing connection to node %d", nodeid); 15008c2ecf20Sopenharmony_ci con = nodeid2con(nodeid, 0); 15018c2ecf20Sopenharmony_ci if (con) { 15028c2ecf20Sopenharmony_ci set_bit(CF_CLOSE, &con->flags); 15038c2ecf20Sopenharmony_ci close_connection(con, true, true, true); 15048c2ecf20Sopenharmony_ci clean_one_writequeue(con); 15058c2ecf20Sopenharmony_ci } 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci spin_lock(&dlm_node_addrs_spin); 15088c2ecf20Sopenharmony_ci na = find_node_addr(nodeid); 15098c2ecf20Sopenharmony_ci if (na) { 15108c2ecf20Sopenharmony_ci list_del(&na->list); 15118c2ecf20Sopenharmony_ci while (na->addr_count--) 15128c2ecf20Sopenharmony_ci kfree(na->addr[na->addr_count]); 15138c2ecf20Sopenharmony_ci kfree(na); 15148c2ecf20Sopenharmony_ci } 15158c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_ci return 0; 15188c2ecf20Sopenharmony_ci} 15198c2ecf20Sopenharmony_ci 15208c2ecf20Sopenharmony_ci/* Receive workqueue function */ 15218c2ecf20Sopenharmony_cistatic void process_recv_sockets(struct work_struct *work) 15228c2ecf20Sopenharmony_ci{ 15238c2ecf20Sopenharmony_ci struct connection *con = container_of(work, struct connection, rwork); 15248c2ecf20Sopenharmony_ci int err; 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci clear_bit(CF_READ_PENDING, &con->flags); 15278c2ecf20Sopenharmony_ci do { 15288c2ecf20Sopenharmony_ci err = con->rx_action(con); 15298c2ecf20Sopenharmony_ci } while (!err); 15308c2ecf20Sopenharmony_ci} 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_ci/* Send workqueue function */ 15338c2ecf20Sopenharmony_cistatic void process_send_sockets(struct work_struct *work) 15348c2ecf20Sopenharmony_ci{ 15358c2ecf20Sopenharmony_ci struct connection *con = container_of(work, struct connection, swork); 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci clear_bit(CF_WRITE_PENDING, &con->flags); 15388c2ecf20Sopenharmony_ci if (con->sock == NULL) /* not mutex protected so check it inside too */ 15398c2ecf20Sopenharmony_ci con->connect_action(con); 15408c2ecf20Sopenharmony_ci if (!list_empty(&con->writequeue)) 15418c2ecf20Sopenharmony_ci send_to_sock(con); 15428c2ecf20Sopenharmony_ci} 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_cistatic void work_stop(void) 15458c2ecf20Sopenharmony_ci{ 15468c2ecf20Sopenharmony_ci if (recv_workqueue) 15478c2ecf20Sopenharmony_ci destroy_workqueue(recv_workqueue); 15488c2ecf20Sopenharmony_ci if (send_workqueue) 15498c2ecf20Sopenharmony_ci destroy_workqueue(send_workqueue); 15508c2ecf20Sopenharmony_ci} 15518c2ecf20Sopenharmony_ci 15528c2ecf20Sopenharmony_cistatic int work_start(void) 15538c2ecf20Sopenharmony_ci{ 15548c2ecf20Sopenharmony_ci recv_workqueue = alloc_workqueue("dlm_recv", 15558c2ecf20Sopenharmony_ci WQ_UNBOUND | WQ_MEM_RECLAIM, 1); 15568c2ecf20Sopenharmony_ci if (!recv_workqueue) { 15578c2ecf20Sopenharmony_ci log_print("can't start dlm_recv"); 15588c2ecf20Sopenharmony_ci return -ENOMEM; 15598c2ecf20Sopenharmony_ci } 15608c2ecf20Sopenharmony_ci 15618c2ecf20Sopenharmony_ci send_workqueue = alloc_workqueue("dlm_send", 15628c2ecf20Sopenharmony_ci WQ_UNBOUND | WQ_MEM_RECLAIM, 1); 15638c2ecf20Sopenharmony_ci if (!send_workqueue) { 15648c2ecf20Sopenharmony_ci log_print("can't start dlm_send"); 15658c2ecf20Sopenharmony_ci destroy_workqueue(recv_workqueue); 15668c2ecf20Sopenharmony_ci return -ENOMEM; 15678c2ecf20Sopenharmony_ci } 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci return 0; 15708c2ecf20Sopenharmony_ci} 15718c2ecf20Sopenharmony_ci 15728c2ecf20Sopenharmony_cistatic void _stop_conn(struct connection *con, bool and_other) 15738c2ecf20Sopenharmony_ci{ 15748c2ecf20Sopenharmony_ci mutex_lock(&con->sock_mutex); 15758c2ecf20Sopenharmony_ci set_bit(CF_CLOSE, &con->flags); 15768c2ecf20Sopenharmony_ci set_bit(CF_READ_PENDING, &con->flags); 15778c2ecf20Sopenharmony_ci set_bit(CF_WRITE_PENDING, &con->flags); 15788c2ecf20Sopenharmony_ci if (con->sock && con->sock->sk) { 15798c2ecf20Sopenharmony_ci write_lock_bh(&con->sock->sk->sk_callback_lock); 15808c2ecf20Sopenharmony_ci con->sock->sk->sk_user_data = NULL; 15818c2ecf20Sopenharmony_ci write_unlock_bh(&con->sock->sk->sk_callback_lock); 15828c2ecf20Sopenharmony_ci } 15838c2ecf20Sopenharmony_ci if (con->othercon && and_other) 15848c2ecf20Sopenharmony_ci _stop_conn(con->othercon, false); 15858c2ecf20Sopenharmony_ci mutex_unlock(&con->sock_mutex); 15868c2ecf20Sopenharmony_ci} 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_cistatic void stop_conn(struct connection *con) 15898c2ecf20Sopenharmony_ci{ 15908c2ecf20Sopenharmony_ci _stop_conn(con, true); 15918c2ecf20Sopenharmony_ci} 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_cistatic void shutdown_conn(struct connection *con) 15948c2ecf20Sopenharmony_ci{ 15958c2ecf20Sopenharmony_ci if (con->shutdown_action) 15968c2ecf20Sopenharmony_ci con->shutdown_action(con); 15978c2ecf20Sopenharmony_ci} 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_cistatic void connection_release(struct rcu_head *rcu) 16008c2ecf20Sopenharmony_ci{ 16018c2ecf20Sopenharmony_ci struct connection *con = container_of(rcu, struct connection, rcu); 16028c2ecf20Sopenharmony_ci 16038c2ecf20Sopenharmony_ci kfree(con->rx_buf); 16048c2ecf20Sopenharmony_ci kfree(con); 16058c2ecf20Sopenharmony_ci} 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_cistatic void free_conn(struct connection *con) 16088c2ecf20Sopenharmony_ci{ 16098c2ecf20Sopenharmony_ci close_connection(con, true, true, true); 16108c2ecf20Sopenharmony_ci spin_lock(&connections_lock); 16118c2ecf20Sopenharmony_ci hlist_del_rcu(&con->list); 16128c2ecf20Sopenharmony_ci spin_unlock(&connections_lock); 16138c2ecf20Sopenharmony_ci if (con->othercon) { 16148c2ecf20Sopenharmony_ci clean_one_writequeue(con->othercon); 16158c2ecf20Sopenharmony_ci call_rcu(&con->othercon->rcu, connection_release); 16168c2ecf20Sopenharmony_ci } 16178c2ecf20Sopenharmony_ci clean_one_writequeue(con); 16188c2ecf20Sopenharmony_ci call_rcu(&con->rcu, connection_release); 16198c2ecf20Sopenharmony_ci} 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_cistatic void work_flush(void) 16228c2ecf20Sopenharmony_ci{ 16238c2ecf20Sopenharmony_ci int ok, idx; 16248c2ecf20Sopenharmony_ci int i; 16258c2ecf20Sopenharmony_ci struct connection *con; 16268c2ecf20Sopenharmony_ci 16278c2ecf20Sopenharmony_ci do { 16288c2ecf20Sopenharmony_ci ok = 1; 16298c2ecf20Sopenharmony_ci foreach_conn(stop_conn); 16308c2ecf20Sopenharmony_ci if (recv_workqueue) 16318c2ecf20Sopenharmony_ci flush_workqueue(recv_workqueue); 16328c2ecf20Sopenharmony_ci if (send_workqueue) 16338c2ecf20Sopenharmony_ci flush_workqueue(send_workqueue); 16348c2ecf20Sopenharmony_ci idx = srcu_read_lock(&connections_srcu); 16358c2ecf20Sopenharmony_ci for (i = 0; i < CONN_HASH_SIZE && ok; i++) { 16368c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(con, &connection_hash[i], 16378c2ecf20Sopenharmony_ci list) { 16388c2ecf20Sopenharmony_ci ok &= test_bit(CF_READ_PENDING, &con->flags); 16398c2ecf20Sopenharmony_ci ok &= test_bit(CF_WRITE_PENDING, &con->flags); 16408c2ecf20Sopenharmony_ci if (con->othercon) { 16418c2ecf20Sopenharmony_ci ok &= test_bit(CF_READ_PENDING, 16428c2ecf20Sopenharmony_ci &con->othercon->flags); 16438c2ecf20Sopenharmony_ci ok &= test_bit(CF_WRITE_PENDING, 16448c2ecf20Sopenharmony_ci &con->othercon->flags); 16458c2ecf20Sopenharmony_ci } 16468c2ecf20Sopenharmony_ci } 16478c2ecf20Sopenharmony_ci } 16488c2ecf20Sopenharmony_ci srcu_read_unlock(&connections_srcu, idx); 16498c2ecf20Sopenharmony_ci } while (!ok); 16508c2ecf20Sopenharmony_ci} 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_civoid dlm_lowcomms_stop(void) 16538c2ecf20Sopenharmony_ci{ 16548c2ecf20Sopenharmony_ci /* Set all the flags to prevent any 16558c2ecf20Sopenharmony_ci socket activity. 16568c2ecf20Sopenharmony_ci */ 16578c2ecf20Sopenharmony_ci dlm_allow_conn = 0; 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci if (recv_workqueue) 16608c2ecf20Sopenharmony_ci flush_workqueue(recv_workqueue); 16618c2ecf20Sopenharmony_ci if (send_workqueue) 16628c2ecf20Sopenharmony_ci flush_workqueue(send_workqueue); 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci foreach_conn(shutdown_conn); 16658c2ecf20Sopenharmony_ci work_flush(); 16668c2ecf20Sopenharmony_ci foreach_conn(free_conn); 16678c2ecf20Sopenharmony_ci work_stop(); 16688c2ecf20Sopenharmony_ci deinit_local(); 16698c2ecf20Sopenharmony_ci} 16708c2ecf20Sopenharmony_ci 16718c2ecf20Sopenharmony_ciint dlm_lowcomms_start(void) 16728c2ecf20Sopenharmony_ci{ 16738c2ecf20Sopenharmony_ci int error = -EINVAL; 16748c2ecf20Sopenharmony_ci struct connection *con; 16758c2ecf20Sopenharmony_ci int i; 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci for (i = 0; i < CONN_HASH_SIZE; i++) 16788c2ecf20Sopenharmony_ci INIT_HLIST_HEAD(&connection_hash[i]); 16798c2ecf20Sopenharmony_ci 16808c2ecf20Sopenharmony_ci init_local(); 16818c2ecf20Sopenharmony_ci if (!dlm_local_count) { 16828c2ecf20Sopenharmony_ci error = -ENOTCONN; 16838c2ecf20Sopenharmony_ci log_print("no local IP address has been set"); 16848c2ecf20Sopenharmony_ci goto fail; 16858c2ecf20Sopenharmony_ci } 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci error = work_start(); 16888c2ecf20Sopenharmony_ci if (error) 16898c2ecf20Sopenharmony_ci goto fail; 16908c2ecf20Sopenharmony_ci 16918c2ecf20Sopenharmony_ci dlm_allow_conn = 1; 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci /* Start listening */ 16948c2ecf20Sopenharmony_ci if (dlm_config.ci_protocol == 0) 16958c2ecf20Sopenharmony_ci error = tcp_listen_for_all(); 16968c2ecf20Sopenharmony_ci else 16978c2ecf20Sopenharmony_ci error = sctp_listen_for_all(); 16988c2ecf20Sopenharmony_ci if (error) 16998c2ecf20Sopenharmony_ci goto fail_unlisten; 17008c2ecf20Sopenharmony_ci 17018c2ecf20Sopenharmony_ci return 0; 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_cifail_unlisten: 17048c2ecf20Sopenharmony_ci dlm_allow_conn = 0; 17058c2ecf20Sopenharmony_ci con = nodeid2con(0,0); 17068c2ecf20Sopenharmony_ci if (con) 17078c2ecf20Sopenharmony_ci free_conn(con); 17088c2ecf20Sopenharmony_cifail: 17098c2ecf20Sopenharmony_ci return error; 17108c2ecf20Sopenharmony_ci} 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_civoid dlm_lowcomms_exit(void) 17138c2ecf20Sopenharmony_ci{ 17148c2ecf20Sopenharmony_ci struct dlm_node_addr *na, *safe; 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci spin_lock(&dlm_node_addrs_spin); 17178c2ecf20Sopenharmony_ci list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { 17188c2ecf20Sopenharmony_ci list_del(&na->list); 17198c2ecf20Sopenharmony_ci while (na->addr_count--) 17208c2ecf20Sopenharmony_ci kfree(na->addr[na->addr_count]); 17218c2ecf20Sopenharmony_ci kfree(na); 17228c2ecf20Sopenharmony_ci } 17238c2ecf20Sopenharmony_ci spin_unlock(&dlm_node_addrs_spin); 17248c2ecf20Sopenharmony_ci} 1725