18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Copyright (C) 2004 Oracle. All rights reserved. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * ---- 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Callers for this were originally written against a very simple synchronus 118c2ecf20Sopenharmony_ci * API. This implementation reflects those simple callers. Some day I'm sure 128c2ecf20Sopenharmony_ci * we'll need to move to a more robust posting/callback mechanism. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * Transmit calls pass in kernel virtual addresses and block copying this into 158c2ecf20Sopenharmony_ci * the socket's tx buffers via a usual blocking sendmsg. They'll block waiting 168c2ecf20Sopenharmony_ci * for a failed socket to timeout. TX callers can also pass in a poniter to an 178c2ecf20Sopenharmony_ci * 'int' which gets filled with an errno off the wire in response to the 188c2ecf20Sopenharmony_ci * message they send. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Handlers for unsolicited messages are registered. Each socket has a page 218c2ecf20Sopenharmony_ci * that incoming data is copied into. First the header, then the data. 228c2ecf20Sopenharmony_ci * Handlers are called from only one thread with a reference to this per-socket 238c2ecf20Sopenharmony_ci * page. This page is destroyed after the handler call, so it can't be 248c2ecf20Sopenharmony_ci * referenced beyond the call. Handlers may block but are discouraged from 258c2ecf20Sopenharmony_ci * doing so. 268c2ecf20Sopenharmony_ci * 278c2ecf20Sopenharmony_ci * Any framing errors (bad magic, large payload lengths) close a connection. 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * Our sock_container holds the state we associate with a socket. It's current 308c2ecf20Sopenharmony_ci * framing state is held there as well as the refcounting we do around when it 318c2ecf20Sopenharmony_ci * is safe to tear down the socket. The socket is only finally torn down from 328c2ecf20Sopenharmony_ci * the container when the container loses all of its references -- so as long 338c2ecf20Sopenharmony_ci * as you hold a ref on the container you can trust that the socket is valid 348c2ecf20Sopenharmony_ci * for use with kernel socket APIs. 358c2ecf20Sopenharmony_ci * 368c2ecf20Sopenharmony_ci * Connections are initiated between a pair of nodes when the node with the 378c2ecf20Sopenharmony_ci * higher node number gets a heartbeat callback which indicates that the lower 388c2ecf20Sopenharmony_ci * numbered node has started heartbeating. The lower numbered node is passive 398c2ecf20Sopenharmony_ci * and only accepts the connection if the higher numbered node is heartbeating. 408c2ecf20Sopenharmony_ci */ 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#include <linux/kernel.h> 438c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 448c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 458c2ecf20Sopenharmony_ci#include <linux/slab.h> 468c2ecf20Sopenharmony_ci#include <linux/idr.h> 478c2ecf20Sopenharmony_ci#include <linux/kref.h> 488c2ecf20Sopenharmony_ci#include <linux/net.h> 498c2ecf20Sopenharmony_ci#include <linux/export.h> 508c2ecf20Sopenharmony_ci#include <net/tcp.h> 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci#include "heartbeat.h" 558c2ecf20Sopenharmony_ci#include "tcp.h" 568c2ecf20Sopenharmony_ci#include "nodemanager.h" 578c2ecf20Sopenharmony_ci#define MLOG_MASK_PREFIX ML_TCP 588c2ecf20Sopenharmony_ci#include "masklog.h" 598c2ecf20Sopenharmony_ci#include "quorum.h" 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci#include "tcp_internal.h" 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci#define SC_NODEF_FMT "node %s (num %u) at %pI4:%u" 648c2ecf20Sopenharmony_ci#define SC_NODEF_ARGS(sc) sc->sc_node->nd_name, sc->sc_node->nd_num, \ 658c2ecf20Sopenharmony_ci &sc->sc_node->nd_ipv4_address, \ 668c2ecf20Sopenharmony_ci ntohs(sc->sc_node->nd_ipv4_port) 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci/* 698c2ecf20Sopenharmony_ci * In the following two log macros, the whitespace after the ',' just 708c2ecf20Sopenharmony_ci * before ##args is intentional. Otherwise, gcc 2.95 will eat the 718c2ecf20Sopenharmony_ci * previous token if args expands to nothing. 728c2ecf20Sopenharmony_ci */ 738c2ecf20Sopenharmony_ci#define msglog(hdr, fmt, args...) do { \ 748c2ecf20Sopenharmony_ci typeof(hdr) __hdr = (hdr); \ 758c2ecf20Sopenharmony_ci mlog(ML_MSG, "[mag %u len %u typ %u stat %d sys_stat %d " \ 768c2ecf20Sopenharmony_ci "key %08x num %u] " fmt, \ 778c2ecf20Sopenharmony_ci be16_to_cpu(__hdr->magic), be16_to_cpu(__hdr->data_len), \ 788c2ecf20Sopenharmony_ci be16_to_cpu(__hdr->msg_type), be32_to_cpu(__hdr->status), \ 798c2ecf20Sopenharmony_ci be32_to_cpu(__hdr->sys_status), be32_to_cpu(__hdr->key), \ 808c2ecf20Sopenharmony_ci be32_to_cpu(__hdr->msg_num) , ##args); \ 818c2ecf20Sopenharmony_ci} while (0) 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci#define sclog(sc, fmt, args...) do { \ 848c2ecf20Sopenharmony_ci typeof(sc) __sc = (sc); \ 858c2ecf20Sopenharmony_ci mlog(ML_SOCKET, "[sc %p refs %d sock %p node %u page %p " \ 868c2ecf20Sopenharmony_ci "pg_off %zu] " fmt, __sc, \ 878c2ecf20Sopenharmony_ci kref_read(&__sc->sc_kref), __sc->sc_sock, \ 888c2ecf20Sopenharmony_ci __sc->sc_node->nd_num, __sc->sc_page, __sc->sc_page_off , \ 898c2ecf20Sopenharmony_ci ##args); \ 908c2ecf20Sopenharmony_ci} while (0) 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_cistatic DEFINE_RWLOCK(o2net_handler_lock); 938c2ecf20Sopenharmony_cistatic struct rb_root o2net_handler_tree = RB_ROOT; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic struct o2net_node o2net_nodes[O2NM_MAX_NODES]; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci/* XXX someday we'll need better accounting */ 988c2ecf20Sopenharmony_cistatic struct socket *o2net_listen_sock; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci/* 1018c2ecf20Sopenharmony_ci * listen work is only queued by the listening socket callbacks on the 1028c2ecf20Sopenharmony_ci * o2net_wq. teardown detaches the callbacks before destroying the workqueue. 1038c2ecf20Sopenharmony_ci * quorum work is queued as sock containers are shutdown.. stop_listening 1048c2ecf20Sopenharmony_ci * tears down all the node's sock containers, preventing future shutdowns 1058c2ecf20Sopenharmony_ci * and queued quroum work, before canceling delayed quorum work and 1068c2ecf20Sopenharmony_ci * destroying the work queue. 1078c2ecf20Sopenharmony_ci */ 1088c2ecf20Sopenharmony_cistatic struct workqueue_struct *o2net_wq; 1098c2ecf20Sopenharmony_cistatic struct work_struct o2net_listen_work; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_cistatic struct o2hb_callback_func o2net_hb_up, o2net_hb_down; 1128c2ecf20Sopenharmony_ci#define O2NET_HB_PRI 0x1 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cistatic struct o2net_handshake *o2net_hand; 1158c2ecf20Sopenharmony_cistatic struct o2net_msg *o2net_keep_req, *o2net_keep_resp; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_cistatic int o2net_sys_err_translations[O2NET_ERR_MAX] = 1188c2ecf20Sopenharmony_ci {[O2NET_ERR_NONE] = 0, 1198c2ecf20Sopenharmony_ci [O2NET_ERR_NO_HNDLR] = -ENOPROTOOPT, 1208c2ecf20Sopenharmony_ci [O2NET_ERR_OVERFLOW] = -EOVERFLOW, 1218c2ecf20Sopenharmony_ci [O2NET_ERR_DIED] = -EHOSTDOWN,}; 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci/* can't quite avoid *all* internal declarations :/ */ 1248c2ecf20Sopenharmony_cistatic void o2net_sc_connect_completed(struct work_struct *work); 1258c2ecf20Sopenharmony_cistatic void o2net_rx_until_empty(struct work_struct *work); 1268c2ecf20Sopenharmony_cistatic void o2net_shutdown_sc(struct work_struct *work); 1278c2ecf20Sopenharmony_cistatic void o2net_listen_data_ready(struct sock *sk); 1288c2ecf20Sopenharmony_cistatic void o2net_sc_send_keep_req(struct work_struct *work); 1298c2ecf20Sopenharmony_cistatic void o2net_idle_timer(struct timer_list *t); 1308c2ecf20Sopenharmony_cistatic void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 1318c2ecf20Sopenharmony_cistatic void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_FS 1348c2ecf20Sopenharmony_cistatic void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, 1358c2ecf20Sopenharmony_ci u32 msgkey, struct task_struct *task, u8 node) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&nst->st_net_debug_item); 1388c2ecf20Sopenharmony_ci nst->st_task = task; 1398c2ecf20Sopenharmony_ci nst->st_msg_type = msgtype; 1408c2ecf20Sopenharmony_ci nst->st_msg_key = msgkey; 1418c2ecf20Sopenharmony_ci nst->st_node = node; 1428c2ecf20Sopenharmony_ci} 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_cistatic inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 1458c2ecf20Sopenharmony_ci{ 1468c2ecf20Sopenharmony_ci nst->st_sock_time = ktime_get(); 1478c2ecf20Sopenharmony_ci} 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_cistatic inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci nst->st_send_time = ktime_get(); 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_cistatic inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 1558c2ecf20Sopenharmony_ci{ 1568c2ecf20Sopenharmony_ci nst->st_status_time = ktime_get(); 1578c2ecf20Sopenharmony_ci} 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_cistatic inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 1608c2ecf20Sopenharmony_ci struct o2net_sock_container *sc) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci nst->st_sc = sc; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_cistatic inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, 1668c2ecf20Sopenharmony_ci u32 msg_id) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci nst->st_id = msg_id; 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_cistatic inline void o2net_set_sock_timer(struct o2net_sock_container *sc) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci sc->sc_tv_timer = ktime_get(); 1748c2ecf20Sopenharmony_ci} 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_cistatic inline void o2net_set_data_ready_time(struct o2net_sock_container *sc) 1778c2ecf20Sopenharmony_ci{ 1788c2ecf20Sopenharmony_ci sc->sc_tv_data_ready = ktime_get(); 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_cistatic inline void o2net_set_advance_start_time(struct o2net_sock_container *sc) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci sc->sc_tv_advance_start = ktime_get(); 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_cistatic inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc) 1878c2ecf20Sopenharmony_ci{ 1888c2ecf20Sopenharmony_ci sc->sc_tv_advance_stop = ktime_get(); 1898c2ecf20Sopenharmony_ci} 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_cistatic inline void o2net_set_func_start_time(struct o2net_sock_container *sc) 1928c2ecf20Sopenharmony_ci{ 1938c2ecf20Sopenharmony_ci sc->sc_tv_func_start = ktime_get(); 1948c2ecf20Sopenharmony_ci} 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_cistatic inline void o2net_set_func_stop_time(struct o2net_sock_container *sc) 1978c2ecf20Sopenharmony_ci{ 1988c2ecf20Sopenharmony_ci sc->sc_tv_func_stop = ktime_get(); 1998c2ecf20Sopenharmony_ci} 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci#else /* CONFIG_DEBUG_FS */ 2028c2ecf20Sopenharmony_ci# define o2net_init_nst(a, b, c, d, e) 2038c2ecf20Sopenharmony_ci# define o2net_set_nst_sock_time(a) 2048c2ecf20Sopenharmony_ci# define o2net_set_nst_send_time(a) 2058c2ecf20Sopenharmony_ci# define o2net_set_nst_status_time(a) 2068c2ecf20Sopenharmony_ci# define o2net_set_nst_sock_container(a, b) 2078c2ecf20Sopenharmony_ci# define o2net_set_nst_msg_id(a, b) 2088c2ecf20Sopenharmony_ci# define o2net_set_sock_timer(a) 2098c2ecf20Sopenharmony_ci# define o2net_set_data_ready_time(a) 2108c2ecf20Sopenharmony_ci# define o2net_set_advance_start_time(a) 2118c2ecf20Sopenharmony_ci# define o2net_set_advance_stop_time(a) 2128c2ecf20Sopenharmony_ci# define o2net_set_func_start_time(a) 2138c2ecf20Sopenharmony_ci# define o2net_set_func_stop_time(a) 2148c2ecf20Sopenharmony_ci#endif /* CONFIG_DEBUG_FS */ 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_FS_STATS 2178c2ecf20Sopenharmony_cistatic ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc) 2188c2ecf20Sopenharmony_ci{ 2198c2ecf20Sopenharmony_ci return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start); 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistatic void o2net_update_send_stats(struct o2net_send_tracking *nst, 2238c2ecf20Sopenharmony_ci struct o2net_sock_container *sc) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total, 2268c2ecf20Sopenharmony_ci ktime_sub(ktime_get(), 2278c2ecf20Sopenharmony_ci nst->st_status_time)); 2288c2ecf20Sopenharmony_ci sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total, 2298c2ecf20Sopenharmony_ci ktime_sub(nst->st_status_time, 2308c2ecf20Sopenharmony_ci nst->st_send_time)); 2318c2ecf20Sopenharmony_ci sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total, 2328c2ecf20Sopenharmony_ci ktime_sub(nst->st_send_time, 2338c2ecf20Sopenharmony_ci nst->st_sock_time)); 2348c2ecf20Sopenharmony_ci sc->sc_send_count++; 2358c2ecf20Sopenharmony_ci} 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_cistatic void o2net_update_recv_stats(struct o2net_sock_container *sc) 2388c2ecf20Sopenharmony_ci{ 2398c2ecf20Sopenharmony_ci sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total, 2408c2ecf20Sopenharmony_ci o2net_get_func_run_time(sc)); 2418c2ecf20Sopenharmony_ci sc->sc_recv_count++; 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci#else 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci# define o2net_update_send_stats(a, b) 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci# define o2net_update_recv_stats(sc) 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci#endif /* CONFIG_OCFS2_FS_STATS */ 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_cistatic inline unsigned int o2net_reconnect_delay(void) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci return o2nm_single_cluster->cl_reconnect_delay_ms; 2558c2ecf20Sopenharmony_ci} 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_cistatic inline unsigned int o2net_keepalive_delay(void) 2588c2ecf20Sopenharmony_ci{ 2598c2ecf20Sopenharmony_ci return o2nm_single_cluster->cl_keepalive_delay_ms; 2608c2ecf20Sopenharmony_ci} 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_cistatic inline unsigned int o2net_idle_timeout(void) 2638c2ecf20Sopenharmony_ci{ 2648c2ecf20Sopenharmony_ci return o2nm_single_cluster->cl_idle_timeout_ms; 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_cistatic inline int o2net_sys_err_to_errno(enum o2net_system_error err) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci int trans; 2708c2ecf20Sopenharmony_ci BUG_ON(err >= O2NET_ERR_MAX); 2718c2ecf20Sopenharmony_ci trans = o2net_sys_err_translations[err]; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci /* Just in case we mess up the translation table above */ 2748c2ecf20Sopenharmony_ci BUG_ON(err != O2NET_ERR_NONE && trans == 0); 2758c2ecf20Sopenharmony_ci return trans; 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_cistatic struct o2net_node * o2net_nn_from_num(u8 node_num) 2798c2ecf20Sopenharmony_ci{ 2808c2ecf20Sopenharmony_ci BUG_ON(node_num >= ARRAY_SIZE(o2net_nodes)); 2818c2ecf20Sopenharmony_ci return &o2net_nodes[node_num]; 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_cistatic u8 o2net_num_from_nn(struct o2net_node *nn) 2858c2ecf20Sopenharmony_ci{ 2868c2ecf20Sopenharmony_ci BUG_ON(nn == NULL); 2878c2ecf20Sopenharmony_ci return nn - o2net_nodes; 2888c2ecf20Sopenharmony_ci} 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_cistatic int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw) 2938c2ecf20Sopenharmony_ci{ 2948c2ecf20Sopenharmony_ci int ret; 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 2978c2ecf20Sopenharmony_ci ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC); 2988c2ecf20Sopenharmony_ci if (ret >= 0) { 2998c2ecf20Sopenharmony_ci nsw->ns_id = ret; 3008c2ecf20Sopenharmony_ci list_add_tail(&nsw->ns_node_item, &nn->nn_status_list); 3018c2ecf20Sopenharmony_ci } 3028c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 3038c2ecf20Sopenharmony_ci if (ret < 0) 3048c2ecf20Sopenharmony_ci return ret; 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci init_waitqueue_head(&nsw->ns_wq); 3078c2ecf20Sopenharmony_ci nsw->ns_sys_status = O2NET_ERR_NONE; 3088c2ecf20Sopenharmony_ci nsw->ns_status = 0; 3098c2ecf20Sopenharmony_ci return 0; 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_cistatic void o2net_complete_nsw_locked(struct o2net_node *nn, 3138c2ecf20Sopenharmony_ci struct o2net_status_wait *nsw, 3148c2ecf20Sopenharmony_ci enum o2net_system_error sys_status, 3158c2ecf20Sopenharmony_ci s32 status) 3168c2ecf20Sopenharmony_ci{ 3178c2ecf20Sopenharmony_ci assert_spin_locked(&nn->nn_lock); 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci if (!list_empty(&nsw->ns_node_item)) { 3208c2ecf20Sopenharmony_ci list_del_init(&nsw->ns_node_item); 3218c2ecf20Sopenharmony_ci nsw->ns_sys_status = sys_status; 3228c2ecf20Sopenharmony_ci nsw->ns_status = status; 3238c2ecf20Sopenharmony_ci idr_remove(&nn->nn_status_idr, nsw->ns_id); 3248c2ecf20Sopenharmony_ci wake_up(&nsw->ns_wq); 3258c2ecf20Sopenharmony_ci } 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_cistatic void o2net_complete_nsw(struct o2net_node *nn, 3298c2ecf20Sopenharmony_ci struct o2net_status_wait *nsw, 3308c2ecf20Sopenharmony_ci u64 id, enum o2net_system_error sys_status, 3318c2ecf20Sopenharmony_ci s32 status) 3328c2ecf20Sopenharmony_ci{ 3338c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 3348c2ecf20Sopenharmony_ci if (nsw == NULL) { 3358c2ecf20Sopenharmony_ci if (id > INT_MAX) 3368c2ecf20Sopenharmony_ci goto out; 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci nsw = idr_find(&nn->nn_status_idr, id); 3398c2ecf20Sopenharmony_ci if (nsw == NULL) 3408c2ecf20Sopenharmony_ci goto out; 3418c2ecf20Sopenharmony_ci } 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci o2net_complete_nsw_locked(nn, nsw, sys_status, status); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ciout: 3468c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 3478c2ecf20Sopenharmony_ci return; 3488c2ecf20Sopenharmony_ci} 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_cistatic void o2net_complete_nodes_nsw(struct o2net_node *nn) 3518c2ecf20Sopenharmony_ci{ 3528c2ecf20Sopenharmony_ci struct o2net_status_wait *nsw, *tmp; 3538c2ecf20Sopenharmony_ci unsigned int num_kills = 0; 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci assert_spin_locked(&nn->nn_lock); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) { 3588c2ecf20Sopenharmony_ci o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); 3598c2ecf20Sopenharmony_ci num_kills++; 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci mlog(0, "completed %d messages for node %u\n", num_kills, 3638c2ecf20Sopenharmony_ci o2net_num_from_nn(nn)); 3648c2ecf20Sopenharmony_ci} 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_cistatic int o2net_nsw_completed(struct o2net_node *nn, 3678c2ecf20Sopenharmony_ci struct o2net_status_wait *nsw) 3688c2ecf20Sopenharmony_ci{ 3698c2ecf20Sopenharmony_ci int completed; 3708c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 3718c2ecf20Sopenharmony_ci completed = list_empty(&nsw->ns_node_item); 3728c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 3738c2ecf20Sopenharmony_ci return completed; 3748c2ecf20Sopenharmony_ci} 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_cistatic void sc_kref_release(struct kref *kref) 3798c2ecf20Sopenharmony_ci{ 3808c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = container_of(kref, 3818c2ecf20Sopenharmony_ci struct o2net_sock_container, sc_kref); 3828c2ecf20Sopenharmony_ci BUG_ON(timer_pending(&sc->sc_idle_timeout)); 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci sclog(sc, "releasing\n"); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci if (sc->sc_sock) { 3878c2ecf20Sopenharmony_ci sock_release(sc->sc_sock); 3888c2ecf20Sopenharmony_ci sc->sc_sock = NULL; 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci o2nm_undepend_item(&sc->sc_node->nd_item); 3928c2ecf20Sopenharmony_ci o2nm_node_put(sc->sc_node); 3938c2ecf20Sopenharmony_ci sc->sc_node = NULL; 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci o2net_debug_del_sc(sc); 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci if (sc->sc_page) 3988c2ecf20Sopenharmony_ci __free_page(sc->sc_page); 3998c2ecf20Sopenharmony_ci kfree(sc); 4008c2ecf20Sopenharmony_ci} 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_cistatic void sc_put(struct o2net_sock_container *sc) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci sclog(sc, "put\n"); 4058c2ecf20Sopenharmony_ci kref_put(&sc->sc_kref, sc_kref_release); 4068c2ecf20Sopenharmony_ci} 4078c2ecf20Sopenharmony_cistatic void sc_get(struct o2net_sock_container *sc) 4088c2ecf20Sopenharmony_ci{ 4098c2ecf20Sopenharmony_ci sclog(sc, "get\n"); 4108c2ecf20Sopenharmony_ci kref_get(&sc->sc_kref); 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_cistatic struct o2net_sock_container *sc_alloc(struct o2nm_node *node) 4138c2ecf20Sopenharmony_ci{ 4148c2ecf20Sopenharmony_ci struct o2net_sock_container *sc, *ret = NULL; 4158c2ecf20Sopenharmony_ci struct page *page = NULL; 4168c2ecf20Sopenharmony_ci int status = 0; 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci page = alloc_page(GFP_NOFS); 4198c2ecf20Sopenharmony_ci sc = kzalloc(sizeof(*sc), GFP_NOFS); 4208c2ecf20Sopenharmony_ci if (sc == NULL || page == NULL) 4218c2ecf20Sopenharmony_ci goto out; 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci kref_init(&sc->sc_kref); 4248c2ecf20Sopenharmony_ci o2nm_node_get(node); 4258c2ecf20Sopenharmony_ci sc->sc_node = node; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci /* pin the node item of the remote node */ 4288c2ecf20Sopenharmony_ci status = o2nm_depend_item(&node->nd_item); 4298c2ecf20Sopenharmony_ci if (status) { 4308c2ecf20Sopenharmony_ci mlog_errno(status); 4318c2ecf20Sopenharmony_ci o2nm_node_put(node); 4328c2ecf20Sopenharmony_ci goto out; 4338c2ecf20Sopenharmony_ci } 4348c2ecf20Sopenharmony_ci INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed); 4358c2ecf20Sopenharmony_ci INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty); 4368c2ecf20Sopenharmony_ci INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); 4378c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req); 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci timer_setup(&sc->sc_idle_timeout, o2net_idle_timer, 0); 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci sclog(sc, "alloced\n"); 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci ret = sc; 4448c2ecf20Sopenharmony_ci sc->sc_page = page; 4458c2ecf20Sopenharmony_ci o2net_debug_add_sc(sc); 4468c2ecf20Sopenharmony_ci sc = NULL; 4478c2ecf20Sopenharmony_ci page = NULL; 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ciout: 4508c2ecf20Sopenharmony_ci if (page) 4518c2ecf20Sopenharmony_ci __free_page(page); 4528c2ecf20Sopenharmony_ci kfree(sc); 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci return ret; 4558c2ecf20Sopenharmony_ci} 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_cistatic void o2net_sc_queue_work(struct o2net_sock_container *sc, 4608c2ecf20Sopenharmony_ci struct work_struct *work) 4618c2ecf20Sopenharmony_ci{ 4628c2ecf20Sopenharmony_ci sc_get(sc); 4638c2ecf20Sopenharmony_ci if (!queue_work(o2net_wq, work)) 4648c2ecf20Sopenharmony_ci sc_put(sc); 4658c2ecf20Sopenharmony_ci} 4668c2ecf20Sopenharmony_cistatic void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc, 4678c2ecf20Sopenharmony_ci struct delayed_work *work, 4688c2ecf20Sopenharmony_ci int delay) 4698c2ecf20Sopenharmony_ci{ 4708c2ecf20Sopenharmony_ci sc_get(sc); 4718c2ecf20Sopenharmony_ci if (!queue_delayed_work(o2net_wq, work, delay)) 4728c2ecf20Sopenharmony_ci sc_put(sc); 4738c2ecf20Sopenharmony_ci} 4748c2ecf20Sopenharmony_cistatic void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc, 4758c2ecf20Sopenharmony_ci struct delayed_work *work) 4768c2ecf20Sopenharmony_ci{ 4778c2ecf20Sopenharmony_ci if (cancel_delayed_work(work)) 4788c2ecf20Sopenharmony_ci sc_put(sc); 4798c2ecf20Sopenharmony_ci} 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_cistatic atomic_t o2net_connected_peers = ATOMIC_INIT(0); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ciint o2net_num_connected_peers(void) 4848c2ecf20Sopenharmony_ci{ 4858c2ecf20Sopenharmony_ci return atomic_read(&o2net_connected_peers); 4868c2ecf20Sopenharmony_ci} 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_cistatic void o2net_set_nn_state(struct o2net_node *nn, 4898c2ecf20Sopenharmony_ci struct o2net_sock_container *sc, 4908c2ecf20Sopenharmony_ci unsigned valid, int err) 4918c2ecf20Sopenharmony_ci{ 4928c2ecf20Sopenharmony_ci int was_valid = nn->nn_sc_valid; 4938c2ecf20Sopenharmony_ci int was_err = nn->nn_persistent_error; 4948c2ecf20Sopenharmony_ci struct o2net_sock_container *old_sc = nn->nn_sc; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci assert_spin_locked(&nn->nn_lock); 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci if (old_sc && !sc) 4998c2ecf20Sopenharmony_ci atomic_dec(&o2net_connected_peers); 5008c2ecf20Sopenharmony_ci else if (!old_sc && sc) 5018c2ecf20Sopenharmony_ci atomic_inc(&o2net_connected_peers); 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci /* the node num comparison and single connect/accept path should stop 5048c2ecf20Sopenharmony_ci * an non-null sc from being overwritten with another */ 5058c2ecf20Sopenharmony_ci BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); 5068c2ecf20Sopenharmony_ci mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); 5078c2ecf20Sopenharmony_ci mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci if (was_valid && !valid && err == 0) 5108c2ecf20Sopenharmony_ci err = -ENOTCONN; 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci mlog(ML_CONN, "node %u sc: %p -> %p, valid %u -> %u, err %d -> %d\n", 5138c2ecf20Sopenharmony_ci o2net_num_from_nn(nn), nn->nn_sc, sc, nn->nn_sc_valid, valid, 5148c2ecf20Sopenharmony_ci nn->nn_persistent_error, err); 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci nn->nn_sc = sc; 5178c2ecf20Sopenharmony_ci nn->nn_sc_valid = valid ? 1 : 0; 5188c2ecf20Sopenharmony_ci nn->nn_persistent_error = err; 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci /* mirrors o2net_tx_can_proceed() */ 5218c2ecf20Sopenharmony_ci if (nn->nn_persistent_error || nn->nn_sc_valid) 5228c2ecf20Sopenharmony_ci wake_up(&nn->nn_sc_wq); 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci if (was_valid && !was_err && nn->nn_persistent_error) { 5258c2ecf20Sopenharmony_ci o2quo_conn_err(o2net_num_from_nn(nn)); 5268c2ecf20Sopenharmony_ci queue_delayed_work(o2net_wq, &nn->nn_still_up, 5278c2ecf20Sopenharmony_ci msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); 5288c2ecf20Sopenharmony_ci } 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci if (was_valid && !valid) { 5318c2ecf20Sopenharmony_ci if (old_sc) 5328c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: No longer connected to " 5338c2ecf20Sopenharmony_ci SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); 5348c2ecf20Sopenharmony_ci o2net_complete_nodes_nsw(nn); 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci if (!was_valid && valid) { 5388c2ecf20Sopenharmony_ci o2quo_conn_up(o2net_num_from_nn(nn)); 5398c2ecf20Sopenharmony_ci cancel_delayed_work(&nn->nn_connect_expired); 5408c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", 5418c2ecf20Sopenharmony_ci o2nm_this_node() > sc->sc_node->nd_num ? 5428c2ecf20Sopenharmony_ci "Connected to" : "Accepted connection from", 5438c2ecf20Sopenharmony_ci SC_NODEF_ARGS(sc)); 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci /* trigger the connecting worker func as long as we're not valid, 5478c2ecf20Sopenharmony_ci * it will back off if it shouldn't connect. This can be called 5488c2ecf20Sopenharmony_ci * from node config teardown and so needs to be careful about 5498c2ecf20Sopenharmony_ci * the work queue actually being up. */ 5508c2ecf20Sopenharmony_ci if (!valid && o2net_wq) { 5518c2ecf20Sopenharmony_ci unsigned long delay; 5528c2ecf20Sopenharmony_ci /* delay if we're within a RECONNECT_DELAY of the 5538c2ecf20Sopenharmony_ci * last attempt */ 5548c2ecf20Sopenharmony_ci delay = (nn->nn_last_connect_attempt + 5558c2ecf20Sopenharmony_ci msecs_to_jiffies(o2net_reconnect_delay())) 5568c2ecf20Sopenharmony_ci - jiffies; 5578c2ecf20Sopenharmony_ci if (delay > msecs_to_jiffies(o2net_reconnect_delay())) 5588c2ecf20Sopenharmony_ci delay = 0; 5598c2ecf20Sopenharmony_ci mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 5608c2ecf20Sopenharmony_ci queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci /* 5638c2ecf20Sopenharmony_ci * Delay the expired work after idle timeout. 5648c2ecf20Sopenharmony_ci * 5658c2ecf20Sopenharmony_ci * We might have lots of failed connection attempts that run 5668c2ecf20Sopenharmony_ci * through here but we only cancel the connect_expired work when 5678c2ecf20Sopenharmony_ci * a connection attempt succeeds. So only the first enqueue of 5688c2ecf20Sopenharmony_ci * the connect_expired work will do anything. The rest will see 5698c2ecf20Sopenharmony_ci * that it's already queued and do nothing. 5708c2ecf20Sopenharmony_ci */ 5718c2ecf20Sopenharmony_ci delay += msecs_to_jiffies(o2net_idle_timeout()); 5728c2ecf20Sopenharmony_ci queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); 5738c2ecf20Sopenharmony_ci } 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci /* keep track of the nn's sc ref for the caller */ 5768c2ecf20Sopenharmony_ci if ((old_sc == NULL) && sc) 5778c2ecf20Sopenharmony_ci sc_get(sc); 5788c2ecf20Sopenharmony_ci if (old_sc && (old_sc != sc)) { 5798c2ecf20Sopenharmony_ci o2net_sc_queue_work(old_sc, &old_sc->sc_shutdown_work); 5808c2ecf20Sopenharmony_ci sc_put(old_sc); 5818c2ecf20Sopenharmony_ci } 5828c2ecf20Sopenharmony_ci} 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci/* see o2net_register_callbacks() */ 5858c2ecf20Sopenharmony_cistatic void o2net_data_ready(struct sock *sk) 5868c2ecf20Sopenharmony_ci{ 5878c2ecf20Sopenharmony_ci void (*ready)(struct sock *sk); 5888c2ecf20Sopenharmony_ci struct o2net_sock_container *sc; 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 5918c2ecf20Sopenharmony_ci sc = sk->sk_user_data; 5928c2ecf20Sopenharmony_ci if (sc) { 5938c2ecf20Sopenharmony_ci sclog(sc, "data_ready hit\n"); 5948c2ecf20Sopenharmony_ci o2net_set_data_ready_time(sc); 5958c2ecf20Sopenharmony_ci o2net_sc_queue_work(sc, &sc->sc_rx_work); 5968c2ecf20Sopenharmony_ci ready = sc->sc_data_ready; 5978c2ecf20Sopenharmony_ci } else { 5988c2ecf20Sopenharmony_ci ready = sk->sk_data_ready; 5998c2ecf20Sopenharmony_ci } 6008c2ecf20Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci ready(sk); 6038c2ecf20Sopenharmony_ci} 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci/* see o2net_register_callbacks() */ 6068c2ecf20Sopenharmony_cistatic void o2net_state_change(struct sock *sk) 6078c2ecf20Sopenharmony_ci{ 6088c2ecf20Sopenharmony_ci void (*state_change)(struct sock *sk); 6098c2ecf20Sopenharmony_ci struct o2net_sock_container *sc; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 6128c2ecf20Sopenharmony_ci sc = sk->sk_user_data; 6138c2ecf20Sopenharmony_ci if (sc == NULL) { 6148c2ecf20Sopenharmony_ci state_change = sk->sk_state_change; 6158c2ecf20Sopenharmony_ci goto out; 6168c2ecf20Sopenharmony_ci } 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci sclog(sc, "state_change to %d\n", sk->sk_state); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci state_change = sc->sc_state_change; 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci switch(sk->sk_state) { 6238c2ecf20Sopenharmony_ci /* ignore connecting sockets as they make progress */ 6248c2ecf20Sopenharmony_ci case TCP_SYN_SENT: 6258c2ecf20Sopenharmony_ci case TCP_SYN_RECV: 6268c2ecf20Sopenharmony_ci break; 6278c2ecf20Sopenharmony_ci case TCP_ESTABLISHED: 6288c2ecf20Sopenharmony_ci o2net_sc_queue_work(sc, &sc->sc_connect_work); 6298c2ecf20Sopenharmony_ci break; 6308c2ecf20Sopenharmony_ci default: 6318c2ecf20Sopenharmony_ci printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT 6328c2ecf20Sopenharmony_ci " shutdown, state %d\n", 6338c2ecf20Sopenharmony_ci SC_NODEF_ARGS(sc), sk->sk_state); 6348c2ecf20Sopenharmony_ci o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 6358c2ecf20Sopenharmony_ci break; 6368c2ecf20Sopenharmony_ci } 6378c2ecf20Sopenharmony_ciout: 6388c2ecf20Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 6398c2ecf20Sopenharmony_ci state_change(sk); 6408c2ecf20Sopenharmony_ci} 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci/* 6438c2ecf20Sopenharmony_ci * we register callbacks so we can queue work on events before calling 6448c2ecf20Sopenharmony_ci * the original callbacks. our callbacks our careful to test user_data 6458c2ecf20Sopenharmony_ci * to discover when they've reaced with o2net_unregister_callbacks(). 6468c2ecf20Sopenharmony_ci */ 6478c2ecf20Sopenharmony_cistatic void o2net_register_callbacks(struct sock *sk, 6488c2ecf20Sopenharmony_ci struct o2net_sock_container *sc) 6498c2ecf20Sopenharmony_ci{ 6508c2ecf20Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci /* accepted sockets inherit the old listen socket data ready */ 6538c2ecf20Sopenharmony_ci if (sk->sk_data_ready == o2net_listen_data_ready) { 6548c2ecf20Sopenharmony_ci sk->sk_data_ready = sk->sk_user_data; 6558c2ecf20Sopenharmony_ci sk->sk_user_data = NULL; 6568c2ecf20Sopenharmony_ci } 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci BUG_ON(sk->sk_user_data != NULL); 6598c2ecf20Sopenharmony_ci sk->sk_user_data = sc; 6608c2ecf20Sopenharmony_ci sc_get(sc); 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci sc->sc_data_ready = sk->sk_data_ready; 6638c2ecf20Sopenharmony_ci sc->sc_state_change = sk->sk_state_change; 6648c2ecf20Sopenharmony_ci sk->sk_data_ready = o2net_data_ready; 6658c2ecf20Sopenharmony_ci sk->sk_state_change = o2net_state_change; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci mutex_init(&sc->sc_send_lock); 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 6708c2ecf20Sopenharmony_ci} 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_cistatic int o2net_unregister_callbacks(struct sock *sk, 6738c2ecf20Sopenharmony_ci struct o2net_sock_container *sc) 6748c2ecf20Sopenharmony_ci{ 6758c2ecf20Sopenharmony_ci int ret = 0; 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 6788c2ecf20Sopenharmony_ci if (sk->sk_user_data == sc) { 6798c2ecf20Sopenharmony_ci ret = 1; 6808c2ecf20Sopenharmony_ci sk->sk_user_data = NULL; 6818c2ecf20Sopenharmony_ci sk->sk_data_ready = sc->sc_data_ready; 6828c2ecf20Sopenharmony_ci sk->sk_state_change = sc->sc_state_change; 6838c2ecf20Sopenharmony_ci } 6848c2ecf20Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci return ret; 6878c2ecf20Sopenharmony_ci} 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci/* 6908c2ecf20Sopenharmony_ci * this is a little helper that is called by callers who have seen a problem 6918c2ecf20Sopenharmony_ci * with an sc and want to detach it from the nn if someone already hasn't beat 6928c2ecf20Sopenharmony_ci * them to it. if an error is given then the shutdown will be persistent 6938c2ecf20Sopenharmony_ci * and pending transmits will be canceled. 6948c2ecf20Sopenharmony_ci */ 6958c2ecf20Sopenharmony_cistatic void o2net_ensure_shutdown(struct o2net_node *nn, 6968c2ecf20Sopenharmony_ci struct o2net_sock_container *sc, 6978c2ecf20Sopenharmony_ci int err) 6988c2ecf20Sopenharmony_ci{ 6998c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 7008c2ecf20Sopenharmony_ci if (nn->nn_sc == sc) 7018c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, NULL, 0, err); 7028c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 7038c2ecf20Sopenharmony_ci} 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci/* 7068c2ecf20Sopenharmony_ci * This work queue function performs the blocking parts of socket shutdown. A 7078c2ecf20Sopenharmony_ci * few paths lead here. set_nn_state will trigger this callback if it sees an 7088c2ecf20Sopenharmony_ci * sc detached from the nn. state_change will also trigger this callback 7098c2ecf20Sopenharmony_ci * directly when it sees errors. In that case we need to call set_nn_state 7108c2ecf20Sopenharmony_ci * ourselves as state_change couldn't get the nn_lock and call set_nn_state 7118c2ecf20Sopenharmony_ci * itself. 7128c2ecf20Sopenharmony_ci */ 7138c2ecf20Sopenharmony_cistatic void o2net_shutdown_sc(struct work_struct *work) 7148c2ecf20Sopenharmony_ci{ 7158c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = 7168c2ecf20Sopenharmony_ci container_of(work, struct o2net_sock_container, 7178c2ecf20Sopenharmony_ci sc_shutdown_work); 7188c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci sclog(sc, "shutting down\n"); 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci /* drop the callbacks ref and call shutdown only once */ 7238c2ecf20Sopenharmony_ci if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) { 7248c2ecf20Sopenharmony_ci /* we shouldn't flush as we're in the thread, the 7258c2ecf20Sopenharmony_ci * races with pending sc work structs are harmless */ 7268c2ecf20Sopenharmony_ci del_timer_sync(&sc->sc_idle_timeout); 7278c2ecf20Sopenharmony_ci o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 7288c2ecf20Sopenharmony_ci sc_put(sc); 7298c2ecf20Sopenharmony_ci kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR); 7308c2ecf20Sopenharmony_ci } 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci /* not fatal so failed connects before the other guy has our 7338c2ecf20Sopenharmony_ci * heartbeat can be retried */ 7348c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, 0); 7358c2ecf20Sopenharmony_ci sc_put(sc); 7368c2ecf20Sopenharmony_ci} 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_cistatic int o2net_handler_cmp(struct o2net_msg_handler *nmh, u32 msg_type, 7418c2ecf20Sopenharmony_ci u32 key) 7428c2ecf20Sopenharmony_ci{ 7438c2ecf20Sopenharmony_ci int ret = memcmp(&nmh->nh_key, &key, sizeof(key)); 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci if (ret == 0) 7468c2ecf20Sopenharmony_ci ret = memcmp(&nmh->nh_msg_type, &msg_type, sizeof(msg_type)); 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci return ret; 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_cistatic struct o2net_msg_handler * 7528c2ecf20Sopenharmony_cio2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, 7538c2ecf20Sopenharmony_ci struct rb_node **ret_parent) 7548c2ecf20Sopenharmony_ci{ 7558c2ecf20Sopenharmony_ci struct rb_node **p = &o2net_handler_tree.rb_node; 7568c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 7578c2ecf20Sopenharmony_ci struct o2net_msg_handler *nmh, *ret = NULL; 7588c2ecf20Sopenharmony_ci int cmp; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci while (*p) { 7618c2ecf20Sopenharmony_ci parent = *p; 7628c2ecf20Sopenharmony_ci nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); 7638c2ecf20Sopenharmony_ci cmp = o2net_handler_cmp(nmh, msg_type, key); 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci if (cmp < 0) 7668c2ecf20Sopenharmony_ci p = &(*p)->rb_left; 7678c2ecf20Sopenharmony_ci else if (cmp > 0) 7688c2ecf20Sopenharmony_ci p = &(*p)->rb_right; 7698c2ecf20Sopenharmony_ci else { 7708c2ecf20Sopenharmony_ci ret = nmh; 7718c2ecf20Sopenharmony_ci break; 7728c2ecf20Sopenharmony_ci } 7738c2ecf20Sopenharmony_ci } 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci if (ret_p != NULL) 7768c2ecf20Sopenharmony_ci *ret_p = p; 7778c2ecf20Sopenharmony_ci if (ret_parent != NULL) 7788c2ecf20Sopenharmony_ci *ret_parent = parent; 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci return ret; 7818c2ecf20Sopenharmony_ci} 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_cistatic void o2net_handler_kref_release(struct kref *kref) 7848c2ecf20Sopenharmony_ci{ 7858c2ecf20Sopenharmony_ci struct o2net_msg_handler *nmh; 7868c2ecf20Sopenharmony_ci nmh = container_of(kref, struct o2net_msg_handler, nh_kref); 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci kfree(nmh); 7898c2ecf20Sopenharmony_ci} 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_cistatic void o2net_handler_put(struct o2net_msg_handler *nmh) 7928c2ecf20Sopenharmony_ci{ 7938c2ecf20Sopenharmony_ci kref_put(&nmh->nh_kref, o2net_handler_kref_release); 7948c2ecf20Sopenharmony_ci} 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci/* max_len is protection for the handler func. incoming messages won't 7978c2ecf20Sopenharmony_ci * be given to the handler if their payload is longer than the max. */ 7988c2ecf20Sopenharmony_ciint o2net_register_handler(u32 msg_type, u32 key, u32 max_len, 7998c2ecf20Sopenharmony_ci o2net_msg_handler_func *func, void *data, 8008c2ecf20Sopenharmony_ci o2net_post_msg_handler_func *post_func, 8018c2ecf20Sopenharmony_ci struct list_head *unreg_list) 8028c2ecf20Sopenharmony_ci{ 8038c2ecf20Sopenharmony_ci struct o2net_msg_handler *nmh = NULL; 8048c2ecf20Sopenharmony_ci struct rb_node **p, *parent; 8058c2ecf20Sopenharmony_ci int ret = 0; 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci if (max_len > O2NET_MAX_PAYLOAD_BYTES) { 8088c2ecf20Sopenharmony_ci mlog(0, "max_len for message handler out of range: %u\n", 8098c2ecf20Sopenharmony_ci max_len); 8108c2ecf20Sopenharmony_ci ret = -EINVAL; 8118c2ecf20Sopenharmony_ci goto out; 8128c2ecf20Sopenharmony_ci } 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_ci if (!msg_type) { 8158c2ecf20Sopenharmony_ci mlog(0, "no message type provided: %u, %p\n", msg_type, func); 8168c2ecf20Sopenharmony_ci ret = -EINVAL; 8178c2ecf20Sopenharmony_ci goto out; 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ci } 8208c2ecf20Sopenharmony_ci if (!func) { 8218c2ecf20Sopenharmony_ci mlog(0, "no message handler provided: %u, %p\n", 8228c2ecf20Sopenharmony_ci msg_type, func); 8238c2ecf20Sopenharmony_ci ret = -EINVAL; 8248c2ecf20Sopenharmony_ci goto out; 8258c2ecf20Sopenharmony_ci } 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci nmh = kzalloc(sizeof(struct o2net_msg_handler), GFP_NOFS); 8288c2ecf20Sopenharmony_ci if (nmh == NULL) { 8298c2ecf20Sopenharmony_ci ret = -ENOMEM; 8308c2ecf20Sopenharmony_ci goto out; 8318c2ecf20Sopenharmony_ci } 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci nmh->nh_func = func; 8348c2ecf20Sopenharmony_ci nmh->nh_func_data = data; 8358c2ecf20Sopenharmony_ci nmh->nh_post_func = post_func; 8368c2ecf20Sopenharmony_ci nmh->nh_msg_type = msg_type; 8378c2ecf20Sopenharmony_ci nmh->nh_max_len = max_len; 8388c2ecf20Sopenharmony_ci nmh->nh_key = key; 8398c2ecf20Sopenharmony_ci /* the tree and list get this ref.. they're both removed in 8408c2ecf20Sopenharmony_ci * unregister when this ref is dropped */ 8418c2ecf20Sopenharmony_ci kref_init(&nmh->nh_kref); 8428c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&nmh->nh_unregister_item); 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci write_lock(&o2net_handler_lock); 8458c2ecf20Sopenharmony_ci if (o2net_handler_tree_lookup(msg_type, key, &p, &parent)) 8468c2ecf20Sopenharmony_ci ret = -EEXIST; 8478c2ecf20Sopenharmony_ci else { 8488c2ecf20Sopenharmony_ci rb_link_node(&nmh->nh_node, parent, p); 8498c2ecf20Sopenharmony_ci rb_insert_color(&nmh->nh_node, &o2net_handler_tree); 8508c2ecf20Sopenharmony_ci list_add_tail(&nmh->nh_unregister_item, unreg_list); 8518c2ecf20Sopenharmony_ci 8528c2ecf20Sopenharmony_ci mlog(ML_TCP, "registered handler func %p type %u key %08x\n", 8538c2ecf20Sopenharmony_ci func, msg_type, key); 8548c2ecf20Sopenharmony_ci /* we've had some trouble with handlers seemingly vanishing. */ 8558c2ecf20Sopenharmony_ci mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p, 8568c2ecf20Sopenharmony_ci &parent) == NULL, 8578c2ecf20Sopenharmony_ci "couldn't find handler we *just* registered " 8588c2ecf20Sopenharmony_ci "for type %u key %08x\n", msg_type, key); 8598c2ecf20Sopenharmony_ci } 8608c2ecf20Sopenharmony_ci write_unlock(&o2net_handler_lock); 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ciout: 8638c2ecf20Sopenharmony_ci if (ret) 8648c2ecf20Sopenharmony_ci kfree(nmh); 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci return ret; 8678c2ecf20Sopenharmony_ci} 8688c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(o2net_register_handler); 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_civoid o2net_unregister_handler_list(struct list_head *list) 8718c2ecf20Sopenharmony_ci{ 8728c2ecf20Sopenharmony_ci struct o2net_msg_handler *nmh, *n; 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci write_lock(&o2net_handler_lock); 8758c2ecf20Sopenharmony_ci list_for_each_entry_safe(nmh, n, list, nh_unregister_item) { 8768c2ecf20Sopenharmony_ci mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", 8778c2ecf20Sopenharmony_ci nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); 8788c2ecf20Sopenharmony_ci rb_erase(&nmh->nh_node, &o2net_handler_tree); 8798c2ecf20Sopenharmony_ci list_del_init(&nmh->nh_unregister_item); 8808c2ecf20Sopenharmony_ci kref_put(&nmh->nh_kref, o2net_handler_kref_release); 8818c2ecf20Sopenharmony_ci } 8828c2ecf20Sopenharmony_ci write_unlock(&o2net_handler_lock); 8838c2ecf20Sopenharmony_ci} 8848c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(o2net_unregister_handler_list); 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_cistatic struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key) 8878c2ecf20Sopenharmony_ci{ 8888c2ecf20Sopenharmony_ci struct o2net_msg_handler *nmh; 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci read_lock(&o2net_handler_lock); 8918c2ecf20Sopenharmony_ci nmh = o2net_handler_tree_lookup(msg_type, key, NULL, NULL); 8928c2ecf20Sopenharmony_ci if (nmh) 8938c2ecf20Sopenharmony_ci kref_get(&nmh->nh_kref); 8948c2ecf20Sopenharmony_ci read_unlock(&o2net_handler_lock); 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci return nmh; 8978c2ecf20Sopenharmony_ci} 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_cistatic int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) 9028c2ecf20Sopenharmony_ci{ 9038c2ecf20Sopenharmony_ci struct kvec vec = { .iov_len = len, .iov_base = data, }; 9048c2ecf20Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; 9058c2ecf20Sopenharmony_ci iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len); 9068c2ecf20Sopenharmony_ci return sock_recvmsg(sock, &msg, MSG_DONTWAIT); 9078c2ecf20Sopenharmony_ci} 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_cistatic int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, 9108c2ecf20Sopenharmony_ci size_t veclen, size_t total) 9118c2ecf20Sopenharmony_ci{ 9128c2ecf20Sopenharmony_ci int ret; 9138c2ecf20Sopenharmony_ci struct msghdr msg = {.msg_flags = 0,}; 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci if (sock == NULL) { 9168c2ecf20Sopenharmony_ci ret = -EINVAL; 9178c2ecf20Sopenharmony_ci goto out; 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci ret = kernel_sendmsg(sock, &msg, vec, veclen, total); 9218c2ecf20Sopenharmony_ci if (likely(ret == total)) 9228c2ecf20Sopenharmony_ci return 0; 9238c2ecf20Sopenharmony_ci mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total); 9248c2ecf20Sopenharmony_ci if (ret >= 0) 9258c2ecf20Sopenharmony_ci ret = -EPIPE; /* should be smarter, I bet */ 9268c2ecf20Sopenharmony_ciout: 9278c2ecf20Sopenharmony_ci mlog(0, "returning error: %d\n", ret); 9288c2ecf20Sopenharmony_ci return ret; 9298c2ecf20Sopenharmony_ci} 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_cistatic void o2net_sendpage(struct o2net_sock_container *sc, 9328c2ecf20Sopenharmony_ci void *kmalloced_virt, 9338c2ecf20Sopenharmony_ci size_t size) 9348c2ecf20Sopenharmony_ci{ 9358c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 9368c2ecf20Sopenharmony_ci ssize_t ret; 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci while (1) { 9398c2ecf20Sopenharmony_ci mutex_lock(&sc->sc_send_lock); 9408c2ecf20Sopenharmony_ci ret = sc->sc_sock->ops->sendpage(sc->sc_sock, 9418c2ecf20Sopenharmony_ci virt_to_page(kmalloced_virt), 9428c2ecf20Sopenharmony_ci offset_in_page(kmalloced_virt), 9438c2ecf20Sopenharmony_ci size, MSG_DONTWAIT); 9448c2ecf20Sopenharmony_ci mutex_unlock(&sc->sc_send_lock); 9458c2ecf20Sopenharmony_ci if (ret == size) 9468c2ecf20Sopenharmony_ci break; 9478c2ecf20Sopenharmony_ci if (ret == (ssize_t)-EAGAIN) { 9488c2ecf20Sopenharmony_ci mlog(0, "sendpage of size %zu to " SC_NODEF_FMT 9498c2ecf20Sopenharmony_ci " returned EAGAIN\n", size, SC_NODEF_ARGS(sc)); 9508c2ecf20Sopenharmony_ci cond_resched(); 9518c2ecf20Sopenharmony_ci continue; 9528c2ecf20Sopenharmony_ci } 9538c2ecf20Sopenharmony_ci mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 9548c2ecf20Sopenharmony_ci " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); 9558c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, 0); 9568c2ecf20Sopenharmony_ci break; 9578c2ecf20Sopenharmony_ci } 9588c2ecf20Sopenharmony_ci} 9598c2ecf20Sopenharmony_ci 9608c2ecf20Sopenharmony_cistatic void o2net_init_msg(struct o2net_msg *msg, u16 data_len, u16 msg_type, u32 key) 9618c2ecf20Sopenharmony_ci{ 9628c2ecf20Sopenharmony_ci memset(msg, 0, sizeof(struct o2net_msg)); 9638c2ecf20Sopenharmony_ci msg->magic = cpu_to_be16(O2NET_MSG_MAGIC); 9648c2ecf20Sopenharmony_ci msg->data_len = cpu_to_be16(data_len); 9658c2ecf20Sopenharmony_ci msg->msg_type = cpu_to_be16(msg_type); 9668c2ecf20Sopenharmony_ci msg->sys_status = cpu_to_be32(O2NET_ERR_NONE); 9678c2ecf20Sopenharmony_ci msg->status = 0; 9688c2ecf20Sopenharmony_ci msg->key = cpu_to_be32(key); 9698c2ecf20Sopenharmony_ci} 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_cistatic int o2net_tx_can_proceed(struct o2net_node *nn, 9728c2ecf20Sopenharmony_ci struct o2net_sock_container **sc_ret, 9738c2ecf20Sopenharmony_ci int *error) 9748c2ecf20Sopenharmony_ci{ 9758c2ecf20Sopenharmony_ci int ret = 0; 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 9788c2ecf20Sopenharmony_ci if (nn->nn_persistent_error) { 9798c2ecf20Sopenharmony_ci ret = 1; 9808c2ecf20Sopenharmony_ci *sc_ret = NULL; 9818c2ecf20Sopenharmony_ci *error = nn->nn_persistent_error; 9828c2ecf20Sopenharmony_ci } else if (nn->nn_sc_valid) { 9838c2ecf20Sopenharmony_ci kref_get(&nn->nn_sc->sc_kref); 9848c2ecf20Sopenharmony_ci 9858c2ecf20Sopenharmony_ci ret = 1; 9868c2ecf20Sopenharmony_ci *sc_ret = nn->nn_sc; 9878c2ecf20Sopenharmony_ci *error = 0; 9888c2ecf20Sopenharmony_ci } 9898c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci return ret; 9928c2ecf20Sopenharmony_ci} 9938c2ecf20Sopenharmony_ci 9948c2ecf20Sopenharmony_ci/* Get a map of all nodes to which this node is currently connected to */ 9958c2ecf20Sopenharmony_civoid o2net_fill_node_map(unsigned long *map, unsigned bytes) 9968c2ecf20Sopenharmony_ci{ 9978c2ecf20Sopenharmony_ci struct o2net_sock_container *sc; 9988c2ecf20Sopenharmony_ci int node, ret; 9998c2ecf20Sopenharmony_ci 10008c2ecf20Sopenharmony_ci BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_ci memset(map, 0, bytes); 10038c2ecf20Sopenharmony_ci for (node = 0; node < O2NM_MAX_NODES; ++node) { 10048c2ecf20Sopenharmony_ci if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret)) 10058c2ecf20Sopenharmony_ci continue; 10068c2ecf20Sopenharmony_ci if (!ret) { 10078c2ecf20Sopenharmony_ci set_bit(node, map); 10088c2ecf20Sopenharmony_ci sc_put(sc); 10098c2ecf20Sopenharmony_ci } 10108c2ecf20Sopenharmony_ci } 10118c2ecf20Sopenharmony_ci} 10128c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(o2net_fill_node_map); 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ciint o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, 10158c2ecf20Sopenharmony_ci size_t caller_veclen, u8 target_node, int *status) 10168c2ecf20Sopenharmony_ci{ 10178c2ecf20Sopenharmony_ci int ret = 0; 10188c2ecf20Sopenharmony_ci struct o2net_msg *msg = NULL; 10198c2ecf20Sopenharmony_ci size_t veclen, caller_bytes = 0; 10208c2ecf20Sopenharmony_ci struct kvec *vec = NULL; 10218c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = NULL; 10228c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(target_node); 10238c2ecf20Sopenharmony_ci struct o2net_status_wait nsw = { 10248c2ecf20Sopenharmony_ci .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), 10258c2ecf20Sopenharmony_ci }; 10268c2ecf20Sopenharmony_ci struct o2net_send_tracking nst; 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci o2net_init_nst(&nst, msg_type, key, current, target_node); 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci if (o2net_wq == NULL) { 10318c2ecf20Sopenharmony_ci mlog(0, "attempt to tx without o2netd running\n"); 10328c2ecf20Sopenharmony_ci ret = -ESRCH; 10338c2ecf20Sopenharmony_ci goto out; 10348c2ecf20Sopenharmony_ci } 10358c2ecf20Sopenharmony_ci 10368c2ecf20Sopenharmony_ci if (caller_veclen == 0) { 10378c2ecf20Sopenharmony_ci mlog(0, "bad kvec array length\n"); 10388c2ecf20Sopenharmony_ci ret = -EINVAL; 10398c2ecf20Sopenharmony_ci goto out; 10408c2ecf20Sopenharmony_ci } 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci caller_bytes = iov_length((struct iovec *)caller_vec, caller_veclen); 10438c2ecf20Sopenharmony_ci if (caller_bytes > O2NET_MAX_PAYLOAD_BYTES) { 10448c2ecf20Sopenharmony_ci mlog(0, "total payload len %zu too large\n", caller_bytes); 10458c2ecf20Sopenharmony_ci ret = -EINVAL; 10468c2ecf20Sopenharmony_ci goto out; 10478c2ecf20Sopenharmony_ci } 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_ci if (target_node == o2nm_this_node()) { 10508c2ecf20Sopenharmony_ci ret = -ELOOP; 10518c2ecf20Sopenharmony_ci goto out; 10528c2ecf20Sopenharmony_ci } 10538c2ecf20Sopenharmony_ci 10548c2ecf20Sopenharmony_ci o2net_debug_add_nst(&nst); 10558c2ecf20Sopenharmony_ci 10568c2ecf20Sopenharmony_ci o2net_set_nst_sock_time(&nst); 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret)); 10598c2ecf20Sopenharmony_ci if (ret) 10608c2ecf20Sopenharmony_ci goto out; 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ci o2net_set_nst_sock_container(&nst, sc); 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci veclen = caller_veclen + 1; 10658c2ecf20Sopenharmony_ci vec = kmalloc_array(veclen, sizeof(struct kvec), GFP_ATOMIC); 10668c2ecf20Sopenharmony_ci if (vec == NULL) { 10678c2ecf20Sopenharmony_ci mlog(0, "failed to %zu element kvec!\n", veclen); 10688c2ecf20Sopenharmony_ci ret = -ENOMEM; 10698c2ecf20Sopenharmony_ci goto out; 10708c2ecf20Sopenharmony_ci } 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci msg = kmalloc(sizeof(struct o2net_msg), GFP_ATOMIC); 10738c2ecf20Sopenharmony_ci if (!msg) { 10748c2ecf20Sopenharmony_ci mlog(0, "failed to allocate a o2net_msg!\n"); 10758c2ecf20Sopenharmony_ci ret = -ENOMEM; 10768c2ecf20Sopenharmony_ci goto out; 10778c2ecf20Sopenharmony_ci } 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci o2net_init_msg(msg, caller_bytes, msg_type, key); 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci vec[0].iov_len = sizeof(struct o2net_msg); 10828c2ecf20Sopenharmony_ci vec[0].iov_base = msg; 10838c2ecf20Sopenharmony_ci memcpy(&vec[1], caller_vec, caller_veclen * sizeof(struct kvec)); 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci ret = o2net_prep_nsw(nn, &nsw); 10868c2ecf20Sopenharmony_ci if (ret) 10878c2ecf20Sopenharmony_ci goto out; 10888c2ecf20Sopenharmony_ci 10898c2ecf20Sopenharmony_ci msg->msg_num = cpu_to_be32(nsw.ns_id); 10908c2ecf20Sopenharmony_ci o2net_set_nst_msg_id(&nst, nsw.ns_id); 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_ci o2net_set_nst_send_time(&nst); 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci /* finally, convert the message header to network byte-order 10958c2ecf20Sopenharmony_ci * and send */ 10968c2ecf20Sopenharmony_ci mutex_lock(&sc->sc_send_lock); 10978c2ecf20Sopenharmony_ci ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, 10988c2ecf20Sopenharmony_ci sizeof(struct o2net_msg) + caller_bytes); 10998c2ecf20Sopenharmony_ci mutex_unlock(&sc->sc_send_lock); 11008c2ecf20Sopenharmony_ci msglog(msg, "sending returned %d\n", ret); 11018c2ecf20Sopenharmony_ci if (ret < 0) { 11028c2ecf20Sopenharmony_ci mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); 11038c2ecf20Sopenharmony_ci goto out; 11048c2ecf20Sopenharmony_ci } 11058c2ecf20Sopenharmony_ci 11068c2ecf20Sopenharmony_ci /* wait on other node's handler */ 11078c2ecf20Sopenharmony_ci o2net_set_nst_status_time(&nst); 11088c2ecf20Sopenharmony_ci wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_ci o2net_update_send_stats(&nst, sc); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci /* Note that we avoid overwriting the callers status return 11138c2ecf20Sopenharmony_ci * variable if a system error was reported on the other 11148c2ecf20Sopenharmony_ci * side. Callers beware. */ 11158c2ecf20Sopenharmony_ci ret = o2net_sys_err_to_errno(nsw.ns_sys_status); 11168c2ecf20Sopenharmony_ci if (status && !ret) 11178c2ecf20Sopenharmony_ci *status = nsw.ns_status; 11188c2ecf20Sopenharmony_ci 11198c2ecf20Sopenharmony_ci mlog(0, "woken, returning system status %d, user status %d\n", 11208c2ecf20Sopenharmony_ci ret, nsw.ns_status); 11218c2ecf20Sopenharmony_ciout: 11228c2ecf20Sopenharmony_ci o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ 11238c2ecf20Sopenharmony_ci if (sc) 11248c2ecf20Sopenharmony_ci sc_put(sc); 11258c2ecf20Sopenharmony_ci kfree(vec); 11268c2ecf20Sopenharmony_ci kfree(msg); 11278c2ecf20Sopenharmony_ci o2net_complete_nsw(nn, &nsw, 0, 0, 0); 11288c2ecf20Sopenharmony_ci return ret; 11298c2ecf20Sopenharmony_ci} 11308c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(o2net_send_message_vec); 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ciint o2net_send_message(u32 msg_type, u32 key, void *data, u32 len, 11338c2ecf20Sopenharmony_ci u8 target_node, int *status) 11348c2ecf20Sopenharmony_ci{ 11358c2ecf20Sopenharmony_ci struct kvec vec = { 11368c2ecf20Sopenharmony_ci .iov_base = data, 11378c2ecf20Sopenharmony_ci .iov_len = len, 11388c2ecf20Sopenharmony_ci }; 11398c2ecf20Sopenharmony_ci return o2net_send_message_vec(msg_type, key, &vec, 1, 11408c2ecf20Sopenharmony_ci target_node, status); 11418c2ecf20Sopenharmony_ci} 11428c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(o2net_send_message); 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_cistatic int o2net_send_status_magic(struct socket *sock, struct o2net_msg *hdr, 11458c2ecf20Sopenharmony_ci enum o2net_system_error syserr, int err) 11468c2ecf20Sopenharmony_ci{ 11478c2ecf20Sopenharmony_ci struct kvec vec = { 11488c2ecf20Sopenharmony_ci .iov_base = hdr, 11498c2ecf20Sopenharmony_ci .iov_len = sizeof(struct o2net_msg), 11508c2ecf20Sopenharmony_ci }; 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci BUG_ON(syserr >= O2NET_ERR_MAX); 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_ci /* leave other fields intact from the incoming message, msg_num 11558c2ecf20Sopenharmony_ci * in particular */ 11568c2ecf20Sopenharmony_ci hdr->sys_status = cpu_to_be32(syserr); 11578c2ecf20Sopenharmony_ci hdr->status = cpu_to_be32(err); 11588c2ecf20Sopenharmony_ci hdr->magic = cpu_to_be16(O2NET_MSG_STATUS_MAGIC); // twiddle the magic 11598c2ecf20Sopenharmony_ci hdr->data_len = 0; 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_ci msglog(hdr, "about to send status magic %d\n", err); 11628c2ecf20Sopenharmony_ci /* hdr has been in host byteorder this whole time */ 11638c2ecf20Sopenharmony_ci return o2net_send_tcp_msg(sock, &vec, 1, sizeof(struct o2net_msg)); 11648c2ecf20Sopenharmony_ci} 11658c2ecf20Sopenharmony_ci 11668c2ecf20Sopenharmony_ci/* this returns -errno if the header was unknown or too large, etc. 11678c2ecf20Sopenharmony_ci * after this is called the buffer us reused for the next message */ 11688c2ecf20Sopenharmony_cistatic int o2net_process_message(struct o2net_sock_container *sc, 11698c2ecf20Sopenharmony_ci struct o2net_msg *hdr) 11708c2ecf20Sopenharmony_ci{ 11718c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 11728c2ecf20Sopenharmony_ci int ret = 0, handler_status; 11738c2ecf20Sopenharmony_ci enum o2net_system_error syserr; 11748c2ecf20Sopenharmony_ci struct o2net_msg_handler *nmh = NULL; 11758c2ecf20Sopenharmony_ci void *ret_data = NULL; 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci msglog(hdr, "processing message\n"); 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci o2net_sc_postpone_idle(sc); 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_ci switch(be16_to_cpu(hdr->magic)) { 11828c2ecf20Sopenharmony_ci case O2NET_MSG_STATUS_MAGIC: 11838c2ecf20Sopenharmony_ci /* special type for returning message status */ 11848c2ecf20Sopenharmony_ci o2net_complete_nsw(nn, NULL, 11858c2ecf20Sopenharmony_ci be32_to_cpu(hdr->msg_num), 11868c2ecf20Sopenharmony_ci be32_to_cpu(hdr->sys_status), 11878c2ecf20Sopenharmony_ci be32_to_cpu(hdr->status)); 11888c2ecf20Sopenharmony_ci goto out; 11898c2ecf20Sopenharmony_ci case O2NET_MSG_KEEP_REQ_MAGIC: 11908c2ecf20Sopenharmony_ci o2net_sendpage(sc, o2net_keep_resp, 11918c2ecf20Sopenharmony_ci sizeof(*o2net_keep_resp)); 11928c2ecf20Sopenharmony_ci goto out; 11938c2ecf20Sopenharmony_ci case O2NET_MSG_KEEP_RESP_MAGIC: 11948c2ecf20Sopenharmony_ci goto out; 11958c2ecf20Sopenharmony_ci case O2NET_MSG_MAGIC: 11968c2ecf20Sopenharmony_ci break; 11978c2ecf20Sopenharmony_ci default: 11988c2ecf20Sopenharmony_ci msglog(hdr, "bad magic\n"); 11998c2ecf20Sopenharmony_ci ret = -EINVAL; 12008c2ecf20Sopenharmony_ci goto out; 12018c2ecf20Sopenharmony_ci break; 12028c2ecf20Sopenharmony_ci } 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci /* find a handler for it */ 12058c2ecf20Sopenharmony_ci handler_status = 0; 12068c2ecf20Sopenharmony_ci nmh = o2net_handler_get(be16_to_cpu(hdr->msg_type), 12078c2ecf20Sopenharmony_ci be32_to_cpu(hdr->key)); 12088c2ecf20Sopenharmony_ci if (!nmh) { 12098c2ecf20Sopenharmony_ci mlog(ML_TCP, "couldn't find handler for type %u key %08x\n", 12108c2ecf20Sopenharmony_ci be16_to_cpu(hdr->msg_type), be32_to_cpu(hdr->key)); 12118c2ecf20Sopenharmony_ci syserr = O2NET_ERR_NO_HNDLR; 12128c2ecf20Sopenharmony_ci goto out_respond; 12138c2ecf20Sopenharmony_ci } 12148c2ecf20Sopenharmony_ci 12158c2ecf20Sopenharmony_ci syserr = O2NET_ERR_NONE; 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci if (be16_to_cpu(hdr->data_len) > nmh->nh_max_len) 12188c2ecf20Sopenharmony_ci syserr = O2NET_ERR_OVERFLOW; 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci if (syserr != O2NET_ERR_NONE) 12218c2ecf20Sopenharmony_ci goto out_respond; 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci o2net_set_func_start_time(sc); 12248c2ecf20Sopenharmony_ci sc->sc_msg_key = be32_to_cpu(hdr->key); 12258c2ecf20Sopenharmony_ci sc->sc_msg_type = be16_to_cpu(hdr->msg_type); 12268c2ecf20Sopenharmony_ci handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + 12278c2ecf20Sopenharmony_ci be16_to_cpu(hdr->data_len), 12288c2ecf20Sopenharmony_ci nmh->nh_func_data, &ret_data); 12298c2ecf20Sopenharmony_ci o2net_set_func_stop_time(sc); 12308c2ecf20Sopenharmony_ci 12318c2ecf20Sopenharmony_ci o2net_update_recv_stats(sc); 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ciout_respond: 12348c2ecf20Sopenharmony_ci /* this destroys the hdr, so don't use it after this */ 12358c2ecf20Sopenharmony_ci mutex_lock(&sc->sc_send_lock); 12368c2ecf20Sopenharmony_ci ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, 12378c2ecf20Sopenharmony_ci handler_status); 12388c2ecf20Sopenharmony_ci mutex_unlock(&sc->sc_send_lock); 12398c2ecf20Sopenharmony_ci hdr = NULL; 12408c2ecf20Sopenharmony_ci mlog(0, "sending handler status %d, syserr %d returned %d\n", 12418c2ecf20Sopenharmony_ci handler_status, syserr, ret); 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci if (nmh) { 12448c2ecf20Sopenharmony_ci BUG_ON(ret_data != NULL && nmh->nh_post_func == NULL); 12458c2ecf20Sopenharmony_ci if (nmh->nh_post_func) 12468c2ecf20Sopenharmony_ci (nmh->nh_post_func)(handler_status, nmh->nh_func_data, 12478c2ecf20Sopenharmony_ci ret_data); 12488c2ecf20Sopenharmony_ci } 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ciout: 12518c2ecf20Sopenharmony_ci if (nmh) 12528c2ecf20Sopenharmony_ci o2net_handler_put(nmh); 12538c2ecf20Sopenharmony_ci return ret; 12548c2ecf20Sopenharmony_ci} 12558c2ecf20Sopenharmony_ci 12568c2ecf20Sopenharmony_cistatic int o2net_check_handshake(struct o2net_sock_container *sc) 12578c2ecf20Sopenharmony_ci{ 12588c2ecf20Sopenharmony_ci struct o2net_handshake *hand = page_address(sc->sc_page); 12598c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 12608c2ecf20Sopenharmony_ci 12618c2ecf20Sopenharmony_ci if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { 12628c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " Advertised net " 12638c2ecf20Sopenharmony_ci "protocol version %llu but %llu is required. " 12648c2ecf20Sopenharmony_ci "Disconnecting.\n", SC_NODEF_ARGS(sc), 12658c2ecf20Sopenharmony_ci (unsigned long long)be64_to_cpu(hand->protocol_version), 12668c2ecf20Sopenharmony_ci O2NET_PROTOCOL_VERSION); 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci /* don't bother reconnecting if its the wrong version. */ 12698c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, -ENOTCONN); 12708c2ecf20Sopenharmony_ci return -1; 12718c2ecf20Sopenharmony_ci } 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_ci /* 12748c2ecf20Sopenharmony_ci * Ensure timeouts are consistent with other nodes, otherwise 12758c2ecf20Sopenharmony_ci * we can end up with one node thinking that the other must be down, 12768c2ecf20Sopenharmony_ci * but isn't. This can ultimately cause corruption. 12778c2ecf20Sopenharmony_ci */ 12788c2ecf20Sopenharmony_ci if (be32_to_cpu(hand->o2net_idle_timeout_ms) != 12798c2ecf20Sopenharmony_ci o2net_idle_timeout()) { 12808c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a network " 12818c2ecf20Sopenharmony_ci "idle timeout of %u ms, but we use %u ms locally. " 12828c2ecf20Sopenharmony_ci "Disconnecting.\n", SC_NODEF_ARGS(sc), 12838c2ecf20Sopenharmony_ci be32_to_cpu(hand->o2net_idle_timeout_ms), 12848c2ecf20Sopenharmony_ci o2net_idle_timeout()); 12858c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, -ENOTCONN); 12868c2ecf20Sopenharmony_ci return -1; 12878c2ecf20Sopenharmony_ci } 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ci if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != 12908c2ecf20Sopenharmony_ci o2net_keepalive_delay()) { 12918c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a keepalive " 12928c2ecf20Sopenharmony_ci "delay of %u ms, but we use %u ms locally. " 12938c2ecf20Sopenharmony_ci "Disconnecting.\n", SC_NODEF_ARGS(sc), 12948c2ecf20Sopenharmony_ci be32_to_cpu(hand->o2net_keepalive_delay_ms), 12958c2ecf20Sopenharmony_ci o2net_keepalive_delay()); 12968c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, -ENOTCONN); 12978c2ecf20Sopenharmony_ci return -1; 12988c2ecf20Sopenharmony_ci } 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != 13018c2ecf20Sopenharmony_ci O2HB_MAX_WRITE_TIMEOUT_MS) { 13028c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a heartbeat " 13038c2ecf20Sopenharmony_ci "timeout of %u ms, but we use %u ms locally. " 13048c2ecf20Sopenharmony_ci "Disconnecting.\n", SC_NODEF_ARGS(sc), 13058c2ecf20Sopenharmony_ci be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), 13068c2ecf20Sopenharmony_ci O2HB_MAX_WRITE_TIMEOUT_MS); 13078c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, -ENOTCONN); 13088c2ecf20Sopenharmony_ci return -1; 13098c2ecf20Sopenharmony_ci } 13108c2ecf20Sopenharmony_ci 13118c2ecf20Sopenharmony_ci sc->sc_handshake_ok = 1; 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 13148c2ecf20Sopenharmony_ci /* set valid and queue the idle timers only if it hasn't been 13158c2ecf20Sopenharmony_ci * shut down already */ 13168c2ecf20Sopenharmony_ci if (nn->nn_sc == sc) { 13178c2ecf20Sopenharmony_ci o2net_sc_reset_idle_timer(sc); 13188c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 0); 13198c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, sc, 1, 0); 13208c2ecf20Sopenharmony_ci } 13218c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci /* shift everything up as though it wasn't there */ 13248c2ecf20Sopenharmony_ci sc->sc_page_off -= sizeof(struct o2net_handshake); 13258c2ecf20Sopenharmony_ci if (sc->sc_page_off) 13268c2ecf20Sopenharmony_ci memmove(hand, hand + 1, sc->sc_page_off); 13278c2ecf20Sopenharmony_ci 13288c2ecf20Sopenharmony_ci return 0; 13298c2ecf20Sopenharmony_ci} 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci/* this demuxes the queued rx bytes into header or payload bits and calls 13328c2ecf20Sopenharmony_ci * handlers as each full message is read off the socket. it returns -error, 13338c2ecf20Sopenharmony_ci * == 0 eof, or > 0 for progress made.*/ 13348c2ecf20Sopenharmony_cistatic int o2net_advance_rx(struct o2net_sock_container *sc) 13358c2ecf20Sopenharmony_ci{ 13368c2ecf20Sopenharmony_ci struct o2net_msg *hdr; 13378c2ecf20Sopenharmony_ci int ret = 0; 13388c2ecf20Sopenharmony_ci void *data; 13398c2ecf20Sopenharmony_ci size_t datalen; 13408c2ecf20Sopenharmony_ci 13418c2ecf20Sopenharmony_ci sclog(sc, "receiving\n"); 13428c2ecf20Sopenharmony_ci o2net_set_advance_start_time(sc); 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci if (unlikely(sc->sc_handshake_ok == 0)) { 13458c2ecf20Sopenharmony_ci if(sc->sc_page_off < sizeof(struct o2net_handshake)) { 13468c2ecf20Sopenharmony_ci data = page_address(sc->sc_page) + sc->sc_page_off; 13478c2ecf20Sopenharmony_ci datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; 13488c2ecf20Sopenharmony_ci ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 13498c2ecf20Sopenharmony_ci if (ret > 0) 13508c2ecf20Sopenharmony_ci sc->sc_page_off += ret; 13518c2ecf20Sopenharmony_ci } 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci if (sc->sc_page_off == sizeof(struct o2net_handshake)) { 13548c2ecf20Sopenharmony_ci o2net_check_handshake(sc); 13558c2ecf20Sopenharmony_ci if (unlikely(sc->sc_handshake_ok == 0)) 13568c2ecf20Sopenharmony_ci ret = -EPROTO; 13578c2ecf20Sopenharmony_ci } 13588c2ecf20Sopenharmony_ci goto out; 13598c2ecf20Sopenharmony_ci } 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci /* do we need more header? */ 13628c2ecf20Sopenharmony_ci if (sc->sc_page_off < sizeof(struct o2net_msg)) { 13638c2ecf20Sopenharmony_ci data = page_address(sc->sc_page) + sc->sc_page_off; 13648c2ecf20Sopenharmony_ci datalen = sizeof(struct o2net_msg) - sc->sc_page_off; 13658c2ecf20Sopenharmony_ci ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 13668c2ecf20Sopenharmony_ci if (ret > 0) { 13678c2ecf20Sopenharmony_ci sc->sc_page_off += ret; 13688c2ecf20Sopenharmony_ci /* only swab incoming here.. we can 13698c2ecf20Sopenharmony_ci * only get here once as we cross from 13708c2ecf20Sopenharmony_ci * being under to over */ 13718c2ecf20Sopenharmony_ci if (sc->sc_page_off == sizeof(struct o2net_msg)) { 13728c2ecf20Sopenharmony_ci hdr = page_address(sc->sc_page); 13738c2ecf20Sopenharmony_ci if (be16_to_cpu(hdr->data_len) > 13748c2ecf20Sopenharmony_ci O2NET_MAX_PAYLOAD_BYTES) 13758c2ecf20Sopenharmony_ci ret = -EOVERFLOW; 13768c2ecf20Sopenharmony_ci } 13778c2ecf20Sopenharmony_ci } 13788c2ecf20Sopenharmony_ci if (ret <= 0) 13798c2ecf20Sopenharmony_ci goto out; 13808c2ecf20Sopenharmony_ci } 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_ci if (sc->sc_page_off < sizeof(struct o2net_msg)) { 13838c2ecf20Sopenharmony_ci /* oof, still don't have a header */ 13848c2ecf20Sopenharmony_ci goto out; 13858c2ecf20Sopenharmony_ci } 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci /* this was swabbed above when we first read it */ 13888c2ecf20Sopenharmony_ci hdr = page_address(sc->sc_page); 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci msglog(hdr, "at page_off %zu\n", sc->sc_page_off); 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_ci /* do we need more payload? */ 13938c2ecf20Sopenharmony_ci if (sc->sc_page_off - sizeof(struct o2net_msg) < be16_to_cpu(hdr->data_len)) { 13948c2ecf20Sopenharmony_ci /* need more payload */ 13958c2ecf20Sopenharmony_ci data = page_address(sc->sc_page) + sc->sc_page_off; 13968c2ecf20Sopenharmony_ci datalen = (sizeof(struct o2net_msg) + be16_to_cpu(hdr->data_len)) - 13978c2ecf20Sopenharmony_ci sc->sc_page_off; 13988c2ecf20Sopenharmony_ci ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 13998c2ecf20Sopenharmony_ci if (ret > 0) 14008c2ecf20Sopenharmony_ci sc->sc_page_off += ret; 14018c2ecf20Sopenharmony_ci if (ret <= 0) 14028c2ecf20Sopenharmony_ci goto out; 14038c2ecf20Sopenharmony_ci } 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci if (sc->sc_page_off - sizeof(struct o2net_msg) == be16_to_cpu(hdr->data_len)) { 14068c2ecf20Sopenharmony_ci /* we can only get here once, the first time we read 14078c2ecf20Sopenharmony_ci * the payload.. so set ret to progress if the handler 14088c2ecf20Sopenharmony_ci * works out. after calling this the message is toast */ 14098c2ecf20Sopenharmony_ci ret = o2net_process_message(sc, hdr); 14108c2ecf20Sopenharmony_ci if (ret == 0) 14118c2ecf20Sopenharmony_ci ret = 1; 14128c2ecf20Sopenharmony_ci sc->sc_page_off = 0; 14138c2ecf20Sopenharmony_ci } 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ciout: 14168c2ecf20Sopenharmony_ci sclog(sc, "ret = %d\n", ret); 14178c2ecf20Sopenharmony_ci o2net_set_advance_stop_time(sc); 14188c2ecf20Sopenharmony_ci return ret; 14198c2ecf20Sopenharmony_ci} 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci/* this work func is triggerd by data ready. it reads until it can read no 14228c2ecf20Sopenharmony_ci * more. it interprets 0, eof, as fatal. if data_ready hits while we're doing 14238c2ecf20Sopenharmony_ci * our work the work struct will be marked and we'll be called again. */ 14248c2ecf20Sopenharmony_cistatic void o2net_rx_until_empty(struct work_struct *work) 14258c2ecf20Sopenharmony_ci{ 14268c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = 14278c2ecf20Sopenharmony_ci container_of(work, struct o2net_sock_container, sc_rx_work); 14288c2ecf20Sopenharmony_ci int ret; 14298c2ecf20Sopenharmony_ci 14308c2ecf20Sopenharmony_ci do { 14318c2ecf20Sopenharmony_ci ret = o2net_advance_rx(sc); 14328c2ecf20Sopenharmony_ci } while (ret > 0); 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci if (ret <= 0 && ret != -EAGAIN) { 14358c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 14368c2ecf20Sopenharmony_ci sclog(sc, "saw error %d, closing\n", ret); 14378c2ecf20Sopenharmony_ci /* not permanent so read failed handshake can retry */ 14388c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, 0); 14398c2ecf20Sopenharmony_ci } 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci sc_put(sc); 14428c2ecf20Sopenharmony_ci} 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_cistatic void o2net_initialize_handshake(void) 14458c2ecf20Sopenharmony_ci{ 14468c2ecf20Sopenharmony_ci o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( 14478c2ecf20Sopenharmony_ci O2HB_MAX_WRITE_TIMEOUT_MS); 14488c2ecf20Sopenharmony_ci o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout()); 14498c2ecf20Sopenharmony_ci o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( 14508c2ecf20Sopenharmony_ci o2net_keepalive_delay()); 14518c2ecf20Sopenharmony_ci o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( 14528c2ecf20Sopenharmony_ci o2net_reconnect_delay()); 14538c2ecf20Sopenharmony_ci} 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 14568c2ecf20Sopenharmony_ci 14578c2ecf20Sopenharmony_ci/* called when a connect completes and after a sock is accepted. the 14588c2ecf20Sopenharmony_ci * rx path will see the response and mark the sc valid */ 14598c2ecf20Sopenharmony_cistatic void o2net_sc_connect_completed(struct work_struct *work) 14608c2ecf20Sopenharmony_ci{ 14618c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = 14628c2ecf20Sopenharmony_ci container_of(work, struct o2net_sock_container, 14638c2ecf20Sopenharmony_ci sc_connect_work); 14648c2ecf20Sopenharmony_ci 14658c2ecf20Sopenharmony_ci mlog(ML_MSG, "sc sending handshake with ver %llu id %llx\n", 14668c2ecf20Sopenharmony_ci (unsigned long long)O2NET_PROTOCOL_VERSION, 14678c2ecf20Sopenharmony_ci (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); 14688c2ecf20Sopenharmony_ci 14698c2ecf20Sopenharmony_ci o2net_initialize_handshake(); 14708c2ecf20Sopenharmony_ci o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 14718c2ecf20Sopenharmony_ci sc_put(sc); 14728c2ecf20Sopenharmony_ci} 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci/* this is called as a work_struct func. */ 14758c2ecf20Sopenharmony_cistatic void o2net_sc_send_keep_req(struct work_struct *work) 14768c2ecf20Sopenharmony_ci{ 14778c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = 14788c2ecf20Sopenharmony_ci container_of(work, struct o2net_sock_container, 14798c2ecf20Sopenharmony_ci sc_keepalive_work.work); 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci o2net_sendpage(sc, o2net_keep_req, sizeof(*o2net_keep_req)); 14828c2ecf20Sopenharmony_ci sc_put(sc); 14838c2ecf20Sopenharmony_ci} 14848c2ecf20Sopenharmony_ci 14858c2ecf20Sopenharmony_ci/* socket shutdown does a del_timer_sync against this as it tears down. 14868c2ecf20Sopenharmony_ci * we can't start this timer until we've got to the point in sc buildup 14878c2ecf20Sopenharmony_ci * where shutdown is going to be involved */ 14888c2ecf20Sopenharmony_cistatic void o2net_idle_timer(struct timer_list *t) 14898c2ecf20Sopenharmony_ci{ 14908c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = from_timer(sc, t, sc_idle_timeout); 14918c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 14928c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_FS 14938c2ecf20Sopenharmony_ci unsigned long msecs = ktime_to_ms(ktime_get()) - 14948c2ecf20Sopenharmony_ci ktime_to_ms(sc->sc_tv_timer); 14958c2ecf20Sopenharmony_ci#else 14968c2ecf20Sopenharmony_ci unsigned long msecs = o2net_idle_timeout(); 14978c2ecf20Sopenharmony_ci#endif 14988c2ecf20Sopenharmony_ci 14998c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " 15008c2ecf20Sopenharmony_ci "idle for %lu.%lu secs.\n", 15018c2ecf20Sopenharmony_ci SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000); 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci /* idle timerout happen, don't shutdown the connection, but 15048c2ecf20Sopenharmony_ci * make fence decision. Maybe the connection can recover before 15058c2ecf20Sopenharmony_ci * the decision is made. 15068c2ecf20Sopenharmony_ci */ 15078c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 1); 15088c2ecf20Sopenharmony_ci o2quo_conn_err(o2net_num_from_nn(nn)); 15098c2ecf20Sopenharmony_ci queue_delayed_work(o2net_wq, &nn->nn_still_up, 15108c2ecf20Sopenharmony_ci msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); 15118c2ecf20Sopenharmony_ci 15128c2ecf20Sopenharmony_ci o2net_sc_reset_idle_timer(sc); 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci} 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_cistatic void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) 15178c2ecf20Sopenharmony_ci{ 15188c2ecf20Sopenharmony_ci o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 15198c2ecf20Sopenharmony_ci o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 15208c2ecf20Sopenharmony_ci msecs_to_jiffies(o2net_keepalive_delay())); 15218c2ecf20Sopenharmony_ci o2net_set_sock_timer(sc); 15228c2ecf20Sopenharmony_ci mod_timer(&sc->sc_idle_timeout, 15238c2ecf20Sopenharmony_ci jiffies + msecs_to_jiffies(o2net_idle_timeout())); 15248c2ecf20Sopenharmony_ci} 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_cistatic void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 15278c2ecf20Sopenharmony_ci{ 15288c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci /* clear fence decision since the connection recover from timeout*/ 15318c2ecf20Sopenharmony_ci if (atomic_read(&nn->nn_timeout)) { 15328c2ecf20Sopenharmony_ci o2quo_conn_up(o2net_num_from_nn(nn)); 15338c2ecf20Sopenharmony_ci cancel_delayed_work(&nn->nn_still_up); 15348c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 0); 15358c2ecf20Sopenharmony_ci } 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci /* Only push out an existing timer */ 15388c2ecf20Sopenharmony_ci if (timer_pending(&sc->sc_idle_timeout)) 15398c2ecf20Sopenharmony_ci o2net_sc_reset_idle_timer(sc); 15408c2ecf20Sopenharmony_ci} 15418c2ecf20Sopenharmony_ci 15428c2ecf20Sopenharmony_ci/* this work func is kicked whenever a path sets the nn state which doesn't 15438c2ecf20Sopenharmony_ci * have valid set. This includes seeing hb come up, losing a connection, 15448c2ecf20Sopenharmony_ci * having a connect attempt fail, etc. This centralizes the logic which decides 15458c2ecf20Sopenharmony_ci * if a connect attempt should be made or if we should give up and all future 15468c2ecf20Sopenharmony_ci * transmit attempts should fail */ 15478c2ecf20Sopenharmony_cistatic void o2net_start_connect(struct work_struct *work) 15488c2ecf20Sopenharmony_ci{ 15498c2ecf20Sopenharmony_ci struct o2net_node *nn = 15508c2ecf20Sopenharmony_ci container_of(work, struct o2net_node, nn_connect_work.work); 15518c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = NULL; 15528c2ecf20Sopenharmony_ci struct o2nm_node *node = NULL, *mynode = NULL; 15538c2ecf20Sopenharmony_ci struct socket *sock = NULL; 15548c2ecf20Sopenharmony_ci struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; 15558c2ecf20Sopenharmony_ci int ret = 0, stop; 15568c2ecf20Sopenharmony_ci unsigned int timeout; 15578c2ecf20Sopenharmony_ci unsigned int nofs_flag; 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci /* 15608c2ecf20Sopenharmony_ci * sock_create allocates the sock with GFP_KERNEL. We must 15618c2ecf20Sopenharmony_ci * prevent the filesystem from being reentered by memory reclaim. 15628c2ecf20Sopenharmony_ci */ 15638c2ecf20Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 15648c2ecf20Sopenharmony_ci /* if we're greater we initiate tx, otherwise we accept */ 15658c2ecf20Sopenharmony_ci if (o2nm_this_node() <= o2net_num_from_nn(nn)) 15668c2ecf20Sopenharmony_ci goto out; 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci /* watch for racing with tearing a node down */ 15698c2ecf20Sopenharmony_ci node = o2nm_get_node_by_num(o2net_num_from_nn(nn)); 15708c2ecf20Sopenharmony_ci if (node == NULL) 15718c2ecf20Sopenharmony_ci goto out; 15728c2ecf20Sopenharmony_ci 15738c2ecf20Sopenharmony_ci mynode = o2nm_get_node_by_num(o2nm_this_node()); 15748c2ecf20Sopenharmony_ci if (mynode == NULL) 15758c2ecf20Sopenharmony_ci goto out; 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 15788c2ecf20Sopenharmony_ci /* 15798c2ecf20Sopenharmony_ci * see if we already have one pending or have given up. 15808c2ecf20Sopenharmony_ci * For nn_timeout, it is set when we close the connection 15818c2ecf20Sopenharmony_ci * because of the idle time out. So it means that we have 15828c2ecf20Sopenharmony_ci * at least connected to that node successfully once, 15838c2ecf20Sopenharmony_ci * now try to connect to it again. 15848c2ecf20Sopenharmony_ci */ 15858c2ecf20Sopenharmony_ci timeout = atomic_read(&nn->nn_timeout); 15868c2ecf20Sopenharmony_ci stop = (nn->nn_sc || 15878c2ecf20Sopenharmony_ci (nn->nn_persistent_error && 15888c2ecf20Sopenharmony_ci (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); 15898c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 15908c2ecf20Sopenharmony_ci if (stop) 15918c2ecf20Sopenharmony_ci goto out; 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_ci nn->nn_last_connect_attempt = jiffies; 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci sc = sc_alloc(node); 15968c2ecf20Sopenharmony_ci if (sc == NULL) { 15978c2ecf20Sopenharmony_ci mlog(0, "couldn't allocate sc\n"); 15988c2ecf20Sopenharmony_ci ret = -ENOMEM; 15998c2ecf20Sopenharmony_ci goto out; 16008c2ecf20Sopenharmony_ci } 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); 16038c2ecf20Sopenharmony_ci if (ret < 0) { 16048c2ecf20Sopenharmony_ci mlog(0, "can't create socket: %d\n", ret); 16058c2ecf20Sopenharmony_ci goto out; 16068c2ecf20Sopenharmony_ci } 16078c2ecf20Sopenharmony_ci sc->sc_sock = sock; /* freed by sc_kref_release */ 16088c2ecf20Sopenharmony_ci 16098c2ecf20Sopenharmony_ci sock->sk->sk_allocation = GFP_ATOMIC; 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci myaddr.sin_family = AF_INET; 16128c2ecf20Sopenharmony_ci myaddr.sin_addr.s_addr = mynode->nd_ipv4_address; 16138c2ecf20Sopenharmony_ci myaddr.sin_port = htons(0); /* any port */ 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, 16168c2ecf20Sopenharmony_ci sizeof(myaddr)); 16178c2ecf20Sopenharmony_ci if (ret) { 16188c2ecf20Sopenharmony_ci mlog(ML_ERROR, "bind failed with %d at address %pI4\n", 16198c2ecf20Sopenharmony_ci ret, &mynode->nd_ipv4_address); 16208c2ecf20Sopenharmony_ci goto out; 16218c2ecf20Sopenharmony_ci } 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(sc->sc_sock->sk); 16248c2ecf20Sopenharmony_ci tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT); 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_ci o2net_register_callbacks(sc->sc_sock->sk, sc); 16278c2ecf20Sopenharmony_ci 16288c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 16298c2ecf20Sopenharmony_ci /* handshake completion will set nn->nn_sc_valid */ 16308c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, sc, 0, 0); 16318c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci remoteaddr.sin_family = AF_INET; 16348c2ecf20Sopenharmony_ci remoteaddr.sin_addr.s_addr = node->nd_ipv4_address; 16358c2ecf20Sopenharmony_ci remoteaddr.sin_port = node->nd_ipv4_port; 16368c2ecf20Sopenharmony_ci 16378c2ecf20Sopenharmony_ci ret = sc->sc_sock->ops->connect(sc->sc_sock, 16388c2ecf20Sopenharmony_ci (struct sockaddr *)&remoteaddr, 16398c2ecf20Sopenharmony_ci sizeof(remoteaddr), 16408c2ecf20Sopenharmony_ci O_NONBLOCK); 16418c2ecf20Sopenharmony_ci if (ret == -EINPROGRESS) 16428c2ecf20Sopenharmony_ci ret = 0; 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ciout: 16458c2ecf20Sopenharmony_ci if (ret && sc) { 16468c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT 16478c2ecf20Sopenharmony_ci " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); 16488c2ecf20Sopenharmony_ci /* 0 err so that another will be queued and attempted 16498c2ecf20Sopenharmony_ci * from set_nn_state */ 16508c2ecf20Sopenharmony_ci o2net_ensure_shutdown(nn, sc, 0); 16518c2ecf20Sopenharmony_ci } 16528c2ecf20Sopenharmony_ci if (sc) 16538c2ecf20Sopenharmony_ci sc_put(sc); 16548c2ecf20Sopenharmony_ci if (node) 16558c2ecf20Sopenharmony_ci o2nm_node_put(node); 16568c2ecf20Sopenharmony_ci if (mynode) 16578c2ecf20Sopenharmony_ci o2nm_node_put(mynode); 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 16608c2ecf20Sopenharmony_ci return; 16618c2ecf20Sopenharmony_ci} 16628c2ecf20Sopenharmony_ci 16638c2ecf20Sopenharmony_cistatic void o2net_connect_expired(struct work_struct *work) 16648c2ecf20Sopenharmony_ci{ 16658c2ecf20Sopenharmony_ci struct o2net_node *nn = 16668c2ecf20Sopenharmony_ci container_of(work, struct o2net_node, nn_connect_expired.work); 16678c2ecf20Sopenharmony_ci 16688c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 16698c2ecf20Sopenharmony_ci if (!nn->nn_sc_valid) { 16708c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: No connection established with " 16718c2ecf20Sopenharmony_ci "node %u after %u.%u seconds, check network and" 16728c2ecf20Sopenharmony_ci " cluster configuration.\n", 16738c2ecf20Sopenharmony_ci o2net_num_from_nn(nn), 16748c2ecf20Sopenharmony_ci o2net_idle_timeout() / 1000, 16758c2ecf20Sopenharmony_ci o2net_idle_timeout() % 1000); 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, NULL, 0, 0); 16788c2ecf20Sopenharmony_ci } 16798c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 16808c2ecf20Sopenharmony_ci} 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_cistatic void o2net_still_up(struct work_struct *work) 16838c2ecf20Sopenharmony_ci{ 16848c2ecf20Sopenharmony_ci struct o2net_node *nn = 16858c2ecf20Sopenharmony_ci container_of(work, struct o2net_node, nn_still_up.work); 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci o2quo_hb_still_up(o2net_num_from_nn(nn)); 16888c2ecf20Sopenharmony_ci} 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_civoid o2net_disconnect_node(struct o2nm_node *node) 16938c2ecf20Sopenharmony_ci{ 16948c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(node->nd_num); 16958c2ecf20Sopenharmony_ci 16968c2ecf20Sopenharmony_ci /* don't reconnect until it's heartbeating again */ 16978c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 16988c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 0); 16998c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 17008c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 17018c2ecf20Sopenharmony_ci 17028c2ecf20Sopenharmony_ci if (o2net_wq) { 17038c2ecf20Sopenharmony_ci cancel_delayed_work(&nn->nn_connect_expired); 17048c2ecf20Sopenharmony_ci cancel_delayed_work(&nn->nn_connect_work); 17058c2ecf20Sopenharmony_ci cancel_delayed_work(&nn->nn_still_up); 17068c2ecf20Sopenharmony_ci flush_workqueue(o2net_wq); 17078c2ecf20Sopenharmony_ci } 17088c2ecf20Sopenharmony_ci} 17098c2ecf20Sopenharmony_ci 17108c2ecf20Sopenharmony_cistatic void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, 17118c2ecf20Sopenharmony_ci void *data) 17128c2ecf20Sopenharmony_ci{ 17138c2ecf20Sopenharmony_ci o2quo_hb_down(node_num); 17148c2ecf20Sopenharmony_ci 17158c2ecf20Sopenharmony_ci if (!node) 17168c2ecf20Sopenharmony_ci return; 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci if (node_num != o2nm_this_node()) 17198c2ecf20Sopenharmony_ci o2net_disconnect_node(node); 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&o2net_connected_peers) < 0); 17228c2ecf20Sopenharmony_ci} 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_cistatic void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, 17258c2ecf20Sopenharmony_ci void *data) 17268c2ecf20Sopenharmony_ci{ 17278c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(node_num); 17288c2ecf20Sopenharmony_ci 17298c2ecf20Sopenharmony_ci o2quo_hb_up(node_num); 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci BUG_ON(!node); 17328c2ecf20Sopenharmony_ci 17338c2ecf20Sopenharmony_ci /* ensure an immediate connect attempt */ 17348c2ecf20Sopenharmony_ci nn->nn_last_connect_attempt = jiffies - 17358c2ecf20Sopenharmony_ci (msecs_to_jiffies(o2net_reconnect_delay()) + 1); 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci if (node_num != o2nm_this_node()) { 17388c2ecf20Sopenharmony_ci /* believe it or not, accept and node heartbeating testing 17398c2ecf20Sopenharmony_ci * can succeed for this node before we got here.. so 17408c2ecf20Sopenharmony_ci * only use set_nn_state to clear the persistent error 17418c2ecf20Sopenharmony_ci * if that hasn't already happened */ 17428c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 17438c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 0); 17448c2ecf20Sopenharmony_ci if (nn->nn_persistent_error) 17458c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, NULL, 0, 0); 17468c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 17478c2ecf20Sopenharmony_ci } 17488c2ecf20Sopenharmony_ci} 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_civoid o2net_unregister_hb_callbacks(void) 17518c2ecf20Sopenharmony_ci{ 17528c2ecf20Sopenharmony_ci o2hb_unregister_callback(NULL, &o2net_hb_up); 17538c2ecf20Sopenharmony_ci o2hb_unregister_callback(NULL, &o2net_hb_down); 17548c2ecf20Sopenharmony_ci} 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ciint o2net_register_hb_callbacks(void) 17578c2ecf20Sopenharmony_ci{ 17588c2ecf20Sopenharmony_ci int ret; 17598c2ecf20Sopenharmony_ci 17608c2ecf20Sopenharmony_ci o2hb_setup_callback(&o2net_hb_down, O2HB_NODE_DOWN_CB, 17618c2ecf20Sopenharmony_ci o2net_hb_node_down_cb, NULL, O2NET_HB_PRI); 17628c2ecf20Sopenharmony_ci o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, 17638c2ecf20Sopenharmony_ci o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); 17648c2ecf20Sopenharmony_ci 17658c2ecf20Sopenharmony_ci ret = o2hb_register_callback(NULL, &o2net_hb_up); 17668c2ecf20Sopenharmony_ci if (ret == 0) 17678c2ecf20Sopenharmony_ci ret = o2hb_register_callback(NULL, &o2net_hb_down); 17688c2ecf20Sopenharmony_ci 17698c2ecf20Sopenharmony_ci if (ret) 17708c2ecf20Sopenharmony_ci o2net_unregister_hb_callbacks(); 17718c2ecf20Sopenharmony_ci 17728c2ecf20Sopenharmony_ci return ret; 17738c2ecf20Sopenharmony_ci} 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_cistatic int o2net_accept_one(struct socket *sock, int *more) 17788c2ecf20Sopenharmony_ci{ 17798c2ecf20Sopenharmony_ci int ret; 17808c2ecf20Sopenharmony_ci struct sockaddr_in sin; 17818c2ecf20Sopenharmony_ci struct socket *new_sock = NULL; 17828c2ecf20Sopenharmony_ci struct o2nm_node *node = NULL; 17838c2ecf20Sopenharmony_ci struct o2nm_node *local_node = NULL; 17848c2ecf20Sopenharmony_ci struct o2net_sock_container *sc = NULL; 17858c2ecf20Sopenharmony_ci struct o2net_node *nn; 17868c2ecf20Sopenharmony_ci unsigned int nofs_flag; 17878c2ecf20Sopenharmony_ci 17888c2ecf20Sopenharmony_ci /* 17898c2ecf20Sopenharmony_ci * sock_create_lite allocates the sock with GFP_KERNEL. We must 17908c2ecf20Sopenharmony_ci * prevent the filesystem from being reentered by memory reclaim. 17918c2ecf20Sopenharmony_ci */ 17928c2ecf20Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 17938c2ecf20Sopenharmony_ci 17948c2ecf20Sopenharmony_ci BUG_ON(sock == NULL); 17958c2ecf20Sopenharmony_ci *more = 0; 17968c2ecf20Sopenharmony_ci ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, 17978c2ecf20Sopenharmony_ci sock->sk->sk_protocol, &new_sock); 17988c2ecf20Sopenharmony_ci if (ret) 17998c2ecf20Sopenharmony_ci goto out; 18008c2ecf20Sopenharmony_ci 18018c2ecf20Sopenharmony_ci new_sock->type = sock->type; 18028c2ecf20Sopenharmony_ci new_sock->ops = sock->ops; 18038c2ecf20Sopenharmony_ci ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); 18048c2ecf20Sopenharmony_ci if (ret < 0) 18058c2ecf20Sopenharmony_ci goto out; 18068c2ecf20Sopenharmony_ci 18078c2ecf20Sopenharmony_ci *more = 1; 18088c2ecf20Sopenharmony_ci new_sock->sk->sk_allocation = GFP_ATOMIC; 18098c2ecf20Sopenharmony_ci 18108c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(new_sock->sk); 18118c2ecf20Sopenharmony_ci tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT); 18128c2ecf20Sopenharmony_ci 18138c2ecf20Sopenharmony_ci ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1); 18148c2ecf20Sopenharmony_ci if (ret < 0) 18158c2ecf20Sopenharmony_ci goto out; 18168c2ecf20Sopenharmony_ci 18178c2ecf20Sopenharmony_ci node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); 18188c2ecf20Sopenharmony_ci if (node == NULL) { 18198c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: Attempt to connect from unknown " 18208c2ecf20Sopenharmony_ci "node at %pI4:%d\n", &sin.sin_addr.s_addr, 18218c2ecf20Sopenharmony_ci ntohs(sin.sin_port)); 18228c2ecf20Sopenharmony_ci ret = -EINVAL; 18238c2ecf20Sopenharmony_ci goto out; 18248c2ecf20Sopenharmony_ci } 18258c2ecf20Sopenharmony_ci 18268c2ecf20Sopenharmony_ci if (o2nm_this_node() >= node->nd_num) { 18278c2ecf20Sopenharmony_ci local_node = o2nm_get_node_by_num(o2nm_this_node()); 18288c2ecf20Sopenharmony_ci if (local_node) 18298c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: Unexpected connect attempt " 18308c2ecf20Sopenharmony_ci "seen at node '%s' (%u, %pI4:%d) from " 18318c2ecf20Sopenharmony_ci "node '%s' (%u, %pI4:%d)\n", 18328c2ecf20Sopenharmony_ci local_node->nd_name, local_node->nd_num, 18338c2ecf20Sopenharmony_ci &(local_node->nd_ipv4_address), 18348c2ecf20Sopenharmony_ci ntohs(local_node->nd_ipv4_port), 18358c2ecf20Sopenharmony_ci node->nd_name, 18368c2ecf20Sopenharmony_ci node->nd_num, &sin.sin_addr.s_addr, 18378c2ecf20Sopenharmony_ci ntohs(sin.sin_port)); 18388c2ecf20Sopenharmony_ci ret = -EINVAL; 18398c2ecf20Sopenharmony_ci goto out; 18408c2ecf20Sopenharmony_ci } 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_ci /* this happens all the time when the other node sees our heartbeat 18438c2ecf20Sopenharmony_ci * and tries to connect before we see their heartbeat */ 18448c2ecf20Sopenharmony_ci if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) { 18458c2ecf20Sopenharmony_ci mlog(ML_CONN, "attempt to connect from node '%s' at " 18468c2ecf20Sopenharmony_ci "%pI4:%d but it isn't heartbeating\n", 18478c2ecf20Sopenharmony_ci node->nd_name, &sin.sin_addr.s_addr, 18488c2ecf20Sopenharmony_ci ntohs(sin.sin_port)); 18498c2ecf20Sopenharmony_ci ret = -EINVAL; 18508c2ecf20Sopenharmony_ci goto out; 18518c2ecf20Sopenharmony_ci } 18528c2ecf20Sopenharmony_ci 18538c2ecf20Sopenharmony_ci nn = o2net_nn_from_num(node->nd_num); 18548c2ecf20Sopenharmony_ci 18558c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 18568c2ecf20Sopenharmony_ci if (nn->nn_sc) 18578c2ecf20Sopenharmony_ci ret = -EBUSY; 18588c2ecf20Sopenharmony_ci else 18598c2ecf20Sopenharmony_ci ret = 0; 18608c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 18618c2ecf20Sopenharmony_ci if (ret) { 18628c2ecf20Sopenharmony_ci printk(KERN_NOTICE "o2net: Attempt to connect from node '%s' " 18638c2ecf20Sopenharmony_ci "at %pI4:%d but it already has an open connection\n", 18648c2ecf20Sopenharmony_ci node->nd_name, &sin.sin_addr.s_addr, 18658c2ecf20Sopenharmony_ci ntohs(sin.sin_port)); 18668c2ecf20Sopenharmony_ci goto out; 18678c2ecf20Sopenharmony_ci } 18688c2ecf20Sopenharmony_ci 18698c2ecf20Sopenharmony_ci sc = sc_alloc(node); 18708c2ecf20Sopenharmony_ci if (sc == NULL) { 18718c2ecf20Sopenharmony_ci ret = -ENOMEM; 18728c2ecf20Sopenharmony_ci goto out; 18738c2ecf20Sopenharmony_ci } 18748c2ecf20Sopenharmony_ci 18758c2ecf20Sopenharmony_ci sc->sc_sock = new_sock; 18768c2ecf20Sopenharmony_ci new_sock = NULL; 18778c2ecf20Sopenharmony_ci 18788c2ecf20Sopenharmony_ci spin_lock(&nn->nn_lock); 18798c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 0); 18808c2ecf20Sopenharmony_ci o2net_set_nn_state(nn, sc, 0, 0); 18818c2ecf20Sopenharmony_ci spin_unlock(&nn->nn_lock); 18828c2ecf20Sopenharmony_ci 18838c2ecf20Sopenharmony_ci o2net_register_callbacks(sc->sc_sock->sk, sc); 18848c2ecf20Sopenharmony_ci o2net_sc_queue_work(sc, &sc->sc_rx_work); 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_ci o2net_initialize_handshake(); 18878c2ecf20Sopenharmony_ci o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_ciout: 18908c2ecf20Sopenharmony_ci if (new_sock) 18918c2ecf20Sopenharmony_ci sock_release(new_sock); 18928c2ecf20Sopenharmony_ci if (node) 18938c2ecf20Sopenharmony_ci o2nm_node_put(node); 18948c2ecf20Sopenharmony_ci if (local_node) 18958c2ecf20Sopenharmony_ci o2nm_node_put(local_node); 18968c2ecf20Sopenharmony_ci if (sc) 18978c2ecf20Sopenharmony_ci sc_put(sc); 18988c2ecf20Sopenharmony_ci 18998c2ecf20Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 19008c2ecf20Sopenharmony_ci return ret; 19018c2ecf20Sopenharmony_ci} 19028c2ecf20Sopenharmony_ci 19038c2ecf20Sopenharmony_ci/* 19048c2ecf20Sopenharmony_ci * This function is invoked in response to one or more 19058c2ecf20Sopenharmony_ci * pending accepts at softIRQ level. We must drain the 19068c2ecf20Sopenharmony_ci * entire que before returning. 19078c2ecf20Sopenharmony_ci */ 19088c2ecf20Sopenharmony_ci 19098c2ecf20Sopenharmony_cistatic void o2net_accept_many(struct work_struct *work) 19108c2ecf20Sopenharmony_ci{ 19118c2ecf20Sopenharmony_ci struct socket *sock = o2net_listen_sock; 19128c2ecf20Sopenharmony_ci int more; 19138c2ecf20Sopenharmony_ci 19148c2ecf20Sopenharmony_ci /* 19158c2ecf20Sopenharmony_ci * It is critical to note that due to interrupt moderation 19168c2ecf20Sopenharmony_ci * at the network driver level, we can't assume to get a 19178c2ecf20Sopenharmony_ci * softIRQ for every single conn since tcp SYN packets 19188c2ecf20Sopenharmony_ci * can arrive back-to-back, and therefore many pending 19198c2ecf20Sopenharmony_ci * accepts may result in just 1 softIRQ. If we terminate 19208c2ecf20Sopenharmony_ci * the o2net_accept_one() loop upon seeing an err, what happens 19218c2ecf20Sopenharmony_ci * to the rest of the conns in the queue? If no new SYN 19228c2ecf20Sopenharmony_ci * arrives for hours, no softIRQ will be delivered, 19238c2ecf20Sopenharmony_ci * and the connections will just sit in the queue. 19248c2ecf20Sopenharmony_ci */ 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci for (;;) { 19278c2ecf20Sopenharmony_ci o2net_accept_one(sock, &more); 19288c2ecf20Sopenharmony_ci if (!more) 19298c2ecf20Sopenharmony_ci break; 19308c2ecf20Sopenharmony_ci cond_resched(); 19318c2ecf20Sopenharmony_ci } 19328c2ecf20Sopenharmony_ci} 19338c2ecf20Sopenharmony_ci 19348c2ecf20Sopenharmony_cistatic void o2net_listen_data_ready(struct sock *sk) 19358c2ecf20Sopenharmony_ci{ 19368c2ecf20Sopenharmony_ci void (*ready)(struct sock *sk); 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 19398c2ecf20Sopenharmony_ci ready = sk->sk_user_data; 19408c2ecf20Sopenharmony_ci if (ready == NULL) { /* check for teardown race */ 19418c2ecf20Sopenharmony_ci ready = sk->sk_data_ready; 19428c2ecf20Sopenharmony_ci goto out; 19438c2ecf20Sopenharmony_ci } 19448c2ecf20Sopenharmony_ci 19458c2ecf20Sopenharmony_ci /* This callback may called twice when a new connection 19468c2ecf20Sopenharmony_ci * is being established as a child socket inherits everything 19478c2ecf20Sopenharmony_ci * from a parent LISTEN socket, including the data_ready cb of 19488c2ecf20Sopenharmony_ci * the parent. This leads to a hazard. In o2net_accept_one() 19498c2ecf20Sopenharmony_ci * we are still initializing the child socket but have not 19508c2ecf20Sopenharmony_ci * changed the inherited data_ready callback yet when 19518c2ecf20Sopenharmony_ci * data starts arriving. 19528c2ecf20Sopenharmony_ci * We avoid this hazard by checking the state. 19538c2ecf20Sopenharmony_ci * For the listening socket, the state will be TCP_LISTEN; for the new 19548c2ecf20Sopenharmony_ci * socket, will be TCP_ESTABLISHED. Also, in this case, 19558c2ecf20Sopenharmony_ci * sk->sk_user_data is not a valid function pointer. 19568c2ecf20Sopenharmony_ci */ 19578c2ecf20Sopenharmony_ci 19588c2ecf20Sopenharmony_ci if (sk->sk_state == TCP_LISTEN) { 19598c2ecf20Sopenharmony_ci queue_work(o2net_wq, &o2net_listen_work); 19608c2ecf20Sopenharmony_ci } else { 19618c2ecf20Sopenharmony_ci ready = NULL; 19628c2ecf20Sopenharmony_ci } 19638c2ecf20Sopenharmony_ci 19648c2ecf20Sopenharmony_ciout: 19658c2ecf20Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 19668c2ecf20Sopenharmony_ci if (ready != NULL) 19678c2ecf20Sopenharmony_ci ready(sk); 19688c2ecf20Sopenharmony_ci} 19698c2ecf20Sopenharmony_ci 19708c2ecf20Sopenharmony_cistatic int o2net_open_listening_sock(__be32 addr, __be16 port) 19718c2ecf20Sopenharmony_ci{ 19728c2ecf20Sopenharmony_ci struct socket *sock = NULL; 19738c2ecf20Sopenharmony_ci int ret; 19748c2ecf20Sopenharmony_ci struct sockaddr_in sin = { 19758c2ecf20Sopenharmony_ci .sin_family = PF_INET, 19768c2ecf20Sopenharmony_ci .sin_addr = { .s_addr = addr }, 19778c2ecf20Sopenharmony_ci .sin_port = port, 19788c2ecf20Sopenharmony_ci }; 19798c2ecf20Sopenharmony_ci 19808c2ecf20Sopenharmony_ci ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); 19818c2ecf20Sopenharmony_ci if (ret < 0) { 19828c2ecf20Sopenharmony_ci printk(KERN_ERR "o2net: Error %d while creating socket\n", ret); 19838c2ecf20Sopenharmony_ci goto out; 19848c2ecf20Sopenharmony_ci } 19858c2ecf20Sopenharmony_ci 19868c2ecf20Sopenharmony_ci sock->sk->sk_allocation = GFP_ATOMIC; 19878c2ecf20Sopenharmony_ci 19888c2ecf20Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 19898c2ecf20Sopenharmony_ci sock->sk->sk_user_data = sock->sk->sk_data_ready; 19908c2ecf20Sopenharmony_ci sock->sk->sk_data_ready = o2net_listen_data_ready; 19918c2ecf20Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_ci o2net_listen_sock = sock; 19948c2ecf20Sopenharmony_ci INIT_WORK(&o2net_listen_work, o2net_accept_many); 19958c2ecf20Sopenharmony_ci 19968c2ecf20Sopenharmony_ci sock->sk->sk_reuse = SK_CAN_REUSE; 19978c2ecf20Sopenharmony_ci ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); 19988c2ecf20Sopenharmony_ci if (ret < 0) { 19998c2ecf20Sopenharmony_ci printk(KERN_ERR "o2net: Error %d while binding socket at " 20008c2ecf20Sopenharmony_ci "%pI4:%u\n", ret, &addr, ntohs(port)); 20018c2ecf20Sopenharmony_ci goto out; 20028c2ecf20Sopenharmony_ci } 20038c2ecf20Sopenharmony_ci 20048c2ecf20Sopenharmony_ci ret = sock->ops->listen(sock, 64); 20058c2ecf20Sopenharmony_ci if (ret < 0) 20068c2ecf20Sopenharmony_ci printk(KERN_ERR "o2net: Error %d while listening on %pI4:%u\n", 20078c2ecf20Sopenharmony_ci ret, &addr, ntohs(port)); 20088c2ecf20Sopenharmony_ci 20098c2ecf20Sopenharmony_ciout: 20108c2ecf20Sopenharmony_ci if (ret) { 20118c2ecf20Sopenharmony_ci o2net_listen_sock = NULL; 20128c2ecf20Sopenharmony_ci if (sock) 20138c2ecf20Sopenharmony_ci sock_release(sock); 20148c2ecf20Sopenharmony_ci } 20158c2ecf20Sopenharmony_ci return ret; 20168c2ecf20Sopenharmony_ci} 20178c2ecf20Sopenharmony_ci 20188c2ecf20Sopenharmony_ci/* 20198c2ecf20Sopenharmony_ci * called from node manager when we should bring up our network listening 20208c2ecf20Sopenharmony_ci * socket. node manager handles all the serialization to only call this 20218c2ecf20Sopenharmony_ci * once and to match it with o2net_stop_listening(). note, 20228c2ecf20Sopenharmony_ci * o2nm_this_node() doesn't work yet as we're being called while it 20238c2ecf20Sopenharmony_ci * is being set up. 20248c2ecf20Sopenharmony_ci */ 20258c2ecf20Sopenharmony_ciint o2net_start_listening(struct o2nm_node *node) 20268c2ecf20Sopenharmony_ci{ 20278c2ecf20Sopenharmony_ci int ret = 0; 20288c2ecf20Sopenharmony_ci 20298c2ecf20Sopenharmony_ci BUG_ON(o2net_wq != NULL); 20308c2ecf20Sopenharmony_ci BUG_ON(o2net_listen_sock != NULL); 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_ci mlog(ML_KTHREAD, "starting o2net thread...\n"); 20338c2ecf20Sopenharmony_ci o2net_wq = alloc_ordered_workqueue("o2net", WQ_MEM_RECLAIM); 20348c2ecf20Sopenharmony_ci if (o2net_wq == NULL) { 20358c2ecf20Sopenharmony_ci mlog(ML_ERROR, "unable to launch o2net thread\n"); 20368c2ecf20Sopenharmony_ci return -ENOMEM; /* ? */ 20378c2ecf20Sopenharmony_ci } 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_ci ret = o2net_open_listening_sock(node->nd_ipv4_address, 20408c2ecf20Sopenharmony_ci node->nd_ipv4_port); 20418c2ecf20Sopenharmony_ci if (ret) { 20428c2ecf20Sopenharmony_ci destroy_workqueue(o2net_wq); 20438c2ecf20Sopenharmony_ci o2net_wq = NULL; 20448c2ecf20Sopenharmony_ci } else 20458c2ecf20Sopenharmony_ci o2quo_conn_up(node->nd_num); 20468c2ecf20Sopenharmony_ci 20478c2ecf20Sopenharmony_ci return ret; 20488c2ecf20Sopenharmony_ci} 20498c2ecf20Sopenharmony_ci 20508c2ecf20Sopenharmony_ci/* again, o2nm_this_node() doesn't work here as we're involved in 20518c2ecf20Sopenharmony_ci * tearing it down */ 20528c2ecf20Sopenharmony_civoid o2net_stop_listening(struct o2nm_node *node) 20538c2ecf20Sopenharmony_ci{ 20548c2ecf20Sopenharmony_ci struct socket *sock = o2net_listen_sock; 20558c2ecf20Sopenharmony_ci size_t i; 20568c2ecf20Sopenharmony_ci 20578c2ecf20Sopenharmony_ci BUG_ON(o2net_wq == NULL); 20588c2ecf20Sopenharmony_ci BUG_ON(o2net_listen_sock == NULL); 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_ci /* stop the listening socket from generating work */ 20618c2ecf20Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 20628c2ecf20Sopenharmony_ci sock->sk->sk_data_ready = sock->sk->sk_user_data; 20638c2ecf20Sopenharmony_ci sock->sk->sk_user_data = NULL; 20648c2ecf20Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 20658c2ecf20Sopenharmony_ci 20668c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { 20678c2ecf20Sopenharmony_ci struct o2nm_node *node = o2nm_get_node_by_num(i); 20688c2ecf20Sopenharmony_ci if (node) { 20698c2ecf20Sopenharmony_ci o2net_disconnect_node(node); 20708c2ecf20Sopenharmony_ci o2nm_node_put(node); 20718c2ecf20Sopenharmony_ci } 20728c2ecf20Sopenharmony_ci } 20738c2ecf20Sopenharmony_ci 20748c2ecf20Sopenharmony_ci /* finish all work and tear down the work queue */ 20758c2ecf20Sopenharmony_ci mlog(ML_KTHREAD, "waiting for o2net thread to exit....\n"); 20768c2ecf20Sopenharmony_ci destroy_workqueue(o2net_wq); 20778c2ecf20Sopenharmony_ci o2net_wq = NULL; 20788c2ecf20Sopenharmony_ci 20798c2ecf20Sopenharmony_ci sock_release(o2net_listen_sock); 20808c2ecf20Sopenharmony_ci o2net_listen_sock = NULL; 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_ci o2quo_conn_err(node->nd_num); 20838c2ecf20Sopenharmony_ci} 20848c2ecf20Sopenharmony_ci 20858c2ecf20Sopenharmony_ci/* ------------------------------------------------------------ */ 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ciint o2net_init(void) 20888c2ecf20Sopenharmony_ci{ 20898c2ecf20Sopenharmony_ci unsigned long i; 20908c2ecf20Sopenharmony_ci 20918c2ecf20Sopenharmony_ci o2quo_init(); 20928c2ecf20Sopenharmony_ci 20938c2ecf20Sopenharmony_ci o2net_debugfs_init(); 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); 20968c2ecf20Sopenharmony_ci o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); 20978c2ecf20Sopenharmony_ci o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); 20988c2ecf20Sopenharmony_ci if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) 20998c2ecf20Sopenharmony_ci goto out; 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); 21028c2ecf20Sopenharmony_ci o2net_hand->connector_id = cpu_to_be64(1); 21038c2ecf20Sopenharmony_ci 21048c2ecf20Sopenharmony_ci o2net_keep_req->magic = cpu_to_be16(O2NET_MSG_KEEP_REQ_MAGIC); 21058c2ecf20Sopenharmony_ci o2net_keep_resp->magic = cpu_to_be16(O2NET_MSG_KEEP_RESP_MAGIC); 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { 21088c2ecf20Sopenharmony_ci struct o2net_node *nn = o2net_nn_from_num(i); 21098c2ecf20Sopenharmony_ci 21108c2ecf20Sopenharmony_ci atomic_set(&nn->nn_timeout, 0); 21118c2ecf20Sopenharmony_ci spin_lock_init(&nn->nn_lock); 21128c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); 21138c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&nn->nn_connect_expired, 21148c2ecf20Sopenharmony_ci o2net_connect_expired); 21158c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&nn->nn_still_up, o2net_still_up); 21168c2ecf20Sopenharmony_ci /* until we see hb from a node we'll return einval */ 21178c2ecf20Sopenharmony_ci nn->nn_persistent_error = -ENOTCONN; 21188c2ecf20Sopenharmony_ci init_waitqueue_head(&nn->nn_sc_wq); 21198c2ecf20Sopenharmony_ci idr_init(&nn->nn_status_idr); 21208c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&nn->nn_status_list); 21218c2ecf20Sopenharmony_ci } 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci return 0; 21248c2ecf20Sopenharmony_ci 21258c2ecf20Sopenharmony_ciout: 21268c2ecf20Sopenharmony_ci kfree(o2net_hand); 21278c2ecf20Sopenharmony_ci kfree(o2net_keep_req); 21288c2ecf20Sopenharmony_ci kfree(o2net_keep_resp); 21298c2ecf20Sopenharmony_ci o2net_debugfs_exit(); 21308c2ecf20Sopenharmony_ci o2quo_exit(); 21318c2ecf20Sopenharmony_ci return -ENOMEM; 21328c2ecf20Sopenharmony_ci} 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_civoid o2net_exit(void) 21358c2ecf20Sopenharmony_ci{ 21368c2ecf20Sopenharmony_ci o2quo_exit(); 21378c2ecf20Sopenharmony_ci kfree(o2net_hand); 21388c2ecf20Sopenharmony_ci kfree(o2net_keep_req); 21398c2ecf20Sopenharmony_ci kfree(o2net_keep_resp); 21408c2ecf20Sopenharmony_ci o2net_debugfs_exit(); 21418c2ecf20Sopenharmony_ci} 2142