18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/crc32c.h> 58c2ecf20Sopenharmony_ci#include <linux/ctype.h> 68c2ecf20Sopenharmony_ci#include <linux/highmem.h> 78c2ecf20Sopenharmony_ci#include <linux/inet.h> 88c2ecf20Sopenharmony_ci#include <linux/kthread.h> 98c2ecf20Sopenharmony_ci#include <linux/net.h> 108c2ecf20Sopenharmony_ci#include <linux/nsproxy.h> 118c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 128c2ecf20Sopenharmony_ci#include <linux/slab.h> 138c2ecf20Sopenharmony_ci#include <linux/socket.h> 148c2ecf20Sopenharmony_ci#include <linux/string.h> 158c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 168c2ecf20Sopenharmony_ci#include <linux/bio.h> 178c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */ 188c2ecf20Sopenharmony_ci#include <linux/dns_resolver.h> 198c2ecf20Sopenharmony_ci#include <net/tcp.h> 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_features.h> 228c2ecf20Sopenharmony_ci#include <linux/ceph/libceph.h> 238c2ecf20Sopenharmony_ci#include <linux/ceph/messenger.h> 248c2ecf20Sopenharmony_ci#include <linux/ceph/decode.h> 258c2ecf20Sopenharmony_ci#include <linux/ceph/pagelist.h> 268c2ecf20Sopenharmony_ci#include <linux/export.h> 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* 298c2ecf20Sopenharmony_ci * Ceph uses the messenger to exchange ceph_msg messages with other 308c2ecf20Sopenharmony_ci * hosts in the system. The messenger provides ordered and reliable 318c2ecf20Sopenharmony_ci * delivery. We tolerate TCP disconnects by reconnecting (with 328c2ecf20Sopenharmony_ci * exponential backoff) in the case of a fault (disconnection, bad 338c2ecf20Sopenharmony_ci * crc, protocol error). Acks allow sent messages to be discarded by 348c2ecf20Sopenharmony_ci * the sender. 358c2ecf20Sopenharmony_ci */ 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci/* 388c2ecf20Sopenharmony_ci * We track the state of the socket on a given connection using 398c2ecf20Sopenharmony_ci * values defined below. The transition to a new socket state is 408c2ecf20Sopenharmony_ci * handled by a function which verifies we aren't coming from an 418c2ecf20Sopenharmony_ci * unexpected state. 428c2ecf20Sopenharmony_ci * 438c2ecf20Sopenharmony_ci * -------- 448c2ecf20Sopenharmony_ci * | NEW* | transient initial state 458c2ecf20Sopenharmony_ci * -------- 468c2ecf20Sopenharmony_ci * | con_sock_state_init() 478c2ecf20Sopenharmony_ci * v 488c2ecf20Sopenharmony_ci * ---------- 498c2ecf20Sopenharmony_ci * | CLOSED | initialized, but no socket (and no 508c2ecf20Sopenharmony_ci * ---------- TCP connection) 518c2ecf20Sopenharmony_ci * ^ \ 528c2ecf20Sopenharmony_ci * | \ con_sock_state_connecting() 538c2ecf20Sopenharmony_ci * | ---------------------- 548c2ecf20Sopenharmony_ci * | \ 558c2ecf20Sopenharmony_ci * + con_sock_state_closed() \ 568c2ecf20Sopenharmony_ci * |+--------------------------- \ 578c2ecf20Sopenharmony_ci * | \ \ \ 588c2ecf20Sopenharmony_ci * | ----------- \ \ 598c2ecf20Sopenharmony_ci * | | CLOSING | socket event; \ \ 608c2ecf20Sopenharmony_ci * | ----------- await close \ \ 618c2ecf20Sopenharmony_ci * | ^ \ | 628c2ecf20Sopenharmony_ci * | | \ | 638c2ecf20Sopenharmony_ci * | + con_sock_state_closing() \ | 648c2ecf20Sopenharmony_ci * | / \ | | 658c2ecf20Sopenharmony_ci * | / --------------- | | 668c2ecf20Sopenharmony_ci * | / \ v v 678c2ecf20Sopenharmony_ci * | / -------------- 688c2ecf20Sopenharmony_ci * | / -----------------| CONNECTING | socket created, TCP 698c2ecf20Sopenharmony_ci * | | / -------------- connect initiated 708c2ecf20Sopenharmony_ci * | | | con_sock_state_connected() 718c2ecf20Sopenharmony_ci * | | v 728c2ecf20Sopenharmony_ci * ------------- 738c2ecf20Sopenharmony_ci * | CONNECTED | TCP connection established 748c2ecf20Sopenharmony_ci * ------------- 758c2ecf20Sopenharmony_ci * 768c2ecf20Sopenharmony_ci * State values for ceph_connection->sock_state; NEW is assumed to be 0. 778c2ecf20Sopenharmony_ci */ 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ 808c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ 818c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */ 828c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */ 838c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci/* 868c2ecf20Sopenharmony_ci * connection states 878c2ecf20Sopenharmony_ci */ 888c2ecf20Sopenharmony_ci#define CON_STATE_CLOSED 1 /* -> PREOPEN */ 898c2ecf20Sopenharmony_ci#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */ 908c2ecf20Sopenharmony_ci#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */ 918c2ecf20Sopenharmony_ci#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */ 928c2ecf20Sopenharmony_ci#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */ 938c2ecf20Sopenharmony_ci#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */ 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci/* 968c2ecf20Sopenharmony_ci * ceph_connection flag bits 978c2ecf20Sopenharmony_ci */ 988c2ecf20Sopenharmony_ci#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop 998c2ecf20Sopenharmony_ci * messages on errors */ 1008c2ecf20Sopenharmony_ci#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */ 1018c2ecf20Sopenharmony_ci#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */ 1028c2ecf20Sopenharmony_ci#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ 1038c2ecf20Sopenharmony_ci#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_cistatic bool con_flag_valid(unsigned long con_flag) 1068c2ecf20Sopenharmony_ci{ 1078c2ecf20Sopenharmony_ci switch (con_flag) { 1088c2ecf20Sopenharmony_ci case CON_FLAG_LOSSYTX: 1098c2ecf20Sopenharmony_ci case CON_FLAG_KEEPALIVE_PENDING: 1108c2ecf20Sopenharmony_ci case CON_FLAG_WRITE_PENDING: 1118c2ecf20Sopenharmony_ci case CON_FLAG_SOCK_CLOSED: 1128c2ecf20Sopenharmony_ci case CON_FLAG_BACKOFF: 1138c2ecf20Sopenharmony_ci return true; 1148c2ecf20Sopenharmony_ci default: 1158c2ecf20Sopenharmony_ci return false; 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci} 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_cistatic void con_flag_clear(struct ceph_connection *con, unsigned long con_flag) 1208c2ecf20Sopenharmony_ci{ 1218c2ecf20Sopenharmony_ci BUG_ON(!con_flag_valid(con_flag)); 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci clear_bit(con_flag, &con->flags); 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_cistatic void con_flag_set(struct ceph_connection *con, unsigned long con_flag) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci BUG_ON(!con_flag_valid(con_flag)); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci set_bit(con_flag, &con->flags); 1318c2ecf20Sopenharmony_ci} 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_cistatic bool con_flag_test(struct ceph_connection *con, unsigned long con_flag) 1348c2ecf20Sopenharmony_ci{ 1358c2ecf20Sopenharmony_ci BUG_ON(!con_flag_valid(con_flag)); 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci return test_bit(con_flag, &con->flags); 1388c2ecf20Sopenharmony_ci} 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_cistatic bool con_flag_test_and_clear(struct ceph_connection *con, 1418c2ecf20Sopenharmony_ci unsigned long con_flag) 1428c2ecf20Sopenharmony_ci{ 1438c2ecf20Sopenharmony_ci BUG_ON(!con_flag_valid(con_flag)); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci return test_and_clear_bit(con_flag, &con->flags); 1468c2ecf20Sopenharmony_ci} 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_cistatic bool con_flag_test_and_set(struct ceph_connection *con, 1498c2ecf20Sopenharmony_ci unsigned long con_flag) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci BUG_ON(!con_flag_valid(con_flag)); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci return test_and_set_bit(con_flag, &con->flags); 1548c2ecf20Sopenharmony_ci} 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci/* Slab caches for frequently-allocated structures */ 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_cistatic struct kmem_cache *ceph_msg_cache; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci/* static tag bytes (protocol control messages) */ 1618c2ecf20Sopenharmony_cistatic char tag_msg = CEPH_MSGR_TAG_MSG; 1628c2ecf20Sopenharmony_cistatic char tag_ack = CEPH_MSGR_TAG_ACK; 1638c2ecf20Sopenharmony_cistatic char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 1648c2ecf20Sopenharmony_cistatic char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP 1678c2ecf20Sopenharmony_cistatic struct lock_class_key socket_class; 1688c2ecf20Sopenharmony_ci#endif 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_cistatic void queue_con(struct ceph_connection *con); 1718c2ecf20Sopenharmony_cistatic void cancel_con(struct ceph_connection *con); 1728c2ecf20Sopenharmony_cistatic void ceph_con_workfn(struct work_struct *); 1738c2ecf20Sopenharmony_cistatic void con_fault(struct ceph_connection *con); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci/* 1768c2ecf20Sopenharmony_ci * Nicely render a sockaddr as a string. An array of formatted 1778c2ecf20Sopenharmony_ci * strings is used, to approximate reentrancy. 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_ci#define ADDR_STR_COUNT_LOG 5 /* log2(# address strings in array) */ 1808c2ecf20Sopenharmony_ci#define ADDR_STR_COUNT (1 << ADDR_STR_COUNT_LOG) 1818c2ecf20Sopenharmony_ci#define ADDR_STR_COUNT_MASK (ADDR_STR_COUNT - 1) 1828c2ecf20Sopenharmony_ci#define MAX_ADDR_STR_LEN 64 /* 54 is enough */ 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_cistatic char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN]; 1858c2ecf20Sopenharmony_cistatic atomic_t addr_str_seq = ATOMIC_INIT(0); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistatic struct page *zero_page; /* used in certain error cases */ 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ciconst char *ceph_pr_addr(const struct ceph_entity_addr *addr) 1908c2ecf20Sopenharmony_ci{ 1918c2ecf20Sopenharmony_ci int i; 1928c2ecf20Sopenharmony_ci char *s; 1938c2ecf20Sopenharmony_ci struct sockaddr_storage ss = addr->in_addr; /* align */ 1948c2ecf20Sopenharmony_ci struct sockaddr_in *in4 = (struct sockaddr_in *)&ss; 1958c2ecf20Sopenharmony_ci struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss; 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; 1988c2ecf20Sopenharmony_ci s = addr_str[i]; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci switch (ss.ss_family) { 2018c2ecf20Sopenharmony_ci case AF_INET: 2028c2ecf20Sopenharmony_ci snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu", 2038c2ecf20Sopenharmony_ci le32_to_cpu(addr->type), &in4->sin_addr, 2048c2ecf20Sopenharmony_ci ntohs(in4->sin_port)); 2058c2ecf20Sopenharmony_ci break; 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci case AF_INET6: 2088c2ecf20Sopenharmony_ci snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu", 2098c2ecf20Sopenharmony_ci le32_to_cpu(addr->type), &in6->sin6_addr, 2108c2ecf20Sopenharmony_ci ntohs(in6->sin6_port)); 2118c2ecf20Sopenharmony_ci break; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci default: 2148c2ecf20Sopenharmony_ci snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", 2158c2ecf20Sopenharmony_ci ss.ss_family); 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci return s; 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_pr_addr); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistatic void encode_my_addr(struct ceph_messenger *msgr) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); 2258c2ecf20Sopenharmony_ci ceph_encode_banner_addr(&msgr->my_enc_addr); 2268c2ecf20Sopenharmony_ci} 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci/* 2298c2ecf20Sopenharmony_ci * work queue for all reading and writing to/from the socket. 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_cistatic struct workqueue_struct *ceph_msgr_wq; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic int ceph_msgr_slab_init(void) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci BUG_ON(ceph_msg_cache); 2368c2ecf20Sopenharmony_ci ceph_msg_cache = KMEM_CACHE(ceph_msg, 0); 2378c2ecf20Sopenharmony_ci if (!ceph_msg_cache) 2388c2ecf20Sopenharmony_ci return -ENOMEM; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci return 0; 2418c2ecf20Sopenharmony_ci} 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_cistatic void ceph_msgr_slab_exit(void) 2448c2ecf20Sopenharmony_ci{ 2458c2ecf20Sopenharmony_ci BUG_ON(!ceph_msg_cache); 2468c2ecf20Sopenharmony_ci kmem_cache_destroy(ceph_msg_cache); 2478c2ecf20Sopenharmony_ci ceph_msg_cache = NULL; 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_cistatic void _ceph_msgr_exit(void) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci if (ceph_msgr_wq) { 2538c2ecf20Sopenharmony_ci destroy_workqueue(ceph_msgr_wq); 2548c2ecf20Sopenharmony_ci ceph_msgr_wq = NULL; 2558c2ecf20Sopenharmony_ci } 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci BUG_ON(zero_page == NULL); 2588c2ecf20Sopenharmony_ci put_page(zero_page); 2598c2ecf20Sopenharmony_ci zero_page = NULL; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci ceph_msgr_slab_exit(); 2628c2ecf20Sopenharmony_ci} 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ciint __init ceph_msgr_init(void) 2658c2ecf20Sopenharmony_ci{ 2668c2ecf20Sopenharmony_ci if (ceph_msgr_slab_init()) 2678c2ecf20Sopenharmony_ci return -ENOMEM; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci BUG_ON(zero_page != NULL); 2708c2ecf20Sopenharmony_ci zero_page = ZERO_PAGE(0); 2718c2ecf20Sopenharmony_ci get_page(zero_page); 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci /* 2748c2ecf20Sopenharmony_ci * The number of active work items is limited by the number of 2758c2ecf20Sopenharmony_ci * connections, so leave @max_active at default. 2768c2ecf20Sopenharmony_ci */ 2778c2ecf20Sopenharmony_ci ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0); 2788c2ecf20Sopenharmony_ci if (ceph_msgr_wq) 2798c2ecf20Sopenharmony_ci return 0; 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci pr_err("msgr_init failed to create workqueue\n"); 2828c2ecf20Sopenharmony_ci _ceph_msgr_exit(); 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci return -ENOMEM; 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_civoid ceph_msgr_exit(void) 2888c2ecf20Sopenharmony_ci{ 2898c2ecf20Sopenharmony_ci BUG_ON(ceph_msgr_wq == NULL); 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci _ceph_msgr_exit(); 2928c2ecf20Sopenharmony_ci} 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_civoid ceph_msgr_flush(void) 2958c2ecf20Sopenharmony_ci{ 2968c2ecf20Sopenharmony_ci flush_workqueue(ceph_msgr_wq); 2978c2ecf20Sopenharmony_ci} 2988c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msgr_flush); 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci/* Connection socket state transition functions */ 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_cistatic void con_sock_state_init(struct ceph_connection *con) 3038c2ecf20Sopenharmony_ci{ 3048c2ecf20Sopenharmony_ci int old_state; 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); 3078c2ecf20Sopenharmony_ci if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) 3088c2ecf20Sopenharmony_ci printk("%s: unexpected old state %d\n", __func__, old_state); 3098c2ecf20Sopenharmony_ci dout("%s con %p sock %d -> %d\n", __func__, con, old_state, 3108c2ecf20Sopenharmony_ci CON_SOCK_STATE_CLOSED); 3118c2ecf20Sopenharmony_ci} 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_cistatic void con_sock_state_connecting(struct ceph_connection *con) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci int old_state; 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING); 3188c2ecf20Sopenharmony_ci if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) 3198c2ecf20Sopenharmony_ci printk("%s: unexpected old state %d\n", __func__, old_state); 3208c2ecf20Sopenharmony_ci dout("%s con %p sock %d -> %d\n", __func__, con, old_state, 3218c2ecf20Sopenharmony_ci CON_SOCK_STATE_CONNECTING); 3228c2ecf20Sopenharmony_ci} 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_cistatic void con_sock_state_connected(struct ceph_connection *con) 3258c2ecf20Sopenharmony_ci{ 3268c2ecf20Sopenharmony_ci int old_state; 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); 3298c2ecf20Sopenharmony_ci if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) 3308c2ecf20Sopenharmony_ci printk("%s: unexpected old state %d\n", __func__, old_state); 3318c2ecf20Sopenharmony_ci dout("%s con %p sock %d -> %d\n", __func__, con, old_state, 3328c2ecf20Sopenharmony_ci CON_SOCK_STATE_CONNECTED); 3338c2ecf20Sopenharmony_ci} 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_cistatic void con_sock_state_closing(struct ceph_connection *con) 3368c2ecf20Sopenharmony_ci{ 3378c2ecf20Sopenharmony_ci int old_state; 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); 3408c2ecf20Sopenharmony_ci if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && 3418c2ecf20Sopenharmony_ci old_state != CON_SOCK_STATE_CONNECTED && 3428c2ecf20Sopenharmony_ci old_state != CON_SOCK_STATE_CLOSING)) 3438c2ecf20Sopenharmony_ci printk("%s: unexpected old state %d\n", __func__, old_state); 3448c2ecf20Sopenharmony_ci dout("%s con %p sock %d -> %d\n", __func__, con, old_state, 3458c2ecf20Sopenharmony_ci CON_SOCK_STATE_CLOSING); 3468c2ecf20Sopenharmony_ci} 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_cistatic void con_sock_state_closed(struct ceph_connection *con) 3498c2ecf20Sopenharmony_ci{ 3508c2ecf20Sopenharmony_ci int old_state; 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); 3538c2ecf20Sopenharmony_ci if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && 3548c2ecf20Sopenharmony_ci old_state != CON_SOCK_STATE_CLOSING && 3558c2ecf20Sopenharmony_ci old_state != CON_SOCK_STATE_CONNECTING && 3568c2ecf20Sopenharmony_ci old_state != CON_SOCK_STATE_CLOSED)) 3578c2ecf20Sopenharmony_ci printk("%s: unexpected old state %d\n", __func__, old_state); 3588c2ecf20Sopenharmony_ci dout("%s con %p sock %d -> %d\n", __func__, con, old_state, 3598c2ecf20Sopenharmony_ci CON_SOCK_STATE_CLOSED); 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci/* 3638c2ecf20Sopenharmony_ci * socket callback functions 3648c2ecf20Sopenharmony_ci */ 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci/* data available on socket, or listen socket received a connect */ 3678c2ecf20Sopenharmony_cistatic void ceph_sock_data_ready(struct sock *sk) 3688c2ecf20Sopenharmony_ci{ 3698c2ecf20Sopenharmony_ci struct ceph_connection *con = sk->sk_user_data; 3708c2ecf20Sopenharmony_ci if (atomic_read(&con->msgr->stopping)) { 3718c2ecf20Sopenharmony_ci return; 3728c2ecf20Sopenharmony_ci } 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci if (sk->sk_state != TCP_CLOSE_WAIT) { 3758c2ecf20Sopenharmony_ci dout("%s on %p state = %lu, queueing work\n", __func__, 3768c2ecf20Sopenharmony_ci con, con->state); 3778c2ecf20Sopenharmony_ci queue_con(con); 3788c2ecf20Sopenharmony_ci } 3798c2ecf20Sopenharmony_ci} 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci/* socket has buffer space for writing */ 3828c2ecf20Sopenharmony_cistatic void ceph_sock_write_space(struct sock *sk) 3838c2ecf20Sopenharmony_ci{ 3848c2ecf20Sopenharmony_ci struct ceph_connection *con = sk->sk_user_data; 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci /* only queue to workqueue if there is data we want to write, 3878c2ecf20Sopenharmony_ci * and there is sufficient space in the socket buffer to accept 3888c2ecf20Sopenharmony_ci * more data. clear SOCK_NOSPACE so that ceph_sock_write_space() 3898c2ecf20Sopenharmony_ci * doesn't get called again until try_write() fills the socket 3908c2ecf20Sopenharmony_ci * buffer. See net/ipv4/tcp_input.c:tcp_check_space() 3918c2ecf20Sopenharmony_ci * and net/core/stream.c:sk_stream_write_space(). 3928c2ecf20Sopenharmony_ci */ 3938c2ecf20Sopenharmony_ci if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) { 3948c2ecf20Sopenharmony_ci if (sk_stream_is_writeable(sk)) { 3958c2ecf20Sopenharmony_ci dout("%s %p queueing write work\n", __func__, con); 3968c2ecf20Sopenharmony_ci clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 3978c2ecf20Sopenharmony_ci queue_con(con); 3988c2ecf20Sopenharmony_ci } 3998c2ecf20Sopenharmony_ci } else { 4008c2ecf20Sopenharmony_ci dout("%s %p nothing to write\n", __func__, con); 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci} 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci/* socket's state has changed */ 4058c2ecf20Sopenharmony_cistatic void ceph_sock_state_change(struct sock *sk) 4068c2ecf20Sopenharmony_ci{ 4078c2ecf20Sopenharmony_ci struct ceph_connection *con = sk->sk_user_data; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci dout("%s %p state = %lu sk_state = %u\n", __func__, 4108c2ecf20Sopenharmony_ci con, con->state, sk->sk_state); 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci switch (sk->sk_state) { 4138c2ecf20Sopenharmony_ci case TCP_CLOSE: 4148c2ecf20Sopenharmony_ci dout("%s TCP_CLOSE\n", __func__); 4158c2ecf20Sopenharmony_ci fallthrough; 4168c2ecf20Sopenharmony_ci case TCP_CLOSE_WAIT: 4178c2ecf20Sopenharmony_ci dout("%s TCP_CLOSE_WAIT\n", __func__); 4188c2ecf20Sopenharmony_ci con_sock_state_closing(con); 4198c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_SOCK_CLOSED); 4208c2ecf20Sopenharmony_ci queue_con(con); 4218c2ecf20Sopenharmony_ci break; 4228c2ecf20Sopenharmony_ci case TCP_ESTABLISHED: 4238c2ecf20Sopenharmony_ci dout("%s TCP_ESTABLISHED\n", __func__); 4248c2ecf20Sopenharmony_ci con_sock_state_connected(con); 4258c2ecf20Sopenharmony_ci queue_con(con); 4268c2ecf20Sopenharmony_ci break; 4278c2ecf20Sopenharmony_ci default: /* Everything else is uninteresting */ 4288c2ecf20Sopenharmony_ci break; 4298c2ecf20Sopenharmony_ci } 4308c2ecf20Sopenharmony_ci} 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci/* 4338c2ecf20Sopenharmony_ci * set up socket callbacks 4348c2ecf20Sopenharmony_ci */ 4358c2ecf20Sopenharmony_cistatic void set_sock_callbacks(struct socket *sock, 4368c2ecf20Sopenharmony_ci struct ceph_connection *con) 4378c2ecf20Sopenharmony_ci{ 4388c2ecf20Sopenharmony_ci struct sock *sk = sock->sk; 4398c2ecf20Sopenharmony_ci sk->sk_user_data = con; 4408c2ecf20Sopenharmony_ci sk->sk_data_ready = ceph_sock_data_ready; 4418c2ecf20Sopenharmony_ci sk->sk_write_space = ceph_sock_write_space; 4428c2ecf20Sopenharmony_ci sk->sk_state_change = ceph_sock_state_change; 4438c2ecf20Sopenharmony_ci} 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci/* 4478c2ecf20Sopenharmony_ci * socket helpers 4488c2ecf20Sopenharmony_ci */ 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci/* 4518c2ecf20Sopenharmony_ci * initiate connection to a remote socket. 4528c2ecf20Sopenharmony_ci */ 4538c2ecf20Sopenharmony_cistatic int ceph_tcp_connect(struct ceph_connection *con) 4548c2ecf20Sopenharmony_ci{ 4558c2ecf20Sopenharmony_ci struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */ 4568c2ecf20Sopenharmony_ci struct socket *sock; 4578c2ecf20Sopenharmony_ci unsigned int noio_flag; 4588c2ecf20Sopenharmony_ci int ret; 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci BUG_ON(con->sock); 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci /* sock_create_kern() allocates with GFP_KERNEL */ 4638c2ecf20Sopenharmony_ci noio_flag = memalloc_noio_save(); 4648c2ecf20Sopenharmony_ci ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family, 4658c2ecf20Sopenharmony_ci SOCK_STREAM, IPPROTO_TCP, &sock); 4668c2ecf20Sopenharmony_ci memalloc_noio_restore(noio_flag); 4678c2ecf20Sopenharmony_ci if (ret) 4688c2ecf20Sopenharmony_ci return ret; 4698c2ecf20Sopenharmony_ci sock->sk->sk_allocation = GFP_NOFS; 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP 4728c2ecf20Sopenharmony_ci lockdep_set_class(&sock->sk->sk_lock, &socket_class); 4738c2ecf20Sopenharmony_ci#endif 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci set_sock_callbacks(sock, con); 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci dout("connect %s\n", ceph_pr_addr(&con->peer_addr)); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci con_sock_state_connecting(con); 4808c2ecf20Sopenharmony_ci ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss), 4818c2ecf20Sopenharmony_ci O_NONBLOCK); 4828c2ecf20Sopenharmony_ci if (ret == -EINPROGRESS) { 4838c2ecf20Sopenharmony_ci dout("connect %s EINPROGRESS sk_state = %u\n", 4848c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), 4858c2ecf20Sopenharmony_ci sock->sk->sk_state); 4868c2ecf20Sopenharmony_ci } else if (ret < 0) { 4878c2ecf20Sopenharmony_ci pr_err("connect %s error %d\n", 4888c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), ret); 4898c2ecf20Sopenharmony_ci sock_release(sock); 4908c2ecf20Sopenharmony_ci return ret; 4918c2ecf20Sopenharmony_ci } 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_ci if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) 4948c2ecf20Sopenharmony_ci tcp_sock_set_nodelay(sock->sk); 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci con->sock = sock; 4978c2ecf20Sopenharmony_ci return 0; 4988c2ecf20Sopenharmony_ci} 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci/* 5018c2ecf20Sopenharmony_ci * If @buf is NULL, discard up to @len bytes. 5028c2ecf20Sopenharmony_ci */ 5038c2ecf20Sopenharmony_cistatic int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) 5048c2ecf20Sopenharmony_ci{ 5058c2ecf20Sopenharmony_ci struct kvec iov = {buf, len}; 5068c2ecf20Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 5078c2ecf20Sopenharmony_ci int r; 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci if (!buf) 5108c2ecf20Sopenharmony_ci msg.msg_flags |= MSG_TRUNC; 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len); 5138c2ecf20Sopenharmony_ci r = sock_recvmsg(sock, &msg, msg.msg_flags); 5148c2ecf20Sopenharmony_ci if (r == -EAGAIN) 5158c2ecf20Sopenharmony_ci r = 0; 5168c2ecf20Sopenharmony_ci return r; 5178c2ecf20Sopenharmony_ci} 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_cistatic int ceph_tcp_recvpage(struct socket *sock, struct page *page, 5208c2ecf20Sopenharmony_ci int page_offset, size_t length) 5218c2ecf20Sopenharmony_ci{ 5228c2ecf20Sopenharmony_ci struct bio_vec bvec = { 5238c2ecf20Sopenharmony_ci .bv_page = page, 5248c2ecf20Sopenharmony_ci .bv_offset = page_offset, 5258c2ecf20Sopenharmony_ci .bv_len = length 5268c2ecf20Sopenharmony_ci }; 5278c2ecf20Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 5288c2ecf20Sopenharmony_ci int r; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci BUG_ON(page_offset + length > PAGE_SIZE); 5318c2ecf20Sopenharmony_ci iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length); 5328c2ecf20Sopenharmony_ci r = sock_recvmsg(sock, &msg, msg.msg_flags); 5338c2ecf20Sopenharmony_ci if (r == -EAGAIN) 5348c2ecf20Sopenharmony_ci r = 0; 5358c2ecf20Sopenharmony_ci return r; 5368c2ecf20Sopenharmony_ci} 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci/* 5398c2ecf20Sopenharmony_ci * write something. @more is true if caller will be sending more data 5408c2ecf20Sopenharmony_ci * shortly. 5418c2ecf20Sopenharmony_ci */ 5428c2ecf20Sopenharmony_cistatic int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, 5438c2ecf20Sopenharmony_ci size_t kvlen, size_t len, bool more) 5448c2ecf20Sopenharmony_ci{ 5458c2ecf20Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 5468c2ecf20Sopenharmony_ci int r; 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci if (more) 5498c2ecf20Sopenharmony_ci msg.msg_flags |= MSG_MORE; 5508c2ecf20Sopenharmony_ci else 5518c2ecf20Sopenharmony_ci msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci r = kernel_sendmsg(sock, &msg, iov, kvlen, len); 5548c2ecf20Sopenharmony_ci if (r == -EAGAIN) 5558c2ecf20Sopenharmony_ci r = 0; 5568c2ecf20Sopenharmony_ci return r; 5578c2ecf20Sopenharmony_ci} 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci/* 5608c2ecf20Sopenharmony_ci * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST 5618c2ecf20Sopenharmony_ci */ 5628c2ecf20Sopenharmony_cistatic int ceph_tcp_sendpage(struct socket *sock, struct page *page, 5638c2ecf20Sopenharmony_ci int offset, size_t size, int more) 5648c2ecf20Sopenharmony_ci{ 5658c2ecf20Sopenharmony_ci ssize_t (*sendpage)(struct socket *sock, struct page *page, 5668c2ecf20Sopenharmony_ci int offset, size_t size, int flags); 5678c2ecf20Sopenharmony_ci int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; 5688c2ecf20Sopenharmony_ci int ret; 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci /* 5718c2ecf20Sopenharmony_ci * sendpage cannot properly handle pages with page_count == 0, 5728c2ecf20Sopenharmony_ci * we need to fall back to sendmsg if that's the case. 5738c2ecf20Sopenharmony_ci * 5748c2ecf20Sopenharmony_ci * Same goes for slab pages: skb_can_coalesce() allows 5758c2ecf20Sopenharmony_ci * coalescing neighboring slab objects into a single frag which 5768c2ecf20Sopenharmony_ci * triggers one of hardened usercopy checks. 5778c2ecf20Sopenharmony_ci */ 5788c2ecf20Sopenharmony_ci if (sendpage_ok(page)) 5798c2ecf20Sopenharmony_ci sendpage = sock->ops->sendpage; 5808c2ecf20Sopenharmony_ci else 5818c2ecf20Sopenharmony_ci sendpage = sock_no_sendpage; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci ret = sendpage(sock, page, offset, size, flags); 5848c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 5858c2ecf20Sopenharmony_ci ret = 0; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci return ret; 5888c2ecf20Sopenharmony_ci} 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci/* 5918c2ecf20Sopenharmony_ci * Shutdown/close the socket for the given connection. 5928c2ecf20Sopenharmony_ci */ 5938c2ecf20Sopenharmony_cistatic int con_close_socket(struct ceph_connection *con) 5948c2ecf20Sopenharmony_ci{ 5958c2ecf20Sopenharmony_ci int rc = 0; 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci dout("con_close_socket on %p sock %p\n", con, con->sock); 5988c2ecf20Sopenharmony_ci if (con->sock) { 5998c2ecf20Sopenharmony_ci rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); 6008c2ecf20Sopenharmony_ci sock_release(con->sock); 6018c2ecf20Sopenharmony_ci con->sock = NULL; 6028c2ecf20Sopenharmony_ci } 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci /* 6058c2ecf20Sopenharmony_ci * Forcibly clear the SOCK_CLOSED flag. It gets set 6068c2ecf20Sopenharmony_ci * independent of the connection mutex, and we could have 6078c2ecf20Sopenharmony_ci * received a socket close event before we had the chance to 6088c2ecf20Sopenharmony_ci * shut the socket down. 6098c2ecf20Sopenharmony_ci */ 6108c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_SOCK_CLOSED); 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci con_sock_state_closed(con); 6138c2ecf20Sopenharmony_ci return rc; 6148c2ecf20Sopenharmony_ci} 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci/* 6178c2ecf20Sopenharmony_ci * Reset a connection. Discard all incoming and outgoing messages 6188c2ecf20Sopenharmony_ci * and clear *_seq state. 6198c2ecf20Sopenharmony_ci */ 6208c2ecf20Sopenharmony_cistatic void ceph_msg_remove(struct ceph_msg *msg) 6218c2ecf20Sopenharmony_ci{ 6228c2ecf20Sopenharmony_ci list_del_init(&msg->list_head); 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci ceph_msg_put(msg); 6258c2ecf20Sopenharmony_ci} 6268c2ecf20Sopenharmony_cistatic void ceph_msg_remove_list(struct list_head *head) 6278c2ecf20Sopenharmony_ci{ 6288c2ecf20Sopenharmony_ci while (!list_empty(head)) { 6298c2ecf20Sopenharmony_ci struct ceph_msg *msg = list_first_entry(head, struct ceph_msg, 6308c2ecf20Sopenharmony_ci list_head); 6318c2ecf20Sopenharmony_ci ceph_msg_remove(msg); 6328c2ecf20Sopenharmony_ci } 6338c2ecf20Sopenharmony_ci} 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_cistatic void reset_connection(struct ceph_connection *con) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci /* reset connection, out_queue, msg_ and connect_seq */ 6388c2ecf20Sopenharmony_ci /* discard existing out_queue and msg_seq */ 6398c2ecf20Sopenharmony_ci dout("reset_connection %p\n", con); 6408c2ecf20Sopenharmony_ci ceph_msg_remove_list(&con->out_queue); 6418c2ecf20Sopenharmony_ci ceph_msg_remove_list(&con->out_sent); 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci if (con->in_msg) { 6448c2ecf20Sopenharmony_ci BUG_ON(con->in_msg->con != con); 6458c2ecf20Sopenharmony_ci ceph_msg_put(con->in_msg); 6468c2ecf20Sopenharmony_ci con->in_msg = NULL; 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci con->connect_seq = 0; 6508c2ecf20Sopenharmony_ci con->out_seq = 0; 6518c2ecf20Sopenharmony_ci if (con->out_msg) { 6528c2ecf20Sopenharmony_ci BUG_ON(con->out_msg->con != con); 6538c2ecf20Sopenharmony_ci ceph_msg_put(con->out_msg); 6548c2ecf20Sopenharmony_ci con->out_msg = NULL; 6558c2ecf20Sopenharmony_ci } 6568c2ecf20Sopenharmony_ci con->in_seq = 0; 6578c2ecf20Sopenharmony_ci con->in_seq_acked = 0; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci con->out_skip = 0; 6608c2ecf20Sopenharmony_ci} 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci/* 6638c2ecf20Sopenharmony_ci * mark a peer down. drop any open connections. 6648c2ecf20Sopenharmony_ci */ 6658c2ecf20Sopenharmony_civoid ceph_con_close(struct ceph_connection *con) 6668c2ecf20Sopenharmony_ci{ 6678c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 6688c2ecf20Sopenharmony_ci dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr)); 6698c2ecf20Sopenharmony_ci con->state = CON_STATE_CLOSED; 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ 6728c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING); 6738c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_WRITE_PENDING); 6748c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_BACKOFF); 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci reset_connection(con); 6778c2ecf20Sopenharmony_ci con->peer_global_seq = 0; 6788c2ecf20Sopenharmony_ci cancel_con(con); 6798c2ecf20Sopenharmony_ci con_close_socket(con); 6808c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 6818c2ecf20Sopenharmony_ci} 6828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_close); 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci/* 6858c2ecf20Sopenharmony_ci * Reopen a closed connection, with a new peer address. 6868c2ecf20Sopenharmony_ci */ 6878c2ecf20Sopenharmony_civoid ceph_con_open(struct ceph_connection *con, 6888c2ecf20Sopenharmony_ci __u8 entity_type, __u64 entity_num, 6898c2ecf20Sopenharmony_ci struct ceph_entity_addr *addr) 6908c2ecf20Sopenharmony_ci{ 6918c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 6928c2ecf20Sopenharmony_ci dout("con_open %p %s\n", con, ceph_pr_addr(addr)); 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_ci WARN_ON(con->state != CON_STATE_CLOSED); 6958c2ecf20Sopenharmony_ci con->state = CON_STATE_PREOPEN; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci con->peer_name.type = (__u8) entity_type; 6988c2ecf20Sopenharmony_ci con->peer_name.num = cpu_to_le64(entity_num); 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci memcpy(&con->peer_addr, addr, sizeof(*addr)); 7018c2ecf20Sopenharmony_ci con->delay = 0; /* reset backoff memory */ 7028c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 7038c2ecf20Sopenharmony_ci queue_con(con); 7048c2ecf20Sopenharmony_ci} 7058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_open); 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci/* 7088c2ecf20Sopenharmony_ci * return true if this connection ever successfully opened 7098c2ecf20Sopenharmony_ci */ 7108c2ecf20Sopenharmony_cibool ceph_con_opened(struct ceph_connection *con) 7118c2ecf20Sopenharmony_ci{ 7128c2ecf20Sopenharmony_ci return con->connect_seq > 0; 7138c2ecf20Sopenharmony_ci} 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci/* 7168c2ecf20Sopenharmony_ci * initialize a new connection. 7178c2ecf20Sopenharmony_ci */ 7188c2ecf20Sopenharmony_civoid ceph_con_init(struct ceph_connection *con, void *private, 7198c2ecf20Sopenharmony_ci const struct ceph_connection_operations *ops, 7208c2ecf20Sopenharmony_ci struct ceph_messenger *msgr) 7218c2ecf20Sopenharmony_ci{ 7228c2ecf20Sopenharmony_ci dout("con_init %p\n", con); 7238c2ecf20Sopenharmony_ci memset(con, 0, sizeof(*con)); 7248c2ecf20Sopenharmony_ci con->private = private; 7258c2ecf20Sopenharmony_ci con->ops = ops; 7268c2ecf20Sopenharmony_ci con->msgr = msgr; 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci con_sock_state_init(con); 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci mutex_init(&con->mutex); 7318c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&con->out_queue); 7328c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&con->out_sent); 7338c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&con->work, ceph_con_workfn); 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci con->state = CON_STATE_CLOSED; 7368c2ecf20Sopenharmony_ci} 7378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_init); 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_ci/* 7418c2ecf20Sopenharmony_ci * We maintain a global counter to order connection attempts. Get 7428c2ecf20Sopenharmony_ci * a unique seq greater than @gt. 7438c2ecf20Sopenharmony_ci */ 7448c2ecf20Sopenharmony_cistatic u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) 7458c2ecf20Sopenharmony_ci{ 7468c2ecf20Sopenharmony_ci u32 ret; 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci spin_lock(&msgr->global_seq_lock); 7498c2ecf20Sopenharmony_ci if (msgr->global_seq < gt) 7508c2ecf20Sopenharmony_ci msgr->global_seq = gt; 7518c2ecf20Sopenharmony_ci ret = ++msgr->global_seq; 7528c2ecf20Sopenharmony_ci spin_unlock(&msgr->global_seq_lock); 7538c2ecf20Sopenharmony_ci return ret; 7548c2ecf20Sopenharmony_ci} 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_cistatic void con_out_kvec_reset(struct ceph_connection *con) 7578c2ecf20Sopenharmony_ci{ 7588c2ecf20Sopenharmony_ci BUG_ON(con->out_skip); 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci con->out_kvec_left = 0; 7618c2ecf20Sopenharmony_ci con->out_kvec_bytes = 0; 7628c2ecf20Sopenharmony_ci con->out_kvec_cur = &con->out_kvec[0]; 7638c2ecf20Sopenharmony_ci} 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_cistatic void con_out_kvec_add(struct ceph_connection *con, 7668c2ecf20Sopenharmony_ci size_t size, void *data) 7678c2ecf20Sopenharmony_ci{ 7688c2ecf20Sopenharmony_ci int index = con->out_kvec_left; 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci BUG_ON(con->out_skip); 7718c2ecf20Sopenharmony_ci BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ci con->out_kvec[index].iov_len = size; 7748c2ecf20Sopenharmony_ci con->out_kvec[index].iov_base = data; 7758c2ecf20Sopenharmony_ci con->out_kvec_left++; 7768c2ecf20Sopenharmony_ci con->out_kvec_bytes += size; 7778c2ecf20Sopenharmony_ci} 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci/* 7808c2ecf20Sopenharmony_ci * Chop off a kvec from the end. Return residual number of bytes for 7818c2ecf20Sopenharmony_ci * that kvec, i.e. how many bytes would have been written if the kvec 7828c2ecf20Sopenharmony_ci * hadn't been nuked. 7838c2ecf20Sopenharmony_ci */ 7848c2ecf20Sopenharmony_cistatic int con_out_kvec_skip(struct ceph_connection *con) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci int off = con->out_kvec_cur - con->out_kvec; 7878c2ecf20Sopenharmony_ci int skip = 0; 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci if (con->out_kvec_bytes > 0) { 7908c2ecf20Sopenharmony_ci skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; 7918c2ecf20Sopenharmony_ci BUG_ON(con->out_kvec_bytes < skip); 7928c2ecf20Sopenharmony_ci BUG_ON(!con->out_kvec_left); 7938c2ecf20Sopenharmony_ci con->out_kvec_bytes -= skip; 7948c2ecf20Sopenharmony_ci con->out_kvec_left--; 7958c2ecf20Sopenharmony_ci } 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci return skip; 7988c2ecf20Sopenharmony_ci} 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 8018c2ecf20Sopenharmony_ci 8028c2ecf20Sopenharmony_ci/* 8038c2ecf20Sopenharmony_ci * For a bio data item, a piece is whatever remains of the next 8048c2ecf20Sopenharmony_ci * entry in the current bio iovec, or the first entry in the next 8058c2ecf20Sopenharmony_ci * bio in the list. 8068c2ecf20Sopenharmony_ci */ 8078c2ecf20Sopenharmony_cistatic void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, 8088c2ecf20Sopenharmony_ci size_t length) 8098c2ecf20Sopenharmony_ci{ 8108c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 8118c2ecf20Sopenharmony_ci struct ceph_bio_iter *it = &cursor->bio_iter; 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci cursor->resid = min_t(size_t, length, data->bio_length); 8148c2ecf20Sopenharmony_ci *it = data->bio_pos; 8158c2ecf20Sopenharmony_ci if (cursor->resid < it->iter.bi_size) 8168c2ecf20Sopenharmony_ci it->iter.bi_size = cursor->resid; 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); 8198c2ecf20Sopenharmony_ci cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); 8208c2ecf20Sopenharmony_ci} 8218c2ecf20Sopenharmony_ci 8228c2ecf20Sopenharmony_cistatic struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, 8238c2ecf20Sopenharmony_ci size_t *page_offset, 8248c2ecf20Sopenharmony_ci size_t *length) 8258c2ecf20Sopenharmony_ci{ 8268c2ecf20Sopenharmony_ci struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio, 8278c2ecf20Sopenharmony_ci cursor->bio_iter.iter); 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci *page_offset = bv.bv_offset; 8308c2ecf20Sopenharmony_ci *length = bv.bv_len; 8318c2ecf20Sopenharmony_ci return bv.bv_page; 8328c2ecf20Sopenharmony_ci} 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_cistatic bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, 8358c2ecf20Sopenharmony_ci size_t bytes) 8368c2ecf20Sopenharmony_ci{ 8378c2ecf20Sopenharmony_ci struct ceph_bio_iter *it = &cursor->bio_iter; 8388c2ecf20Sopenharmony_ci struct page *page = bio_iter_page(it->bio, it->iter); 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci BUG_ON(bytes > cursor->resid); 8418c2ecf20Sopenharmony_ci BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); 8428c2ecf20Sopenharmony_ci cursor->resid -= bytes; 8438c2ecf20Sopenharmony_ci bio_advance_iter(it->bio, &it->iter, bytes); 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci if (!cursor->resid) { 8468c2ecf20Sopenharmony_ci BUG_ON(!cursor->last_piece); 8478c2ecf20Sopenharmony_ci return false; /* no more data */ 8488c2ecf20Sopenharmony_ci } 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && 8518c2ecf20Sopenharmony_ci page == bio_iter_page(it->bio, it->iter))) 8528c2ecf20Sopenharmony_ci return false; /* more bytes to process in this segment */ 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci if (!it->iter.bi_size) { 8558c2ecf20Sopenharmony_ci it->bio = it->bio->bi_next; 8568c2ecf20Sopenharmony_ci it->iter = it->bio->bi_iter; 8578c2ecf20Sopenharmony_ci if (cursor->resid < it->iter.bi_size) 8588c2ecf20Sopenharmony_ci it->iter.bi_size = cursor->resid; 8598c2ecf20Sopenharmony_ci } 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci BUG_ON(cursor->last_piece); 8628c2ecf20Sopenharmony_ci BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); 8638c2ecf20Sopenharmony_ci cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); 8648c2ecf20Sopenharmony_ci return true; 8658c2ecf20Sopenharmony_ci} 8668c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */ 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_cistatic void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor, 8698c2ecf20Sopenharmony_ci size_t length) 8708c2ecf20Sopenharmony_ci{ 8718c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 8728c2ecf20Sopenharmony_ci struct bio_vec *bvecs = data->bvec_pos.bvecs; 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size); 8758c2ecf20Sopenharmony_ci cursor->bvec_iter = data->bvec_pos.iter; 8768c2ecf20Sopenharmony_ci cursor->bvec_iter.bi_size = cursor->resid; 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); 8798c2ecf20Sopenharmony_ci cursor->last_piece = 8808c2ecf20Sopenharmony_ci cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); 8818c2ecf20Sopenharmony_ci} 8828c2ecf20Sopenharmony_ci 8838c2ecf20Sopenharmony_cistatic struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor, 8848c2ecf20Sopenharmony_ci size_t *page_offset, 8858c2ecf20Sopenharmony_ci size_t *length) 8868c2ecf20Sopenharmony_ci{ 8878c2ecf20Sopenharmony_ci struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs, 8888c2ecf20Sopenharmony_ci cursor->bvec_iter); 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci *page_offset = bv.bv_offset; 8918c2ecf20Sopenharmony_ci *length = bv.bv_len; 8928c2ecf20Sopenharmony_ci return bv.bv_page; 8938c2ecf20Sopenharmony_ci} 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_cistatic bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, 8968c2ecf20Sopenharmony_ci size_t bytes) 8978c2ecf20Sopenharmony_ci{ 8988c2ecf20Sopenharmony_ci struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; 8998c2ecf20Sopenharmony_ci struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci BUG_ON(bytes > cursor->resid); 9028c2ecf20Sopenharmony_ci BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); 9038c2ecf20Sopenharmony_ci cursor->resid -= bytes; 9048c2ecf20Sopenharmony_ci bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes); 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci if (!cursor->resid) { 9078c2ecf20Sopenharmony_ci BUG_ON(!cursor->last_piece); 9088c2ecf20Sopenharmony_ci return false; /* no more data */ 9098c2ecf20Sopenharmony_ci } 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_ci if (!bytes || (cursor->bvec_iter.bi_bvec_done && 9128c2ecf20Sopenharmony_ci page == bvec_iter_page(bvecs, cursor->bvec_iter))) 9138c2ecf20Sopenharmony_ci return false; /* more bytes to process in this segment */ 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci BUG_ON(cursor->last_piece); 9168c2ecf20Sopenharmony_ci BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); 9178c2ecf20Sopenharmony_ci cursor->last_piece = 9188c2ecf20Sopenharmony_ci cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); 9198c2ecf20Sopenharmony_ci return true; 9208c2ecf20Sopenharmony_ci} 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci/* 9238c2ecf20Sopenharmony_ci * For a page array, a piece comes from the first page in the array 9248c2ecf20Sopenharmony_ci * that has not already been fully consumed. 9258c2ecf20Sopenharmony_ci */ 9268c2ecf20Sopenharmony_cistatic void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, 9278c2ecf20Sopenharmony_ci size_t length) 9288c2ecf20Sopenharmony_ci{ 9298c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 9308c2ecf20Sopenharmony_ci int page_count; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci BUG_ON(data->type != CEPH_MSG_DATA_PAGES); 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci BUG_ON(!data->pages); 9358c2ecf20Sopenharmony_ci BUG_ON(!data->length); 9368c2ecf20Sopenharmony_ci 9378c2ecf20Sopenharmony_ci cursor->resid = min(length, data->length); 9388c2ecf20Sopenharmony_ci page_count = calc_pages_for(data->alignment, (u64)data->length); 9398c2ecf20Sopenharmony_ci cursor->page_offset = data->alignment & ~PAGE_MASK; 9408c2ecf20Sopenharmony_ci cursor->page_index = 0; 9418c2ecf20Sopenharmony_ci BUG_ON(page_count > (int)USHRT_MAX); 9428c2ecf20Sopenharmony_ci cursor->page_count = (unsigned short)page_count; 9438c2ecf20Sopenharmony_ci BUG_ON(length > SIZE_MAX - cursor->page_offset); 9448c2ecf20Sopenharmony_ci cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE; 9458c2ecf20Sopenharmony_ci} 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_cistatic struct page * 9488c2ecf20Sopenharmony_ciceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor, 9498c2ecf20Sopenharmony_ci size_t *page_offset, size_t *length) 9508c2ecf20Sopenharmony_ci{ 9518c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci BUG_ON(data->type != CEPH_MSG_DATA_PAGES); 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ci BUG_ON(cursor->page_index >= cursor->page_count); 9568c2ecf20Sopenharmony_ci BUG_ON(cursor->page_offset >= PAGE_SIZE); 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci *page_offset = cursor->page_offset; 9598c2ecf20Sopenharmony_ci if (cursor->last_piece) 9608c2ecf20Sopenharmony_ci *length = cursor->resid; 9618c2ecf20Sopenharmony_ci else 9628c2ecf20Sopenharmony_ci *length = PAGE_SIZE - *page_offset; 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci return data->pages[cursor->page_index]; 9658c2ecf20Sopenharmony_ci} 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_cistatic bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor, 9688c2ecf20Sopenharmony_ci size_t bytes) 9698c2ecf20Sopenharmony_ci{ 9708c2ecf20Sopenharmony_ci BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES); 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci BUG_ON(cursor->page_offset + bytes > PAGE_SIZE); 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci /* Advance the cursor page offset */ 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ci cursor->resid -= bytes; 9778c2ecf20Sopenharmony_ci cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK; 9788c2ecf20Sopenharmony_ci if (!bytes || cursor->page_offset) 9798c2ecf20Sopenharmony_ci return false; /* more bytes to process in the current page */ 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ci if (!cursor->resid) 9828c2ecf20Sopenharmony_ci return false; /* no more data */ 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci /* Move on to the next page; offset is already at 0 */ 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci BUG_ON(cursor->page_index >= cursor->page_count); 9878c2ecf20Sopenharmony_ci cursor->page_index++; 9888c2ecf20Sopenharmony_ci cursor->last_piece = cursor->resid <= PAGE_SIZE; 9898c2ecf20Sopenharmony_ci 9908c2ecf20Sopenharmony_ci return true; 9918c2ecf20Sopenharmony_ci} 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci/* 9948c2ecf20Sopenharmony_ci * For a pagelist, a piece is whatever remains to be consumed in the 9958c2ecf20Sopenharmony_ci * first page in the list, or the front of the next page. 9968c2ecf20Sopenharmony_ci */ 9978c2ecf20Sopenharmony_cistatic void 9988c2ecf20Sopenharmony_ciceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor, 9998c2ecf20Sopenharmony_ci size_t length) 10008c2ecf20Sopenharmony_ci{ 10018c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 10028c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist; 10038c2ecf20Sopenharmony_ci struct page *page; 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ci BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST); 10068c2ecf20Sopenharmony_ci 10078c2ecf20Sopenharmony_ci pagelist = data->pagelist; 10088c2ecf20Sopenharmony_ci BUG_ON(!pagelist); 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci if (!length) 10118c2ecf20Sopenharmony_ci return; /* pagelist can be assigned but empty */ 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci BUG_ON(list_empty(&pagelist->head)); 10148c2ecf20Sopenharmony_ci page = list_first_entry(&pagelist->head, struct page, lru); 10158c2ecf20Sopenharmony_ci 10168c2ecf20Sopenharmony_ci cursor->resid = min(length, pagelist->length); 10178c2ecf20Sopenharmony_ci cursor->page = page; 10188c2ecf20Sopenharmony_ci cursor->offset = 0; 10198c2ecf20Sopenharmony_ci cursor->last_piece = cursor->resid <= PAGE_SIZE; 10208c2ecf20Sopenharmony_ci} 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_cistatic struct page * 10238c2ecf20Sopenharmony_ciceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor, 10248c2ecf20Sopenharmony_ci size_t *page_offset, size_t *length) 10258c2ecf20Sopenharmony_ci{ 10268c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 10278c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST); 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci pagelist = data->pagelist; 10328c2ecf20Sopenharmony_ci BUG_ON(!pagelist); 10338c2ecf20Sopenharmony_ci 10348c2ecf20Sopenharmony_ci BUG_ON(!cursor->page); 10358c2ecf20Sopenharmony_ci BUG_ON(cursor->offset + cursor->resid != pagelist->length); 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci /* offset of first page in pagelist is always 0 */ 10388c2ecf20Sopenharmony_ci *page_offset = cursor->offset & ~PAGE_MASK; 10398c2ecf20Sopenharmony_ci if (cursor->last_piece) 10408c2ecf20Sopenharmony_ci *length = cursor->resid; 10418c2ecf20Sopenharmony_ci else 10428c2ecf20Sopenharmony_ci *length = PAGE_SIZE - *page_offset; 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_ci return cursor->page; 10458c2ecf20Sopenharmony_ci} 10468c2ecf20Sopenharmony_ci 10478c2ecf20Sopenharmony_cistatic bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, 10488c2ecf20Sopenharmony_ci size_t bytes) 10498c2ecf20Sopenharmony_ci{ 10508c2ecf20Sopenharmony_ci struct ceph_msg_data *data = cursor->data; 10518c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist; 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST); 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci pagelist = data->pagelist; 10568c2ecf20Sopenharmony_ci BUG_ON(!pagelist); 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci BUG_ON(cursor->offset + cursor->resid != pagelist->length); 10598c2ecf20Sopenharmony_ci BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE); 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci /* Advance the cursor offset */ 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci cursor->resid -= bytes; 10648c2ecf20Sopenharmony_ci cursor->offset += bytes; 10658c2ecf20Sopenharmony_ci /* offset of first page in pagelist is always 0 */ 10668c2ecf20Sopenharmony_ci if (!bytes || cursor->offset & ~PAGE_MASK) 10678c2ecf20Sopenharmony_ci return false; /* more bytes to process in the current page */ 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci if (!cursor->resid) 10708c2ecf20Sopenharmony_ci return false; /* no more data */ 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci /* Move on to the next page */ 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_ci BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); 10758c2ecf20Sopenharmony_ci cursor->page = list_next_entry(cursor->page, lru); 10768c2ecf20Sopenharmony_ci cursor->last_piece = cursor->resid <= PAGE_SIZE; 10778c2ecf20Sopenharmony_ci 10788c2ecf20Sopenharmony_ci return true; 10798c2ecf20Sopenharmony_ci} 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci/* 10828c2ecf20Sopenharmony_ci * Message data is handled (sent or received) in pieces, where each 10838c2ecf20Sopenharmony_ci * piece resides on a single page. The network layer might not 10848c2ecf20Sopenharmony_ci * consume an entire piece at once. A data item's cursor keeps 10858c2ecf20Sopenharmony_ci * track of which piece is next to process and how much remains to 10868c2ecf20Sopenharmony_ci * be processed in that piece. It also tracks whether the current 10878c2ecf20Sopenharmony_ci * piece is the last one in the data item. 10888c2ecf20Sopenharmony_ci */ 10898c2ecf20Sopenharmony_cistatic void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) 10908c2ecf20Sopenharmony_ci{ 10918c2ecf20Sopenharmony_ci size_t length = cursor->total_resid; 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci switch (cursor->data->type) { 10948c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_PAGELIST: 10958c2ecf20Sopenharmony_ci ceph_msg_data_pagelist_cursor_init(cursor, length); 10968c2ecf20Sopenharmony_ci break; 10978c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_PAGES: 10988c2ecf20Sopenharmony_ci ceph_msg_data_pages_cursor_init(cursor, length); 10998c2ecf20Sopenharmony_ci break; 11008c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 11018c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_BIO: 11028c2ecf20Sopenharmony_ci ceph_msg_data_bio_cursor_init(cursor, length); 11038c2ecf20Sopenharmony_ci break; 11048c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */ 11058c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_BVECS: 11068c2ecf20Sopenharmony_ci ceph_msg_data_bvecs_cursor_init(cursor, length); 11078c2ecf20Sopenharmony_ci break; 11088c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_NONE: 11098c2ecf20Sopenharmony_ci default: 11108c2ecf20Sopenharmony_ci /* BUG(); */ 11118c2ecf20Sopenharmony_ci break; 11128c2ecf20Sopenharmony_ci } 11138c2ecf20Sopenharmony_ci cursor->need_crc = true; 11148c2ecf20Sopenharmony_ci} 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_cistatic void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) 11178c2ecf20Sopenharmony_ci{ 11188c2ecf20Sopenharmony_ci struct ceph_msg_data_cursor *cursor = &msg->cursor; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci BUG_ON(!length); 11218c2ecf20Sopenharmony_ci BUG_ON(length > msg->data_length); 11228c2ecf20Sopenharmony_ci BUG_ON(!msg->num_data_items); 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci cursor->total_resid = length; 11258c2ecf20Sopenharmony_ci cursor->data = msg->data; 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_ci __ceph_msg_data_cursor_init(cursor); 11288c2ecf20Sopenharmony_ci} 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci/* 11318c2ecf20Sopenharmony_ci * Return the page containing the next piece to process for a given 11328c2ecf20Sopenharmony_ci * data item, and supply the page offset and length of that piece. 11338c2ecf20Sopenharmony_ci * Indicate whether this is the last piece in this data item. 11348c2ecf20Sopenharmony_ci */ 11358c2ecf20Sopenharmony_cistatic struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, 11368c2ecf20Sopenharmony_ci size_t *page_offset, size_t *length, 11378c2ecf20Sopenharmony_ci bool *last_piece) 11388c2ecf20Sopenharmony_ci{ 11398c2ecf20Sopenharmony_ci struct page *page; 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci switch (cursor->data->type) { 11428c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_PAGELIST: 11438c2ecf20Sopenharmony_ci page = ceph_msg_data_pagelist_next(cursor, page_offset, length); 11448c2ecf20Sopenharmony_ci break; 11458c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_PAGES: 11468c2ecf20Sopenharmony_ci page = ceph_msg_data_pages_next(cursor, page_offset, length); 11478c2ecf20Sopenharmony_ci break; 11488c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 11498c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_BIO: 11508c2ecf20Sopenharmony_ci page = ceph_msg_data_bio_next(cursor, page_offset, length); 11518c2ecf20Sopenharmony_ci break; 11528c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */ 11538c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_BVECS: 11548c2ecf20Sopenharmony_ci page = ceph_msg_data_bvecs_next(cursor, page_offset, length); 11558c2ecf20Sopenharmony_ci break; 11568c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_NONE: 11578c2ecf20Sopenharmony_ci default: 11588c2ecf20Sopenharmony_ci page = NULL; 11598c2ecf20Sopenharmony_ci break; 11608c2ecf20Sopenharmony_ci } 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci BUG_ON(!page); 11638c2ecf20Sopenharmony_ci BUG_ON(*page_offset + *length > PAGE_SIZE); 11648c2ecf20Sopenharmony_ci BUG_ON(!*length); 11658c2ecf20Sopenharmony_ci BUG_ON(*length > cursor->resid); 11668c2ecf20Sopenharmony_ci if (last_piece) 11678c2ecf20Sopenharmony_ci *last_piece = cursor->last_piece; 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci return page; 11708c2ecf20Sopenharmony_ci} 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci/* 11738c2ecf20Sopenharmony_ci * Returns true if the result moves the cursor on to the next piece 11748c2ecf20Sopenharmony_ci * of the data item. 11758c2ecf20Sopenharmony_ci */ 11768c2ecf20Sopenharmony_cistatic void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, 11778c2ecf20Sopenharmony_ci size_t bytes) 11788c2ecf20Sopenharmony_ci{ 11798c2ecf20Sopenharmony_ci bool new_piece; 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_ci BUG_ON(bytes > cursor->resid); 11828c2ecf20Sopenharmony_ci switch (cursor->data->type) { 11838c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_PAGELIST: 11848c2ecf20Sopenharmony_ci new_piece = ceph_msg_data_pagelist_advance(cursor, bytes); 11858c2ecf20Sopenharmony_ci break; 11868c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_PAGES: 11878c2ecf20Sopenharmony_ci new_piece = ceph_msg_data_pages_advance(cursor, bytes); 11888c2ecf20Sopenharmony_ci break; 11898c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 11908c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_BIO: 11918c2ecf20Sopenharmony_ci new_piece = ceph_msg_data_bio_advance(cursor, bytes); 11928c2ecf20Sopenharmony_ci break; 11938c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */ 11948c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_BVECS: 11958c2ecf20Sopenharmony_ci new_piece = ceph_msg_data_bvecs_advance(cursor, bytes); 11968c2ecf20Sopenharmony_ci break; 11978c2ecf20Sopenharmony_ci case CEPH_MSG_DATA_NONE: 11988c2ecf20Sopenharmony_ci default: 11998c2ecf20Sopenharmony_ci BUG(); 12008c2ecf20Sopenharmony_ci break; 12018c2ecf20Sopenharmony_ci } 12028c2ecf20Sopenharmony_ci cursor->total_resid -= bytes; 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci if (!cursor->resid && cursor->total_resid) { 12058c2ecf20Sopenharmony_ci WARN_ON(!cursor->last_piece); 12068c2ecf20Sopenharmony_ci cursor->data++; 12078c2ecf20Sopenharmony_ci __ceph_msg_data_cursor_init(cursor); 12088c2ecf20Sopenharmony_ci new_piece = true; 12098c2ecf20Sopenharmony_ci } 12108c2ecf20Sopenharmony_ci cursor->need_crc = new_piece; 12118c2ecf20Sopenharmony_ci} 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_cistatic size_t sizeof_footer(struct ceph_connection *con) 12148c2ecf20Sopenharmony_ci{ 12158c2ecf20Sopenharmony_ci return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? 12168c2ecf20Sopenharmony_ci sizeof(struct ceph_msg_footer) : 12178c2ecf20Sopenharmony_ci sizeof(struct ceph_msg_footer_old); 12188c2ecf20Sopenharmony_ci} 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_cistatic void prepare_message_data(struct ceph_msg *msg, u32 data_len) 12218c2ecf20Sopenharmony_ci{ 12228c2ecf20Sopenharmony_ci /* Initialize data cursor */ 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci ceph_msg_data_cursor_init(msg, (size_t)data_len); 12258c2ecf20Sopenharmony_ci} 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_ci/* 12288c2ecf20Sopenharmony_ci * Prepare footer for currently outgoing message, and finish things 12298c2ecf20Sopenharmony_ci * off. Assumes out_kvec* are already valid.. we just add on to the end. 12308c2ecf20Sopenharmony_ci */ 12318c2ecf20Sopenharmony_cistatic void prepare_write_message_footer(struct ceph_connection *con) 12328c2ecf20Sopenharmony_ci{ 12338c2ecf20Sopenharmony_ci struct ceph_msg *m = con->out_msg; 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ci m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_ci dout("prepare_write_message_footer %p\n", con); 12388c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof_footer(con), &m->footer); 12398c2ecf20Sopenharmony_ci if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 12408c2ecf20Sopenharmony_ci if (con->ops->sign_message) 12418c2ecf20Sopenharmony_ci con->ops->sign_message(m); 12428c2ecf20Sopenharmony_ci else 12438c2ecf20Sopenharmony_ci m->footer.sig = 0; 12448c2ecf20Sopenharmony_ci } else { 12458c2ecf20Sopenharmony_ci m->old_footer.flags = m->footer.flags; 12468c2ecf20Sopenharmony_ci } 12478c2ecf20Sopenharmony_ci con->out_more = m->more_to_follow; 12488c2ecf20Sopenharmony_ci con->out_msg_done = true; 12498c2ecf20Sopenharmony_ci} 12508c2ecf20Sopenharmony_ci 12518c2ecf20Sopenharmony_ci/* 12528c2ecf20Sopenharmony_ci * Prepare headers for the next outgoing message. 12538c2ecf20Sopenharmony_ci */ 12548c2ecf20Sopenharmony_cistatic void prepare_write_message(struct ceph_connection *con) 12558c2ecf20Sopenharmony_ci{ 12568c2ecf20Sopenharmony_ci struct ceph_msg *m; 12578c2ecf20Sopenharmony_ci u32 crc; 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 12608c2ecf20Sopenharmony_ci con->out_msg_done = false; 12618c2ecf20Sopenharmony_ci 12628c2ecf20Sopenharmony_ci /* Sneak an ack in there first? If we can get it into the same 12638c2ecf20Sopenharmony_ci * TCP packet that's a good thing. */ 12648c2ecf20Sopenharmony_ci if (con->in_seq > con->in_seq_acked) { 12658c2ecf20Sopenharmony_ci con->in_seq_acked = con->in_seq; 12668c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); 12678c2ecf20Sopenharmony_ci con->out_temp_ack = cpu_to_le64(con->in_seq_acked); 12688c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (con->out_temp_ack), 12698c2ecf20Sopenharmony_ci &con->out_temp_ack); 12708c2ecf20Sopenharmony_ci } 12718c2ecf20Sopenharmony_ci 12728c2ecf20Sopenharmony_ci BUG_ON(list_empty(&con->out_queue)); 12738c2ecf20Sopenharmony_ci m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); 12748c2ecf20Sopenharmony_ci con->out_msg = m; 12758c2ecf20Sopenharmony_ci BUG_ON(m->con != con); 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci /* put message on sent list */ 12788c2ecf20Sopenharmony_ci ceph_msg_get(m); 12798c2ecf20Sopenharmony_ci list_move_tail(&m->list_head, &con->out_sent); 12808c2ecf20Sopenharmony_ci 12818c2ecf20Sopenharmony_ci /* 12828c2ecf20Sopenharmony_ci * only assign outgoing seq # if we haven't sent this message 12838c2ecf20Sopenharmony_ci * yet. if it is requeued, resend with it's original seq. 12848c2ecf20Sopenharmony_ci */ 12858c2ecf20Sopenharmony_ci if (m->needs_out_seq) { 12868c2ecf20Sopenharmony_ci m->hdr.seq = cpu_to_le64(++con->out_seq); 12878c2ecf20Sopenharmony_ci m->needs_out_seq = false; 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ci if (con->ops->reencode_message) 12908c2ecf20Sopenharmony_ci con->ops->reencode_message(m); 12918c2ecf20Sopenharmony_ci } 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", 12948c2ecf20Sopenharmony_ci m, con->out_seq, le16_to_cpu(m->hdr.type), 12958c2ecf20Sopenharmony_ci le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 12968c2ecf20Sopenharmony_ci m->data_length); 12978c2ecf20Sopenharmony_ci WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len)); 12988c2ecf20Sopenharmony_ci WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci /* tag + hdr + front + middle */ 13018c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); 13028c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); 13038c2ecf20Sopenharmony_ci con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); 13048c2ecf20Sopenharmony_ci 13058c2ecf20Sopenharmony_ci if (m->middle) 13068c2ecf20Sopenharmony_ci con_out_kvec_add(con, m->middle->vec.iov_len, 13078c2ecf20Sopenharmony_ci m->middle->vec.iov_base); 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci /* fill in hdr crc and finalize hdr */ 13108c2ecf20Sopenharmony_ci crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); 13118c2ecf20Sopenharmony_ci con->out_msg->hdr.crc = cpu_to_le32(crc); 13128c2ecf20Sopenharmony_ci memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci /* fill in front and middle crc, footer */ 13158c2ecf20Sopenharmony_ci crc = crc32c(0, m->front.iov_base, m->front.iov_len); 13168c2ecf20Sopenharmony_ci con->out_msg->footer.front_crc = cpu_to_le32(crc); 13178c2ecf20Sopenharmony_ci if (m->middle) { 13188c2ecf20Sopenharmony_ci crc = crc32c(0, m->middle->vec.iov_base, 13198c2ecf20Sopenharmony_ci m->middle->vec.iov_len); 13208c2ecf20Sopenharmony_ci con->out_msg->footer.middle_crc = cpu_to_le32(crc); 13218c2ecf20Sopenharmony_ci } else 13228c2ecf20Sopenharmony_ci con->out_msg->footer.middle_crc = 0; 13238c2ecf20Sopenharmony_ci dout("%s front_crc %u middle_crc %u\n", __func__, 13248c2ecf20Sopenharmony_ci le32_to_cpu(con->out_msg->footer.front_crc), 13258c2ecf20Sopenharmony_ci le32_to_cpu(con->out_msg->footer.middle_crc)); 13268c2ecf20Sopenharmony_ci con->out_msg->footer.flags = 0; 13278c2ecf20Sopenharmony_ci 13288c2ecf20Sopenharmony_ci /* is there a data payload? */ 13298c2ecf20Sopenharmony_ci con->out_msg->footer.data_crc = 0; 13308c2ecf20Sopenharmony_ci if (m->data_length) { 13318c2ecf20Sopenharmony_ci prepare_message_data(con->out_msg, m->data_length); 13328c2ecf20Sopenharmony_ci con->out_more = 1; /* data + footer will follow */ 13338c2ecf20Sopenharmony_ci } else { 13348c2ecf20Sopenharmony_ci /* no, queue up footer too and be done */ 13358c2ecf20Sopenharmony_ci prepare_write_message_footer(con); 13368c2ecf20Sopenharmony_ci } 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_WRITE_PENDING); 13398c2ecf20Sopenharmony_ci} 13408c2ecf20Sopenharmony_ci 13418c2ecf20Sopenharmony_ci/* 13428c2ecf20Sopenharmony_ci * Prepare an ack. 13438c2ecf20Sopenharmony_ci */ 13448c2ecf20Sopenharmony_cistatic void prepare_write_ack(struct ceph_connection *con) 13458c2ecf20Sopenharmony_ci{ 13468c2ecf20Sopenharmony_ci dout("prepare_write_ack %p %llu -> %llu\n", con, 13478c2ecf20Sopenharmony_ci con->in_seq_acked, con->in_seq); 13488c2ecf20Sopenharmony_ci con->in_seq_acked = con->in_seq; 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci con->out_temp_ack = cpu_to_le64(con->in_seq_acked); 13558c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (con->out_temp_ack), 13568c2ecf20Sopenharmony_ci &con->out_temp_ack); 13578c2ecf20Sopenharmony_ci 13588c2ecf20Sopenharmony_ci con->out_more = 1; /* more will follow.. eventually.. */ 13598c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_WRITE_PENDING); 13608c2ecf20Sopenharmony_ci} 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci/* 13638c2ecf20Sopenharmony_ci * Prepare to share the seq during handshake 13648c2ecf20Sopenharmony_ci */ 13658c2ecf20Sopenharmony_cistatic void prepare_write_seq(struct ceph_connection *con) 13668c2ecf20Sopenharmony_ci{ 13678c2ecf20Sopenharmony_ci dout("prepare_write_seq %p %llu -> %llu\n", con, 13688c2ecf20Sopenharmony_ci con->in_seq_acked, con->in_seq); 13698c2ecf20Sopenharmony_ci con->in_seq_acked = con->in_seq; 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 13728c2ecf20Sopenharmony_ci 13738c2ecf20Sopenharmony_ci con->out_temp_ack = cpu_to_le64(con->in_seq_acked); 13748c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (con->out_temp_ack), 13758c2ecf20Sopenharmony_ci &con->out_temp_ack); 13768c2ecf20Sopenharmony_ci 13778c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_WRITE_PENDING); 13788c2ecf20Sopenharmony_ci} 13798c2ecf20Sopenharmony_ci 13808c2ecf20Sopenharmony_ci/* 13818c2ecf20Sopenharmony_ci * Prepare to write keepalive byte. 13828c2ecf20Sopenharmony_ci */ 13838c2ecf20Sopenharmony_cistatic void prepare_write_keepalive(struct ceph_connection *con) 13848c2ecf20Sopenharmony_ci{ 13858c2ecf20Sopenharmony_ci dout("prepare_write_keepalive %p\n", con); 13868c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 13878c2ecf20Sopenharmony_ci if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { 13888c2ecf20Sopenharmony_ci struct timespec64 now; 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci ktime_get_real_ts64(&now); 13918c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); 13928c2ecf20Sopenharmony_ci ceph_encode_timespec64(&con->out_temp_keepalive2, &now); 13938c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), 13948c2ecf20Sopenharmony_ci &con->out_temp_keepalive2); 13958c2ecf20Sopenharmony_ci } else { 13968c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); 13978c2ecf20Sopenharmony_ci } 13988c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_WRITE_PENDING); 13998c2ecf20Sopenharmony_ci} 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci/* 14028c2ecf20Sopenharmony_ci * Connection negotiation. 14038c2ecf20Sopenharmony_ci */ 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_cistatic int get_connect_authorizer(struct ceph_connection *con) 14068c2ecf20Sopenharmony_ci{ 14078c2ecf20Sopenharmony_ci struct ceph_auth_handshake *auth; 14088c2ecf20Sopenharmony_ci int auth_proto; 14098c2ecf20Sopenharmony_ci 14108c2ecf20Sopenharmony_ci if (!con->ops->get_authorizer) { 14118c2ecf20Sopenharmony_ci con->auth = NULL; 14128c2ecf20Sopenharmony_ci con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; 14138c2ecf20Sopenharmony_ci con->out_connect.authorizer_len = 0; 14148c2ecf20Sopenharmony_ci return 0; 14158c2ecf20Sopenharmony_ci } 14168c2ecf20Sopenharmony_ci 14178c2ecf20Sopenharmony_ci auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry); 14188c2ecf20Sopenharmony_ci if (IS_ERR(auth)) 14198c2ecf20Sopenharmony_ci return PTR_ERR(auth); 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci con->auth = auth; 14228c2ecf20Sopenharmony_ci con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); 14238c2ecf20Sopenharmony_ci con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); 14248c2ecf20Sopenharmony_ci return 0; 14258c2ecf20Sopenharmony_ci} 14268c2ecf20Sopenharmony_ci 14278c2ecf20Sopenharmony_ci/* 14288c2ecf20Sopenharmony_ci * We connected to a peer and are saying hello. 14298c2ecf20Sopenharmony_ci */ 14308c2ecf20Sopenharmony_cistatic void prepare_write_banner(struct ceph_connection *con) 14318c2ecf20Sopenharmony_ci{ 14328c2ecf20Sopenharmony_ci con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); 14338c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), 14348c2ecf20Sopenharmony_ci &con->msgr->my_enc_addr); 14358c2ecf20Sopenharmony_ci 14368c2ecf20Sopenharmony_ci con->out_more = 0; 14378c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_WRITE_PENDING); 14388c2ecf20Sopenharmony_ci} 14398c2ecf20Sopenharmony_ci 14408c2ecf20Sopenharmony_cistatic void __prepare_write_connect(struct ceph_connection *con) 14418c2ecf20Sopenharmony_ci{ 14428c2ecf20Sopenharmony_ci con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect); 14438c2ecf20Sopenharmony_ci if (con->auth) 14448c2ecf20Sopenharmony_ci con_out_kvec_add(con, con->auth->authorizer_buf_len, 14458c2ecf20Sopenharmony_ci con->auth->authorizer_buf); 14468c2ecf20Sopenharmony_ci 14478c2ecf20Sopenharmony_ci con->out_more = 0; 14488c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_WRITE_PENDING); 14498c2ecf20Sopenharmony_ci} 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_cistatic int prepare_write_connect(struct ceph_connection *con) 14528c2ecf20Sopenharmony_ci{ 14538c2ecf20Sopenharmony_ci unsigned int global_seq = get_global_seq(con->msgr, 0); 14548c2ecf20Sopenharmony_ci int proto; 14558c2ecf20Sopenharmony_ci int ret; 14568c2ecf20Sopenharmony_ci 14578c2ecf20Sopenharmony_ci switch (con->peer_name.type) { 14588c2ecf20Sopenharmony_ci case CEPH_ENTITY_TYPE_MON: 14598c2ecf20Sopenharmony_ci proto = CEPH_MONC_PROTOCOL; 14608c2ecf20Sopenharmony_ci break; 14618c2ecf20Sopenharmony_ci case CEPH_ENTITY_TYPE_OSD: 14628c2ecf20Sopenharmony_ci proto = CEPH_OSDC_PROTOCOL; 14638c2ecf20Sopenharmony_ci break; 14648c2ecf20Sopenharmony_ci case CEPH_ENTITY_TYPE_MDS: 14658c2ecf20Sopenharmony_ci proto = CEPH_MDSC_PROTOCOL; 14668c2ecf20Sopenharmony_ci break; 14678c2ecf20Sopenharmony_ci default: 14688c2ecf20Sopenharmony_ci BUG(); 14698c2ecf20Sopenharmony_ci } 14708c2ecf20Sopenharmony_ci 14718c2ecf20Sopenharmony_ci dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 14728c2ecf20Sopenharmony_ci con->connect_seq, global_seq, proto); 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci con->out_connect.features = 14758c2ecf20Sopenharmony_ci cpu_to_le64(from_msgr(con->msgr)->supported_features); 14768c2ecf20Sopenharmony_ci con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 14778c2ecf20Sopenharmony_ci con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); 14788c2ecf20Sopenharmony_ci con->out_connect.global_seq = cpu_to_le32(global_seq); 14798c2ecf20Sopenharmony_ci con->out_connect.protocol_version = cpu_to_le32(proto); 14808c2ecf20Sopenharmony_ci con->out_connect.flags = 0; 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci ret = get_connect_authorizer(con); 14838c2ecf20Sopenharmony_ci if (ret) 14848c2ecf20Sopenharmony_ci return ret; 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_ci __prepare_write_connect(con); 14878c2ecf20Sopenharmony_ci return 0; 14888c2ecf20Sopenharmony_ci} 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_ci/* 14918c2ecf20Sopenharmony_ci * write as much of pending kvecs to the socket as we can. 14928c2ecf20Sopenharmony_ci * 1 -> done 14938c2ecf20Sopenharmony_ci * 0 -> socket full, but more to do 14948c2ecf20Sopenharmony_ci * <0 -> error 14958c2ecf20Sopenharmony_ci */ 14968c2ecf20Sopenharmony_cistatic int write_partial_kvec(struct ceph_connection *con) 14978c2ecf20Sopenharmony_ci{ 14988c2ecf20Sopenharmony_ci int ret; 14998c2ecf20Sopenharmony_ci 15008c2ecf20Sopenharmony_ci dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes); 15018c2ecf20Sopenharmony_ci while (con->out_kvec_bytes > 0) { 15028c2ecf20Sopenharmony_ci ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur, 15038c2ecf20Sopenharmony_ci con->out_kvec_left, con->out_kvec_bytes, 15048c2ecf20Sopenharmony_ci con->out_more); 15058c2ecf20Sopenharmony_ci if (ret <= 0) 15068c2ecf20Sopenharmony_ci goto out; 15078c2ecf20Sopenharmony_ci con->out_kvec_bytes -= ret; 15088c2ecf20Sopenharmony_ci if (con->out_kvec_bytes == 0) 15098c2ecf20Sopenharmony_ci break; /* done */ 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci /* account for full iov entries consumed */ 15128c2ecf20Sopenharmony_ci while (ret >= con->out_kvec_cur->iov_len) { 15138c2ecf20Sopenharmony_ci BUG_ON(!con->out_kvec_left); 15148c2ecf20Sopenharmony_ci ret -= con->out_kvec_cur->iov_len; 15158c2ecf20Sopenharmony_ci con->out_kvec_cur++; 15168c2ecf20Sopenharmony_ci con->out_kvec_left--; 15178c2ecf20Sopenharmony_ci } 15188c2ecf20Sopenharmony_ci /* and for a partially-consumed entry */ 15198c2ecf20Sopenharmony_ci if (ret) { 15208c2ecf20Sopenharmony_ci con->out_kvec_cur->iov_len -= ret; 15218c2ecf20Sopenharmony_ci con->out_kvec_cur->iov_base += ret; 15228c2ecf20Sopenharmony_ci } 15238c2ecf20Sopenharmony_ci } 15248c2ecf20Sopenharmony_ci con->out_kvec_left = 0; 15258c2ecf20Sopenharmony_ci ret = 1; 15268c2ecf20Sopenharmony_ciout: 15278c2ecf20Sopenharmony_ci dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, 15288c2ecf20Sopenharmony_ci con->out_kvec_bytes, con->out_kvec_left, ret); 15298c2ecf20Sopenharmony_ci return ret; /* done! */ 15308c2ecf20Sopenharmony_ci} 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_cistatic u32 ceph_crc32c_page(u32 crc, struct page *page, 15338c2ecf20Sopenharmony_ci unsigned int page_offset, 15348c2ecf20Sopenharmony_ci unsigned int length) 15358c2ecf20Sopenharmony_ci{ 15368c2ecf20Sopenharmony_ci char *kaddr; 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_ci kaddr = kmap(page); 15398c2ecf20Sopenharmony_ci BUG_ON(kaddr == NULL); 15408c2ecf20Sopenharmony_ci crc = crc32c(crc, kaddr + page_offset, length); 15418c2ecf20Sopenharmony_ci kunmap(page); 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci return crc; 15448c2ecf20Sopenharmony_ci} 15458c2ecf20Sopenharmony_ci/* 15468c2ecf20Sopenharmony_ci * Write as much message data payload as we can. If we finish, queue 15478c2ecf20Sopenharmony_ci * up the footer. 15488c2ecf20Sopenharmony_ci * 1 -> done, footer is now queued in out_kvec[]. 15498c2ecf20Sopenharmony_ci * 0 -> socket full, but more to do 15508c2ecf20Sopenharmony_ci * <0 -> error 15518c2ecf20Sopenharmony_ci */ 15528c2ecf20Sopenharmony_cistatic int write_partial_message_data(struct ceph_connection *con) 15538c2ecf20Sopenharmony_ci{ 15548c2ecf20Sopenharmony_ci struct ceph_msg *msg = con->out_msg; 15558c2ecf20Sopenharmony_ci struct ceph_msg_data_cursor *cursor = &msg->cursor; 15568c2ecf20Sopenharmony_ci bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 15578c2ecf20Sopenharmony_ci int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; 15588c2ecf20Sopenharmony_ci u32 crc; 15598c2ecf20Sopenharmony_ci 15608c2ecf20Sopenharmony_ci dout("%s %p msg %p\n", __func__, con, msg); 15618c2ecf20Sopenharmony_ci 15628c2ecf20Sopenharmony_ci if (!msg->num_data_items) 15638c2ecf20Sopenharmony_ci return -EINVAL; 15648c2ecf20Sopenharmony_ci 15658c2ecf20Sopenharmony_ci /* 15668c2ecf20Sopenharmony_ci * Iterate through each page that contains data to be 15678c2ecf20Sopenharmony_ci * written, and send as much as possible for each. 15688c2ecf20Sopenharmony_ci * 15698c2ecf20Sopenharmony_ci * If we are calculating the data crc (the default), we will 15708c2ecf20Sopenharmony_ci * need to map the page. If we have no pages, they have 15718c2ecf20Sopenharmony_ci * been revoked, so use the zero page. 15728c2ecf20Sopenharmony_ci */ 15738c2ecf20Sopenharmony_ci crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; 15748c2ecf20Sopenharmony_ci while (cursor->total_resid) { 15758c2ecf20Sopenharmony_ci struct page *page; 15768c2ecf20Sopenharmony_ci size_t page_offset; 15778c2ecf20Sopenharmony_ci size_t length; 15788c2ecf20Sopenharmony_ci int ret; 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci if (!cursor->resid) { 15818c2ecf20Sopenharmony_ci ceph_msg_data_advance(cursor, 0); 15828c2ecf20Sopenharmony_ci continue; 15838c2ecf20Sopenharmony_ci } 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); 15868c2ecf20Sopenharmony_ci if (length == cursor->total_resid) 15878c2ecf20Sopenharmony_ci more = MSG_MORE; 15888c2ecf20Sopenharmony_ci ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, 15898c2ecf20Sopenharmony_ci more); 15908c2ecf20Sopenharmony_ci if (ret <= 0) { 15918c2ecf20Sopenharmony_ci if (do_datacrc) 15928c2ecf20Sopenharmony_ci msg->footer.data_crc = cpu_to_le32(crc); 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_ci return ret; 15958c2ecf20Sopenharmony_ci } 15968c2ecf20Sopenharmony_ci if (do_datacrc && cursor->need_crc) 15978c2ecf20Sopenharmony_ci crc = ceph_crc32c_page(crc, page, page_offset, length); 15988c2ecf20Sopenharmony_ci ceph_msg_data_advance(cursor, (size_t)ret); 15998c2ecf20Sopenharmony_ci } 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci dout("%s %p msg %p done\n", __func__, con, msg); 16028c2ecf20Sopenharmony_ci 16038c2ecf20Sopenharmony_ci /* prepare and queue up footer, too */ 16048c2ecf20Sopenharmony_ci if (do_datacrc) 16058c2ecf20Sopenharmony_ci msg->footer.data_crc = cpu_to_le32(crc); 16068c2ecf20Sopenharmony_ci else 16078c2ecf20Sopenharmony_ci msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 16088c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 16098c2ecf20Sopenharmony_ci prepare_write_message_footer(con); 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci return 1; /* must return > 0 to indicate success */ 16128c2ecf20Sopenharmony_ci} 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_ci/* 16158c2ecf20Sopenharmony_ci * write some zeros 16168c2ecf20Sopenharmony_ci */ 16178c2ecf20Sopenharmony_cistatic int write_partial_skip(struct ceph_connection *con) 16188c2ecf20Sopenharmony_ci{ 16198c2ecf20Sopenharmony_ci int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; 16208c2ecf20Sopenharmony_ci int ret; 16218c2ecf20Sopenharmony_ci 16228c2ecf20Sopenharmony_ci dout("%s %p %d left\n", __func__, con, con->out_skip); 16238c2ecf20Sopenharmony_ci while (con->out_skip > 0) { 16248c2ecf20Sopenharmony_ci size_t size = min(con->out_skip, (int) PAGE_SIZE); 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_ci if (size == con->out_skip) 16278c2ecf20Sopenharmony_ci more = MSG_MORE; 16288c2ecf20Sopenharmony_ci ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more); 16298c2ecf20Sopenharmony_ci if (ret <= 0) 16308c2ecf20Sopenharmony_ci goto out; 16318c2ecf20Sopenharmony_ci con->out_skip -= ret; 16328c2ecf20Sopenharmony_ci } 16338c2ecf20Sopenharmony_ci ret = 1; 16348c2ecf20Sopenharmony_ciout: 16358c2ecf20Sopenharmony_ci return ret; 16368c2ecf20Sopenharmony_ci} 16378c2ecf20Sopenharmony_ci 16388c2ecf20Sopenharmony_ci/* 16398c2ecf20Sopenharmony_ci * Prepare to read connection handshake, or an ack. 16408c2ecf20Sopenharmony_ci */ 16418c2ecf20Sopenharmony_cistatic void prepare_read_banner(struct ceph_connection *con) 16428c2ecf20Sopenharmony_ci{ 16438c2ecf20Sopenharmony_ci dout("prepare_read_banner %p\n", con); 16448c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16458c2ecf20Sopenharmony_ci} 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_cistatic void prepare_read_connect(struct ceph_connection *con) 16488c2ecf20Sopenharmony_ci{ 16498c2ecf20Sopenharmony_ci dout("prepare_read_connect %p\n", con); 16508c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16518c2ecf20Sopenharmony_ci} 16528c2ecf20Sopenharmony_ci 16538c2ecf20Sopenharmony_cistatic void prepare_read_ack(struct ceph_connection *con) 16548c2ecf20Sopenharmony_ci{ 16558c2ecf20Sopenharmony_ci dout("prepare_read_ack %p\n", con); 16568c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16578c2ecf20Sopenharmony_ci} 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_cistatic void prepare_read_seq(struct ceph_connection *con) 16608c2ecf20Sopenharmony_ci{ 16618c2ecf20Sopenharmony_ci dout("prepare_read_seq %p\n", con); 16628c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16638c2ecf20Sopenharmony_ci con->in_tag = CEPH_MSGR_TAG_SEQ; 16648c2ecf20Sopenharmony_ci} 16658c2ecf20Sopenharmony_ci 16668c2ecf20Sopenharmony_cistatic void prepare_read_tag(struct ceph_connection *con) 16678c2ecf20Sopenharmony_ci{ 16688c2ecf20Sopenharmony_ci dout("prepare_read_tag %p\n", con); 16698c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16708c2ecf20Sopenharmony_ci con->in_tag = CEPH_MSGR_TAG_READY; 16718c2ecf20Sopenharmony_ci} 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_cistatic void prepare_read_keepalive_ack(struct ceph_connection *con) 16748c2ecf20Sopenharmony_ci{ 16758c2ecf20Sopenharmony_ci dout("prepare_read_keepalive_ack %p\n", con); 16768c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16778c2ecf20Sopenharmony_ci} 16788c2ecf20Sopenharmony_ci 16798c2ecf20Sopenharmony_ci/* 16808c2ecf20Sopenharmony_ci * Prepare to read a message. 16818c2ecf20Sopenharmony_ci */ 16828c2ecf20Sopenharmony_cistatic int prepare_read_message(struct ceph_connection *con) 16838c2ecf20Sopenharmony_ci{ 16848c2ecf20Sopenharmony_ci dout("prepare_read_message %p\n", con); 16858c2ecf20Sopenharmony_ci BUG_ON(con->in_msg != NULL); 16868c2ecf20Sopenharmony_ci con->in_base_pos = 0; 16878c2ecf20Sopenharmony_ci con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; 16888c2ecf20Sopenharmony_ci return 0; 16898c2ecf20Sopenharmony_ci} 16908c2ecf20Sopenharmony_ci 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_cistatic int read_partial(struct ceph_connection *con, 16938c2ecf20Sopenharmony_ci int end, int size, void *object) 16948c2ecf20Sopenharmony_ci{ 16958c2ecf20Sopenharmony_ci while (con->in_base_pos < end) { 16968c2ecf20Sopenharmony_ci int left = end - con->in_base_pos; 16978c2ecf20Sopenharmony_ci int have = size - left; 16988c2ecf20Sopenharmony_ci int ret = ceph_tcp_recvmsg(con->sock, object + have, left); 16998c2ecf20Sopenharmony_ci if (ret <= 0) 17008c2ecf20Sopenharmony_ci return ret; 17018c2ecf20Sopenharmony_ci con->in_base_pos += ret; 17028c2ecf20Sopenharmony_ci } 17038c2ecf20Sopenharmony_ci return 1; 17048c2ecf20Sopenharmony_ci} 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_ci/* 17088c2ecf20Sopenharmony_ci * Read all or part of the connect-side handshake on a new connection 17098c2ecf20Sopenharmony_ci */ 17108c2ecf20Sopenharmony_cistatic int read_partial_banner(struct ceph_connection *con) 17118c2ecf20Sopenharmony_ci{ 17128c2ecf20Sopenharmony_ci int size; 17138c2ecf20Sopenharmony_ci int end; 17148c2ecf20Sopenharmony_ci int ret; 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci dout("read_partial_banner %p at %d\n", con, con->in_base_pos); 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci /* peer's banner */ 17198c2ecf20Sopenharmony_ci size = strlen(CEPH_BANNER); 17208c2ecf20Sopenharmony_ci end = size; 17218c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, con->in_banner); 17228c2ecf20Sopenharmony_ci if (ret <= 0) 17238c2ecf20Sopenharmony_ci goto out; 17248c2ecf20Sopenharmony_ci 17258c2ecf20Sopenharmony_ci size = sizeof (con->actual_peer_addr); 17268c2ecf20Sopenharmony_ci end += size; 17278c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, &con->actual_peer_addr); 17288c2ecf20Sopenharmony_ci if (ret <= 0) 17298c2ecf20Sopenharmony_ci goto out; 17308c2ecf20Sopenharmony_ci ceph_decode_banner_addr(&con->actual_peer_addr); 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci size = sizeof (con->peer_addr_for_me); 17338c2ecf20Sopenharmony_ci end += size; 17348c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, &con->peer_addr_for_me); 17358c2ecf20Sopenharmony_ci if (ret <= 0) 17368c2ecf20Sopenharmony_ci goto out; 17378c2ecf20Sopenharmony_ci ceph_decode_banner_addr(&con->peer_addr_for_me); 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_ciout: 17408c2ecf20Sopenharmony_ci return ret; 17418c2ecf20Sopenharmony_ci} 17428c2ecf20Sopenharmony_ci 17438c2ecf20Sopenharmony_cistatic int read_partial_connect(struct ceph_connection *con) 17448c2ecf20Sopenharmony_ci{ 17458c2ecf20Sopenharmony_ci int size; 17468c2ecf20Sopenharmony_ci int end; 17478c2ecf20Sopenharmony_ci int ret; 17488c2ecf20Sopenharmony_ci 17498c2ecf20Sopenharmony_ci dout("read_partial_connect %p at %d\n", con, con->in_base_pos); 17508c2ecf20Sopenharmony_ci 17518c2ecf20Sopenharmony_ci size = sizeof (con->in_reply); 17528c2ecf20Sopenharmony_ci end = size; 17538c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, &con->in_reply); 17548c2ecf20Sopenharmony_ci if (ret <= 0) 17558c2ecf20Sopenharmony_ci goto out; 17568c2ecf20Sopenharmony_ci 17578c2ecf20Sopenharmony_ci if (con->auth) { 17588c2ecf20Sopenharmony_ci size = le32_to_cpu(con->in_reply.authorizer_len); 17598c2ecf20Sopenharmony_ci if (size > con->auth->authorizer_reply_buf_len) { 17608c2ecf20Sopenharmony_ci pr_err("authorizer reply too big: %d > %zu\n", size, 17618c2ecf20Sopenharmony_ci con->auth->authorizer_reply_buf_len); 17628c2ecf20Sopenharmony_ci ret = -EINVAL; 17638c2ecf20Sopenharmony_ci goto out; 17648c2ecf20Sopenharmony_ci } 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci end += size; 17678c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, 17688c2ecf20Sopenharmony_ci con->auth->authorizer_reply_buf); 17698c2ecf20Sopenharmony_ci if (ret <= 0) 17708c2ecf20Sopenharmony_ci goto out; 17718c2ecf20Sopenharmony_ci } 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", 17748c2ecf20Sopenharmony_ci con, (int)con->in_reply.tag, 17758c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.connect_seq), 17768c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.global_seq)); 17778c2ecf20Sopenharmony_ciout: 17788c2ecf20Sopenharmony_ci return ret; 17798c2ecf20Sopenharmony_ci} 17808c2ecf20Sopenharmony_ci 17818c2ecf20Sopenharmony_ci/* 17828c2ecf20Sopenharmony_ci * Verify the hello banner looks okay. 17838c2ecf20Sopenharmony_ci */ 17848c2ecf20Sopenharmony_cistatic int verify_hello(struct ceph_connection *con) 17858c2ecf20Sopenharmony_ci{ 17868c2ecf20Sopenharmony_ci if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { 17878c2ecf20Sopenharmony_ci pr_err("connect to %s got bad banner\n", 17888c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr)); 17898c2ecf20Sopenharmony_ci con->error_msg = "protocol error, bad banner"; 17908c2ecf20Sopenharmony_ci return -1; 17918c2ecf20Sopenharmony_ci } 17928c2ecf20Sopenharmony_ci return 0; 17938c2ecf20Sopenharmony_ci} 17948c2ecf20Sopenharmony_ci 17958c2ecf20Sopenharmony_cistatic bool addr_is_blank(struct ceph_entity_addr *addr) 17968c2ecf20Sopenharmony_ci{ 17978c2ecf20Sopenharmony_ci struct sockaddr_storage ss = addr->in_addr; /* align */ 17988c2ecf20Sopenharmony_ci struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr; 17998c2ecf20Sopenharmony_ci struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr; 18008c2ecf20Sopenharmony_ci 18018c2ecf20Sopenharmony_ci switch (ss.ss_family) { 18028c2ecf20Sopenharmony_ci case AF_INET: 18038c2ecf20Sopenharmony_ci return addr4->s_addr == htonl(INADDR_ANY); 18048c2ecf20Sopenharmony_ci case AF_INET6: 18058c2ecf20Sopenharmony_ci return ipv6_addr_any(addr6); 18068c2ecf20Sopenharmony_ci default: 18078c2ecf20Sopenharmony_ci return true; 18088c2ecf20Sopenharmony_ci } 18098c2ecf20Sopenharmony_ci} 18108c2ecf20Sopenharmony_ci 18118c2ecf20Sopenharmony_cistatic int addr_port(struct ceph_entity_addr *addr) 18128c2ecf20Sopenharmony_ci{ 18138c2ecf20Sopenharmony_ci switch (get_unaligned(&addr->in_addr.ss_family)) { 18148c2ecf20Sopenharmony_ci case AF_INET: 18158c2ecf20Sopenharmony_ci return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port)); 18168c2ecf20Sopenharmony_ci case AF_INET6: 18178c2ecf20Sopenharmony_ci return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port)); 18188c2ecf20Sopenharmony_ci } 18198c2ecf20Sopenharmony_ci return 0; 18208c2ecf20Sopenharmony_ci} 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_cistatic void addr_set_port(struct ceph_entity_addr *addr, int p) 18238c2ecf20Sopenharmony_ci{ 18248c2ecf20Sopenharmony_ci switch (get_unaligned(&addr->in_addr.ss_family)) { 18258c2ecf20Sopenharmony_ci case AF_INET: 18268c2ecf20Sopenharmony_ci put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port); 18278c2ecf20Sopenharmony_ci break; 18288c2ecf20Sopenharmony_ci case AF_INET6: 18298c2ecf20Sopenharmony_ci put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port); 18308c2ecf20Sopenharmony_ci break; 18318c2ecf20Sopenharmony_ci } 18328c2ecf20Sopenharmony_ci} 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_ci/* 18358c2ecf20Sopenharmony_ci * Unlike other *_pton function semantics, zero indicates success. 18368c2ecf20Sopenharmony_ci */ 18378c2ecf20Sopenharmony_cistatic int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr, 18388c2ecf20Sopenharmony_ci char delim, const char **ipend) 18398c2ecf20Sopenharmony_ci{ 18408c2ecf20Sopenharmony_ci memset(&addr->in_addr, 0, sizeof(addr->in_addr)); 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_ci if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) { 18438c2ecf20Sopenharmony_ci put_unaligned(AF_INET, &addr->in_addr.ss_family); 18448c2ecf20Sopenharmony_ci return 0; 18458c2ecf20Sopenharmony_ci } 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) { 18488c2ecf20Sopenharmony_ci put_unaligned(AF_INET6, &addr->in_addr.ss_family); 18498c2ecf20Sopenharmony_ci return 0; 18508c2ecf20Sopenharmony_ci } 18518c2ecf20Sopenharmony_ci 18528c2ecf20Sopenharmony_ci return -EINVAL; 18538c2ecf20Sopenharmony_ci} 18548c2ecf20Sopenharmony_ci 18558c2ecf20Sopenharmony_ci/* 18568c2ecf20Sopenharmony_ci * Extract hostname string and resolve using kernel DNS facility. 18578c2ecf20Sopenharmony_ci */ 18588c2ecf20Sopenharmony_ci#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER 18598c2ecf20Sopenharmony_cistatic int ceph_dns_resolve_name(const char *name, size_t namelen, 18608c2ecf20Sopenharmony_ci struct ceph_entity_addr *addr, char delim, const char **ipend) 18618c2ecf20Sopenharmony_ci{ 18628c2ecf20Sopenharmony_ci const char *end, *delim_p; 18638c2ecf20Sopenharmony_ci char *colon_p, *ip_addr = NULL; 18648c2ecf20Sopenharmony_ci int ip_len, ret; 18658c2ecf20Sopenharmony_ci 18668c2ecf20Sopenharmony_ci /* 18678c2ecf20Sopenharmony_ci * The end of the hostname occurs immediately preceding the delimiter or 18688c2ecf20Sopenharmony_ci * the port marker (':') where the delimiter takes precedence. 18698c2ecf20Sopenharmony_ci */ 18708c2ecf20Sopenharmony_ci delim_p = memchr(name, delim, namelen); 18718c2ecf20Sopenharmony_ci colon_p = memchr(name, ':', namelen); 18728c2ecf20Sopenharmony_ci 18738c2ecf20Sopenharmony_ci if (delim_p && colon_p) 18748c2ecf20Sopenharmony_ci end = delim_p < colon_p ? delim_p : colon_p; 18758c2ecf20Sopenharmony_ci else if (!delim_p && colon_p) 18768c2ecf20Sopenharmony_ci end = colon_p; 18778c2ecf20Sopenharmony_ci else { 18788c2ecf20Sopenharmony_ci end = delim_p; 18798c2ecf20Sopenharmony_ci if (!end) /* case: hostname:/ */ 18808c2ecf20Sopenharmony_ci end = name + namelen; 18818c2ecf20Sopenharmony_ci } 18828c2ecf20Sopenharmony_ci 18838c2ecf20Sopenharmony_ci if (end <= name) 18848c2ecf20Sopenharmony_ci return -EINVAL; 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_ci /* do dns_resolve upcall */ 18878c2ecf20Sopenharmony_ci ip_len = dns_query(current->nsproxy->net_ns, 18888c2ecf20Sopenharmony_ci NULL, name, end - name, NULL, &ip_addr, NULL, false); 18898c2ecf20Sopenharmony_ci if (ip_len > 0) 18908c2ecf20Sopenharmony_ci ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL); 18918c2ecf20Sopenharmony_ci else 18928c2ecf20Sopenharmony_ci ret = -ESRCH; 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_ci kfree(ip_addr); 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ci *ipend = end; 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, 18998c2ecf20Sopenharmony_ci ret, ret ? "failed" : ceph_pr_addr(addr)); 19008c2ecf20Sopenharmony_ci 19018c2ecf20Sopenharmony_ci return ret; 19028c2ecf20Sopenharmony_ci} 19038c2ecf20Sopenharmony_ci#else 19048c2ecf20Sopenharmony_cistatic inline int ceph_dns_resolve_name(const char *name, size_t namelen, 19058c2ecf20Sopenharmony_ci struct ceph_entity_addr *addr, char delim, const char **ipend) 19068c2ecf20Sopenharmony_ci{ 19078c2ecf20Sopenharmony_ci return -EINVAL; 19088c2ecf20Sopenharmony_ci} 19098c2ecf20Sopenharmony_ci#endif 19108c2ecf20Sopenharmony_ci 19118c2ecf20Sopenharmony_ci/* 19128c2ecf20Sopenharmony_ci * Parse a server name (IP or hostname). If a valid IP address is not found 19138c2ecf20Sopenharmony_ci * then try to extract a hostname to resolve using userspace DNS upcall. 19148c2ecf20Sopenharmony_ci */ 19158c2ecf20Sopenharmony_cistatic int ceph_parse_server_name(const char *name, size_t namelen, 19168c2ecf20Sopenharmony_ci struct ceph_entity_addr *addr, char delim, const char **ipend) 19178c2ecf20Sopenharmony_ci{ 19188c2ecf20Sopenharmony_ci int ret; 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ci ret = ceph_pton(name, namelen, addr, delim, ipend); 19218c2ecf20Sopenharmony_ci if (ret) 19228c2ecf20Sopenharmony_ci ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend); 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci return ret; 19258c2ecf20Sopenharmony_ci} 19268c2ecf20Sopenharmony_ci 19278c2ecf20Sopenharmony_ci/* 19288c2ecf20Sopenharmony_ci * Parse an ip[:port] list into an addr array. Use the default 19298c2ecf20Sopenharmony_ci * monitor port if a port isn't specified. 19308c2ecf20Sopenharmony_ci */ 19318c2ecf20Sopenharmony_ciint ceph_parse_ips(const char *c, const char *end, 19328c2ecf20Sopenharmony_ci struct ceph_entity_addr *addr, 19338c2ecf20Sopenharmony_ci int max_count, int *count) 19348c2ecf20Sopenharmony_ci{ 19358c2ecf20Sopenharmony_ci int i, ret = -EINVAL; 19368c2ecf20Sopenharmony_ci const char *p = c; 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_ci dout("parse_ips on '%.*s'\n", (int)(end-c), c); 19398c2ecf20Sopenharmony_ci for (i = 0; i < max_count; i++) { 19408c2ecf20Sopenharmony_ci const char *ipend; 19418c2ecf20Sopenharmony_ci int port; 19428c2ecf20Sopenharmony_ci char delim = ','; 19438c2ecf20Sopenharmony_ci 19448c2ecf20Sopenharmony_ci if (*p == '[') { 19458c2ecf20Sopenharmony_ci delim = ']'; 19468c2ecf20Sopenharmony_ci p++; 19478c2ecf20Sopenharmony_ci } 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); 19508c2ecf20Sopenharmony_ci if (ret) 19518c2ecf20Sopenharmony_ci goto bad; 19528c2ecf20Sopenharmony_ci ret = -EINVAL; 19538c2ecf20Sopenharmony_ci 19548c2ecf20Sopenharmony_ci p = ipend; 19558c2ecf20Sopenharmony_ci 19568c2ecf20Sopenharmony_ci if (delim == ']') { 19578c2ecf20Sopenharmony_ci if (*p != ']') { 19588c2ecf20Sopenharmony_ci dout("missing matching ']'\n"); 19598c2ecf20Sopenharmony_ci goto bad; 19608c2ecf20Sopenharmony_ci } 19618c2ecf20Sopenharmony_ci p++; 19628c2ecf20Sopenharmony_ci } 19638c2ecf20Sopenharmony_ci 19648c2ecf20Sopenharmony_ci /* port? */ 19658c2ecf20Sopenharmony_ci if (p < end && *p == ':') { 19668c2ecf20Sopenharmony_ci port = 0; 19678c2ecf20Sopenharmony_ci p++; 19688c2ecf20Sopenharmony_ci while (p < end && *p >= '0' && *p <= '9') { 19698c2ecf20Sopenharmony_ci port = (port * 10) + (*p - '0'); 19708c2ecf20Sopenharmony_ci p++; 19718c2ecf20Sopenharmony_ci } 19728c2ecf20Sopenharmony_ci if (port == 0) 19738c2ecf20Sopenharmony_ci port = CEPH_MON_PORT; 19748c2ecf20Sopenharmony_ci else if (port > 65535) 19758c2ecf20Sopenharmony_ci goto bad; 19768c2ecf20Sopenharmony_ci } else { 19778c2ecf20Sopenharmony_ci port = CEPH_MON_PORT; 19788c2ecf20Sopenharmony_ci } 19798c2ecf20Sopenharmony_ci 19808c2ecf20Sopenharmony_ci addr_set_port(&addr[i], port); 19818c2ecf20Sopenharmony_ci addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; 19828c2ecf20Sopenharmony_ci 19838c2ecf20Sopenharmony_ci dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); 19848c2ecf20Sopenharmony_ci 19858c2ecf20Sopenharmony_ci if (p == end) 19868c2ecf20Sopenharmony_ci break; 19878c2ecf20Sopenharmony_ci if (*p != ',') 19888c2ecf20Sopenharmony_ci goto bad; 19898c2ecf20Sopenharmony_ci p++; 19908c2ecf20Sopenharmony_ci } 19918c2ecf20Sopenharmony_ci 19928c2ecf20Sopenharmony_ci if (p != end) 19938c2ecf20Sopenharmony_ci goto bad; 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci if (count) 19968c2ecf20Sopenharmony_ci *count = i + 1; 19978c2ecf20Sopenharmony_ci return 0; 19988c2ecf20Sopenharmony_ci 19998c2ecf20Sopenharmony_cibad: 20008c2ecf20Sopenharmony_ci return ret; 20018c2ecf20Sopenharmony_ci} 20028c2ecf20Sopenharmony_ci 20038c2ecf20Sopenharmony_cistatic int process_banner(struct ceph_connection *con) 20048c2ecf20Sopenharmony_ci{ 20058c2ecf20Sopenharmony_ci dout("process_banner on %p\n", con); 20068c2ecf20Sopenharmony_ci 20078c2ecf20Sopenharmony_ci if (verify_hello(con) < 0) 20088c2ecf20Sopenharmony_ci return -1; 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci /* 20118c2ecf20Sopenharmony_ci * Make sure the other end is who we wanted. note that the other 20128c2ecf20Sopenharmony_ci * end may not yet know their ip address, so if it's 0.0.0.0, give 20138c2ecf20Sopenharmony_ci * them the benefit of the doubt. 20148c2ecf20Sopenharmony_ci */ 20158c2ecf20Sopenharmony_ci if (memcmp(&con->peer_addr, &con->actual_peer_addr, 20168c2ecf20Sopenharmony_ci sizeof(con->peer_addr)) != 0 && 20178c2ecf20Sopenharmony_ci !(addr_is_blank(&con->actual_peer_addr) && 20188c2ecf20Sopenharmony_ci con->actual_peer_addr.nonce == con->peer_addr.nonce)) { 20198c2ecf20Sopenharmony_ci pr_warn("wrong peer, want %s/%u, got %s/%u\n", 20208c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), 20218c2ecf20Sopenharmony_ci le32_to_cpu(con->peer_addr.nonce), 20228c2ecf20Sopenharmony_ci ceph_pr_addr(&con->actual_peer_addr), 20238c2ecf20Sopenharmony_ci le32_to_cpu(con->actual_peer_addr.nonce)); 20248c2ecf20Sopenharmony_ci con->error_msg = "wrong peer at address"; 20258c2ecf20Sopenharmony_ci return -1; 20268c2ecf20Sopenharmony_ci } 20278c2ecf20Sopenharmony_ci 20288c2ecf20Sopenharmony_ci /* 20298c2ecf20Sopenharmony_ci * did we learn our address? 20308c2ecf20Sopenharmony_ci */ 20318c2ecf20Sopenharmony_ci if (addr_is_blank(&con->msgr->inst.addr)) { 20328c2ecf20Sopenharmony_ci int port = addr_port(&con->msgr->inst.addr); 20338c2ecf20Sopenharmony_ci 20348c2ecf20Sopenharmony_ci memcpy(&con->msgr->inst.addr.in_addr, 20358c2ecf20Sopenharmony_ci &con->peer_addr_for_me.in_addr, 20368c2ecf20Sopenharmony_ci sizeof(con->peer_addr_for_me.in_addr)); 20378c2ecf20Sopenharmony_ci addr_set_port(&con->msgr->inst.addr, port); 20388c2ecf20Sopenharmony_ci encode_my_addr(con->msgr); 20398c2ecf20Sopenharmony_ci dout("process_banner learned my addr is %s\n", 20408c2ecf20Sopenharmony_ci ceph_pr_addr(&con->msgr->inst.addr)); 20418c2ecf20Sopenharmony_ci } 20428c2ecf20Sopenharmony_ci 20438c2ecf20Sopenharmony_ci return 0; 20448c2ecf20Sopenharmony_ci} 20458c2ecf20Sopenharmony_ci 20468c2ecf20Sopenharmony_cistatic int process_connect(struct ceph_connection *con) 20478c2ecf20Sopenharmony_ci{ 20488c2ecf20Sopenharmony_ci u64 sup_feat = from_msgr(con->msgr)->supported_features; 20498c2ecf20Sopenharmony_ci u64 req_feat = from_msgr(con->msgr)->required_features; 20508c2ecf20Sopenharmony_ci u64 server_feat = le64_to_cpu(con->in_reply.features); 20518c2ecf20Sopenharmony_ci int ret; 20528c2ecf20Sopenharmony_ci 20538c2ecf20Sopenharmony_ci dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 20548c2ecf20Sopenharmony_ci 20558c2ecf20Sopenharmony_ci if (con->auth) { 20568c2ecf20Sopenharmony_ci int len = le32_to_cpu(con->in_reply.authorizer_len); 20578c2ecf20Sopenharmony_ci 20588c2ecf20Sopenharmony_ci /* 20598c2ecf20Sopenharmony_ci * Any connection that defines ->get_authorizer() 20608c2ecf20Sopenharmony_ci * should also define ->add_authorizer_challenge() and 20618c2ecf20Sopenharmony_ci * ->verify_authorizer_reply(). 20628c2ecf20Sopenharmony_ci * 20638c2ecf20Sopenharmony_ci * See get_connect_authorizer(). 20648c2ecf20Sopenharmony_ci */ 20658c2ecf20Sopenharmony_ci if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { 20668c2ecf20Sopenharmony_ci ret = con->ops->add_authorizer_challenge( 20678c2ecf20Sopenharmony_ci con, con->auth->authorizer_reply_buf, len); 20688c2ecf20Sopenharmony_ci if (ret < 0) 20698c2ecf20Sopenharmony_ci return ret; 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 20728c2ecf20Sopenharmony_ci __prepare_write_connect(con); 20738c2ecf20Sopenharmony_ci prepare_read_connect(con); 20748c2ecf20Sopenharmony_ci return 0; 20758c2ecf20Sopenharmony_ci } 20768c2ecf20Sopenharmony_ci 20778c2ecf20Sopenharmony_ci if (len) { 20788c2ecf20Sopenharmony_ci ret = con->ops->verify_authorizer_reply(con); 20798c2ecf20Sopenharmony_ci if (ret < 0) { 20808c2ecf20Sopenharmony_ci con->error_msg = "bad authorize reply"; 20818c2ecf20Sopenharmony_ci return ret; 20828c2ecf20Sopenharmony_ci } 20838c2ecf20Sopenharmony_ci } 20848c2ecf20Sopenharmony_ci } 20858c2ecf20Sopenharmony_ci 20868c2ecf20Sopenharmony_ci switch (con->in_reply.tag) { 20878c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_FEATURES: 20888c2ecf20Sopenharmony_ci pr_err("%s%lld %s feature set mismatch," 20898c2ecf20Sopenharmony_ci " my %llx < server's %llx, missing %llx\n", 20908c2ecf20Sopenharmony_ci ENTITY_NAME(con->peer_name), 20918c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), 20928c2ecf20Sopenharmony_ci sup_feat, server_feat, server_feat & ~sup_feat); 20938c2ecf20Sopenharmony_ci con->error_msg = "missing required protocol features"; 20948c2ecf20Sopenharmony_ci reset_connection(con); 20958c2ecf20Sopenharmony_ci return -1; 20968c2ecf20Sopenharmony_ci 20978c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_BADPROTOVER: 20988c2ecf20Sopenharmony_ci pr_err("%s%lld %s protocol version mismatch," 20998c2ecf20Sopenharmony_ci " my %d != server's %d\n", 21008c2ecf20Sopenharmony_ci ENTITY_NAME(con->peer_name), 21018c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), 21028c2ecf20Sopenharmony_ci le32_to_cpu(con->out_connect.protocol_version), 21038c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.protocol_version)); 21048c2ecf20Sopenharmony_ci con->error_msg = "protocol version mismatch"; 21058c2ecf20Sopenharmony_ci reset_connection(con); 21068c2ecf20Sopenharmony_ci return -1; 21078c2ecf20Sopenharmony_ci 21088c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_BADAUTHORIZER: 21098c2ecf20Sopenharmony_ci con->auth_retry++; 21108c2ecf20Sopenharmony_ci dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, 21118c2ecf20Sopenharmony_ci con->auth_retry); 21128c2ecf20Sopenharmony_ci if (con->auth_retry == 2) { 21138c2ecf20Sopenharmony_ci con->error_msg = "connect authorization failure"; 21148c2ecf20Sopenharmony_ci return -1; 21158c2ecf20Sopenharmony_ci } 21168c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 21178c2ecf20Sopenharmony_ci ret = prepare_write_connect(con); 21188c2ecf20Sopenharmony_ci if (ret < 0) 21198c2ecf20Sopenharmony_ci return ret; 21208c2ecf20Sopenharmony_ci prepare_read_connect(con); 21218c2ecf20Sopenharmony_ci break; 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_RESETSESSION: 21248c2ecf20Sopenharmony_ci /* 21258c2ecf20Sopenharmony_ci * If we connected with a large connect_seq but the peer 21268c2ecf20Sopenharmony_ci * has no record of a session with us (no connection, or 21278c2ecf20Sopenharmony_ci * connect_seq == 0), they will send RESETSESION to indicate 21288c2ecf20Sopenharmony_ci * that they must have reset their session, and may have 21298c2ecf20Sopenharmony_ci * dropped messages. 21308c2ecf20Sopenharmony_ci */ 21318c2ecf20Sopenharmony_ci dout("process_connect got RESET peer seq %u\n", 21328c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.connect_seq)); 21338c2ecf20Sopenharmony_ci pr_err("%s%lld %s connection reset\n", 21348c2ecf20Sopenharmony_ci ENTITY_NAME(con->peer_name), 21358c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr)); 21368c2ecf20Sopenharmony_ci reset_connection(con); 21378c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 21388c2ecf20Sopenharmony_ci ret = prepare_write_connect(con); 21398c2ecf20Sopenharmony_ci if (ret < 0) 21408c2ecf20Sopenharmony_ci return ret; 21418c2ecf20Sopenharmony_ci prepare_read_connect(con); 21428c2ecf20Sopenharmony_ci 21438c2ecf20Sopenharmony_ci /* Tell ceph about it. */ 21448c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 21458c2ecf20Sopenharmony_ci pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name)); 21468c2ecf20Sopenharmony_ci if (con->ops->peer_reset) 21478c2ecf20Sopenharmony_ci con->ops->peer_reset(con); 21488c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 21498c2ecf20Sopenharmony_ci if (con->state != CON_STATE_NEGOTIATING) 21508c2ecf20Sopenharmony_ci return -EAGAIN; 21518c2ecf20Sopenharmony_ci break; 21528c2ecf20Sopenharmony_ci 21538c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_RETRY_SESSION: 21548c2ecf20Sopenharmony_ci /* 21558c2ecf20Sopenharmony_ci * If we sent a smaller connect_seq than the peer has, try 21568c2ecf20Sopenharmony_ci * again with a larger value. 21578c2ecf20Sopenharmony_ci */ 21588c2ecf20Sopenharmony_ci dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", 21598c2ecf20Sopenharmony_ci le32_to_cpu(con->out_connect.connect_seq), 21608c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.connect_seq)); 21618c2ecf20Sopenharmony_ci con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); 21628c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 21638c2ecf20Sopenharmony_ci ret = prepare_write_connect(con); 21648c2ecf20Sopenharmony_ci if (ret < 0) 21658c2ecf20Sopenharmony_ci return ret; 21668c2ecf20Sopenharmony_ci prepare_read_connect(con); 21678c2ecf20Sopenharmony_ci break; 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_RETRY_GLOBAL: 21708c2ecf20Sopenharmony_ci /* 21718c2ecf20Sopenharmony_ci * If we sent a smaller global_seq than the peer has, try 21728c2ecf20Sopenharmony_ci * again with a larger value. 21738c2ecf20Sopenharmony_ci */ 21748c2ecf20Sopenharmony_ci dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", 21758c2ecf20Sopenharmony_ci con->peer_global_seq, 21768c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.global_seq)); 21778c2ecf20Sopenharmony_ci get_global_seq(con->msgr, 21788c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.global_seq)); 21798c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 21808c2ecf20Sopenharmony_ci ret = prepare_write_connect(con); 21818c2ecf20Sopenharmony_ci if (ret < 0) 21828c2ecf20Sopenharmony_ci return ret; 21838c2ecf20Sopenharmony_ci prepare_read_connect(con); 21848c2ecf20Sopenharmony_ci break; 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_SEQ: 21878c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_READY: 21888c2ecf20Sopenharmony_ci if (req_feat & ~server_feat) { 21898c2ecf20Sopenharmony_ci pr_err("%s%lld %s protocol feature mismatch," 21908c2ecf20Sopenharmony_ci " my required %llx > server's %llx, need %llx\n", 21918c2ecf20Sopenharmony_ci ENTITY_NAME(con->peer_name), 21928c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), 21938c2ecf20Sopenharmony_ci req_feat, server_feat, req_feat & ~server_feat); 21948c2ecf20Sopenharmony_ci con->error_msg = "missing required protocol features"; 21958c2ecf20Sopenharmony_ci reset_connection(con); 21968c2ecf20Sopenharmony_ci return -1; 21978c2ecf20Sopenharmony_ci } 21988c2ecf20Sopenharmony_ci 21998c2ecf20Sopenharmony_ci WARN_ON(con->state != CON_STATE_NEGOTIATING); 22008c2ecf20Sopenharmony_ci con->state = CON_STATE_OPEN; 22018c2ecf20Sopenharmony_ci con->auth_retry = 0; /* we authenticated; clear flag */ 22028c2ecf20Sopenharmony_ci con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 22038c2ecf20Sopenharmony_ci con->connect_seq++; 22048c2ecf20Sopenharmony_ci con->peer_features = server_feat; 22058c2ecf20Sopenharmony_ci dout("process_connect got READY gseq %d cseq %d (%d)\n", 22068c2ecf20Sopenharmony_ci con->peer_global_seq, 22078c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.connect_seq), 22088c2ecf20Sopenharmony_ci con->connect_seq); 22098c2ecf20Sopenharmony_ci WARN_ON(con->connect_seq != 22108c2ecf20Sopenharmony_ci le32_to_cpu(con->in_reply.connect_seq)); 22118c2ecf20Sopenharmony_ci 22128c2ecf20Sopenharmony_ci if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) 22138c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_LOSSYTX); 22148c2ecf20Sopenharmony_ci 22158c2ecf20Sopenharmony_ci con->delay = 0; /* reset backoff memory */ 22168c2ecf20Sopenharmony_ci 22178c2ecf20Sopenharmony_ci if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) { 22188c2ecf20Sopenharmony_ci prepare_write_seq(con); 22198c2ecf20Sopenharmony_ci prepare_read_seq(con); 22208c2ecf20Sopenharmony_ci } else { 22218c2ecf20Sopenharmony_ci prepare_read_tag(con); 22228c2ecf20Sopenharmony_ci } 22238c2ecf20Sopenharmony_ci break; 22248c2ecf20Sopenharmony_ci 22258c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_WAIT: 22268c2ecf20Sopenharmony_ci /* 22278c2ecf20Sopenharmony_ci * If there is a connection race (we are opening 22288c2ecf20Sopenharmony_ci * connections to each other), one of us may just have 22298c2ecf20Sopenharmony_ci * to WAIT. This shouldn't happen if we are the 22308c2ecf20Sopenharmony_ci * client. 22318c2ecf20Sopenharmony_ci */ 22328c2ecf20Sopenharmony_ci con->error_msg = "protocol error, got WAIT as client"; 22338c2ecf20Sopenharmony_ci return -1; 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_ci default: 22368c2ecf20Sopenharmony_ci con->error_msg = "protocol error, garbage tag during connect"; 22378c2ecf20Sopenharmony_ci return -1; 22388c2ecf20Sopenharmony_ci } 22398c2ecf20Sopenharmony_ci return 0; 22408c2ecf20Sopenharmony_ci} 22418c2ecf20Sopenharmony_ci 22428c2ecf20Sopenharmony_ci 22438c2ecf20Sopenharmony_ci/* 22448c2ecf20Sopenharmony_ci * read (part of) an ack 22458c2ecf20Sopenharmony_ci */ 22468c2ecf20Sopenharmony_cistatic int read_partial_ack(struct ceph_connection *con) 22478c2ecf20Sopenharmony_ci{ 22488c2ecf20Sopenharmony_ci int size = sizeof (con->in_temp_ack); 22498c2ecf20Sopenharmony_ci int end = size; 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_ci return read_partial(con, end, size, &con->in_temp_ack); 22528c2ecf20Sopenharmony_ci} 22538c2ecf20Sopenharmony_ci 22548c2ecf20Sopenharmony_ci/* 22558c2ecf20Sopenharmony_ci * We can finally discard anything that's been acked. 22568c2ecf20Sopenharmony_ci */ 22578c2ecf20Sopenharmony_cistatic void process_ack(struct ceph_connection *con) 22588c2ecf20Sopenharmony_ci{ 22598c2ecf20Sopenharmony_ci struct ceph_msg *m; 22608c2ecf20Sopenharmony_ci u64 ack = le64_to_cpu(con->in_temp_ack); 22618c2ecf20Sopenharmony_ci u64 seq; 22628c2ecf20Sopenharmony_ci bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ); 22638c2ecf20Sopenharmony_ci struct list_head *list = reconnect ? &con->out_queue : &con->out_sent; 22648c2ecf20Sopenharmony_ci 22658c2ecf20Sopenharmony_ci /* 22668c2ecf20Sopenharmony_ci * In the reconnect case, con_fault() has requeued messages 22678c2ecf20Sopenharmony_ci * in out_sent. We should cleanup old messages according to 22688c2ecf20Sopenharmony_ci * the reconnect seq. 22698c2ecf20Sopenharmony_ci */ 22708c2ecf20Sopenharmony_ci while (!list_empty(list)) { 22718c2ecf20Sopenharmony_ci m = list_first_entry(list, struct ceph_msg, list_head); 22728c2ecf20Sopenharmony_ci if (reconnect && m->needs_out_seq) 22738c2ecf20Sopenharmony_ci break; 22748c2ecf20Sopenharmony_ci seq = le64_to_cpu(m->hdr.seq); 22758c2ecf20Sopenharmony_ci if (seq > ack) 22768c2ecf20Sopenharmony_ci break; 22778c2ecf20Sopenharmony_ci dout("got ack for seq %llu type %d at %p\n", seq, 22788c2ecf20Sopenharmony_ci le16_to_cpu(m->hdr.type), m); 22798c2ecf20Sopenharmony_ci m->ack_stamp = jiffies; 22808c2ecf20Sopenharmony_ci ceph_msg_remove(m); 22818c2ecf20Sopenharmony_ci } 22828c2ecf20Sopenharmony_ci 22838c2ecf20Sopenharmony_ci prepare_read_tag(con); 22848c2ecf20Sopenharmony_ci} 22858c2ecf20Sopenharmony_ci 22868c2ecf20Sopenharmony_ci 22878c2ecf20Sopenharmony_cistatic int read_partial_message_section(struct ceph_connection *con, 22888c2ecf20Sopenharmony_ci struct kvec *section, 22898c2ecf20Sopenharmony_ci unsigned int sec_len, u32 *crc) 22908c2ecf20Sopenharmony_ci{ 22918c2ecf20Sopenharmony_ci int ret, left; 22928c2ecf20Sopenharmony_ci 22938c2ecf20Sopenharmony_ci BUG_ON(!section); 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_ci while (section->iov_len < sec_len) { 22968c2ecf20Sopenharmony_ci BUG_ON(section->iov_base == NULL); 22978c2ecf20Sopenharmony_ci left = sec_len - section->iov_len; 22988c2ecf20Sopenharmony_ci ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + 22998c2ecf20Sopenharmony_ci section->iov_len, left); 23008c2ecf20Sopenharmony_ci if (ret <= 0) 23018c2ecf20Sopenharmony_ci return ret; 23028c2ecf20Sopenharmony_ci section->iov_len += ret; 23038c2ecf20Sopenharmony_ci } 23048c2ecf20Sopenharmony_ci if (section->iov_len == sec_len) 23058c2ecf20Sopenharmony_ci *crc = crc32c(0, section->iov_base, section->iov_len); 23068c2ecf20Sopenharmony_ci 23078c2ecf20Sopenharmony_ci return 1; 23088c2ecf20Sopenharmony_ci} 23098c2ecf20Sopenharmony_ci 23108c2ecf20Sopenharmony_cistatic int read_partial_msg_data(struct ceph_connection *con) 23118c2ecf20Sopenharmony_ci{ 23128c2ecf20Sopenharmony_ci struct ceph_msg *msg = con->in_msg; 23138c2ecf20Sopenharmony_ci struct ceph_msg_data_cursor *cursor = &msg->cursor; 23148c2ecf20Sopenharmony_ci bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 23158c2ecf20Sopenharmony_ci struct page *page; 23168c2ecf20Sopenharmony_ci size_t page_offset; 23178c2ecf20Sopenharmony_ci size_t length; 23188c2ecf20Sopenharmony_ci u32 crc = 0; 23198c2ecf20Sopenharmony_ci int ret; 23208c2ecf20Sopenharmony_ci 23218c2ecf20Sopenharmony_ci if (!msg->num_data_items) 23228c2ecf20Sopenharmony_ci return -EIO; 23238c2ecf20Sopenharmony_ci 23248c2ecf20Sopenharmony_ci if (do_datacrc) 23258c2ecf20Sopenharmony_ci crc = con->in_data_crc; 23268c2ecf20Sopenharmony_ci while (cursor->total_resid) { 23278c2ecf20Sopenharmony_ci if (!cursor->resid) { 23288c2ecf20Sopenharmony_ci ceph_msg_data_advance(cursor, 0); 23298c2ecf20Sopenharmony_ci continue; 23308c2ecf20Sopenharmony_ci } 23318c2ecf20Sopenharmony_ci 23328c2ecf20Sopenharmony_ci page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); 23338c2ecf20Sopenharmony_ci ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); 23348c2ecf20Sopenharmony_ci if (ret <= 0) { 23358c2ecf20Sopenharmony_ci if (do_datacrc) 23368c2ecf20Sopenharmony_ci con->in_data_crc = crc; 23378c2ecf20Sopenharmony_ci 23388c2ecf20Sopenharmony_ci return ret; 23398c2ecf20Sopenharmony_ci } 23408c2ecf20Sopenharmony_ci 23418c2ecf20Sopenharmony_ci if (do_datacrc) 23428c2ecf20Sopenharmony_ci crc = ceph_crc32c_page(crc, page, page_offset, ret); 23438c2ecf20Sopenharmony_ci ceph_msg_data_advance(cursor, (size_t)ret); 23448c2ecf20Sopenharmony_ci } 23458c2ecf20Sopenharmony_ci if (do_datacrc) 23468c2ecf20Sopenharmony_ci con->in_data_crc = crc; 23478c2ecf20Sopenharmony_ci 23488c2ecf20Sopenharmony_ci return 1; /* must return > 0 to indicate success */ 23498c2ecf20Sopenharmony_ci} 23508c2ecf20Sopenharmony_ci 23518c2ecf20Sopenharmony_ci/* 23528c2ecf20Sopenharmony_ci * read (part of) a message. 23538c2ecf20Sopenharmony_ci */ 23548c2ecf20Sopenharmony_cistatic int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_cistatic int read_partial_message(struct ceph_connection *con) 23578c2ecf20Sopenharmony_ci{ 23588c2ecf20Sopenharmony_ci struct ceph_msg *m = con->in_msg; 23598c2ecf20Sopenharmony_ci int size; 23608c2ecf20Sopenharmony_ci int end; 23618c2ecf20Sopenharmony_ci int ret; 23628c2ecf20Sopenharmony_ci unsigned int front_len, middle_len, data_len; 23638c2ecf20Sopenharmony_ci bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 23648c2ecf20Sopenharmony_ci bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); 23658c2ecf20Sopenharmony_ci u64 seq; 23668c2ecf20Sopenharmony_ci u32 crc; 23678c2ecf20Sopenharmony_ci 23688c2ecf20Sopenharmony_ci dout("read_partial_message con %p msg %p\n", con, m); 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci /* header */ 23718c2ecf20Sopenharmony_ci size = sizeof (con->in_hdr); 23728c2ecf20Sopenharmony_ci end = size; 23738c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, &con->in_hdr); 23748c2ecf20Sopenharmony_ci if (ret <= 0) 23758c2ecf20Sopenharmony_ci return ret; 23768c2ecf20Sopenharmony_ci 23778c2ecf20Sopenharmony_ci crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); 23788c2ecf20Sopenharmony_ci if (cpu_to_le32(crc) != con->in_hdr.crc) { 23798c2ecf20Sopenharmony_ci pr_err("read_partial_message bad hdr crc %u != expected %u\n", 23808c2ecf20Sopenharmony_ci crc, con->in_hdr.crc); 23818c2ecf20Sopenharmony_ci return -EBADMSG; 23828c2ecf20Sopenharmony_ci } 23838c2ecf20Sopenharmony_ci 23848c2ecf20Sopenharmony_ci front_len = le32_to_cpu(con->in_hdr.front_len); 23858c2ecf20Sopenharmony_ci if (front_len > CEPH_MSG_MAX_FRONT_LEN) 23868c2ecf20Sopenharmony_ci return -EIO; 23878c2ecf20Sopenharmony_ci middle_len = le32_to_cpu(con->in_hdr.middle_len); 23888c2ecf20Sopenharmony_ci if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) 23898c2ecf20Sopenharmony_ci return -EIO; 23908c2ecf20Sopenharmony_ci data_len = le32_to_cpu(con->in_hdr.data_len); 23918c2ecf20Sopenharmony_ci if (data_len > CEPH_MSG_MAX_DATA_LEN) 23928c2ecf20Sopenharmony_ci return -EIO; 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci /* verify seq# */ 23958c2ecf20Sopenharmony_ci seq = le64_to_cpu(con->in_hdr.seq); 23968c2ecf20Sopenharmony_ci if ((s64)seq - (s64)con->in_seq < 1) { 23978c2ecf20Sopenharmony_ci pr_info("skipping %s%lld %s seq %lld expected %lld\n", 23988c2ecf20Sopenharmony_ci ENTITY_NAME(con->peer_name), 23998c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), 24008c2ecf20Sopenharmony_ci seq, con->in_seq + 1); 24018c2ecf20Sopenharmony_ci con->in_base_pos = -front_len - middle_len - data_len - 24028c2ecf20Sopenharmony_ci sizeof_footer(con); 24038c2ecf20Sopenharmony_ci con->in_tag = CEPH_MSGR_TAG_READY; 24048c2ecf20Sopenharmony_ci return 1; 24058c2ecf20Sopenharmony_ci } else if ((s64)seq - (s64)con->in_seq > 1) { 24068c2ecf20Sopenharmony_ci pr_err("read_partial_message bad seq %lld expected %lld\n", 24078c2ecf20Sopenharmony_ci seq, con->in_seq + 1); 24088c2ecf20Sopenharmony_ci con->error_msg = "bad message sequence # for incoming message"; 24098c2ecf20Sopenharmony_ci return -EBADE; 24108c2ecf20Sopenharmony_ci } 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_ci /* allocate message? */ 24138c2ecf20Sopenharmony_ci if (!con->in_msg) { 24148c2ecf20Sopenharmony_ci int skip = 0; 24158c2ecf20Sopenharmony_ci 24168c2ecf20Sopenharmony_ci dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 24178c2ecf20Sopenharmony_ci front_len, data_len); 24188c2ecf20Sopenharmony_ci ret = ceph_con_in_msg_alloc(con, &skip); 24198c2ecf20Sopenharmony_ci if (ret < 0) 24208c2ecf20Sopenharmony_ci return ret; 24218c2ecf20Sopenharmony_ci 24228c2ecf20Sopenharmony_ci BUG_ON(!con->in_msg ^ skip); 24238c2ecf20Sopenharmony_ci if (skip) { 24248c2ecf20Sopenharmony_ci /* skip this message */ 24258c2ecf20Sopenharmony_ci dout("alloc_msg said skip message\n"); 24268c2ecf20Sopenharmony_ci con->in_base_pos = -front_len - middle_len - data_len - 24278c2ecf20Sopenharmony_ci sizeof_footer(con); 24288c2ecf20Sopenharmony_ci con->in_tag = CEPH_MSGR_TAG_READY; 24298c2ecf20Sopenharmony_ci con->in_seq++; 24308c2ecf20Sopenharmony_ci return 1; 24318c2ecf20Sopenharmony_ci } 24328c2ecf20Sopenharmony_ci 24338c2ecf20Sopenharmony_ci BUG_ON(!con->in_msg); 24348c2ecf20Sopenharmony_ci BUG_ON(con->in_msg->con != con); 24358c2ecf20Sopenharmony_ci m = con->in_msg; 24368c2ecf20Sopenharmony_ci m->front.iov_len = 0; /* haven't read it yet */ 24378c2ecf20Sopenharmony_ci if (m->middle) 24388c2ecf20Sopenharmony_ci m->middle->vec.iov_len = 0; 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci /* prepare for data payload, if any */ 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci if (data_len) 24438c2ecf20Sopenharmony_ci prepare_message_data(con->in_msg, data_len); 24448c2ecf20Sopenharmony_ci } 24458c2ecf20Sopenharmony_ci 24468c2ecf20Sopenharmony_ci /* front */ 24478c2ecf20Sopenharmony_ci ret = read_partial_message_section(con, &m->front, front_len, 24488c2ecf20Sopenharmony_ci &con->in_front_crc); 24498c2ecf20Sopenharmony_ci if (ret <= 0) 24508c2ecf20Sopenharmony_ci return ret; 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ci /* middle */ 24538c2ecf20Sopenharmony_ci if (m->middle) { 24548c2ecf20Sopenharmony_ci ret = read_partial_message_section(con, &m->middle->vec, 24558c2ecf20Sopenharmony_ci middle_len, 24568c2ecf20Sopenharmony_ci &con->in_middle_crc); 24578c2ecf20Sopenharmony_ci if (ret <= 0) 24588c2ecf20Sopenharmony_ci return ret; 24598c2ecf20Sopenharmony_ci } 24608c2ecf20Sopenharmony_ci 24618c2ecf20Sopenharmony_ci /* (page) data */ 24628c2ecf20Sopenharmony_ci if (data_len) { 24638c2ecf20Sopenharmony_ci ret = read_partial_msg_data(con); 24648c2ecf20Sopenharmony_ci if (ret <= 0) 24658c2ecf20Sopenharmony_ci return ret; 24668c2ecf20Sopenharmony_ci } 24678c2ecf20Sopenharmony_ci 24688c2ecf20Sopenharmony_ci /* footer */ 24698c2ecf20Sopenharmony_ci size = sizeof_footer(con); 24708c2ecf20Sopenharmony_ci end += size; 24718c2ecf20Sopenharmony_ci ret = read_partial(con, end, size, &m->footer); 24728c2ecf20Sopenharmony_ci if (ret <= 0) 24738c2ecf20Sopenharmony_ci return ret; 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_ci if (!need_sign) { 24768c2ecf20Sopenharmony_ci m->footer.flags = m->old_footer.flags; 24778c2ecf20Sopenharmony_ci m->footer.sig = 0; 24788c2ecf20Sopenharmony_ci } 24798c2ecf20Sopenharmony_ci 24808c2ecf20Sopenharmony_ci dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", 24818c2ecf20Sopenharmony_ci m, front_len, m->footer.front_crc, middle_len, 24828c2ecf20Sopenharmony_ci m->footer.middle_crc, data_len, m->footer.data_crc); 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ci /* crc ok? */ 24858c2ecf20Sopenharmony_ci if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { 24868c2ecf20Sopenharmony_ci pr_err("read_partial_message %p front crc %u != exp. %u\n", 24878c2ecf20Sopenharmony_ci m, con->in_front_crc, m->footer.front_crc); 24888c2ecf20Sopenharmony_ci return -EBADMSG; 24898c2ecf20Sopenharmony_ci } 24908c2ecf20Sopenharmony_ci if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { 24918c2ecf20Sopenharmony_ci pr_err("read_partial_message %p middle crc %u != exp %u\n", 24928c2ecf20Sopenharmony_ci m, con->in_middle_crc, m->footer.middle_crc); 24938c2ecf20Sopenharmony_ci return -EBADMSG; 24948c2ecf20Sopenharmony_ci } 24958c2ecf20Sopenharmony_ci if (do_datacrc && 24968c2ecf20Sopenharmony_ci (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && 24978c2ecf20Sopenharmony_ci con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { 24988c2ecf20Sopenharmony_ci pr_err("read_partial_message %p data crc %u != exp. %u\n", m, 24998c2ecf20Sopenharmony_ci con->in_data_crc, le32_to_cpu(m->footer.data_crc)); 25008c2ecf20Sopenharmony_ci return -EBADMSG; 25018c2ecf20Sopenharmony_ci } 25028c2ecf20Sopenharmony_ci 25038c2ecf20Sopenharmony_ci if (need_sign && con->ops->check_message_signature && 25048c2ecf20Sopenharmony_ci con->ops->check_message_signature(m)) { 25058c2ecf20Sopenharmony_ci pr_err("read_partial_message %p signature check failed\n", m); 25068c2ecf20Sopenharmony_ci return -EBADMSG; 25078c2ecf20Sopenharmony_ci } 25088c2ecf20Sopenharmony_ci 25098c2ecf20Sopenharmony_ci return 1; /* done! */ 25108c2ecf20Sopenharmony_ci} 25118c2ecf20Sopenharmony_ci 25128c2ecf20Sopenharmony_ci/* 25138c2ecf20Sopenharmony_ci * Process message. This happens in the worker thread. The callback should 25148c2ecf20Sopenharmony_ci * be careful not to do anything that waits on other incoming messages or it 25158c2ecf20Sopenharmony_ci * may deadlock. 25168c2ecf20Sopenharmony_ci */ 25178c2ecf20Sopenharmony_cistatic void process_message(struct ceph_connection *con) 25188c2ecf20Sopenharmony_ci{ 25198c2ecf20Sopenharmony_ci struct ceph_msg *msg = con->in_msg; 25208c2ecf20Sopenharmony_ci 25218c2ecf20Sopenharmony_ci BUG_ON(con->in_msg->con != con); 25228c2ecf20Sopenharmony_ci con->in_msg = NULL; 25238c2ecf20Sopenharmony_ci 25248c2ecf20Sopenharmony_ci /* if first message, set peer_name */ 25258c2ecf20Sopenharmony_ci if (con->peer_name.type == 0) 25268c2ecf20Sopenharmony_ci con->peer_name = msg->hdr.src; 25278c2ecf20Sopenharmony_ci 25288c2ecf20Sopenharmony_ci con->in_seq++; 25298c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 25308c2ecf20Sopenharmony_ci 25318c2ecf20Sopenharmony_ci dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", 25328c2ecf20Sopenharmony_ci msg, le64_to_cpu(msg->hdr.seq), 25338c2ecf20Sopenharmony_ci ENTITY_NAME(msg->hdr.src), 25348c2ecf20Sopenharmony_ci le16_to_cpu(msg->hdr.type), 25358c2ecf20Sopenharmony_ci ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), 25368c2ecf20Sopenharmony_ci le32_to_cpu(msg->hdr.front_len), 25378c2ecf20Sopenharmony_ci le32_to_cpu(msg->hdr.data_len), 25388c2ecf20Sopenharmony_ci con->in_front_crc, con->in_middle_crc, con->in_data_crc); 25398c2ecf20Sopenharmony_ci con->ops->dispatch(con, msg); 25408c2ecf20Sopenharmony_ci 25418c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 25428c2ecf20Sopenharmony_ci} 25438c2ecf20Sopenharmony_ci 25448c2ecf20Sopenharmony_cistatic int read_keepalive_ack(struct ceph_connection *con) 25458c2ecf20Sopenharmony_ci{ 25468c2ecf20Sopenharmony_ci struct ceph_timespec ceph_ts; 25478c2ecf20Sopenharmony_ci size_t size = sizeof(ceph_ts); 25488c2ecf20Sopenharmony_ci int ret = read_partial(con, size, size, &ceph_ts); 25498c2ecf20Sopenharmony_ci if (ret <= 0) 25508c2ecf20Sopenharmony_ci return ret; 25518c2ecf20Sopenharmony_ci ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); 25528c2ecf20Sopenharmony_ci prepare_read_tag(con); 25538c2ecf20Sopenharmony_ci return 1; 25548c2ecf20Sopenharmony_ci} 25558c2ecf20Sopenharmony_ci 25568c2ecf20Sopenharmony_ci/* 25578c2ecf20Sopenharmony_ci * Write something to the socket. Called in a worker thread when the 25588c2ecf20Sopenharmony_ci * socket appears to be writeable and we have something ready to send. 25598c2ecf20Sopenharmony_ci */ 25608c2ecf20Sopenharmony_cistatic int try_write(struct ceph_connection *con) 25618c2ecf20Sopenharmony_ci{ 25628c2ecf20Sopenharmony_ci int ret = 1; 25638c2ecf20Sopenharmony_ci 25648c2ecf20Sopenharmony_ci dout("try_write start %p state %lu\n", con, con->state); 25658c2ecf20Sopenharmony_ci if (con->state != CON_STATE_PREOPEN && 25668c2ecf20Sopenharmony_ci con->state != CON_STATE_CONNECTING && 25678c2ecf20Sopenharmony_ci con->state != CON_STATE_NEGOTIATING && 25688c2ecf20Sopenharmony_ci con->state != CON_STATE_OPEN) 25698c2ecf20Sopenharmony_ci return 0; 25708c2ecf20Sopenharmony_ci 25718c2ecf20Sopenharmony_ci /* open the socket first? */ 25728c2ecf20Sopenharmony_ci if (con->state == CON_STATE_PREOPEN) { 25738c2ecf20Sopenharmony_ci BUG_ON(con->sock); 25748c2ecf20Sopenharmony_ci con->state = CON_STATE_CONNECTING; 25758c2ecf20Sopenharmony_ci 25768c2ecf20Sopenharmony_ci con_out_kvec_reset(con); 25778c2ecf20Sopenharmony_ci prepare_write_banner(con); 25788c2ecf20Sopenharmony_ci prepare_read_banner(con); 25798c2ecf20Sopenharmony_ci 25808c2ecf20Sopenharmony_ci BUG_ON(con->in_msg); 25818c2ecf20Sopenharmony_ci con->in_tag = CEPH_MSGR_TAG_READY; 25828c2ecf20Sopenharmony_ci dout("try_write initiating connect on %p new state %lu\n", 25838c2ecf20Sopenharmony_ci con, con->state); 25848c2ecf20Sopenharmony_ci ret = ceph_tcp_connect(con); 25858c2ecf20Sopenharmony_ci if (ret < 0) { 25868c2ecf20Sopenharmony_ci con->error_msg = "connect error"; 25878c2ecf20Sopenharmony_ci goto out; 25888c2ecf20Sopenharmony_ci } 25898c2ecf20Sopenharmony_ci } 25908c2ecf20Sopenharmony_ci 25918c2ecf20Sopenharmony_cimore: 25928c2ecf20Sopenharmony_ci dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); 25938c2ecf20Sopenharmony_ci BUG_ON(!con->sock); 25948c2ecf20Sopenharmony_ci 25958c2ecf20Sopenharmony_ci /* kvec data queued? */ 25968c2ecf20Sopenharmony_ci if (con->out_kvec_left) { 25978c2ecf20Sopenharmony_ci ret = write_partial_kvec(con); 25988c2ecf20Sopenharmony_ci if (ret <= 0) 25998c2ecf20Sopenharmony_ci goto out; 26008c2ecf20Sopenharmony_ci } 26018c2ecf20Sopenharmony_ci if (con->out_skip) { 26028c2ecf20Sopenharmony_ci ret = write_partial_skip(con); 26038c2ecf20Sopenharmony_ci if (ret <= 0) 26048c2ecf20Sopenharmony_ci goto out; 26058c2ecf20Sopenharmony_ci } 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci /* msg pages? */ 26088c2ecf20Sopenharmony_ci if (con->out_msg) { 26098c2ecf20Sopenharmony_ci if (con->out_msg_done) { 26108c2ecf20Sopenharmony_ci ceph_msg_put(con->out_msg); 26118c2ecf20Sopenharmony_ci con->out_msg = NULL; /* we're done with this one */ 26128c2ecf20Sopenharmony_ci goto do_next; 26138c2ecf20Sopenharmony_ci } 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci ret = write_partial_message_data(con); 26168c2ecf20Sopenharmony_ci if (ret == 1) 26178c2ecf20Sopenharmony_ci goto more; /* we need to send the footer, too! */ 26188c2ecf20Sopenharmony_ci if (ret == 0) 26198c2ecf20Sopenharmony_ci goto out; 26208c2ecf20Sopenharmony_ci if (ret < 0) { 26218c2ecf20Sopenharmony_ci dout("try_write write_partial_message_data err %d\n", 26228c2ecf20Sopenharmony_ci ret); 26238c2ecf20Sopenharmony_ci goto out; 26248c2ecf20Sopenharmony_ci } 26258c2ecf20Sopenharmony_ci } 26268c2ecf20Sopenharmony_ci 26278c2ecf20Sopenharmony_cido_next: 26288c2ecf20Sopenharmony_ci if (con->state == CON_STATE_OPEN) { 26298c2ecf20Sopenharmony_ci if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { 26308c2ecf20Sopenharmony_ci prepare_write_keepalive(con); 26318c2ecf20Sopenharmony_ci goto more; 26328c2ecf20Sopenharmony_ci } 26338c2ecf20Sopenharmony_ci /* is anything else pending? */ 26348c2ecf20Sopenharmony_ci if (!list_empty(&con->out_queue)) { 26358c2ecf20Sopenharmony_ci prepare_write_message(con); 26368c2ecf20Sopenharmony_ci goto more; 26378c2ecf20Sopenharmony_ci } 26388c2ecf20Sopenharmony_ci if (con->in_seq > con->in_seq_acked) { 26398c2ecf20Sopenharmony_ci prepare_write_ack(con); 26408c2ecf20Sopenharmony_ci goto more; 26418c2ecf20Sopenharmony_ci } 26428c2ecf20Sopenharmony_ci } 26438c2ecf20Sopenharmony_ci 26448c2ecf20Sopenharmony_ci /* Nothing to do! */ 26458c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_WRITE_PENDING); 26468c2ecf20Sopenharmony_ci dout("try_write nothing else to write.\n"); 26478c2ecf20Sopenharmony_ci ret = 0; 26488c2ecf20Sopenharmony_ciout: 26498c2ecf20Sopenharmony_ci dout("try_write done on %p ret %d\n", con, ret); 26508c2ecf20Sopenharmony_ci return ret; 26518c2ecf20Sopenharmony_ci} 26528c2ecf20Sopenharmony_ci 26538c2ecf20Sopenharmony_ci/* 26548c2ecf20Sopenharmony_ci * Read what we can from the socket. 26558c2ecf20Sopenharmony_ci */ 26568c2ecf20Sopenharmony_cistatic int try_read(struct ceph_connection *con) 26578c2ecf20Sopenharmony_ci{ 26588c2ecf20Sopenharmony_ci int ret = -1; 26598c2ecf20Sopenharmony_ci 26608c2ecf20Sopenharmony_cimore: 26618c2ecf20Sopenharmony_ci dout("try_read start on %p state %lu\n", con, con->state); 26628c2ecf20Sopenharmony_ci if (con->state != CON_STATE_CONNECTING && 26638c2ecf20Sopenharmony_ci con->state != CON_STATE_NEGOTIATING && 26648c2ecf20Sopenharmony_ci con->state != CON_STATE_OPEN) 26658c2ecf20Sopenharmony_ci return 0; 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci BUG_ON(!con->sock); 26688c2ecf20Sopenharmony_ci 26698c2ecf20Sopenharmony_ci dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, 26708c2ecf20Sopenharmony_ci con->in_base_pos); 26718c2ecf20Sopenharmony_ci 26728c2ecf20Sopenharmony_ci if (con->state == CON_STATE_CONNECTING) { 26738c2ecf20Sopenharmony_ci dout("try_read connecting\n"); 26748c2ecf20Sopenharmony_ci ret = read_partial_banner(con); 26758c2ecf20Sopenharmony_ci if (ret <= 0) 26768c2ecf20Sopenharmony_ci goto out; 26778c2ecf20Sopenharmony_ci ret = process_banner(con); 26788c2ecf20Sopenharmony_ci if (ret < 0) 26798c2ecf20Sopenharmony_ci goto out; 26808c2ecf20Sopenharmony_ci 26818c2ecf20Sopenharmony_ci con->state = CON_STATE_NEGOTIATING; 26828c2ecf20Sopenharmony_ci 26838c2ecf20Sopenharmony_ci /* 26848c2ecf20Sopenharmony_ci * Received banner is good, exchange connection info. 26858c2ecf20Sopenharmony_ci * Do not reset out_kvec, as sending our banner raced 26868c2ecf20Sopenharmony_ci * with receiving peer banner after connect completed. 26878c2ecf20Sopenharmony_ci */ 26888c2ecf20Sopenharmony_ci ret = prepare_write_connect(con); 26898c2ecf20Sopenharmony_ci if (ret < 0) 26908c2ecf20Sopenharmony_ci goto out; 26918c2ecf20Sopenharmony_ci prepare_read_connect(con); 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci /* Send connection info before awaiting response */ 26948c2ecf20Sopenharmony_ci goto out; 26958c2ecf20Sopenharmony_ci } 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_ci if (con->state == CON_STATE_NEGOTIATING) { 26988c2ecf20Sopenharmony_ci dout("try_read negotiating\n"); 26998c2ecf20Sopenharmony_ci ret = read_partial_connect(con); 27008c2ecf20Sopenharmony_ci if (ret <= 0) 27018c2ecf20Sopenharmony_ci goto out; 27028c2ecf20Sopenharmony_ci ret = process_connect(con); 27038c2ecf20Sopenharmony_ci if (ret < 0) 27048c2ecf20Sopenharmony_ci goto out; 27058c2ecf20Sopenharmony_ci goto more; 27068c2ecf20Sopenharmony_ci } 27078c2ecf20Sopenharmony_ci 27088c2ecf20Sopenharmony_ci WARN_ON(con->state != CON_STATE_OPEN); 27098c2ecf20Sopenharmony_ci 27108c2ecf20Sopenharmony_ci if (con->in_base_pos < 0) { 27118c2ecf20Sopenharmony_ci /* 27128c2ecf20Sopenharmony_ci * skipping + discarding content. 27138c2ecf20Sopenharmony_ci */ 27148c2ecf20Sopenharmony_ci ret = ceph_tcp_recvmsg(con->sock, NULL, -con->in_base_pos); 27158c2ecf20Sopenharmony_ci if (ret <= 0) 27168c2ecf20Sopenharmony_ci goto out; 27178c2ecf20Sopenharmony_ci dout("skipped %d / %d bytes\n", ret, -con->in_base_pos); 27188c2ecf20Sopenharmony_ci con->in_base_pos += ret; 27198c2ecf20Sopenharmony_ci if (con->in_base_pos) 27208c2ecf20Sopenharmony_ci goto more; 27218c2ecf20Sopenharmony_ci } 27228c2ecf20Sopenharmony_ci if (con->in_tag == CEPH_MSGR_TAG_READY) { 27238c2ecf20Sopenharmony_ci /* 27248c2ecf20Sopenharmony_ci * what's next? 27258c2ecf20Sopenharmony_ci */ 27268c2ecf20Sopenharmony_ci ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); 27278c2ecf20Sopenharmony_ci if (ret <= 0) 27288c2ecf20Sopenharmony_ci goto out; 27298c2ecf20Sopenharmony_ci dout("try_read got tag %d\n", (int)con->in_tag); 27308c2ecf20Sopenharmony_ci switch (con->in_tag) { 27318c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_MSG: 27328c2ecf20Sopenharmony_ci prepare_read_message(con); 27338c2ecf20Sopenharmony_ci break; 27348c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_ACK: 27358c2ecf20Sopenharmony_ci prepare_read_ack(con); 27368c2ecf20Sopenharmony_ci break; 27378c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_KEEPALIVE2_ACK: 27388c2ecf20Sopenharmony_ci prepare_read_keepalive_ack(con); 27398c2ecf20Sopenharmony_ci break; 27408c2ecf20Sopenharmony_ci case CEPH_MSGR_TAG_CLOSE: 27418c2ecf20Sopenharmony_ci con_close_socket(con); 27428c2ecf20Sopenharmony_ci con->state = CON_STATE_CLOSED; 27438c2ecf20Sopenharmony_ci goto out; 27448c2ecf20Sopenharmony_ci default: 27458c2ecf20Sopenharmony_ci goto bad_tag; 27468c2ecf20Sopenharmony_ci } 27478c2ecf20Sopenharmony_ci } 27488c2ecf20Sopenharmony_ci if (con->in_tag == CEPH_MSGR_TAG_MSG) { 27498c2ecf20Sopenharmony_ci ret = read_partial_message(con); 27508c2ecf20Sopenharmony_ci if (ret <= 0) { 27518c2ecf20Sopenharmony_ci switch (ret) { 27528c2ecf20Sopenharmony_ci case -EBADMSG: 27538c2ecf20Sopenharmony_ci con->error_msg = "bad crc/signature"; 27548c2ecf20Sopenharmony_ci fallthrough; 27558c2ecf20Sopenharmony_ci case -EBADE: 27568c2ecf20Sopenharmony_ci ret = -EIO; 27578c2ecf20Sopenharmony_ci break; 27588c2ecf20Sopenharmony_ci case -EIO: 27598c2ecf20Sopenharmony_ci con->error_msg = "io error"; 27608c2ecf20Sopenharmony_ci break; 27618c2ecf20Sopenharmony_ci } 27628c2ecf20Sopenharmony_ci goto out; 27638c2ecf20Sopenharmony_ci } 27648c2ecf20Sopenharmony_ci if (con->in_tag == CEPH_MSGR_TAG_READY) 27658c2ecf20Sopenharmony_ci goto more; 27668c2ecf20Sopenharmony_ci process_message(con); 27678c2ecf20Sopenharmony_ci if (con->state == CON_STATE_OPEN) 27688c2ecf20Sopenharmony_ci prepare_read_tag(con); 27698c2ecf20Sopenharmony_ci goto more; 27708c2ecf20Sopenharmony_ci } 27718c2ecf20Sopenharmony_ci if (con->in_tag == CEPH_MSGR_TAG_ACK || 27728c2ecf20Sopenharmony_ci con->in_tag == CEPH_MSGR_TAG_SEQ) { 27738c2ecf20Sopenharmony_ci /* 27748c2ecf20Sopenharmony_ci * the final handshake seq exchange is semantically 27758c2ecf20Sopenharmony_ci * equivalent to an ACK 27768c2ecf20Sopenharmony_ci */ 27778c2ecf20Sopenharmony_ci ret = read_partial_ack(con); 27788c2ecf20Sopenharmony_ci if (ret <= 0) 27798c2ecf20Sopenharmony_ci goto out; 27808c2ecf20Sopenharmony_ci process_ack(con); 27818c2ecf20Sopenharmony_ci goto more; 27828c2ecf20Sopenharmony_ci } 27838c2ecf20Sopenharmony_ci if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { 27848c2ecf20Sopenharmony_ci ret = read_keepalive_ack(con); 27858c2ecf20Sopenharmony_ci if (ret <= 0) 27868c2ecf20Sopenharmony_ci goto out; 27878c2ecf20Sopenharmony_ci goto more; 27888c2ecf20Sopenharmony_ci } 27898c2ecf20Sopenharmony_ci 27908c2ecf20Sopenharmony_ciout: 27918c2ecf20Sopenharmony_ci dout("try_read done on %p ret %d\n", con, ret); 27928c2ecf20Sopenharmony_ci return ret; 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_cibad_tag: 27958c2ecf20Sopenharmony_ci pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag); 27968c2ecf20Sopenharmony_ci con->error_msg = "protocol error, garbage tag"; 27978c2ecf20Sopenharmony_ci ret = -1; 27988c2ecf20Sopenharmony_ci goto out; 27998c2ecf20Sopenharmony_ci} 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci 28028c2ecf20Sopenharmony_ci/* 28038c2ecf20Sopenharmony_ci * Atomically queue work on a connection after the specified delay. 28048c2ecf20Sopenharmony_ci * Bump @con reference to avoid races with connection teardown. 28058c2ecf20Sopenharmony_ci * Returns 0 if work was queued, or an error code otherwise. 28068c2ecf20Sopenharmony_ci */ 28078c2ecf20Sopenharmony_cistatic int queue_con_delay(struct ceph_connection *con, unsigned long delay) 28088c2ecf20Sopenharmony_ci{ 28098c2ecf20Sopenharmony_ci if (!con->ops->get(con)) { 28108c2ecf20Sopenharmony_ci dout("%s %p ref count 0\n", __func__, con); 28118c2ecf20Sopenharmony_ci return -ENOENT; 28128c2ecf20Sopenharmony_ci } 28138c2ecf20Sopenharmony_ci 28148c2ecf20Sopenharmony_ci dout("%s %p %lu\n", __func__, con, delay); 28158c2ecf20Sopenharmony_ci if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { 28168c2ecf20Sopenharmony_ci dout("%s %p - already queued\n", __func__, con); 28178c2ecf20Sopenharmony_ci con->ops->put(con); 28188c2ecf20Sopenharmony_ci return -EBUSY; 28198c2ecf20Sopenharmony_ci } 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ci return 0; 28228c2ecf20Sopenharmony_ci} 28238c2ecf20Sopenharmony_ci 28248c2ecf20Sopenharmony_cistatic void queue_con(struct ceph_connection *con) 28258c2ecf20Sopenharmony_ci{ 28268c2ecf20Sopenharmony_ci (void) queue_con_delay(con, 0); 28278c2ecf20Sopenharmony_ci} 28288c2ecf20Sopenharmony_ci 28298c2ecf20Sopenharmony_cistatic void cancel_con(struct ceph_connection *con) 28308c2ecf20Sopenharmony_ci{ 28318c2ecf20Sopenharmony_ci if (cancel_delayed_work(&con->work)) { 28328c2ecf20Sopenharmony_ci dout("%s %p\n", __func__, con); 28338c2ecf20Sopenharmony_ci con->ops->put(con); 28348c2ecf20Sopenharmony_ci } 28358c2ecf20Sopenharmony_ci} 28368c2ecf20Sopenharmony_ci 28378c2ecf20Sopenharmony_cistatic bool con_sock_closed(struct ceph_connection *con) 28388c2ecf20Sopenharmony_ci{ 28398c2ecf20Sopenharmony_ci if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) 28408c2ecf20Sopenharmony_ci return false; 28418c2ecf20Sopenharmony_ci 28428c2ecf20Sopenharmony_ci#define CASE(x) \ 28438c2ecf20Sopenharmony_ci case CON_STATE_ ## x: \ 28448c2ecf20Sopenharmony_ci con->error_msg = "socket closed (con state " #x ")"; \ 28458c2ecf20Sopenharmony_ci break; 28468c2ecf20Sopenharmony_ci 28478c2ecf20Sopenharmony_ci switch (con->state) { 28488c2ecf20Sopenharmony_ci CASE(CLOSED); 28498c2ecf20Sopenharmony_ci CASE(PREOPEN); 28508c2ecf20Sopenharmony_ci CASE(CONNECTING); 28518c2ecf20Sopenharmony_ci CASE(NEGOTIATING); 28528c2ecf20Sopenharmony_ci CASE(OPEN); 28538c2ecf20Sopenharmony_ci CASE(STANDBY); 28548c2ecf20Sopenharmony_ci default: 28558c2ecf20Sopenharmony_ci pr_warn("%s con %p unrecognized state %lu\n", 28568c2ecf20Sopenharmony_ci __func__, con, con->state); 28578c2ecf20Sopenharmony_ci con->error_msg = "unrecognized con state"; 28588c2ecf20Sopenharmony_ci BUG(); 28598c2ecf20Sopenharmony_ci break; 28608c2ecf20Sopenharmony_ci } 28618c2ecf20Sopenharmony_ci#undef CASE 28628c2ecf20Sopenharmony_ci 28638c2ecf20Sopenharmony_ci return true; 28648c2ecf20Sopenharmony_ci} 28658c2ecf20Sopenharmony_ci 28668c2ecf20Sopenharmony_cistatic bool con_backoff(struct ceph_connection *con) 28678c2ecf20Sopenharmony_ci{ 28688c2ecf20Sopenharmony_ci int ret; 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF)) 28718c2ecf20Sopenharmony_ci return false; 28728c2ecf20Sopenharmony_ci 28738c2ecf20Sopenharmony_ci ret = queue_con_delay(con, round_jiffies_relative(con->delay)); 28748c2ecf20Sopenharmony_ci if (ret) { 28758c2ecf20Sopenharmony_ci dout("%s: con %p FAILED to back off %lu\n", __func__, 28768c2ecf20Sopenharmony_ci con, con->delay); 28778c2ecf20Sopenharmony_ci BUG_ON(ret == -ENOENT); 28788c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_BACKOFF); 28798c2ecf20Sopenharmony_ci } 28808c2ecf20Sopenharmony_ci 28818c2ecf20Sopenharmony_ci return true; 28828c2ecf20Sopenharmony_ci} 28838c2ecf20Sopenharmony_ci 28848c2ecf20Sopenharmony_ci/* Finish fault handling; con->mutex must *not* be held here */ 28858c2ecf20Sopenharmony_ci 28868c2ecf20Sopenharmony_cistatic void con_fault_finish(struct ceph_connection *con) 28878c2ecf20Sopenharmony_ci{ 28888c2ecf20Sopenharmony_ci dout("%s %p\n", __func__, con); 28898c2ecf20Sopenharmony_ci 28908c2ecf20Sopenharmony_ci /* 28918c2ecf20Sopenharmony_ci * in case we faulted due to authentication, invalidate our 28928c2ecf20Sopenharmony_ci * current tickets so that we can get new ones. 28938c2ecf20Sopenharmony_ci */ 28948c2ecf20Sopenharmony_ci if (con->auth_retry) { 28958c2ecf20Sopenharmony_ci dout("auth_retry %d, invalidating\n", con->auth_retry); 28968c2ecf20Sopenharmony_ci if (con->ops->invalidate_authorizer) 28978c2ecf20Sopenharmony_ci con->ops->invalidate_authorizer(con); 28988c2ecf20Sopenharmony_ci con->auth_retry = 0; 28998c2ecf20Sopenharmony_ci } 29008c2ecf20Sopenharmony_ci 29018c2ecf20Sopenharmony_ci if (con->ops->fault) 29028c2ecf20Sopenharmony_ci con->ops->fault(con); 29038c2ecf20Sopenharmony_ci} 29048c2ecf20Sopenharmony_ci 29058c2ecf20Sopenharmony_ci/* 29068c2ecf20Sopenharmony_ci * Do some work on a connection. Drop a connection ref when we're done. 29078c2ecf20Sopenharmony_ci */ 29088c2ecf20Sopenharmony_cistatic void ceph_con_workfn(struct work_struct *work) 29098c2ecf20Sopenharmony_ci{ 29108c2ecf20Sopenharmony_ci struct ceph_connection *con = container_of(work, struct ceph_connection, 29118c2ecf20Sopenharmony_ci work.work); 29128c2ecf20Sopenharmony_ci bool fault; 29138c2ecf20Sopenharmony_ci 29148c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 29158c2ecf20Sopenharmony_ci while (true) { 29168c2ecf20Sopenharmony_ci int ret; 29178c2ecf20Sopenharmony_ci 29188c2ecf20Sopenharmony_ci if ((fault = con_sock_closed(con))) { 29198c2ecf20Sopenharmony_ci dout("%s: con %p SOCK_CLOSED\n", __func__, con); 29208c2ecf20Sopenharmony_ci break; 29218c2ecf20Sopenharmony_ci } 29228c2ecf20Sopenharmony_ci if (con_backoff(con)) { 29238c2ecf20Sopenharmony_ci dout("%s: con %p BACKOFF\n", __func__, con); 29248c2ecf20Sopenharmony_ci break; 29258c2ecf20Sopenharmony_ci } 29268c2ecf20Sopenharmony_ci if (con->state == CON_STATE_STANDBY) { 29278c2ecf20Sopenharmony_ci dout("%s: con %p STANDBY\n", __func__, con); 29288c2ecf20Sopenharmony_ci break; 29298c2ecf20Sopenharmony_ci } 29308c2ecf20Sopenharmony_ci if (con->state == CON_STATE_CLOSED) { 29318c2ecf20Sopenharmony_ci dout("%s: con %p CLOSED\n", __func__, con); 29328c2ecf20Sopenharmony_ci BUG_ON(con->sock); 29338c2ecf20Sopenharmony_ci break; 29348c2ecf20Sopenharmony_ci } 29358c2ecf20Sopenharmony_ci if (con->state == CON_STATE_PREOPEN) { 29368c2ecf20Sopenharmony_ci dout("%s: con %p PREOPEN\n", __func__, con); 29378c2ecf20Sopenharmony_ci BUG_ON(con->sock); 29388c2ecf20Sopenharmony_ci } 29398c2ecf20Sopenharmony_ci 29408c2ecf20Sopenharmony_ci ret = try_read(con); 29418c2ecf20Sopenharmony_ci if (ret < 0) { 29428c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 29438c2ecf20Sopenharmony_ci continue; 29448c2ecf20Sopenharmony_ci if (!con->error_msg) 29458c2ecf20Sopenharmony_ci con->error_msg = "socket error on read"; 29468c2ecf20Sopenharmony_ci fault = true; 29478c2ecf20Sopenharmony_ci break; 29488c2ecf20Sopenharmony_ci } 29498c2ecf20Sopenharmony_ci 29508c2ecf20Sopenharmony_ci ret = try_write(con); 29518c2ecf20Sopenharmony_ci if (ret < 0) { 29528c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 29538c2ecf20Sopenharmony_ci continue; 29548c2ecf20Sopenharmony_ci if (!con->error_msg) 29558c2ecf20Sopenharmony_ci con->error_msg = "socket error on write"; 29568c2ecf20Sopenharmony_ci fault = true; 29578c2ecf20Sopenharmony_ci } 29588c2ecf20Sopenharmony_ci 29598c2ecf20Sopenharmony_ci break; /* If we make it to here, we're done */ 29608c2ecf20Sopenharmony_ci } 29618c2ecf20Sopenharmony_ci if (fault) 29628c2ecf20Sopenharmony_ci con_fault(con); 29638c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 29648c2ecf20Sopenharmony_ci 29658c2ecf20Sopenharmony_ci if (fault) 29668c2ecf20Sopenharmony_ci con_fault_finish(con); 29678c2ecf20Sopenharmony_ci 29688c2ecf20Sopenharmony_ci con->ops->put(con); 29698c2ecf20Sopenharmony_ci} 29708c2ecf20Sopenharmony_ci 29718c2ecf20Sopenharmony_ci/* 29728c2ecf20Sopenharmony_ci * Generic error/fault handler. A retry mechanism is used with 29738c2ecf20Sopenharmony_ci * exponential backoff 29748c2ecf20Sopenharmony_ci */ 29758c2ecf20Sopenharmony_cistatic void con_fault(struct ceph_connection *con) 29768c2ecf20Sopenharmony_ci{ 29778c2ecf20Sopenharmony_ci dout("fault %p state %lu to peer %s\n", 29788c2ecf20Sopenharmony_ci con, con->state, ceph_pr_addr(&con->peer_addr)); 29798c2ecf20Sopenharmony_ci 29808c2ecf20Sopenharmony_ci pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), 29818c2ecf20Sopenharmony_ci ceph_pr_addr(&con->peer_addr), con->error_msg); 29828c2ecf20Sopenharmony_ci con->error_msg = NULL; 29838c2ecf20Sopenharmony_ci 29848c2ecf20Sopenharmony_ci WARN_ON(con->state != CON_STATE_CONNECTING && 29858c2ecf20Sopenharmony_ci con->state != CON_STATE_NEGOTIATING && 29868c2ecf20Sopenharmony_ci con->state != CON_STATE_OPEN); 29878c2ecf20Sopenharmony_ci 29888c2ecf20Sopenharmony_ci con_close_socket(con); 29898c2ecf20Sopenharmony_ci 29908c2ecf20Sopenharmony_ci if (con_flag_test(con, CON_FLAG_LOSSYTX)) { 29918c2ecf20Sopenharmony_ci dout("fault on LOSSYTX channel, marking CLOSED\n"); 29928c2ecf20Sopenharmony_ci con->state = CON_STATE_CLOSED; 29938c2ecf20Sopenharmony_ci return; 29948c2ecf20Sopenharmony_ci } 29958c2ecf20Sopenharmony_ci 29968c2ecf20Sopenharmony_ci if (con->in_msg) { 29978c2ecf20Sopenharmony_ci BUG_ON(con->in_msg->con != con); 29988c2ecf20Sopenharmony_ci ceph_msg_put(con->in_msg); 29998c2ecf20Sopenharmony_ci con->in_msg = NULL; 30008c2ecf20Sopenharmony_ci } 30018c2ecf20Sopenharmony_ci if (con->out_msg) { 30028c2ecf20Sopenharmony_ci BUG_ON(con->out_msg->con != con); 30038c2ecf20Sopenharmony_ci ceph_msg_put(con->out_msg); 30048c2ecf20Sopenharmony_ci con->out_msg = NULL; 30058c2ecf20Sopenharmony_ci } 30068c2ecf20Sopenharmony_ci 30078c2ecf20Sopenharmony_ci /* Requeue anything that hasn't been acked */ 30088c2ecf20Sopenharmony_ci list_splice_init(&con->out_sent, &con->out_queue); 30098c2ecf20Sopenharmony_ci 30108c2ecf20Sopenharmony_ci /* If there are no messages queued or keepalive pending, place 30118c2ecf20Sopenharmony_ci * the connection in a STANDBY state */ 30128c2ecf20Sopenharmony_ci if (list_empty(&con->out_queue) && 30138c2ecf20Sopenharmony_ci !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) { 30148c2ecf20Sopenharmony_ci dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); 30158c2ecf20Sopenharmony_ci con_flag_clear(con, CON_FLAG_WRITE_PENDING); 30168c2ecf20Sopenharmony_ci con->state = CON_STATE_STANDBY; 30178c2ecf20Sopenharmony_ci } else { 30188c2ecf20Sopenharmony_ci /* retry after a delay. */ 30198c2ecf20Sopenharmony_ci con->state = CON_STATE_PREOPEN; 30208c2ecf20Sopenharmony_ci if (con->delay == 0) 30218c2ecf20Sopenharmony_ci con->delay = BASE_DELAY_INTERVAL; 30228c2ecf20Sopenharmony_ci else if (con->delay < MAX_DELAY_INTERVAL) 30238c2ecf20Sopenharmony_ci con->delay *= 2; 30248c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_BACKOFF); 30258c2ecf20Sopenharmony_ci queue_con(con); 30268c2ecf20Sopenharmony_ci } 30278c2ecf20Sopenharmony_ci} 30288c2ecf20Sopenharmony_ci 30298c2ecf20Sopenharmony_ci 30308c2ecf20Sopenharmony_civoid ceph_messenger_reset_nonce(struct ceph_messenger *msgr) 30318c2ecf20Sopenharmony_ci{ 30328c2ecf20Sopenharmony_ci u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000; 30338c2ecf20Sopenharmony_ci msgr->inst.addr.nonce = cpu_to_le32(nonce); 30348c2ecf20Sopenharmony_ci encode_my_addr(msgr); 30358c2ecf20Sopenharmony_ci} 30368c2ecf20Sopenharmony_ci 30378c2ecf20Sopenharmony_ci/* 30388c2ecf20Sopenharmony_ci * initialize a new messenger instance 30398c2ecf20Sopenharmony_ci */ 30408c2ecf20Sopenharmony_civoid ceph_messenger_init(struct ceph_messenger *msgr, 30418c2ecf20Sopenharmony_ci struct ceph_entity_addr *myaddr) 30428c2ecf20Sopenharmony_ci{ 30438c2ecf20Sopenharmony_ci spin_lock_init(&msgr->global_seq_lock); 30448c2ecf20Sopenharmony_ci 30458c2ecf20Sopenharmony_ci if (myaddr) 30468c2ecf20Sopenharmony_ci msgr->inst.addr = *myaddr; 30478c2ecf20Sopenharmony_ci 30488c2ecf20Sopenharmony_ci /* select a random nonce */ 30498c2ecf20Sopenharmony_ci msgr->inst.addr.type = 0; 30508c2ecf20Sopenharmony_ci get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); 30518c2ecf20Sopenharmony_ci encode_my_addr(msgr); 30528c2ecf20Sopenharmony_ci 30538c2ecf20Sopenharmony_ci atomic_set(&msgr->stopping, 0); 30548c2ecf20Sopenharmony_ci write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); 30558c2ecf20Sopenharmony_ci 30568c2ecf20Sopenharmony_ci dout("%s %p\n", __func__, msgr); 30578c2ecf20Sopenharmony_ci} 30588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_messenger_init); 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_civoid ceph_messenger_fini(struct ceph_messenger *msgr) 30618c2ecf20Sopenharmony_ci{ 30628c2ecf20Sopenharmony_ci put_net(read_pnet(&msgr->net)); 30638c2ecf20Sopenharmony_ci} 30648c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_messenger_fini); 30658c2ecf20Sopenharmony_ci 30668c2ecf20Sopenharmony_cistatic void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con) 30678c2ecf20Sopenharmony_ci{ 30688c2ecf20Sopenharmony_ci if (msg->con) 30698c2ecf20Sopenharmony_ci msg->con->ops->put(msg->con); 30708c2ecf20Sopenharmony_ci 30718c2ecf20Sopenharmony_ci msg->con = con ? con->ops->get(con) : NULL; 30728c2ecf20Sopenharmony_ci BUG_ON(msg->con != con); 30738c2ecf20Sopenharmony_ci} 30748c2ecf20Sopenharmony_ci 30758c2ecf20Sopenharmony_cistatic void clear_standby(struct ceph_connection *con) 30768c2ecf20Sopenharmony_ci{ 30778c2ecf20Sopenharmony_ci /* come back from STANDBY? */ 30788c2ecf20Sopenharmony_ci if (con->state == CON_STATE_STANDBY) { 30798c2ecf20Sopenharmony_ci dout("clear_standby %p and ++connect_seq\n", con); 30808c2ecf20Sopenharmony_ci con->state = CON_STATE_PREOPEN; 30818c2ecf20Sopenharmony_ci con->connect_seq++; 30828c2ecf20Sopenharmony_ci WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING)); 30838c2ecf20Sopenharmony_ci WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)); 30848c2ecf20Sopenharmony_ci } 30858c2ecf20Sopenharmony_ci} 30868c2ecf20Sopenharmony_ci 30878c2ecf20Sopenharmony_ci/* 30888c2ecf20Sopenharmony_ci * Queue up an outgoing message on the given connection. 30898c2ecf20Sopenharmony_ci */ 30908c2ecf20Sopenharmony_civoid ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) 30918c2ecf20Sopenharmony_ci{ 30928c2ecf20Sopenharmony_ci /* set src+dst */ 30938c2ecf20Sopenharmony_ci msg->hdr.src = con->msgr->inst.name; 30948c2ecf20Sopenharmony_ci BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 30958c2ecf20Sopenharmony_ci msg->needs_out_seq = true; 30968c2ecf20Sopenharmony_ci 30978c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 30988c2ecf20Sopenharmony_ci 30998c2ecf20Sopenharmony_ci if (con->state == CON_STATE_CLOSED) { 31008c2ecf20Sopenharmony_ci dout("con_send %p closed, dropping %p\n", con, msg); 31018c2ecf20Sopenharmony_ci ceph_msg_put(msg); 31028c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 31038c2ecf20Sopenharmony_ci return; 31048c2ecf20Sopenharmony_ci } 31058c2ecf20Sopenharmony_ci 31068c2ecf20Sopenharmony_ci msg_con_set(msg, con); 31078c2ecf20Sopenharmony_ci 31088c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&msg->list_head)); 31098c2ecf20Sopenharmony_ci list_add_tail(&msg->list_head, &con->out_queue); 31108c2ecf20Sopenharmony_ci dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, 31118c2ecf20Sopenharmony_ci ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type), 31128c2ecf20Sopenharmony_ci ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), 31138c2ecf20Sopenharmony_ci le32_to_cpu(msg->hdr.front_len), 31148c2ecf20Sopenharmony_ci le32_to_cpu(msg->hdr.middle_len), 31158c2ecf20Sopenharmony_ci le32_to_cpu(msg->hdr.data_len)); 31168c2ecf20Sopenharmony_ci 31178c2ecf20Sopenharmony_ci clear_standby(con); 31188c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci /* if there wasn't anything waiting to send before, queue 31218c2ecf20Sopenharmony_ci * new work */ 31228c2ecf20Sopenharmony_ci if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) 31238c2ecf20Sopenharmony_ci queue_con(con); 31248c2ecf20Sopenharmony_ci} 31258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_send); 31268c2ecf20Sopenharmony_ci 31278c2ecf20Sopenharmony_ci/* 31288c2ecf20Sopenharmony_ci * Revoke a message that was previously queued for send 31298c2ecf20Sopenharmony_ci */ 31308c2ecf20Sopenharmony_civoid ceph_msg_revoke(struct ceph_msg *msg) 31318c2ecf20Sopenharmony_ci{ 31328c2ecf20Sopenharmony_ci struct ceph_connection *con = msg->con; 31338c2ecf20Sopenharmony_ci 31348c2ecf20Sopenharmony_ci if (!con) { 31358c2ecf20Sopenharmony_ci dout("%s msg %p null con\n", __func__, msg); 31368c2ecf20Sopenharmony_ci return; /* Message not in our possession */ 31378c2ecf20Sopenharmony_ci } 31388c2ecf20Sopenharmony_ci 31398c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 31408c2ecf20Sopenharmony_ci if (!list_empty(&msg->list_head)) { 31418c2ecf20Sopenharmony_ci dout("%s %p msg %p - was on queue\n", __func__, con, msg); 31428c2ecf20Sopenharmony_ci list_del_init(&msg->list_head); 31438c2ecf20Sopenharmony_ci msg->hdr.seq = 0; 31448c2ecf20Sopenharmony_ci 31458c2ecf20Sopenharmony_ci ceph_msg_put(msg); 31468c2ecf20Sopenharmony_ci } 31478c2ecf20Sopenharmony_ci if (con->out_msg == msg) { 31488c2ecf20Sopenharmony_ci BUG_ON(con->out_skip); 31498c2ecf20Sopenharmony_ci /* footer */ 31508c2ecf20Sopenharmony_ci if (con->out_msg_done) { 31518c2ecf20Sopenharmony_ci con->out_skip += con_out_kvec_skip(con); 31528c2ecf20Sopenharmony_ci } else { 31538c2ecf20Sopenharmony_ci BUG_ON(!msg->data_length); 31548c2ecf20Sopenharmony_ci con->out_skip += sizeof_footer(con); 31558c2ecf20Sopenharmony_ci } 31568c2ecf20Sopenharmony_ci /* data, middle, front */ 31578c2ecf20Sopenharmony_ci if (msg->data_length) 31588c2ecf20Sopenharmony_ci con->out_skip += msg->cursor.total_resid; 31598c2ecf20Sopenharmony_ci if (msg->middle) 31608c2ecf20Sopenharmony_ci con->out_skip += con_out_kvec_skip(con); 31618c2ecf20Sopenharmony_ci con->out_skip += con_out_kvec_skip(con); 31628c2ecf20Sopenharmony_ci 31638c2ecf20Sopenharmony_ci dout("%s %p msg %p - was sending, will write %d skip %d\n", 31648c2ecf20Sopenharmony_ci __func__, con, msg, con->out_kvec_bytes, con->out_skip); 31658c2ecf20Sopenharmony_ci msg->hdr.seq = 0; 31668c2ecf20Sopenharmony_ci con->out_msg = NULL; 31678c2ecf20Sopenharmony_ci ceph_msg_put(msg); 31688c2ecf20Sopenharmony_ci } 31698c2ecf20Sopenharmony_ci 31708c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 31718c2ecf20Sopenharmony_ci} 31728c2ecf20Sopenharmony_ci 31738c2ecf20Sopenharmony_ci/* 31748c2ecf20Sopenharmony_ci * Revoke a message that we may be reading data into 31758c2ecf20Sopenharmony_ci */ 31768c2ecf20Sopenharmony_civoid ceph_msg_revoke_incoming(struct ceph_msg *msg) 31778c2ecf20Sopenharmony_ci{ 31788c2ecf20Sopenharmony_ci struct ceph_connection *con = msg->con; 31798c2ecf20Sopenharmony_ci 31808c2ecf20Sopenharmony_ci if (!con) { 31818c2ecf20Sopenharmony_ci dout("%s msg %p null con\n", __func__, msg); 31828c2ecf20Sopenharmony_ci return; /* Message not in our possession */ 31838c2ecf20Sopenharmony_ci } 31848c2ecf20Sopenharmony_ci 31858c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 31868c2ecf20Sopenharmony_ci if (con->in_msg == msg) { 31878c2ecf20Sopenharmony_ci unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); 31888c2ecf20Sopenharmony_ci unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len); 31898c2ecf20Sopenharmony_ci unsigned int data_len = le32_to_cpu(con->in_hdr.data_len); 31908c2ecf20Sopenharmony_ci 31918c2ecf20Sopenharmony_ci /* skip rest of message */ 31928c2ecf20Sopenharmony_ci dout("%s %p msg %p revoked\n", __func__, con, msg); 31938c2ecf20Sopenharmony_ci con->in_base_pos = con->in_base_pos - 31948c2ecf20Sopenharmony_ci sizeof(struct ceph_msg_header) - 31958c2ecf20Sopenharmony_ci front_len - 31968c2ecf20Sopenharmony_ci middle_len - 31978c2ecf20Sopenharmony_ci data_len - 31988c2ecf20Sopenharmony_ci sizeof(struct ceph_msg_footer); 31998c2ecf20Sopenharmony_ci ceph_msg_put(con->in_msg); 32008c2ecf20Sopenharmony_ci con->in_msg = NULL; 32018c2ecf20Sopenharmony_ci con->in_tag = CEPH_MSGR_TAG_READY; 32028c2ecf20Sopenharmony_ci con->in_seq++; 32038c2ecf20Sopenharmony_ci } else { 32048c2ecf20Sopenharmony_ci dout("%s %p in_msg %p msg %p no-op\n", 32058c2ecf20Sopenharmony_ci __func__, con, con->in_msg, msg); 32068c2ecf20Sopenharmony_ci } 32078c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 32088c2ecf20Sopenharmony_ci} 32098c2ecf20Sopenharmony_ci 32108c2ecf20Sopenharmony_ci/* 32118c2ecf20Sopenharmony_ci * Queue a keepalive byte to ensure the tcp connection is alive. 32128c2ecf20Sopenharmony_ci */ 32138c2ecf20Sopenharmony_civoid ceph_con_keepalive(struct ceph_connection *con) 32148c2ecf20Sopenharmony_ci{ 32158c2ecf20Sopenharmony_ci dout("con_keepalive %p\n", con); 32168c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 32178c2ecf20Sopenharmony_ci clear_standby(con); 32188c2ecf20Sopenharmony_ci con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING); 32198c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 32208c2ecf20Sopenharmony_ci 32218c2ecf20Sopenharmony_ci if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) 32228c2ecf20Sopenharmony_ci queue_con(con); 32238c2ecf20Sopenharmony_ci} 32248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_keepalive); 32258c2ecf20Sopenharmony_ci 32268c2ecf20Sopenharmony_cibool ceph_con_keepalive_expired(struct ceph_connection *con, 32278c2ecf20Sopenharmony_ci unsigned long interval) 32288c2ecf20Sopenharmony_ci{ 32298c2ecf20Sopenharmony_ci if (interval > 0 && 32308c2ecf20Sopenharmony_ci (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { 32318c2ecf20Sopenharmony_ci struct timespec64 now; 32328c2ecf20Sopenharmony_ci struct timespec64 ts; 32338c2ecf20Sopenharmony_ci ktime_get_real_ts64(&now); 32348c2ecf20Sopenharmony_ci jiffies_to_timespec64(interval, &ts); 32358c2ecf20Sopenharmony_ci ts = timespec64_add(con->last_keepalive_ack, ts); 32368c2ecf20Sopenharmony_ci return timespec64_compare(&now, &ts) >= 0; 32378c2ecf20Sopenharmony_ci } 32388c2ecf20Sopenharmony_ci return false; 32398c2ecf20Sopenharmony_ci} 32408c2ecf20Sopenharmony_ci 32418c2ecf20Sopenharmony_cistatic struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg) 32428c2ecf20Sopenharmony_ci{ 32438c2ecf20Sopenharmony_ci BUG_ON(msg->num_data_items >= msg->max_data_items); 32448c2ecf20Sopenharmony_ci return &msg->data[msg->num_data_items++]; 32458c2ecf20Sopenharmony_ci} 32468c2ecf20Sopenharmony_ci 32478c2ecf20Sopenharmony_cistatic void ceph_msg_data_destroy(struct ceph_msg_data *data) 32488c2ecf20Sopenharmony_ci{ 32498c2ecf20Sopenharmony_ci if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) { 32508c2ecf20Sopenharmony_ci int num_pages = calc_pages_for(data->alignment, data->length); 32518c2ecf20Sopenharmony_ci ceph_release_page_vector(data->pages, num_pages); 32528c2ecf20Sopenharmony_ci } else if (data->type == CEPH_MSG_DATA_PAGELIST) { 32538c2ecf20Sopenharmony_ci ceph_pagelist_release(data->pagelist); 32548c2ecf20Sopenharmony_ci } 32558c2ecf20Sopenharmony_ci} 32568c2ecf20Sopenharmony_ci 32578c2ecf20Sopenharmony_civoid ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, 32588c2ecf20Sopenharmony_ci size_t length, size_t alignment, bool own_pages) 32598c2ecf20Sopenharmony_ci{ 32608c2ecf20Sopenharmony_ci struct ceph_msg_data *data; 32618c2ecf20Sopenharmony_ci 32628c2ecf20Sopenharmony_ci BUG_ON(!pages); 32638c2ecf20Sopenharmony_ci BUG_ON(!length); 32648c2ecf20Sopenharmony_ci 32658c2ecf20Sopenharmony_ci data = ceph_msg_data_add(msg); 32668c2ecf20Sopenharmony_ci data->type = CEPH_MSG_DATA_PAGES; 32678c2ecf20Sopenharmony_ci data->pages = pages; 32688c2ecf20Sopenharmony_ci data->length = length; 32698c2ecf20Sopenharmony_ci data->alignment = alignment & ~PAGE_MASK; 32708c2ecf20Sopenharmony_ci data->own_pages = own_pages; 32718c2ecf20Sopenharmony_ci 32728c2ecf20Sopenharmony_ci msg->data_length += length; 32738c2ecf20Sopenharmony_ci} 32748c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_pages); 32758c2ecf20Sopenharmony_ci 32768c2ecf20Sopenharmony_civoid ceph_msg_data_add_pagelist(struct ceph_msg *msg, 32778c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist) 32788c2ecf20Sopenharmony_ci{ 32798c2ecf20Sopenharmony_ci struct ceph_msg_data *data; 32808c2ecf20Sopenharmony_ci 32818c2ecf20Sopenharmony_ci BUG_ON(!pagelist); 32828c2ecf20Sopenharmony_ci BUG_ON(!pagelist->length); 32838c2ecf20Sopenharmony_ci 32848c2ecf20Sopenharmony_ci data = ceph_msg_data_add(msg); 32858c2ecf20Sopenharmony_ci data->type = CEPH_MSG_DATA_PAGELIST; 32868c2ecf20Sopenharmony_ci refcount_inc(&pagelist->refcnt); 32878c2ecf20Sopenharmony_ci data->pagelist = pagelist; 32888c2ecf20Sopenharmony_ci 32898c2ecf20Sopenharmony_ci msg->data_length += pagelist->length; 32908c2ecf20Sopenharmony_ci} 32918c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_pagelist); 32928c2ecf20Sopenharmony_ci 32938c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 32948c2ecf20Sopenharmony_civoid ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, 32958c2ecf20Sopenharmony_ci u32 length) 32968c2ecf20Sopenharmony_ci{ 32978c2ecf20Sopenharmony_ci struct ceph_msg_data *data; 32988c2ecf20Sopenharmony_ci 32998c2ecf20Sopenharmony_ci data = ceph_msg_data_add(msg); 33008c2ecf20Sopenharmony_ci data->type = CEPH_MSG_DATA_BIO; 33018c2ecf20Sopenharmony_ci data->bio_pos = *bio_pos; 33028c2ecf20Sopenharmony_ci data->bio_length = length; 33038c2ecf20Sopenharmony_ci 33048c2ecf20Sopenharmony_ci msg->data_length += length; 33058c2ecf20Sopenharmony_ci} 33068c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_bio); 33078c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */ 33088c2ecf20Sopenharmony_ci 33098c2ecf20Sopenharmony_civoid ceph_msg_data_add_bvecs(struct ceph_msg *msg, 33108c2ecf20Sopenharmony_ci struct ceph_bvec_iter *bvec_pos) 33118c2ecf20Sopenharmony_ci{ 33128c2ecf20Sopenharmony_ci struct ceph_msg_data *data; 33138c2ecf20Sopenharmony_ci 33148c2ecf20Sopenharmony_ci data = ceph_msg_data_add(msg); 33158c2ecf20Sopenharmony_ci data->type = CEPH_MSG_DATA_BVECS; 33168c2ecf20Sopenharmony_ci data->bvec_pos = *bvec_pos; 33178c2ecf20Sopenharmony_ci 33188c2ecf20Sopenharmony_ci msg->data_length += bvec_pos->iter.bi_size; 33198c2ecf20Sopenharmony_ci} 33208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_bvecs); 33218c2ecf20Sopenharmony_ci 33228c2ecf20Sopenharmony_ci/* 33238c2ecf20Sopenharmony_ci * construct a new message with given type, size 33248c2ecf20Sopenharmony_ci * the new msg has a ref count of 1. 33258c2ecf20Sopenharmony_ci */ 33268c2ecf20Sopenharmony_cistruct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, 33278c2ecf20Sopenharmony_ci gfp_t flags, bool can_fail) 33288c2ecf20Sopenharmony_ci{ 33298c2ecf20Sopenharmony_ci struct ceph_msg *m; 33308c2ecf20Sopenharmony_ci 33318c2ecf20Sopenharmony_ci m = kmem_cache_zalloc(ceph_msg_cache, flags); 33328c2ecf20Sopenharmony_ci if (m == NULL) 33338c2ecf20Sopenharmony_ci goto out; 33348c2ecf20Sopenharmony_ci 33358c2ecf20Sopenharmony_ci m->hdr.type = cpu_to_le16(type); 33368c2ecf20Sopenharmony_ci m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 33378c2ecf20Sopenharmony_ci m->hdr.front_len = cpu_to_le32(front_len); 33388c2ecf20Sopenharmony_ci 33398c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&m->list_head); 33408c2ecf20Sopenharmony_ci kref_init(&m->kref); 33418c2ecf20Sopenharmony_ci 33428c2ecf20Sopenharmony_ci /* front */ 33438c2ecf20Sopenharmony_ci if (front_len) { 33448c2ecf20Sopenharmony_ci m->front.iov_base = ceph_kvmalloc(front_len, flags); 33458c2ecf20Sopenharmony_ci if (m->front.iov_base == NULL) { 33468c2ecf20Sopenharmony_ci dout("ceph_msg_new can't allocate %d bytes\n", 33478c2ecf20Sopenharmony_ci front_len); 33488c2ecf20Sopenharmony_ci goto out2; 33498c2ecf20Sopenharmony_ci } 33508c2ecf20Sopenharmony_ci } else { 33518c2ecf20Sopenharmony_ci m->front.iov_base = NULL; 33528c2ecf20Sopenharmony_ci } 33538c2ecf20Sopenharmony_ci m->front_alloc_len = m->front.iov_len = front_len; 33548c2ecf20Sopenharmony_ci 33558c2ecf20Sopenharmony_ci if (max_data_items) { 33568c2ecf20Sopenharmony_ci m->data = kmalloc_array(max_data_items, sizeof(*m->data), 33578c2ecf20Sopenharmony_ci flags); 33588c2ecf20Sopenharmony_ci if (!m->data) 33598c2ecf20Sopenharmony_ci goto out2; 33608c2ecf20Sopenharmony_ci 33618c2ecf20Sopenharmony_ci m->max_data_items = max_data_items; 33628c2ecf20Sopenharmony_ci } 33638c2ecf20Sopenharmony_ci 33648c2ecf20Sopenharmony_ci dout("ceph_msg_new %p front %d\n", m, front_len); 33658c2ecf20Sopenharmony_ci return m; 33668c2ecf20Sopenharmony_ci 33678c2ecf20Sopenharmony_ciout2: 33688c2ecf20Sopenharmony_ci ceph_msg_put(m); 33698c2ecf20Sopenharmony_ciout: 33708c2ecf20Sopenharmony_ci if (!can_fail) { 33718c2ecf20Sopenharmony_ci pr_err("msg_new can't create type %d front %d\n", type, 33728c2ecf20Sopenharmony_ci front_len); 33738c2ecf20Sopenharmony_ci WARN_ON(1); 33748c2ecf20Sopenharmony_ci } else { 33758c2ecf20Sopenharmony_ci dout("msg_new can't create type %d front %d\n", type, 33768c2ecf20Sopenharmony_ci front_len); 33778c2ecf20Sopenharmony_ci } 33788c2ecf20Sopenharmony_ci return NULL; 33798c2ecf20Sopenharmony_ci} 33808c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_new2); 33818c2ecf20Sopenharmony_ci 33828c2ecf20Sopenharmony_cistruct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, 33838c2ecf20Sopenharmony_ci bool can_fail) 33848c2ecf20Sopenharmony_ci{ 33858c2ecf20Sopenharmony_ci return ceph_msg_new2(type, front_len, 0, flags, can_fail); 33868c2ecf20Sopenharmony_ci} 33878c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_new); 33888c2ecf20Sopenharmony_ci 33898c2ecf20Sopenharmony_ci/* 33908c2ecf20Sopenharmony_ci * Allocate "middle" portion of a message, if it is needed and wasn't 33918c2ecf20Sopenharmony_ci * allocated by alloc_msg. This allows us to read a small fixed-size 33928c2ecf20Sopenharmony_ci * per-type header in the front and then gracefully fail (i.e., 33938c2ecf20Sopenharmony_ci * propagate the error to the caller based on info in the front) when 33948c2ecf20Sopenharmony_ci * the middle is too large. 33958c2ecf20Sopenharmony_ci */ 33968c2ecf20Sopenharmony_cistatic int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) 33978c2ecf20Sopenharmony_ci{ 33988c2ecf20Sopenharmony_ci int type = le16_to_cpu(msg->hdr.type); 33998c2ecf20Sopenharmony_ci int middle_len = le32_to_cpu(msg->hdr.middle_len); 34008c2ecf20Sopenharmony_ci 34018c2ecf20Sopenharmony_ci dout("alloc_middle %p type %d %s middle_len %d\n", msg, type, 34028c2ecf20Sopenharmony_ci ceph_msg_type_name(type), middle_len); 34038c2ecf20Sopenharmony_ci BUG_ON(!middle_len); 34048c2ecf20Sopenharmony_ci BUG_ON(msg->middle); 34058c2ecf20Sopenharmony_ci 34068c2ecf20Sopenharmony_ci msg->middle = ceph_buffer_new(middle_len, GFP_NOFS); 34078c2ecf20Sopenharmony_ci if (!msg->middle) 34088c2ecf20Sopenharmony_ci return -ENOMEM; 34098c2ecf20Sopenharmony_ci return 0; 34108c2ecf20Sopenharmony_ci} 34118c2ecf20Sopenharmony_ci 34128c2ecf20Sopenharmony_ci/* 34138c2ecf20Sopenharmony_ci * Allocate a message for receiving an incoming message on a 34148c2ecf20Sopenharmony_ci * connection, and save the result in con->in_msg. Uses the 34158c2ecf20Sopenharmony_ci * connection's private alloc_msg op if available. 34168c2ecf20Sopenharmony_ci * 34178c2ecf20Sopenharmony_ci * Returns 0 on success, or a negative error code. 34188c2ecf20Sopenharmony_ci * 34198c2ecf20Sopenharmony_ci * On success, if we set *skip = 1: 34208c2ecf20Sopenharmony_ci * - the next message should be skipped and ignored. 34218c2ecf20Sopenharmony_ci * - con->in_msg == NULL 34228c2ecf20Sopenharmony_ci * or if we set *skip = 0: 34238c2ecf20Sopenharmony_ci * - con->in_msg is non-null. 34248c2ecf20Sopenharmony_ci * On error (ENOMEM, EAGAIN, ...), 34258c2ecf20Sopenharmony_ci * - con->in_msg == NULL 34268c2ecf20Sopenharmony_ci */ 34278c2ecf20Sopenharmony_cistatic int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) 34288c2ecf20Sopenharmony_ci{ 34298c2ecf20Sopenharmony_ci struct ceph_msg_header *hdr = &con->in_hdr; 34308c2ecf20Sopenharmony_ci int middle_len = le32_to_cpu(hdr->middle_len); 34318c2ecf20Sopenharmony_ci struct ceph_msg *msg; 34328c2ecf20Sopenharmony_ci int ret = 0; 34338c2ecf20Sopenharmony_ci 34348c2ecf20Sopenharmony_ci BUG_ON(con->in_msg != NULL); 34358c2ecf20Sopenharmony_ci BUG_ON(!con->ops->alloc_msg); 34368c2ecf20Sopenharmony_ci 34378c2ecf20Sopenharmony_ci mutex_unlock(&con->mutex); 34388c2ecf20Sopenharmony_ci msg = con->ops->alloc_msg(con, hdr, skip); 34398c2ecf20Sopenharmony_ci mutex_lock(&con->mutex); 34408c2ecf20Sopenharmony_ci if (con->state != CON_STATE_OPEN) { 34418c2ecf20Sopenharmony_ci if (msg) 34428c2ecf20Sopenharmony_ci ceph_msg_put(msg); 34438c2ecf20Sopenharmony_ci return -EAGAIN; 34448c2ecf20Sopenharmony_ci } 34458c2ecf20Sopenharmony_ci if (msg) { 34468c2ecf20Sopenharmony_ci BUG_ON(*skip); 34478c2ecf20Sopenharmony_ci msg_con_set(msg, con); 34488c2ecf20Sopenharmony_ci con->in_msg = msg; 34498c2ecf20Sopenharmony_ci } else { 34508c2ecf20Sopenharmony_ci /* 34518c2ecf20Sopenharmony_ci * Null message pointer means either we should skip 34528c2ecf20Sopenharmony_ci * this message or we couldn't allocate memory. The 34538c2ecf20Sopenharmony_ci * former is not an error. 34548c2ecf20Sopenharmony_ci */ 34558c2ecf20Sopenharmony_ci if (*skip) 34568c2ecf20Sopenharmony_ci return 0; 34578c2ecf20Sopenharmony_ci 34588c2ecf20Sopenharmony_ci con->error_msg = "error allocating memory for incoming message"; 34598c2ecf20Sopenharmony_ci return -ENOMEM; 34608c2ecf20Sopenharmony_ci } 34618c2ecf20Sopenharmony_ci memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 34628c2ecf20Sopenharmony_ci 34638c2ecf20Sopenharmony_ci if (middle_len && !con->in_msg->middle) { 34648c2ecf20Sopenharmony_ci ret = ceph_alloc_middle(con, con->in_msg); 34658c2ecf20Sopenharmony_ci if (ret < 0) { 34668c2ecf20Sopenharmony_ci ceph_msg_put(con->in_msg); 34678c2ecf20Sopenharmony_ci con->in_msg = NULL; 34688c2ecf20Sopenharmony_ci } 34698c2ecf20Sopenharmony_ci } 34708c2ecf20Sopenharmony_ci 34718c2ecf20Sopenharmony_ci return ret; 34728c2ecf20Sopenharmony_ci} 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci 34758c2ecf20Sopenharmony_ci/* 34768c2ecf20Sopenharmony_ci * Free a generically kmalloc'd message. 34778c2ecf20Sopenharmony_ci */ 34788c2ecf20Sopenharmony_cistatic void ceph_msg_free(struct ceph_msg *m) 34798c2ecf20Sopenharmony_ci{ 34808c2ecf20Sopenharmony_ci dout("%s %p\n", __func__, m); 34818c2ecf20Sopenharmony_ci kvfree(m->front.iov_base); 34828c2ecf20Sopenharmony_ci kfree(m->data); 34838c2ecf20Sopenharmony_ci kmem_cache_free(ceph_msg_cache, m); 34848c2ecf20Sopenharmony_ci} 34858c2ecf20Sopenharmony_ci 34868c2ecf20Sopenharmony_cistatic void ceph_msg_release(struct kref *kref) 34878c2ecf20Sopenharmony_ci{ 34888c2ecf20Sopenharmony_ci struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 34898c2ecf20Sopenharmony_ci int i; 34908c2ecf20Sopenharmony_ci 34918c2ecf20Sopenharmony_ci dout("%s %p\n", __func__, m); 34928c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&m->list_head)); 34938c2ecf20Sopenharmony_ci 34948c2ecf20Sopenharmony_ci msg_con_set(m, NULL); 34958c2ecf20Sopenharmony_ci 34968c2ecf20Sopenharmony_ci /* drop middle, data, if any */ 34978c2ecf20Sopenharmony_ci if (m->middle) { 34988c2ecf20Sopenharmony_ci ceph_buffer_put(m->middle); 34998c2ecf20Sopenharmony_ci m->middle = NULL; 35008c2ecf20Sopenharmony_ci } 35018c2ecf20Sopenharmony_ci 35028c2ecf20Sopenharmony_ci for (i = 0; i < m->num_data_items; i++) 35038c2ecf20Sopenharmony_ci ceph_msg_data_destroy(&m->data[i]); 35048c2ecf20Sopenharmony_ci 35058c2ecf20Sopenharmony_ci if (m->pool) 35068c2ecf20Sopenharmony_ci ceph_msgpool_put(m->pool, m); 35078c2ecf20Sopenharmony_ci else 35088c2ecf20Sopenharmony_ci ceph_msg_free(m); 35098c2ecf20Sopenharmony_ci} 35108c2ecf20Sopenharmony_ci 35118c2ecf20Sopenharmony_cistruct ceph_msg *ceph_msg_get(struct ceph_msg *msg) 35128c2ecf20Sopenharmony_ci{ 35138c2ecf20Sopenharmony_ci dout("%s %p (was %d)\n", __func__, msg, 35148c2ecf20Sopenharmony_ci kref_read(&msg->kref)); 35158c2ecf20Sopenharmony_ci kref_get(&msg->kref); 35168c2ecf20Sopenharmony_ci return msg; 35178c2ecf20Sopenharmony_ci} 35188c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_get); 35198c2ecf20Sopenharmony_ci 35208c2ecf20Sopenharmony_civoid ceph_msg_put(struct ceph_msg *msg) 35218c2ecf20Sopenharmony_ci{ 35228c2ecf20Sopenharmony_ci dout("%s %p (was %d)\n", __func__, msg, 35238c2ecf20Sopenharmony_ci kref_read(&msg->kref)); 35248c2ecf20Sopenharmony_ci kref_put(&msg->kref, ceph_msg_release); 35258c2ecf20Sopenharmony_ci} 35268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_put); 35278c2ecf20Sopenharmony_ci 35288c2ecf20Sopenharmony_civoid ceph_msg_dump(struct ceph_msg *msg) 35298c2ecf20Sopenharmony_ci{ 35308c2ecf20Sopenharmony_ci pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg, 35318c2ecf20Sopenharmony_ci msg->front_alloc_len, msg->data_length); 35328c2ecf20Sopenharmony_ci print_hex_dump(KERN_DEBUG, "header: ", 35338c2ecf20Sopenharmony_ci DUMP_PREFIX_OFFSET, 16, 1, 35348c2ecf20Sopenharmony_ci &msg->hdr, sizeof(msg->hdr), true); 35358c2ecf20Sopenharmony_ci print_hex_dump(KERN_DEBUG, " front: ", 35368c2ecf20Sopenharmony_ci DUMP_PREFIX_OFFSET, 16, 1, 35378c2ecf20Sopenharmony_ci msg->front.iov_base, msg->front.iov_len, true); 35388c2ecf20Sopenharmony_ci if (msg->middle) 35398c2ecf20Sopenharmony_ci print_hex_dump(KERN_DEBUG, "middle: ", 35408c2ecf20Sopenharmony_ci DUMP_PREFIX_OFFSET, 16, 1, 35418c2ecf20Sopenharmony_ci msg->middle->vec.iov_base, 35428c2ecf20Sopenharmony_ci msg->middle->vec.iov_len, true); 35438c2ecf20Sopenharmony_ci print_hex_dump(KERN_DEBUG, "footer: ", 35448c2ecf20Sopenharmony_ci DUMP_PREFIX_OFFSET, 16, 1, 35458c2ecf20Sopenharmony_ci &msg->footer, sizeof(msg->footer), true); 35468c2ecf20Sopenharmony_ci} 35478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_dump); 3548