18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_debug.h>
38c2ecf20Sopenharmony_ci
48c2ecf20Sopenharmony_ci#include <linux/crc32c.h>
58c2ecf20Sopenharmony_ci#include <linux/ctype.h>
68c2ecf20Sopenharmony_ci#include <linux/highmem.h>
78c2ecf20Sopenharmony_ci#include <linux/inet.h>
88c2ecf20Sopenharmony_ci#include <linux/kthread.h>
98c2ecf20Sopenharmony_ci#include <linux/net.h>
108c2ecf20Sopenharmony_ci#include <linux/nsproxy.h>
118c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
128c2ecf20Sopenharmony_ci#include <linux/slab.h>
138c2ecf20Sopenharmony_ci#include <linux/socket.h>
148c2ecf20Sopenharmony_ci#include <linux/string.h>
158c2ecf20Sopenharmony_ci#ifdef	CONFIG_BLOCK
168c2ecf20Sopenharmony_ci#include <linux/bio.h>
178c2ecf20Sopenharmony_ci#endif	/* CONFIG_BLOCK */
188c2ecf20Sopenharmony_ci#include <linux/dns_resolver.h>
198c2ecf20Sopenharmony_ci#include <net/tcp.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_features.h>
228c2ecf20Sopenharmony_ci#include <linux/ceph/libceph.h>
238c2ecf20Sopenharmony_ci#include <linux/ceph/messenger.h>
248c2ecf20Sopenharmony_ci#include <linux/ceph/decode.h>
258c2ecf20Sopenharmony_ci#include <linux/ceph/pagelist.h>
268c2ecf20Sopenharmony_ci#include <linux/export.h>
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci/*
298c2ecf20Sopenharmony_ci * Ceph uses the messenger to exchange ceph_msg messages with other
308c2ecf20Sopenharmony_ci * hosts in the system.  The messenger provides ordered and reliable
318c2ecf20Sopenharmony_ci * delivery.  We tolerate TCP disconnects by reconnecting (with
328c2ecf20Sopenharmony_ci * exponential backoff) in the case of a fault (disconnection, bad
338c2ecf20Sopenharmony_ci * crc, protocol error).  Acks allow sent messages to be discarded by
348c2ecf20Sopenharmony_ci * the sender.
358c2ecf20Sopenharmony_ci */
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/*
388c2ecf20Sopenharmony_ci * We track the state of the socket on a given connection using
398c2ecf20Sopenharmony_ci * values defined below.  The transition to a new socket state is
408c2ecf20Sopenharmony_ci * handled by a function which verifies we aren't coming from an
418c2ecf20Sopenharmony_ci * unexpected state.
428c2ecf20Sopenharmony_ci *
438c2ecf20Sopenharmony_ci *      --------
448c2ecf20Sopenharmony_ci *      | NEW* |  transient initial state
458c2ecf20Sopenharmony_ci *      --------
468c2ecf20Sopenharmony_ci *          | con_sock_state_init()
478c2ecf20Sopenharmony_ci *          v
488c2ecf20Sopenharmony_ci *      ----------
498c2ecf20Sopenharmony_ci *      | CLOSED |  initialized, but no socket (and no
508c2ecf20Sopenharmony_ci *      ----------  TCP connection)
518c2ecf20Sopenharmony_ci *       ^      \
528c2ecf20Sopenharmony_ci *       |       \ con_sock_state_connecting()
538c2ecf20Sopenharmony_ci *       |        ----------------------
548c2ecf20Sopenharmony_ci *       |                              \
558c2ecf20Sopenharmony_ci *       + con_sock_state_closed()       \
568c2ecf20Sopenharmony_ci *       |+---------------------------    \
578c2ecf20Sopenharmony_ci *       | \                          \    \
588c2ecf20Sopenharmony_ci *       |  -----------                \    \
598c2ecf20Sopenharmony_ci *       |  | CLOSING |  socket event;  \    \
608c2ecf20Sopenharmony_ci *       |  -----------  await close     \    \
618c2ecf20Sopenharmony_ci *       |       ^                        \   |
628c2ecf20Sopenharmony_ci *       |       |                         \  |
638c2ecf20Sopenharmony_ci *       |       + con_sock_state_closing() \ |
648c2ecf20Sopenharmony_ci *       |      / \                         | |
658c2ecf20Sopenharmony_ci *       |     /   ---------------          | |
668c2ecf20Sopenharmony_ci *       |    /                   \         v v
678c2ecf20Sopenharmony_ci *       |   /                    --------------
688c2ecf20Sopenharmony_ci *       |  /    -----------------| CONNECTING |  socket created, TCP
698c2ecf20Sopenharmony_ci *       |  |   /                 --------------  connect initiated
708c2ecf20Sopenharmony_ci *       |  |   | con_sock_state_connected()
718c2ecf20Sopenharmony_ci *       |  |   v
728c2ecf20Sopenharmony_ci *      -------------
738c2ecf20Sopenharmony_ci *      | CONNECTED |  TCP connection established
748c2ecf20Sopenharmony_ci *      -------------
758c2ecf20Sopenharmony_ci *
768c2ecf20Sopenharmony_ci * State values for ceph_connection->sock_state; NEW is assumed to be 0.
778c2ecf20Sopenharmony_ci */
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_NEW		0	/* -> CLOSED */
808c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CLOSED		1	/* -> CONNECTING */
818c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CONNECTING	2	/* -> CONNECTED or -> CLOSING */
828c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CONNECTED	3	/* -> CLOSING or -> CLOSED */
838c2ecf20Sopenharmony_ci#define CON_SOCK_STATE_CLOSING		4	/* -> CLOSED */
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci/*
868c2ecf20Sopenharmony_ci * connection states
878c2ecf20Sopenharmony_ci */
888c2ecf20Sopenharmony_ci#define CON_STATE_CLOSED        1  /* -> PREOPEN */
898c2ecf20Sopenharmony_ci#define CON_STATE_PREOPEN       2  /* -> CONNECTING, CLOSED */
908c2ecf20Sopenharmony_ci#define CON_STATE_CONNECTING    3  /* -> NEGOTIATING, CLOSED */
918c2ecf20Sopenharmony_ci#define CON_STATE_NEGOTIATING   4  /* -> OPEN, CLOSED */
928c2ecf20Sopenharmony_ci#define CON_STATE_OPEN          5  /* -> STANDBY, CLOSED */
938c2ecf20Sopenharmony_ci#define CON_STATE_STANDBY       6  /* -> PREOPEN, CLOSED */
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci/*
968c2ecf20Sopenharmony_ci * ceph_connection flag bits
978c2ecf20Sopenharmony_ci */
988c2ecf20Sopenharmony_ci#define CON_FLAG_LOSSYTX           0  /* we can close channel or drop
998c2ecf20Sopenharmony_ci				       * messages on errors */
1008c2ecf20Sopenharmony_ci#define CON_FLAG_KEEPALIVE_PENDING 1  /* we need to send a keepalive */
1018c2ecf20Sopenharmony_ci#define CON_FLAG_WRITE_PENDING	   2  /* we have data ready to send */
1028c2ecf20Sopenharmony_ci#define CON_FLAG_SOCK_CLOSED	   3  /* socket state changed to closed */
1038c2ecf20Sopenharmony_ci#define CON_FLAG_BACKOFF           4  /* need to retry queuing delayed work */
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_cistatic bool con_flag_valid(unsigned long con_flag)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	switch (con_flag) {
1088c2ecf20Sopenharmony_ci	case CON_FLAG_LOSSYTX:
1098c2ecf20Sopenharmony_ci	case CON_FLAG_KEEPALIVE_PENDING:
1108c2ecf20Sopenharmony_ci	case CON_FLAG_WRITE_PENDING:
1118c2ecf20Sopenharmony_ci	case CON_FLAG_SOCK_CLOSED:
1128c2ecf20Sopenharmony_ci	case CON_FLAG_BACKOFF:
1138c2ecf20Sopenharmony_ci		return true;
1148c2ecf20Sopenharmony_ci	default:
1158c2ecf20Sopenharmony_ci		return false;
1168c2ecf20Sopenharmony_ci	}
1178c2ecf20Sopenharmony_ci}
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_cistatic void con_flag_clear(struct ceph_connection *con, unsigned long con_flag)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	BUG_ON(!con_flag_valid(con_flag));
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	clear_bit(con_flag, &con->flags);
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic void con_flag_set(struct ceph_connection *con, unsigned long con_flag)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	BUG_ON(!con_flag_valid(con_flag));
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	set_bit(con_flag, &con->flags);
1318c2ecf20Sopenharmony_ci}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistatic bool con_flag_test(struct ceph_connection *con, unsigned long con_flag)
1348c2ecf20Sopenharmony_ci{
1358c2ecf20Sopenharmony_ci	BUG_ON(!con_flag_valid(con_flag));
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	return test_bit(con_flag, &con->flags);
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic bool con_flag_test_and_clear(struct ceph_connection *con,
1418c2ecf20Sopenharmony_ci					unsigned long con_flag)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	BUG_ON(!con_flag_valid(con_flag));
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	return test_and_clear_bit(con_flag, &con->flags);
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic bool con_flag_test_and_set(struct ceph_connection *con,
1498c2ecf20Sopenharmony_ci					unsigned long con_flag)
1508c2ecf20Sopenharmony_ci{
1518c2ecf20Sopenharmony_ci	BUG_ON(!con_flag_valid(con_flag));
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	return test_and_set_bit(con_flag, &con->flags);
1548c2ecf20Sopenharmony_ci}
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci/* Slab caches for frequently-allocated structures */
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cistatic struct kmem_cache	*ceph_msg_cache;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci/* static tag bytes (protocol control messages) */
1618c2ecf20Sopenharmony_cistatic char tag_msg = CEPH_MSGR_TAG_MSG;
1628c2ecf20Sopenharmony_cistatic char tag_ack = CEPH_MSGR_TAG_ACK;
1638c2ecf20Sopenharmony_cistatic char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
1648c2ecf20Sopenharmony_cistatic char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP
1678c2ecf20Sopenharmony_cistatic struct lock_class_key socket_class;
1688c2ecf20Sopenharmony_ci#endif
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic void queue_con(struct ceph_connection *con);
1718c2ecf20Sopenharmony_cistatic void cancel_con(struct ceph_connection *con);
1728c2ecf20Sopenharmony_cistatic void ceph_con_workfn(struct work_struct *);
1738c2ecf20Sopenharmony_cistatic void con_fault(struct ceph_connection *con);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci/*
1768c2ecf20Sopenharmony_ci * Nicely render a sockaddr as a string.  An array of formatted
1778c2ecf20Sopenharmony_ci * strings is used, to approximate reentrancy.
1788c2ecf20Sopenharmony_ci */
1798c2ecf20Sopenharmony_ci#define ADDR_STR_COUNT_LOG	5	/* log2(# address strings in array) */
1808c2ecf20Sopenharmony_ci#define ADDR_STR_COUNT		(1 << ADDR_STR_COUNT_LOG)
1818c2ecf20Sopenharmony_ci#define ADDR_STR_COUNT_MASK	(ADDR_STR_COUNT - 1)
1828c2ecf20Sopenharmony_ci#define MAX_ADDR_STR_LEN	64	/* 54 is enough */
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_cistatic char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN];
1858c2ecf20Sopenharmony_cistatic atomic_t addr_str_seq = ATOMIC_INIT(0);
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_cistatic struct page *zero_page;		/* used in certain error cases */
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ciconst char *ceph_pr_addr(const struct ceph_entity_addr *addr)
1908c2ecf20Sopenharmony_ci{
1918c2ecf20Sopenharmony_ci	int i;
1928c2ecf20Sopenharmony_ci	char *s;
1938c2ecf20Sopenharmony_ci	struct sockaddr_storage ss = addr->in_addr; /* align */
1948c2ecf20Sopenharmony_ci	struct sockaddr_in *in4 = (struct sockaddr_in *)&ss;
1958c2ecf20Sopenharmony_ci	struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
1988c2ecf20Sopenharmony_ci	s = addr_str[i];
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	switch (ss.ss_family) {
2018c2ecf20Sopenharmony_ci	case AF_INET:
2028c2ecf20Sopenharmony_ci		snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu",
2038c2ecf20Sopenharmony_ci			 le32_to_cpu(addr->type), &in4->sin_addr,
2048c2ecf20Sopenharmony_ci			 ntohs(in4->sin_port));
2058c2ecf20Sopenharmony_ci		break;
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	case AF_INET6:
2088c2ecf20Sopenharmony_ci		snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu",
2098c2ecf20Sopenharmony_ci			 le32_to_cpu(addr->type), &in6->sin6_addr,
2108c2ecf20Sopenharmony_ci			 ntohs(in6->sin6_port));
2118c2ecf20Sopenharmony_ci		break;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	default:
2148c2ecf20Sopenharmony_ci		snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
2158c2ecf20Sopenharmony_ci			 ss.ss_family);
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	return s;
2198c2ecf20Sopenharmony_ci}
2208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_pr_addr);
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_cistatic void encode_my_addr(struct ceph_messenger *msgr)
2238c2ecf20Sopenharmony_ci{
2248c2ecf20Sopenharmony_ci	memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
2258c2ecf20Sopenharmony_ci	ceph_encode_banner_addr(&msgr->my_enc_addr);
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci/*
2298c2ecf20Sopenharmony_ci * work queue for all reading and writing to/from the socket.
2308c2ecf20Sopenharmony_ci */
2318c2ecf20Sopenharmony_cistatic struct workqueue_struct *ceph_msgr_wq;
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_cistatic int ceph_msgr_slab_init(void)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	BUG_ON(ceph_msg_cache);
2368c2ecf20Sopenharmony_ci	ceph_msg_cache = KMEM_CACHE(ceph_msg, 0);
2378c2ecf20Sopenharmony_ci	if (!ceph_msg_cache)
2388c2ecf20Sopenharmony_ci		return -ENOMEM;
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	return 0;
2418c2ecf20Sopenharmony_ci}
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_cistatic void ceph_msgr_slab_exit(void)
2448c2ecf20Sopenharmony_ci{
2458c2ecf20Sopenharmony_ci	BUG_ON(!ceph_msg_cache);
2468c2ecf20Sopenharmony_ci	kmem_cache_destroy(ceph_msg_cache);
2478c2ecf20Sopenharmony_ci	ceph_msg_cache = NULL;
2488c2ecf20Sopenharmony_ci}
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_cistatic void _ceph_msgr_exit(void)
2518c2ecf20Sopenharmony_ci{
2528c2ecf20Sopenharmony_ci	if (ceph_msgr_wq) {
2538c2ecf20Sopenharmony_ci		destroy_workqueue(ceph_msgr_wq);
2548c2ecf20Sopenharmony_ci		ceph_msgr_wq = NULL;
2558c2ecf20Sopenharmony_ci	}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	BUG_ON(zero_page == NULL);
2588c2ecf20Sopenharmony_ci	put_page(zero_page);
2598c2ecf20Sopenharmony_ci	zero_page = NULL;
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	ceph_msgr_slab_exit();
2628c2ecf20Sopenharmony_ci}
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ciint __init ceph_msgr_init(void)
2658c2ecf20Sopenharmony_ci{
2668c2ecf20Sopenharmony_ci	if (ceph_msgr_slab_init())
2678c2ecf20Sopenharmony_ci		return -ENOMEM;
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci	BUG_ON(zero_page != NULL);
2708c2ecf20Sopenharmony_ci	zero_page = ZERO_PAGE(0);
2718c2ecf20Sopenharmony_ci	get_page(zero_page);
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	/*
2748c2ecf20Sopenharmony_ci	 * The number of active work items is limited by the number of
2758c2ecf20Sopenharmony_ci	 * connections, so leave @max_active at default.
2768c2ecf20Sopenharmony_ci	 */
2778c2ecf20Sopenharmony_ci	ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0);
2788c2ecf20Sopenharmony_ci	if (ceph_msgr_wq)
2798c2ecf20Sopenharmony_ci		return 0;
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	pr_err("msgr_init failed to create workqueue\n");
2828c2ecf20Sopenharmony_ci	_ceph_msgr_exit();
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	return -ENOMEM;
2858c2ecf20Sopenharmony_ci}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_civoid ceph_msgr_exit(void)
2888c2ecf20Sopenharmony_ci{
2898c2ecf20Sopenharmony_ci	BUG_ON(ceph_msgr_wq == NULL);
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	_ceph_msgr_exit();
2928c2ecf20Sopenharmony_ci}
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_civoid ceph_msgr_flush(void)
2958c2ecf20Sopenharmony_ci{
2968c2ecf20Sopenharmony_ci	flush_workqueue(ceph_msgr_wq);
2978c2ecf20Sopenharmony_ci}
2988c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msgr_flush);
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci/* Connection socket state transition functions */
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_cistatic void con_sock_state_init(struct ceph_connection *con)
3038c2ecf20Sopenharmony_ci{
3048c2ecf20Sopenharmony_ci	int old_state;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
3078c2ecf20Sopenharmony_ci	if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
3088c2ecf20Sopenharmony_ci		printk("%s: unexpected old state %d\n", __func__, old_state);
3098c2ecf20Sopenharmony_ci	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
3108c2ecf20Sopenharmony_ci	     CON_SOCK_STATE_CLOSED);
3118c2ecf20Sopenharmony_ci}
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_cistatic void con_sock_state_connecting(struct ceph_connection *con)
3148c2ecf20Sopenharmony_ci{
3158c2ecf20Sopenharmony_ci	int old_state;
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
3188c2ecf20Sopenharmony_ci	if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
3198c2ecf20Sopenharmony_ci		printk("%s: unexpected old state %d\n", __func__, old_state);
3208c2ecf20Sopenharmony_ci	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
3218c2ecf20Sopenharmony_ci	     CON_SOCK_STATE_CONNECTING);
3228c2ecf20Sopenharmony_ci}
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_cistatic void con_sock_state_connected(struct ceph_connection *con)
3258c2ecf20Sopenharmony_ci{
3268c2ecf20Sopenharmony_ci	int old_state;
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
3298c2ecf20Sopenharmony_ci	if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
3308c2ecf20Sopenharmony_ci		printk("%s: unexpected old state %d\n", __func__, old_state);
3318c2ecf20Sopenharmony_ci	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
3328c2ecf20Sopenharmony_ci	     CON_SOCK_STATE_CONNECTED);
3338c2ecf20Sopenharmony_ci}
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_cistatic void con_sock_state_closing(struct ceph_connection *con)
3368c2ecf20Sopenharmony_ci{
3378c2ecf20Sopenharmony_ci	int old_state;
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
3408c2ecf20Sopenharmony_ci	if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
3418c2ecf20Sopenharmony_ci			old_state != CON_SOCK_STATE_CONNECTED &&
3428c2ecf20Sopenharmony_ci			old_state != CON_SOCK_STATE_CLOSING))
3438c2ecf20Sopenharmony_ci		printk("%s: unexpected old state %d\n", __func__, old_state);
3448c2ecf20Sopenharmony_ci	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
3458c2ecf20Sopenharmony_ci	     CON_SOCK_STATE_CLOSING);
3468c2ecf20Sopenharmony_ci}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_cistatic void con_sock_state_closed(struct ceph_connection *con)
3498c2ecf20Sopenharmony_ci{
3508c2ecf20Sopenharmony_ci	int old_state;
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
3538c2ecf20Sopenharmony_ci	if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
3548c2ecf20Sopenharmony_ci		    old_state != CON_SOCK_STATE_CLOSING &&
3558c2ecf20Sopenharmony_ci		    old_state != CON_SOCK_STATE_CONNECTING &&
3568c2ecf20Sopenharmony_ci		    old_state != CON_SOCK_STATE_CLOSED))
3578c2ecf20Sopenharmony_ci		printk("%s: unexpected old state %d\n", __func__, old_state);
3588c2ecf20Sopenharmony_ci	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
3598c2ecf20Sopenharmony_ci	     CON_SOCK_STATE_CLOSED);
3608c2ecf20Sopenharmony_ci}
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci/*
3638c2ecf20Sopenharmony_ci * socket callback functions
3648c2ecf20Sopenharmony_ci */
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci/* data available on socket, or listen socket received a connect */
3678c2ecf20Sopenharmony_cistatic void ceph_sock_data_ready(struct sock *sk)
3688c2ecf20Sopenharmony_ci{
3698c2ecf20Sopenharmony_ci	struct ceph_connection *con = sk->sk_user_data;
3708c2ecf20Sopenharmony_ci	if (atomic_read(&con->msgr->stopping)) {
3718c2ecf20Sopenharmony_ci		return;
3728c2ecf20Sopenharmony_ci	}
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (sk->sk_state != TCP_CLOSE_WAIT) {
3758c2ecf20Sopenharmony_ci		dout("%s on %p state = %lu, queueing work\n", __func__,
3768c2ecf20Sopenharmony_ci		     con, con->state);
3778c2ecf20Sopenharmony_ci		queue_con(con);
3788c2ecf20Sopenharmony_ci	}
3798c2ecf20Sopenharmony_ci}
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci/* socket has buffer space for writing */
3828c2ecf20Sopenharmony_cistatic void ceph_sock_write_space(struct sock *sk)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	struct ceph_connection *con = sk->sk_user_data;
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci	/* only queue to workqueue if there is data we want to write,
3878c2ecf20Sopenharmony_ci	 * and there is sufficient space in the socket buffer to accept
3888c2ecf20Sopenharmony_ci	 * more data.  clear SOCK_NOSPACE so that ceph_sock_write_space()
3898c2ecf20Sopenharmony_ci	 * doesn't get called again until try_write() fills the socket
3908c2ecf20Sopenharmony_ci	 * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
3918c2ecf20Sopenharmony_ci	 * and net/core/stream.c:sk_stream_write_space().
3928c2ecf20Sopenharmony_ci	 */
3938c2ecf20Sopenharmony_ci	if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
3948c2ecf20Sopenharmony_ci		if (sk_stream_is_writeable(sk)) {
3958c2ecf20Sopenharmony_ci			dout("%s %p queueing write work\n", __func__, con);
3968c2ecf20Sopenharmony_ci			clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
3978c2ecf20Sopenharmony_ci			queue_con(con);
3988c2ecf20Sopenharmony_ci		}
3998c2ecf20Sopenharmony_ci	} else {
4008c2ecf20Sopenharmony_ci		dout("%s %p nothing to write\n", __func__, con);
4018c2ecf20Sopenharmony_ci	}
4028c2ecf20Sopenharmony_ci}
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci/* socket's state has changed */
4058c2ecf20Sopenharmony_cistatic void ceph_sock_state_change(struct sock *sk)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	struct ceph_connection *con = sk->sk_user_data;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	dout("%s %p state = %lu sk_state = %u\n", __func__,
4108c2ecf20Sopenharmony_ci	     con, con->state, sk->sk_state);
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	switch (sk->sk_state) {
4138c2ecf20Sopenharmony_ci	case TCP_CLOSE:
4148c2ecf20Sopenharmony_ci		dout("%s TCP_CLOSE\n", __func__);
4158c2ecf20Sopenharmony_ci		fallthrough;
4168c2ecf20Sopenharmony_ci	case TCP_CLOSE_WAIT:
4178c2ecf20Sopenharmony_ci		dout("%s TCP_CLOSE_WAIT\n", __func__);
4188c2ecf20Sopenharmony_ci		con_sock_state_closing(con);
4198c2ecf20Sopenharmony_ci		con_flag_set(con, CON_FLAG_SOCK_CLOSED);
4208c2ecf20Sopenharmony_ci		queue_con(con);
4218c2ecf20Sopenharmony_ci		break;
4228c2ecf20Sopenharmony_ci	case TCP_ESTABLISHED:
4238c2ecf20Sopenharmony_ci		dout("%s TCP_ESTABLISHED\n", __func__);
4248c2ecf20Sopenharmony_ci		con_sock_state_connected(con);
4258c2ecf20Sopenharmony_ci		queue_con(con);
4268c2ecf20Sopenharmony_ci		break;
4278c2ecf20Sopenharmony_ci	default:	/* Everything else is uninteresting */
4288c2ecf20Sopenharmony_ci		break;
4298c2ecf20Sopenharmony_ci	}
4308c2ecf20Sopenharmony_ci}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci/*
4338c2ecf20Sopenharmony_ci * set up socket callbacks
4348c2ecf20Sopenharmony_ci */
4358c2ecf20Sopenharmony_cistatic void set_sock_callbacks(struct socket *sock,
4368c2ecf20Sopenharmony_ci			       struct ceph_connection *con)
4378c2ecf20Sopenharmony_ci{
4388c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
4398c2ecf20Sopenharmony_ci	sk->sk_user_data = con;
4408c2ecf20Sopenharmony_ci	sk->sk_data_ready = ceph_sock_data_ready;
4418c2ecf20Sopenharmony_ci	sk->sk_write_space = ceph_sock_write_space;
4428c2ecf20Sopenharmony_ci	sk->sk_state_change = ceph_sock_state_change;
4438c2ecf20Sopenharmony_ci}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci/*
4478c2ecf20Sopenharmony_ci * socket helpers
4488c2ecf20Sopenharmony_ci */
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci/*
4518c2ecf20Sopenharmony_ci * initiate connection to a remote socket.
4528c2ecf20Sopenharmony_ci */
4538c2ecf20Sopenharmony_cistatic int ceph_tcp_connect(struct ceph_connection *con)
4548c2ecf20Sopenharmony_ci{
4558c2ecf20Sopenharmony_ci	struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */
4568c2ecf20Sopenharmony_ci	struct socket *sock;
4578c2ecf20Sopenharmony_ci	unsigned int noio_flag;
4588c2ecf20Sopenharmony_ci	int ret;
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	BUG_ON(con->sock);
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	/* sock_create_kern() allocates with GFP_KERNEL */
4638c2ecf20Sopenharmony_ci	noio_flag = memalloc_noio_save();
4648c2ecf20Sopenharmony_ci	ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family,
4658c2ecf20Sopenharmony_ci			       SOCK_STREAM, IPPROTO_TCP, &sock);
4668c2ecf20Sopenharmony_ci	memalloc_noio_restore(noio_flag);
4678c2ecf20Sopenharmony_ci	if (ret)
4688c2ecf20Sopenharmony_ci		return ret;
4698c2ecf20Sopenharmony_ci	sock->sk->sk_allocation = GFP_NOFS;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP
4728c2ecf20Sopenharmony_ci	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
4738c2ecf20Sopenharmony_ci#endif
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_ci	set_sock_callbacks(sock, con);
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	dout("connect %s\n", ceph_pr_addr(&con->peer_addr));
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	con_sock_state_connecting(con);
4808c2ecf20Sopenharmony_ci	ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss),
4818c2ecf20Sopenharmony_ci			     O_NONBLOCK);
4828c2ecf20Sopenharmony_ci	if (ret == -EINPROGRESS) {
4838c2ecf20Sopenharmony_ci		dout("connect %s EINPROGRESS sk_state = %u\n",
4848c2ecf20Sopenharmony_ci		     ceph_pr_addr(&con->peer_addr),
4858c2ecf20Sopenharmony_ci		     sock->sk->sk_state);
4868c2ecf20Sopenharmony_ci	} else if (ret < 0) {
4878c2ecf20Sopenharmony_ci		pr_err("connect %s error %d\n",
4888c2ecf20Sopenharmony_ci		       ceph_pr_addr(&con->peer_addr), ret);
4898c2ecf20Sopenharmony_ci		sock_release(sock);
4908c2ecf20Sopenharmony_ci		return ret;
4918c2ecf20Sopenharmony_ci	}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
4948c2ecf20Sopenharmony_ci		tcp_sock_set_nodelay(sock->sk);
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	con->sock = sock;
4978c2ecf20Sopenharmony_ci	return 0;
4988c2ecf20Sopenharmony_ci}
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci/*
5018c2ecf20Sopenharmony_ci * If @buf is NULL, discard up to @len bytes.
5028c2ecf20Sopenharmony_ci */
5038c2ecf20Sopenharmony_cistatic int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
5048c2ecf20Sopenharmony_ci{
5058c2ecf20Sopenharmony_ci	struct kvec iov = {buf, len};
5068c2ecf20Sopenharmony_ci	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
5078c2ecf20Sopenharmony_ci	int r;
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	if (!buf)
5108c2ecf20Sopenharmony_ci		msg.msg_flags |= MSG_TRUNC;
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
5138c2ecf20Sopenharmony_ci	r = sock_recvmsg(sock, &msg, msg.msg_flags);
5148c2ecf20Sopenharmony_ci	if (r == -EAGAIN)
5158c2ecf20Sopenharmony_ci		r = 0;
5168c2ecf20Sopenharmony_ci	return r;
5178c2ecf20Sopenharmony_ci}
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_cistatic int ceph_tcp_recvpage(struct socket *sock, struct page *page,
5208c2ecf20Sopenharmony_ci		     int page_offset, size_t length)
5218c2ecf20Sopenharmony_ci{
5228c2ecf20Sopenharmony_ci	struct bio_vec bvec = {
5238c2ecf20Sopenharmony_ci		.bv_page = page,
5248c2ecf20Sopenharmony_ci		.bv_offset = page_offset,
5258c2ecf20Sopenharmony_ci		.bv_len = length
5268c2ecf20Sopenharmony_ci	};
5278c2ecf20Sopenharmony_ci	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
5288c2ecf20Sopenharmony_ci	int r;
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci	BUG_ON(page_offset + length > PAGE_SIZE);
5318c2ecf20Sopenharmony_ci	iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
5328c2ecf20Sopenharmony_ci	r = sock_recvmsg(sock, &msg, msg.msg_flags);
5338c2ecf20Sopenharmony_ci	if (r == -EAGAIN)
5348c2ecf20Sopenharmony_ci		r = 0;
5358c2ecf20Sopenharmony_ci	return r;
5368c2ecf20Sopenharmony_ci}
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci/*
5398c2ecf20Sopenharmony_ci * write something.  @more is true if caller will be sending more data
5408c2ecf20Sopenharmony_ci * shortly.
5418c2ecf20Sopenharmony_ci */
5428c2ecf20Sopenharmony_cistatic int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
5438c2ecf20Sopenharmony_ci			    size_t kvlen, size_t len, bool more)
5448c2ecf20Sopenharmony_ci{
5458c2ecf20Sopenharmony_ci	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
5468c2ecf20Sopenharmony_ci	int r;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	if (more)
5498c2ecf20Sopenharmony_ci		msg.msg_flags |= MSG_MORE;
5508c2ecf20Sopenharmony_ci	else
5518c2ecf20Sopenharmony_ci		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
5548c2ecf20Sopenharmony_ci	if (r == -EAGAIN)
5558c2ecf20Sopenharmony_ci		r = 0;
5568c2ecf20Sopenharmony_ci	return r;
5578c2ecf20Sopenharmony_ci}
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci/*
5608c2ecf20Sopenharmony_ci * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
5618c2ecf20Sopenharmony_ci */
5628c2ecf20Sopenharmony_cistatic int ceph_tcp_sendpage(struct socket *sock, struct page *page,
5638c2ecf20Sopenharmony_ci			     int offset, size_t size, int more)
5648c2ecf20Sopenharmony_ci{
5658c2ecf20Sopenharmony_ci	ssize_t (*sendpage)(struct socket *sock, struct page *page,
5668c2ecf20Sopenharmony_ci			    int offset, size_t size, int flags);
5678c2ecf20Sopenharmony_ci	int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
5688c2ecf20Sopenharmony_ci	int ret;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	/*
5718c2ecf20Sopenharmony_ci	 * sendpage cannot properly handle pages with page_count == 0,
5728c2ecf20Sopenharmony_ci	 * we need to fall back to sendmsg if that's the case.
5738c2ecf20Sopenharmony_ci	 *
5748c2ecf20Sopenharmony_ci	 * Same goes for slab pages: skb_can_coalesce() allows
5758c2ecf20Sopenharmony_ci	 * coalescing neighboring slab objects into a single frag which
5768c2ecf20Sopenharmony_ci	 * triggers one of hardened usercopy checks.
5778c2ecf20Sopenharmony_ci	 */
5788c2ecf20Sopenharmony_ci	if (sendpage_ok(page))
5798c2ecf20Sopenharmony_ci		sendpage = sock->ops->sendpage;
5808c2ecf20Sopenharmony_ci	else
5818c2ecf20Sopenharmony_ci		sendpage = sock_no_sendpage;
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	ret = sendpage(sock, page, offset, size, flags);
5848c2ecf20Sopenharmony_ci	if (ret == -EAGAIN)
5858c2ecf20Sopenharmony_ci		ret = 0;
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	return ret;
5888c2ecf20Sopenharmony_ci}
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci/*
5918c2ecf20Sopenharmony_ci * Shutdown/close the socket for the given connection.
5928c2ecf20Sopenharmony_ci */
5938c2ecf20Sopenharmony_cistatic int con_close_socket(struct ceph_connection *con)
5948c2ecf20Sopenharmony_ci{
5958c2ecf20Sopenharmony_ci	int rc = 0;
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	dout("con_close_socket on %p sock %p\n", con, con->sock);
5988c2ecf20Sopenharmony_ci	if (con->sock) {
5998c2ecf20Sopenharmony_ci		rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
6008c2ecf20Sopenharmony_ci		sock_release(con->sock);
6018c2ecf20Sopenharmony_ci		con->sock = NULL;
6028c2ecf20Sopenharmony_ci	}
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_ci	/*
6058c2ecf20Sopenharmony_ci	 * Forcibly clear the SOCK_CLOSED flag.  It gets set
6068c2ecf20Sopenharmony_ci	 * independent of the connection mutex, and we could have
6078c2ecf20Sopenharmony_ci	 * received a socket close event before we had the chance to
6088c2ecf20Sopenharmony_ci	 * shut the socket down.
6098c2ecf20Sopenharmony_ci	 */
6108c2ecf20Sopenharmony_ci	con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci	con_sock_state_closed(con);
6138c2ecf20Sopenharmony_ci	return rc;
6148c2ecf20Sopenharmony_ci}
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci/*
6178c2ecf20Sopenharmony_ci * Reset a connection.  Discard all incoming and outgoing messages
6188c2ecf20Sopenharmony_ci * and clear *_seq state.
6198c2ecf20Sopenharmony_ci */
6208c2ecf20Sopenharmony_cistatic void ceph_msg_remove(struct ceph_msg *msg)
6218c2ecf20Sopenharmony_ci{
6228c2ecf20Sopenharmony_ci	list_del_init(&msg->list_head);
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	ceph_msg_put(msg);
6258c2ecf20Sopenharmony_ci}
6268c2ecf20Sopenharmony_cistatic void ceph_msg_remove_list(struct list_head *head)
6278c2ecf20Sopenharmony_ci{
6288c2ecf20Sopenharmony_ci	while (!list_empty(head)) {
6298c2ecf20Sopenharmony_ci		struct ceph_msg *msg = list_first_entry(head, struct ceph_msg,
6308c2ecf20Sopenharmony_ci							list_head);
6318c2ecf20Sopenharmony_ci		ceph_msg_remove(msg);
6328c2ecf20Sopenharmony_ci	}
6338c2ecf20Sopenharmony_ci}
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_cistatic void reset_connection(struct ceph_connection *con)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	/* reset connection, out_queue, msg_ and connect_seq */
6388c2ecf20Sopenharmony_ci	/* discard existing out_queue and msg_seq */
6398c2ecf20Sopenharmony_ci	dout("reset_connection %p\n", con);
6408c2ecf20Sopenharmony_ci	ceph_msg_remove_list(&con->out_queue);
6418c2ecf20Sopenharmony_ci	ceph_msg_remove_list(&con->out_sent);
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_ci	if (con->in_msg) {
6448c2ecf20Sopenharmony_ci		BUG_ON(con->in_msg->con != con);
6458c2ecf20Sopenharmony_ci		ceph_msg_put(con->in_msg);
6468c2ecf20Sopenharmony_ci		con->in_msg = NULL;
6478c2ecf20Sopenharmony_ci	}
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	con->connect_seq = 0;
6508c2ecf20Sopenharmony_ci	con->out_seq = 0;
6518c2ecf20Sopenharmony_ci	if (con->out_msg) {
6528c2ecf20Sopenharmony_ci		BUG_ON(con->out_msg->con != con);
6538c2ecf20Sopenharmony_ci		ceph_msg_put(con->out_msg);
6548c2ecf20Sopenharmony_ci		con->out_msg = NULL;
6558c2ecf20Sopenharmony_ci	}
6568c2ecf20Sopenharmony_ci	con->in_seq = 0;
6578c2ecf20Sopenharmony_ci	con->in_seq_acked = 0;
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	con->out_skip = 0;
6608c2ecf20Sopenharmony_ci}
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci/*
6638c2ecf20Sopenharmony_ci * mark a peer down.  drop any open connections.
6648c2ecf20Sopenharmony_ci */
6658c2ecf20Sopenharmony_civoid ceph_con_close(struct ceph_connection *con)
6668c2ecf20Sopenharmony_ci{
6678c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
6688c2ecf20Sopenharmony_ci	dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr));
6698c2ecf20Sopenharmony_ci	con->state = CON_STATE_CLOSED;
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci	con_flag_clear(con, CON_FLAG_LOSSYTX);	/* so we retry next connect */
6728c2ecf20Sopenharmony_ci	con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING);
6738c2ecf20Sopenharmony_ci	con_flag_clear(con, CON_FLAG_WRITE_PENDING);
6748c2ecf20Sopenharmony_ci	con_flag_clear(con, CON_FLAG_BACKOFF);
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	reset_connection(con);
6778c2ecf20Sopenharmony_ci	con->peer_global_seq = 0;
6788c2ecf20Sopenharmony_ci	cancel_con(con);
6798c2ecf20Sopenharmony_ci	con_close_socket(con);
6808c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
6818c2ecf20Sopenharmony_ci}
6828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_close);
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_ci/*
6858c2ecf20Sopenharmony_ci * Reopen a closed connection, with a new peer address.
6868c2ecf20Sopenharmony_ci */
6878c2ecf20Sopenharmony_civoid ceph_con_open(struct ceph_connection *con,
6888c2ecf20Sopenharmony_ci		   __u8 entity_type, __u64 entity_num,
6898c2ecf20Sopenharmony_ci		   struct ceph_entity_addr *addr)
6908c2ecf20Sopenharmony_ci{
6918c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
6928c2ecf20Sopenharmony_ci	dout("con_open %p %s\n", con, ceph_pr_addr(addr));
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	WARN_ON(con->state != CON_STATE_CLOSED);
6958c2ecf20Sopenharmony_ci	con->state = CON_STATE_PREOPEN;
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	con->peer_name.type = (__u8) entity_type;
6988c2ecf20Sopenharmony_ci	con->peer_name.num = cpu_to_le64(entity_num);
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	memcpy(&con->peer_addr, addr, sizeof(*addr));
7018c2ecf20Sopenharmony_ci	con->delay = 0;      /* reset backoff memory */
7028c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
7038c2ecf20Sopenharmony_ci	queue_con(con);
7048c2ecf20Sopenharmony_ci}
7058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_open);
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci/*
7088c2ecf20Sopenharmony_ci * return true if this connection ever successfully opened
7098c2ecf20Sopenharmony_ci */
7108c2ecf20Sopenharmony_cibool ceph_con_opened(struct ceph_connection *con)
7118c2ecf20Sopenharmony_ci{
7128c2ecf20Sopenharmony_ci	return con->connect_seq > 0;
7138c2ecf20Sopenharmony_ci}
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_ci/*
7168c2ecf20Sopenharmony_ci * initialize a new connection.
7178c2ecf20Sopenharmony_ci */
7188c2ecf20Sopenharmony_civoid ceph_con_init(struct ceph_connection *con, void *private,
7198c2ecf20Sopenharmony_ci	const struct ceph_connection_operations *ops,
7208c2ecf20Sopenharmony_ci	struct ceph_messenger *msgr)
7218c2ecf20Sopenharmony_ci{
7228c2ecf20Sopenharmony_ci	dout("con_init %p\n", con);
7238c2ecf20Sopenharmony_ci	memset(con, 0, sizeof(*con));
7248c2ecf20Sopenharmony_ci	con->private = private;
7258c2ecf20Sopenharmony_ci	con->ops = ops;
7268c2ecf20Sopenharmony_ci	con->msgr = msgr;
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci	con_sock_state_init(con);
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	mutex_init(&con->mutex);
7318c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&con->out_queue);
7328c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&con->out_sent);
7338c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&con->work, ceph_con_workfn);
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	con->state = CON_STATE_CLOSED;
7368c2ecf20Sopenharmony_ci}
7378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_init);
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci
7408c2ecf20Sopenharmony_ci/*
7418c2ecf20Sopenharmony_ci * We maintain a global counter to order connection attempts.  Get
7428c2ecf20Sopenharmony_ci * a unique seq greater than @gt.
7438c2ecf20Sopenharmony_ci */
7448c2ecf20Sopenharmony_cistatic u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
7458c2ecf20Sopenharmony_ci{
7468c2ecf20Sopenharmony_ci	u32 ret;
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	spin_lock(&msgr->global_seq_lock);
7498c2ecf20Sopenharmony_ci	if (msgr->global_seq < gt)
7508c2ecf20Sopenharmony_ci		msgr->global_seq = gt;
7518c2ecf20Sopenharmony_ci	ret = ++msgr->global_seq;
7528c2ecf20Sopenharmony_ci	spin_unlock(&msgr->global_seq_lock);
7538c2ecf20Sopenharmony_ci	return ret;
7548c2ecf20Sopenharmony_ci}
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_cistatic void con_out_kvec_reset(struct ceph_connection *con)
7578c2ecf20Sopenharmony_ci{
7588c2ecf20Sopenharmony_ci	BUG_ON(con->out_skip);
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_ci	con->out_kvec_left = 0;
7618c2ecf20Sopenharmony_ci	con->out_kvec_bytes = 0;
7628c2ecf20Sopenharmony_ci	con->out_kvec_cur = &con->out_kvec[0];
7638c2ecf20Sopenharmony_ci}
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_cistatic void con_out_kvec_add(struct ceph_connection *con,
7668c2ecf20Sopenharmony_ci				size_t size, void *data)
7678c2ecf20Sopenharmony_ci{
7688c2ecf20Sopenharmony_ci	int index = con->out_kvec_left;
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	BUG_ON(con->out_skip);
7718c2ecf20Sopenharmony_ci	BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	con->out_kvec[index].iov_len = size;
7748c2ecf20Sopenharmony_ci	con->out_kvec[index].iov_base = data;
7758c2ecf20Sopenharmony_ci	con->out_kvec_left++;
7768c2ecf20Sopenharmony_ci	con->out_kvec_bytes += size;
7778c2ecf20Sopenharmony_ci}
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci/*
7808c2ecf20Sopenharmony_ci * Chop off a kvec from the end.  Return residual number of bytes for
7818c2ecf20Sopenharmony_ci * that kvec, i.e. how many bytes would have been written if the kvec
7828c2ecf20Sopenharmony_ci * hadn't been nuked.
7838c2ecf20Sopenharmony_ci */
7848c2ecf20Sopenharmony_cistatic int con_out_kvec_skip(struct ceph_connection *con)
7858c2ecf20Sopenharmony_ci{
7868c2ecf20Sopenharmony_ci	int off = con->out_kvec_cur - con->out_kvec;
7878c2ecf20Sopenharmony_ci	int skip = 0;
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	if (con->out_kvec_bytes > 0) {
7908c2ecf20Sopenharmony_ci		skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
7918c2ecf20Sopenharmony_ci		BUG_ON(con->out_kvec_bytes < skip);
7928c2ecf20Sopenharmony_ci		BUG_ON(!con->out_kvec_left);
7938c2ecf20Sopenharmony_ci		con->out_kvec_bytes -= skip;
7948c2ecf20Sopenharmony_ci		con->out_kvec_left--;
7958c2ecf20Sopenharmony_ci	}
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	return skip;
7988c2ecf20Sopenharmony_ci}
7998c2ecf20Sopenharmony_ci
8008c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci/*
8038c2ecf20Sopenharmony_ci * For a bio data item, a piece is whatever remains of the next
8048c2ecf20Sopenharmony_ci * entry in the current bio iovec, or the first entry in the next
8058c2ecf20Sopenharmony_ci * bio in the list.
8068c2ecf20Sopenharmony_ci */
8078c2ecf20Sopenharmony_cistatic void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
8088c2ecf20Sopenharmony_ci					size_t length)
8098c2ecf20Sopenharmony_ci{
8108c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
8118c2ecf20Sopenharmony_ci	struct ceph_bio_iter *it = &cursor->bio_iter;
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci	cursor->resid = min_t(size_t, length, data->bio_length);
8148c2ecf20Sopenharmony_ci	*it = data->bio_pos;
8158c2ecf20Sopenharmony_ci	if (cursor->resid < it->iter.bi_size)
8168c2ecf20Sopenharmony_ci		it->iter.bi_size = cursor->resid;
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
8198c2ecf20Sopenharmony_ci	cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
8208c2ecf20Sopenharmony_ci}
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_cistatic struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
8238c2ecf20Sopenharmony_ci						size_t *page_offset,
8248c2ecf20Sopenharmony_ci						size_t *length)
8258c2ecf20Sopenharmony_ci{
8268c2ecf20Sopenharmony_ci	struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
8278c2ecf20Sopenharmony_ci					   cursor->bio_iter.iter);
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci	*page_offset = bv.bv_offset;
8308c2ecf20Sopenharmony_ci	*length = bv.bv_len;
8318c2ecf20Sopenharmony_ci	return bv.bv_page;
8328c2ecf20Sopenharmony_ci}
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_cistatic bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
8358c2ecf20Sopenharmony_ci					size_t bytes)
8368c2ecf20Sopenharmony_ci{
8378c2ecf20Sopenharmony_ci	struct ceph_bio_iter *it = &cursor->bio_iter;
8388c2ecf20Sopenharmony_ci	struct page *page = bio_iter_page(it->bio, it->iter);
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci	BUG_ON(bytes > cursor->resid);
8418c2ecf20Sopenharmony_ci	BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
8428c2ecf20Sopenharmony_ci	cursor->resid -= bytes;
8438c2ecf20Sopenharmony_ci	bio_advance_iter(it->bio, &it->iter, bytes);
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	if (!cursor->resid) {
8468c2ecf20Sopenharmony_ci		BUG_ON(!cursor->last_piece);
8478c2ecf20Sopenharmony_ci		return false;   /* no more data */
8488c2ecf20Sopenharmony_ci	}
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
8518c2ecf20Sopenharmony_ci		       page == bio_iter_page(it->bio, it->iter)))
8528c2ecf20Sopenharmony_ci		return false;	/* more bytes to process in this segment */
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ci	if (!it->iter.bi_size) {
8558c2ecf20Sopenharmony_ci		it->bio = it->bio->bi_next;
8568c2ecf20Sopenharmony_ci		it->iter = it->bio->bi_iter;
8578c2ecf20Sopenharmony_ci		if (cursor->resid < it->iter.bi_size)
8588c2ecf20Sopenharmony_ci			it->iter.bi_size = cursor->resid;
8598c2ecf20Sopenharmony_ci	}
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	BUG_ON(cursor->last_piece);
8628c2ecf20Sopenharmony_ci	BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
8638c2ecf20Sopenharmony_ci	cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
8648c2ecf20Sopenharmony_ci	return true;
8658c2ecf20Sopenharmony_ci}
8668c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */
8678c2ecf20Sopenharmony_ci
8688c2ecf20Sopenharmony_cistatic void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
8698c2ecf20Sopenharmony_ci					size_t length)
8708c2ecf20Sopenharmony_ci{
8718c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
8728c2ecf20Sopenharmony_ci	struct bio_vec *bvecs = data->bvec_pos.bvecs;
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci	cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size);
8758c2ecf20Sopenharmony_ci	cursor->bvec_iter = data->bvec_pos.iter;
8768c2ecf20Sopenharmony_ci	cursor->bvec_iter.bi_size = cursor->resid;
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
8798c2ecf20Sopenharmony_ci	cursor->last_piece =
8808c2ecf20Sopenharmony_ci	    cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
8818c2ecf20Sopenharmony_ci}
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_cistatic struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
8848c2ecf20Sopenharmony_ci						size_t *page_offset,
8858c2ecf20Sopenharmony_ci						size_t *length)
8868c2ecf20Sopenharmony_ci{
8878c2ecf20Sopenharmony_ci	struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs,
8888c2ecf20Sopenharmony_ci					   cursor->bvec_iter);
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_ci	*page_offset = bv.bv_offset;
8918c2ecf20Sopenharmony_ci	*length = bv.bv_len;
8928c2ecf20Sopenharmony_ci	return bv.bv_page;
8938c2ecf20Sopenharmony_ci}
8948c2ecf20Sopenharmony_ci
8958c2ecf20Sopenharmony_cistatic bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
8968c2ecf20Sopenharmony_ci					size_t bytes)
8978c2ecf20Sopenharmony_ci{
8988c2ecf20Sopenharmony_ci	struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
8998c2ecf20Sopenharmony_ci	struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter);
9008c2ecf20Sopenharmony_ci
9018c2ecf20Sopenharmony_ci	BUG_ON(bytes > cursor->resid);
9028c2ecf20Sopenharmony_ci	BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
9038c2ecf20Sopenharmony_ci	cursor->resid -= bytes;
9048c2ecf20Sopenharmony_ci	bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
9058c2ecf20Sopenharmony_ci
9068c2ecf20Sopenharmony_ci	if (!cursor->resid) {
9078c2ecf20Sopenharmony_ci		BUG_ON(!cursor->last_piece);
9088c2ecf20Sopenharmony_ci		return false;   /* no more data */
9098c2ecf20Sopenharmony_ci	}
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci	if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
9128c2ecf20Sopenharmony_ci		       page == bvec_iter_page(bvecs, cursor->bvec_iter)))
9138c2ecf20Sopenharmony_ci		return false;	/* more bytes to process in this segment */
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci	BUG_ON(cursor->last_piece);
9168c2ecf20Sopenharmony_ci	BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
9178c2ecf20Sopenharmony_ci	cursor->last_piece =
9188c2ecf20Sopenharmony_ci	    cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
9198c2ecf20Sopenharmony_ci	return true;
9208c2ecf20Sopenharmony_ci}
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ci/*
9238c2ecf20Sopenharmony_ci * For a page array, a piece comes from the first page in the array
9248c2ecf20Sopenharmony_ci * that has not already been fully consumed.
9258c2ecf20Sopenharmony_ci */
9268c2ecf20Sopenharmony_cistatic void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
9278c2ecf20Sopenharmony_ci					size_t length)
9288c2ecf20Sopenharmony_ci{
9298c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
9308c2ecf20Sopenharmony_ci	int page_count;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	BUG_ON(!data->pages);
9358c2ecf20Sopenharmony_ci	BUG_ON(!data->length);
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci	cursor->resid = min(length, data->length);
9388c2ecf20Sopenharmony_ci	page_count = calc_pages_for(data->alignment, (u64)data->length);
9398c2ecf20Sopenharmony_ci	cursor->page_offset = data->alignment & ~PAGE_MASK;
9408c2ecf20Sopenharmony_ci	cursor->page_index = 0;
9418c2ecf20Sopenharmony_ci	BUG_ON(page_count > (int)USHRT_MAX);
9428c2ecf20Sopenharmony_ci	cursor->page_count = (unsigned short)page_count;
9438c2ecf20Sopenharmony_ci	BUG_ON(length > SIZE_MAX - cursor->page_offset);
9448c2ecf20Sopenharmony_ci	cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
9458c2ecf20Sopenharmony_ci}
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_cistatic struct page *
9488c2ecf20Sopenharmony_ciceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
9498c2ecf20Sopenharmony_ci					size_t *page_offset, size_t *length)
9508c2ecf20Sopenharmony_ci{
9518c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ci	BUG_ON(cursor->page_index >= cursor->page_count);
9568c2ecf20Sopenharmony_ci	BUG_ON(cursor->page_offset >= PAGE_SIZE);
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ci	*page_offset = cursor->page_offset;
9598c2ecf20Sopenharmony_ci	if (cursor->last_piece)
9608c2ecf20Sopenharmony_ci		*length = cursor->resid;
9618c2ecf20Sopenharmony_ci	else
9628c2ecf20Sopenharmony_ci		*length = PAGE_SIZE - *page_offset;
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci	return data->pages[cursor->page_index];
9658c2ecf20Sopenharmony_ci}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_cistatic bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
9688c2ecf20Sopenharmony_ci						size_t bytes)
9698c2ecf20Sopenharmony_ci{
9708c2ecf20Sopenharmony_ci	BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	/* Advance the cursor page offset */
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci	cursor->resid -= bytes;
9778c2ecf20Sopenharmony_ci	cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
9788c2ecf20Sopenharmony_ci	if (!bytes || cursor->page_offset)
9798c2ecf20Sopenharmony_ci		return false;	/* more bytes to process in the current page */
9808c2ecf20Sopenharmony_ci
9818c2ecf20Sopenharmony_ci	if (!cursor->resid)
9828c2ecf20Sopenharmony_ci		return false;   /* no more data */
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	/* Move on to the next page; offset is already at 0 */
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ci	BUG_ON(cursor->page_index >= cursor->page_count);
9878c2ecf20Sopenharmony_ci	cursor->page_index++;
9888c2ecf20Sopenharmony_ci	cursor->last_piece = cursor->resid <= PAGE_SIZE;
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_ci	return true;
9918c2ecf20Sopenharmony_ci}
9928c2ecf20Sopenharmony_ci
9938c2ecf20Sopenharmony_ci/*
9948c2ecf20Sopenharmony_ci * For a pagelist, a piece is whatever remains to be consumed in the
9958c2ecf20Sopenharmony_ci * first page in the list, or the front of the next page.
9968c2ecf20Sopenharmony_ci */
9978c2ecf20Sopenharmony_cistatic void
9988c2ecf20Sopenharmony_ciceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
9998c2ecf20Sopenharmony_ci					size_t length)
10008c2ecf20Sopenharmony_ci{
10018c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
10028c2ecf20Sopenharmony_ci	struct ceph_pagelist *pagelist;
10038c2ecf20Sopenharmony_ci	struct page *page;
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_ci	pagelist = data->pagelist;
10088c2ecf20Sopenharmony_ci	BUG_ON(!pagelist);
10098c2ecf20Sopenharmony_ci
10108c2ecf20Sopenharmony_ci	if (!length)
10118c2ecf20Sopenharmony_ci		return;		/* pagelist can be assigned but empty */
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci	BUG_ON(list_empty(&pagelist->head));
10148c2ecf20Sopenharmony_ci	page = list_first_entry(&pagelist->head, struct page, lru);
10158c2ecf20Sopenharmony_ci
10168c2ecf20Sopenharmony_ci	cursor->resid = min(length, pagelist->length);
10178c2ecf20Sopenharmony_ci	cursor->page = page;
10188c2ecf20Sopenharmony_ci	cursor->offset = 0;
10198c2ecf20Sopenharmony_ci	cursor->last_piece = cursor->resid <= PAGE_SIZE;
10208c2ecf20Sopenharmony_ci}
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_cistatic struct page *
10238c2ecf20Sopenharmony_ciceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
10248c2ecf20Sopenharmony_ci				size_t *page_offset, size_t *length)
10258c2ecf20Sopenharmony_ci{
10268c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
10278c2ecf20Sopenharmony_ci	struct ceph_pagelist *pagelist;
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_ci	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci	pagelist = data->pagelist;
10328c2ecf20Sopenharmony_ci	BUG_ON(!pagelist);
10338c2ecf20Sopenharmony_ci
10348c2ecf20Sopenharmony_ci	BUG_ON(!cursor->page);
10358c2ecf20Sopenharmony_ci	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci	/* offset of first page in pagelist is always 0 */
10388c2ecf20Sopenharmony_ci	*page_offset = cursor->offset & ~PAGE_MASK;
10398c2ecf20Sopenharmony_ci	if (cursor->last_piece)
10408c2ecf20Sopenharmony_ci		*length = cursor->resid;
10418c2ecf20Sopenharmony_ci	else
10428c2ecf20Sopenharmony_ci		*length = PAGE_SIZE - *page_offset;
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci	return cursor->page;
10458c2ecf20Sopenharmony_ci}
10468c2ecf20Sopenharmony_ci
10478c2ecf20Sopenharmony_cistatic bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
10488c2ecf20Sopenharmony_ci						size_t bytes)
10498c2ecf20Sopenharmony_ci{
10508c2ecf20Sopenharmony_ci	struct ceph_msg_data *data = cursor->data;
10518c2ecf20Sopenharmony_ci	struct ceph_pagelist *pagelist;
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	pagelist = data->pagelist;
10568c2ecf20Sopenharmony_ci	BUG_ON(!pagelist);
10578c2ecf20Sopenharmony_ci
10588c2ecf20Sopenharmony_ci	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
10598c2ecf20Sopenharmony_ci	BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
10608c2ecf20Sopenharmony_ci
10618c2ecf20Sopenharmony_ci	/* Advance the cursor offset */
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci	cursor->resid -= bytes;
10648c2ecf20Sopenharmony_ci	cursor->offset += bytes;
10658c2ecf20Sopenharmony_ci	/* offset of first page in pagelist is always 0 */
10668c2ecf20Sopenharmony_ci	if (!bytes || cursor->offset & ~PAGE_MASK)
10678c2ecf20Sopenharmony_ci		return false;	/* more bytes to process in the current page */
10688c2ecf20Sopenharmony_ci
10698c2ecf20Sopenharmony_ci	if (!cursor->resid)
10708c2ecf20Sopenharmony_ci		return false;   /* no more data */
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	/* Move on to the next page */
10738c2ecf20Sopenharmony_ci
10748c2ecf20Sopenharmony_ci	BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
10758c2ecf20Sopenharmony_ci	cursor->page = list_next_entry(cursor->page, lru);
10768c2ecf20Sopenharmony_ci	cursor->last_piece = cursor->resid <= PAGE_SIZE;
10778c2ecf20Sopenharmony_ci
10788c2ecf20Sopenharmony_ci	return true;
10798c2ecf20Sopenharmony_ci}
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci/*
10828c2ecf20Sopenharmony_ci * Message data is handled (sent or received) in pieces, where each
10838c2ecf20Sopenharmony_ci * piece resides on a single page.  The network layer might not
10848c2ecf20Sopenharmony_ci * consume an entire piece at once.  A data item's cursor keeps
10858c2ecf20Sopenharmony_ci * track of which piece is next to process and how much remains to
10868c2ecf20Sopenharmony_ci * be processed in that piece.  It also tracks whether the current
10878c2ecf20Sopenharmony_ci * piece is the last one in the data item.
10888c2ecf20Sopenharmony_ci */
10898c2ecf20Sopenharmony_cistatic void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
10908c2ecf20Sopenharmony_ci{
10918c2ecf20Sopenharmony_ci	size_t length = cursor->total_resid;
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci	switch (cursor->data->type) {
10948c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_PAGELIST:
10958c2ecf20Sopenharmony_ci		ceph_msg_data_pagelist_cursor_init(cursor, length);
10968c2ecf20Sopenharmony_ci		break;
10978c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_PAGES:
10988c2ecf20Sopenharmony_ci		ceph_msg_data_pages_cursor_init(cursor, length);
10998c2ecf20Sopenharmony_ci		break;
11008c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
11018c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_BIO:
11028c2ecf20Sopenharmony_ci		ceph_msg_data_bio_cursor_init(cursor, length);
11038c2ecf20Sopenharmony_ci		break;
11048c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */
11058c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_BVECS:
11068c2ecf20Sopenharmony_ci		ceph_msg_data_bvecs_cursor_init(cursor, length);
11078c2ecf20Sopenharmony_ci		break;
11088c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_NONE:
11098c2ecf20Sopenharmony_ci	default:
11108c2ecf20Sopenharmony_ci		/* BUG(); */
11118c2ecf20Sopenharmony_ci		break;
11128c2ecf20Sopenharmony_ci	}
11138c2ecf20Sopenharmony_ci	cursor->need_crc = true;
11148c2ecf20Sopenharmony_ci}
11158c2ecf20Sopenharmony_ci
11168c2ecf20Sopenharmony_cistatic void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
11178c2ecf20Sopenharmony_ci{
11188c2ecf20Sopenharmony_ci	struct ceph_msg_data_cursor *cursor = &msg->cursor;
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci	BUG_ON(!length);
11218c2ecf20Sopenharmony_ci	BUG_ON(length > msg->data_length);
11228c2ecf20Sopenharmony_ci	BUG_ON(!msg->num_data_items);
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci	cursor->total_resid = length;
11258c2ecf20Sopenharmony_ci	cursor->data = msg->data;
11268c2ecf20Sopenharmony_ci
11278c2ecf20Sopenharmony_ci	__ceph_msg_data_cursor_init(cursor);
11288c2ecf20Sopenharmony_ci}
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci/*
11318c2ecf20Sopenharmony_ci * Return the page containing the next piece to process for a given
11328c2ecf20Sopenharmony_ci * data item, and supply the page offset and length of that piece.
11338c2ecf20Sopenharmony_ci * Indicate whether this is the last piece in this data item.
11348c2ecf20Sopenharmony_ci */
11358c2ecf20Sopenharmony_cistatic struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
11368c2ecf20Sopenharmony_ci					size_t *page_offset, size_t *length,
11378c2ecf20Sopenharmony_ci					bool *last_piece)
11388c2ecf20Sopenharmony_ci{
11398c2ecf20Sopenharmony_ci	struct page *page;
11408c2ecf20Sopenharmony_ci
11418c2ecf20Sopenharmony_ci	switch (cursor->data->type) {
11428c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_PAGELIST:
11438c2ecf20Sopenharmony_ci		page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
11448c2ecf20Sopenharmony_ci		break;
11458c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_PAGES:
11468c2ecf20Sopenharmony_ci		page = ceph_msg_data_pages_next(cursor, page_offset, length);
11478c2ecf20Sopenharmony_ci		break;
11488c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
11498c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_BIO:
11508c2ecf20Sopenharmony_ci		page = ceph_msg_data_bio_next(cursor, page_offset, length);
11518c2ecf20Sopenharmony_ci		break;
11528c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */
11538c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_BVECS:
11548c2ecf20Sopenharmony_ci		page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
11558c2ecf20Sopenharmony_ci		break;
11568c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_NONE:
11578c2ecf20Sopenharmony_ci	default:
11588c2ecf20Sopenharmony_ci		page = NULL;
11598c2ecf20Sopenharmony_ci		break;
11608c2ecf20Sopenharmony_ci	}
11618c2ecf20Sopenharmony_ci
11628c2ecf20Sopenharmony_ci	BUG_ON(!page);
11638c2ecf20Sopenharmony_ci	BUG_ON(*page_offset + *length > PAGE_SIZE);
11648c2ecf20Sopenharmony_ci	BUG_ON(!*length);
11658c2ecf20Sopenharmony_ci	BUG_ON(*length > cursor->resid);
11668c2ecf20Sopenharmony_ci	if (last_piece)
11678c2ecf20Sopenharmony_ci		*last_piece = cursor->last_piece;
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci	return page;
11708c2ecf20Sopenharmony_ci}
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci/*
11738c2ecf20Sopenharmony_ci * Returns true if the result moves the cursor on to the next piece
11748c2ecf20Sopenharmony_ci * of the data item.
11758c2ecf20Sopenharmony_ci */
11768c2ecf20Sopenharmony_cistatic void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
11778c2ecf20Sopenharmony_ci				  size_t bytes)
11788c2ecf20Sopenharmony_ci{
11798c2ecf20Sopenharmony_ci	bool new_piece;
11808c2ecf20Sopenharmony_ci
11818c2ecf20Sopenharmony_ci	BUG_ON(bytes > cursor->resid);
11828c2ecf20Sopenharmony_ci	switch (cursor->data->type) {
11838c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_PAGELIST:
11848c2ecf20Sopenharmony_ci		new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
11858c2ecf20Sopenharmony_ci		break;
11868c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_PAGES:
11878c2ecf20Sopenharmony_ci		new_piece = ceph_msg_data_pages_advance(cursor, bytes);
11888c2ecf20Sopenharmony_ci		break;
11898c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
11908c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_BIO:
11918c2ecf20Sopenharmony_ci		new_piece = ceph_msg_data_bio_advance(cursor, bytes);
11928c2ecf20Sopenharmony_ci		break;
11938c2ecf20Sopenharmony_ci#endif /* CONFIG_BLOCK */
11948c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_BVECS:
11958c2ecf20Sopenharmony_ci		new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
11968c2ecf20Sopenharmony_ci		break;
11978c2ecf20Sopenharmony_ci	case CEPH_MSG_DATA_NONE:
11988c2ecf20Sopenharmony_ci	default:
11998c2ecf20Sopenharmony_ci		BUG();
12008c2ecf20Sopenharmony_ci		break;
12018c2ecf20Sopenharmony_ci	}
12028c2ecf20Sopenharmony_ci	cursor->total_resid -= bytes;
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_ci	if (!cursor->resid && cursor->total_resid) {
12058c2ecf20Sopenharmony_ci		WARN_ON(!cursor->last_piece);
12068c2ecf20Sopenharmony_ci		cursor->data++;
12078c2ecf20Sopenharmony_ci		__ceph_msg_data_cursor_init(cursor);
12088c2ecf20Sopenharmony_ci		new_piece = true;
12098c2ecf20Sopenharmony_ci	}
12108c2ecf20Sopenharmony_ci	cursor->need_crc = new_piece;
12118c2ecf20Sopenharmony_ci}
12128c2ecf20Sopenharmony_ci
12138c2ecf20Sopenharmony_cistatic size_t sizeof_footer(struct ceph_connection *con)
12148c2ecf20Sopenharmony_ci{
12158c2ecf20Sopenharmony_ci	return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
12168c2ecf20Sopenharmony_ci	    sizeof(struct ceph_msg_footer) :
12178c2ecf20Sopenharmony_ci	    sizeof(struct ceph_msg_footer_old);
12188c2ecf20Sopenharmony_ci}
12198c2ecf20Sopenharmony_ci
12208c2ecf20Sopenharmony_cistatic void prepare_message_data(struct ceph_msg *msg, u32 data_len)
12218c2ecf20Sopenharmony_ci{
12228c2ecf20Sopenharmony_ci	/* Initialize data cursor */
12238c2ecf20Sopenharmony_ci
12248c2ecf20Sopenharmony_ci	ceph_msg_data_cursor_init(msg, (size_t)data_len);
12258c2ecf20Sopenharmony_ci}
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci/*
12288c2ecf20Sopenharmony_ci * Prepare footer for currently outgoing message, and finish things
12298c2ecf20Sopenharmony_ci * off.  Assumes out_kvec* are already valid.. we just add on to the end.
12308c2ecf20Sopenharmony_ci */
12318c2ecf20Sopenharmony_cistatic void prepare_write_message_footer(struct ceph_connection *con)
12328c2ecf20Sopenharmony_ci{
12338c2ecf20Sopenharmony_ci	struct ceph_msg *m = con->out_msg;
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ci	m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
12368c2ecf20Sopenharmony_ci
12378c2ecf20Sopenharmony_ci	dout("prepare_write_message_footer %p\n", con);
12388c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof_footer(con), &m->footer);
12398c2ecf20Sopenharmony_ci	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
12408c2ecf20Sopenharmony_ci		if (con->ops->sign_message)
12418c2ecf20Sopenharmony_ci			con->ops->sign_message(m);
12428c2ecf20Sopenharmony_ci		else
12438c2ecf20Sopenharmony_ci			m->footer.sig = 0;
12448c2ecf20Sopenharmony_ci	} else {
12458c2ecf20Sopenharmony_ci		m->old_footer.flags = m->footer.flags;
12468c2ecf20Sopenharmony_ci	}
12478c2ecf20Sopenharmony_ci	con->out_more = m->more_to_follow;
12488c2ecf20Sopenharmony_ci	con->out_msg_done = true;
12498c2ecf20Sopenharmony_ci}
12508c2ecf20Sopenharmony_ci
12518c2ecf20Sopenharmony_ci/*
12528c2ecf20Sopenharmony_ci * Prepare headers for the next outgoing message.
12538c2ecf20Sopenharmony_ci */
12548c2ecf20Sopenharmony_cistatic void prepare_write_message(struct ceph_connection *con)
12558c2ecf20Sopenharmony_ci{
12568c2ecf20Sopenharmony_ci	struct ceph_msg *m;
12578c2ecf20Sopenharmony_ci	u32 crc;
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_ci	con_out_kvec_reset(con);
12608c2ecf20Sopenharmony_ci	con->out_msg_done = false;
12618c2ecf20Sopenharmony_ci
12628c2ecf20Sopenharmony_ci	/* Sneak an ack in there first?  If we can get it into the same
12638c2ecf20Sopenharmony_ci	 * TCP packet that's a good thing. */
12648c2ecf20Sopenharmony_ci	if (con->in_seq > con->in_seq_acked) {
12658c2ecf20Sopenharmony_ci		con->in_seq_acked = con->in_seq;
12668c2ecf20Sopenharmony_ci		con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
12678c2ecf20Sopenharmony_ci		con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
12688c2ecf20Sopenharmony_ci		con_out_kvec_add(con, sizeof (con->out_temp_ack),
12698c2ecf20Sopenharmony_ci			&con->out_temp_ack);
12708c2ecf20Sopenharmony_ci	}
12718c2ecf20Sopenharmony_ci
12728c2ecf20Sopenharmony_ci	BUG_ON(list_empty(&con->out_queue));
12738c2ecf20Sopenharmony_ci	m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
12748c2ecf20Sopenharmony_ci	con->out_msg = m;
12758c2ecf20Sopenharmony_ci	BUG_ON(m->con != con);
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci	/* put message on sent list */
12788c2ecf20Sopenharmony_ci	ceph_msg_get(m);
12798c2ecf20Sopenharmony_ci	list_move_tail(&m->list_head, &con->out_sent);
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_ci	/*
12828c2ecf20Sopenharmony_ci	 * only assign outgoing seq # if we haven't sent this message
12838c2ecf20Sopenharmony_ci	 * yet.  if it is requeued, resend with it's original seq.
12848c2ecf20Sopenharmony_ci	 */
12858c2ecf20Sopenharmony_ci	if (m->needs_out_seq) {
12868c2ecf20Sopenharmony_ci		m->hdr.seq = cpu_to_le64(++con->out_seq);
12878c2ecf20Sopenharmony_ci		m->needs_out_seq = false;
12888c2ecf20Sopenharmony_ci
12898c2ecf20Sopenharmony_ci		if (con->ops->reencode_message)
12908c2ecf20Sopenharmony_ci			con->ops->reencode_message(m);
12918c2ecf20Sopenharmony_ci	}
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
12948c2ecf20Sopenharmony_ci	     m, con->out_seq, le16_to_cpu(m->hdr.type),
12958c2ecf20Sopenharmony_ci	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
12968c2ecf20Sopenharmony_ci	     m->data_length);
12978c2ecf20Sopenharmony_ci	WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len));
12988c2ecf20Sopenharmony_ci	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_ci	/* tag + hdr + front + middle */
13018c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
13028c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
13038c2ecf20Sopenharmony_ci	con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci	if (m->middle)
13068c2ecf20Sopenharmony_ci		con_out_kvec_add(con, m->middle->vec.iov_len,
13078c2ecf20Sopenharmony_ci			m->middle->vec.iov_base);
13088c2ecf20Sopenharmony_ci
13098c2ecf20Sopenharmony_ci	/* fill in hdr crc and finalize hdr */
13108c2ecf20Sopenharmony_ci	crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
13118c2ecf20Sopenharmony_ci	con->out_msg->hdr.crc = cpu_to_le32(crc);
13128c2ecf20Sopenharmony_ci	memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
13138c2ecf20Sopenharmony_ci
13148c2ecf20Sopenharmony_ci	/* fill in front and middle crc, footer */
13158c2ecf20Sopenharmony_ci	crc = crc32c(0, m->front.iov_base, m->front.iov_len);
13168c2ecf20Sopenharmony_ci	con->out_msg->footer.front_crc = cpu_to_le32(crc);
13178c2ecf20Sopenharmony_ci	if (m->middle) {
13188c2ecf20Sopenharmony_ci		crc = crc32c(0, m->middle->vec.iov_base,
13198c2ecf20Sopenharmony_ci				m->middle->vec.iov_len);
13208c2ecf20Sopenharmony_ci		con->out_msg->footer.middle_crc = cpu_to_le32(crc);
13218c2ecf20Sopenharmony_ci	} else
13228c2ecf20Sopenharmony_ci		con->out_msg->footer.middle_crc = 0;
13238c2ecf20Sopenharmony_ci	dout("%s front_crc %u middle_crc %u\n", __func__,
13248c2ecf20Sopenharmony_ci	     le32_to_cpu(con->out_msg->footer.front_crc),
13258c2ecf20Sopenharmony_ci	     le32_to_cpu(con->out_msg->footer.middle_crc));
13268c2ecf20Sopenharmony_ci	con->out_msg->footer.flags = 0;
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	/* is there a data payload? */
13298c2ecf20Sopenharmony_ci	con->out_msg->footer.data_crc = 0;
13308c2ecf20Sopenharmony_ci	if (m->data_length) {
13318c2ecf20Sopenharmony_ci		prepare_message_data(con->out_msg, m->data_length);
13328c2ecf20Sopenharmony_ci		con->out_more = 1;  /* data + footer will follow */
13338c2ecf20Sopenharmony_ci	} else {
13348c2ecf20Sopenharmony_ci		/* no, queue up footer too and be done */
13358c2ecf20Sopenharmony_ci		prepare_write_message_footer(con);
13368c2ecf20Sopenharmony_ci	}
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_WRITE_PENDING);
13398c2ecf20Sopenharmony_ci}
13408c2ecf20Sopenharmony_ci
13418c2ecf20Sopenharmony_ci/*
13428c2ecf20Sopenharmony_ci * Prepare an ack.
13438c2ecf20Sopenharmony_ci */
13448c2ecf20Sopenharmony_cistatic void prepare_write_ack(struct ceph_connection *con)
13458c2ecf20Sopenharmony_ci{
13468c2ecf20Sopenharmony_ci	dout("prepare_write_ack %p %llu -> %llu\n", con,
13478c2ecf20Sopenharmony_ci	     con->in_seq_acked, con->in_seq);
13488c2ecf20Sopenharmony_ci	con->in_seq_acked = con->in_seq;
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	con_out_kvec_reset(con);
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
13558c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof (con->out_temp_ack),
13568c2ecf20Sopenharmony_ci				&con->out_temp_ack);
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	con->out_more = 1;  /* more will follow.. eventually.. */
13598c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_WRITE_PENDING);
13608c2ecf20Sopenharmony_ci}
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci/*
13638c2ecf20Sopenharmony_ci * Prepare to share the seq during handshake
13648c2ecf20Sopenharmony_ci */
13658c2ecf20Sopenharmony_cistatic void prepare_write_seq(struct ceph_connection *con)
13668c2ecf20Sopenharmony_ci{
13678c2ecf20Sopenharmony_ci	dout("prepare_write_seq %p %llu -> %llu\n", con,
13688c2ecf20Sopenharmony_ci	     con->in_seq_acked, con->in_seq);
13698c2ecf20Sopenharmony_ci	con->in_seq_acked = con->in_seq;
13708c2ecf20Sopenharmony_ci
13718c2ecf20Sopenharmony_ci	con_out_kvec_reset(con);
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_ci	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
13748c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof (con->out_temp_ack),
13758c2ecf20Sopenharmony_ci			 &con->out_temp_ack);
13768c2ecf20Sopenharmony_ci
13778c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_WRITE_PENDING);
13788c2ecf20Sopenharmony_ci}
13798c2ecf20Sopenharmony_ci
13808c2ecf20Sopenharmony_ci/*
13818c2ecf20Sopenharmony_ci * Prepare to write keepalive byte.
13828c2ecf20Sopenharmony_ci */
13838c2ecf20Sopenharmony_cistatic void prepare_write_keepalive(struct ceph_connection *con)
13848c2ecf20Sopenharmony_ci{
13858c2ecf20Sopenharmony_ci	dout("prepare_write_keepalive %p\n", con);
13868c2ecf20Sopenharmony_ci	con_out_kvec_reset(con);
13878c2ecf20Sopenharmony_ci	if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
13888c2ecf20Sopenharmony_ci		struct timespec64 now;
13898c2ecf20Sopenharmony_ci
13908c2ecf20Sopenharmony_ci		ktime_get_real_ts64(&now);
13918c2ecf20Sopenharmony_ci		con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
13928c2ecf20Sopenharmony_ci		ceph_encode_timespec64(&con->out_temp_keepalive2, &now);
13938c2ecf20Sopenharmony_ci		con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
13948c2ecf20Sopenharmony_ci				 &con->out_temp_keepalive2);
13958c2ecf20Sopenharmony_ci	} else {
13968c2ecf20Sopenharmony_ci		con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
13978c2ecf20Sopenharmony_ci	}
13988c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_WRITE_PENDING);
13998c2ecf20Sopenharmony_ci}
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_ci/*
14028c2ecf20Sopenharmony_ci * Connection negotiation.
14038c2ecf20Sopenharmony_ci */
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_cistatic int get_connect_authorizer(struct ceph_connection *con)
14068c2ecf20Sopenharmony_ci{
14078c2ecf20Sopenharmony_ci	struct ceph_auth_handshake *auth;
14088c2ecf20Sopenharmony_ci	int auth_proto;
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	if (!con->ops->get_authorizer) {
14118c2ecf20Sopenharmony_ci		con->auth = NULL;
14128c2ecf20Sopenharmony_ci		con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
14138c2ecf20Sopenharmony_ci		con->out_connect.authorizer_len = 0;
14148c2ecf20Sopenharmony_ci		return 0;
14158c2ecf20Sopenharmony_ci	}
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
14188c2ecf20Sopenharmony_ci	if (IS_ERR(auth))
14198c2ecf20Sopenharmony_ci		return PTR_ERR(auth);
14208c2ecf20Sopenharmony_ci
14218c2ecf20Sopenharmony_ci	con->auth = auth;
14228c2ecf20Sopenharmony_ci	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
14238c2ecf20Sopenharmony_ci	con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
14248c2ecf20Sopenharmony_ci	return 0;
14258c2ecf20Sopenharmony_ci}
14268c2ecf20Sopenharmony_ci
14278c2ecf20Sopenharmony_ci/*
14288c2ecf20Sopenharmony_ci * We connected to a peer and are saying hello.
14298c2ecf20Sopenharmony_ci */
14308c2ecf20Sopenharmony_cistatic void prepare_write_banner(struct ceph_connection *con)
14318c2ecf20Sopenharmony_ci{
14328c2ecf20Sopenharmony_ci	con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
14338c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
14348c2ecf20Sopenharmony_ci					&con->msgr->my_enc_addr);
14358c2ecf20Sopenharmony_ci
14368c2ecf20Sopenharmony_ci	con->out_more = 0;
14378c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_WRITE_PENDING);
14388c2ecf20Sopenharmony_ci}
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_cistatic void __prepare_write_connect(struct ceph_connection *con)
14418c2ecf20Sopenharmony_ci{
14428c2ecf20Sopenharmony_ci	con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
14438c2ecf20Sopenharmony_ci	if (con->auth)
14448c2ecf20Sopenharmony_ci		con_out_kvec_add(con, con->auth->authorizer_buf_len,
14458c2ecf20Sopenharmony_ci				 con->auth->authorizer_buf);
14468c2ecf20Sopenharmony_ci
14478c2ecf20Sopenharmony_ci	con->out_more = 0;
14488c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_WRITE_PENDING);
14498c2ecf20Sopenharmony_ci}
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_cistatic int prepare_write_connect(struct ceph_connection *con)
14528c2ecf20Sopenharmony_ci{
14538c2ecf20Sopenharmony_ci	unsigned int global_seq = get_global_seq(con->msgr, 0);
14548c2ecf20Sopenharmony_ci	int proto;
14558c2ecf20Sopenharmony_ci	int ret;
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_ci	switch (con->peer_name.type) {
14588c2ecf20Sopenharmony_ci	case CEPH_ENTITY_TYPE_MON:
14598c2ecf20Sopenharmony_ci		proto = CEPH_MONC_PROTOCOL;
14608c2ecf20Sopenharmony_ci		break;
14618c2ecf20Sopenharmony_ci	case CEPH_ENTITY_TYPE_OSD:
14628c2ecf20Sopenharmony_ci		proto = CEPH_OSDC_PROTOCOL;
14638c2ecf20Sopenharmony_ci		break;
14648c2ecf20Sopenharmony_ci	case CEPH_ENTITY_TYPE_MDS:
14658c2ecf20Sopenharmony_ci		proto = CEPH_MDSC_PROTOCOL;
14668c2ecf20Sopenharmony_ci		break;
14678c2ecf20Sopenharmony_ci	default:
14688c2ecf20Sopenharmony_ci		BUG();
14698c2ecf20Sopenharmony_ci	}
14708c2ecf20Sopenharmony_ci
14718c2ecf20Sopenharmony_ci	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
14728c2ecf20Sopenharmony_ci	     con->connect_seq, global_seq, proto);
14738c2ecf20Sopenharmony_ci
14748c2ecf20Sopenharmony_ci	con->out_connect.features =
14758c2ecf20Sopenharmony_ci	    cpu_to_le64(from_msgr(con->msgr)->supported_features);
14768c2ecf20Sopenharmony_ci	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
14778c2ecf20Sopenharmony_ci	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
14788c2ecf20Sopenharmony_ci	con->out_connect.global_seq = cpu_to_le32(global_seq);
14798c2ecf20Sopenharmony_ci	con->out_connect.protocol_version = cpu_to_le32(proto);
14808c2ecf20Sopenharmony_ci	con->out_connect.flags = 0;
14818c2ecf20Sopenharmony_ci
14828c2ecf20Sopenharmony_ci	ret = get_connect_authorizer(con);
14838c2ecf20Sopenharmony_ci	if (ret)
14848c2ecf20Sopenharmony_ci		return ret;
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	__prepare_write_connect(con);
14878c2ecf20Sopenharmony_ci	return 0;
14888c2ecf20Sopenharmony_ci}
14898c2ecf20Sopenharmony_ci
14908c2ecf20Sopenharmony_ci/*
14918c2ecf20Sopenharmony_ci * write as much of pending kvecs to the socket as we can.
14928c2ecf20Sopenharmony_ci *  1 -> done
14938c2ecf20Sopenharmony_ci *  0 -> socket full, but more to do
14948c2ecf20Sopenharmony_ci * <0 -> error
14958c2ecf20Sopenharmony_ci */
14968c2ecf20Sopenharmony_cistatic int write_partial_kvec(struct ceph_connection *con)
14978c2ecf20Sopenharmony_ci{
14988c2ecf20Sopenharmony_ci	int ret;
14998c2ecf20Sopenharmony_ci
15008c2ecf20Sopenharmony_ci	dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes);
15018c2ecf20Sopenharmony_ci	while (con->out_kvec_bytes > 0) {
15028c2ecf20Sopenharmony_ci		ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur,
15038c2ecf20Sopenharmony_ci				       con->out_kvec_left, con->out_kvec_bytes,
15048c2ecf20Sopenharmony_ci				       con->out_more);
15058c2ecf20Sopenharmony_ci		if (ret <= 0)
15068c2ecf20Sopenharmony_ci			goto out;
15078c2ecf20Sopenharmony_ci		con->out_kvec_bytes -= ret;
15088c2ecf20Sopenharmony_ci		if (con->out_kvec_bytes == 0)
15098c2ecf20Sopenharmony_ci			break;            /* done */
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_ci		/* account for full iov entries consumed */
15128c2ecf20Sopenharmony_ci		while (ret >= con->out_kvec_cur->iov_len) {
15138c2ecf20Sopenharmony_ci			BUG_ON(!con->out_kvec_left);
15148c2ecf20Sopenharmony_ci			ret -= con->out_kvec_cur->iov_len;
15158c2ecf20Sopenharmony_ci			con->out_kvec_cur++;
15168c2ecf20Sopenharmony_ci			con->out_kvec_left--;
15178c2ecf20Sopenharmony_ci		}
15188c2ecf20Sopenharmony_ci		/* and for a partially-consumed entry */
15198c2ecf20Sopenharmony_ci		if (ret) {
15208c2ecf20Sopenharmony_ci			con->out_kvec_cur->iov_len -= ret;
15218c2ecf20Sopenharmony_ci			con->out_kvec_cur->iov_base += ret;
15228c2ecf20Sopenharmony_ci		}
15238c2ecf20Sopenharmony_ci	}
15248c2ecf20Sopenharmony_ci	con->out_kvec_left = 0;
15258c2ecf20Sopenharmony_ci	ret = 1;
15268c2ecf20Sopenharmony_ciout:
15278c2ecf20Sopenharmony_ci	dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
15288c2ecf20Sopenharmony_ci	     con->out_kvec_bytes, con->out_kvec_left, ret);
15298c2ecf20Sopenharmony_ci	return ret;  /* done! */
15308c2ecf20Sopenharmony_ci}
15318c2ecf20Sopenharmony_ci
15328c2ecf20Sopenharmony_cistatic u32 ceph_crc32c_page(u32 crc, struct page *page,
15338c2ecf20Sopenharmony_ci				unsigned int page_offset,
15348c2ecf20Sopenharmony_ci				unsigned int length)
15358c2ecf20Sopenharmony_ci{
15368c2ecf20Sopenharmony_ci	char *kaddr;
15378c2ecf20Sopenharmony_ci
15388c2ecf20Sopenharmony_ci	kaddr = kmap(page);
15398c2ecf20Sopenharmony_ci	BUG_ON(kaddr == NULL);
15408c2ecf20Sopenharmony_ci	crc = crc32c(crc, kaddr + page_offset, length);
15418c2ecf20Sopenharmony_ci	kunmap(page);
15428c2ecf20Sopenharmony_ci
15438c2ecf20Sopenharmony_ci	return crc;
15448c2ecf20Sopenharmony_ci}
15458c2ecf20Sopenharmony_ci/*
15468c2ecf20Sopenharmony_ci * Write as much message data payload as we can.  If we finish, queue
15478c2ecf20Sopenharmony_ci * up the footer.
15488c2ecf20Sopenharmony_ci *  1 -> done, footer is now queued in out_kvec[].
15498c2ecf20Sopenharmony_ci *  0 -> socket full, but more to do
15508c2ecf20Sopenharmony_ci * <0 -> error
15518c2ecf20Sopenharmony_ci */
15528c2ecf20Sopenharmony_cistatic int write_partial_message_data(struct ceph_connection *con)
15538c2ecf20Sopenharmony_ci{
15548c2ecf20Sopenharmony_ci	struct ceph_msg *msg = con->out_msg;
15558c2ecf20Sopenharmony_ci	struct ceph_msg_data_cursor *cursor = &msg->cursor;
15568c2ecf20Sopenharmony_ci	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
15578c2ecf20Sopenharmony_ci	int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
15588c2ecf20Sopenharmony_ci	u32 crc;
15598c2ecf20Sopenharmony_ci
15608c2ecf20Sopenharmony_ci	dout("%s %p msg %p\n", __func__, con, msg);
15618c2ecf20Sopenharmony_ci
15628c2ecf20Sopenharmony_ci	if (!msg->num_data_items)
15638c2ecf20Sopenharmony_ci		return -EINVAL;
15648c2ecf20Sopenharmony_ci
15658c2ecf20Sopenharmony_ci	/*
15668c2ecf20Sopenharmony_ci	 * Iterate through each page that contains data to be
15678c2ecf20Sopenharmony_ci	 * written, and send as much as possible for each.
15688c2ecf20Sopenharmony_ci	 *
15698c2ecf20Sopenharmony_ci	 * If we are calculating the data crc (the default), we will
15708c2ecf20Sopenharmony_ci	 * need to map the page.  If we have no pages, they have
15718c2ecf20Sopenharmony_ci	 * been revoked, so use the zero page.
15728c2ecf20Sopenharmony_ci	 */
15738c2ecf20Sopenharmony_ci	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
15748c2ecf20Sopenharmony_ci	while (cursor->total_resid) {
15758c2ecf20Sopenharmony_ci		struct page *page;
15768c2ecf20Sopenharmony_ci		size_t page_offset;
15778c2ecf20Sopenharmony_ci		size_t length;
15788c2ecf20Sopenharmony_ci		int ret;
15798c2ecf20Sopenharmony_ci
15808c2ecf20Sopenharmony_ci		if (!cursor->resid) {
15818c2ecf20Sopenharmony_ci			ceph_msg_data_advance(cursor, 0);
15828c2ecf20Sopenharmony_ci			continue;
15838c2ecf20Sopenharmony_ci		}
15848c2ecf20Sopenharmony_ci
15858c2ecf20Sopenharmony_ci		page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
15868c2ecf20Sopenharmony_ci		if (length == cursor->total_resid)
15878c2ecf20Sopenharmony_ci			more = MSG_MORE;
15888c2ecf20Sopenharmony_ci		ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
15898c2ecf20Sopenharmony_ci					more);
15908c2ecf20Sopenharmony_ci		if (ret <= 0) {
15918c2ecf20Sopenharmony_ci			if (do_datacrc)
15928c2ecf20Sopenharmony_ci				msg->footer.data_crc = cpu_to_le32(crc);
15938c2ecf20Sopenharmony_ci
15948c2ecf20Sopenharmony_ci			return ret;
15958c2ecf20Sopenharmony_ci		}
15968c2ecf20Sopenharmony_ci		if (do_datacrc && cursor->need_crc)
15978c2ecf20Sopenharmony_ci			crc = ceph_crc32c_page(crc, page, page_offset, length);
15988c2ecf20Sopenharmony_ci		ceph_msg_data_advance(cursor, (size_t)ret);
15998c2ecf20Sopenharmony_ci	}
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci	dout("%s %p msg %p done\n", __func__, con, msg);
16028c2ecf20Sopenharmony_ci
16038c2ecf20Sopenharmony_ci	/* prepare and queue up footer, too */
16048c2ecf20Sopenharmony_ci	if (do_datacrc)
16058c2ecf20Sopenharmony_ci		msg->footer.data_crc = cpu_to_le32(crc);
16068c2ecf20Sopenharmony_ci	else
16078c2ecf20Sopenharmony_ci		msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
16088c2ecf20Sopenharmony_ci	con_out_kvec_reset(con);
16098c2ecf20Sopenharmony_ci	prepare_write_message_footer(con);
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ci	return 1;	/* must return > 0 to indicate success */
16128c2ecf20Sopenharmony_ci}
16138c2ecf20Sopenharmony_ci
16148c2ecf20Sopenharmony_ci/*
16158c2ecf20Sopenharmony_ci * write some zeros
16168c2ecf20Sopenharmony_ci */
16178c2ecf20Sopenharmony_cistatic int write_partial_skip(struct ceph_connection *con)
16188c2ecf20Sopenharmony_ci{
16198c2ecf20Sopenharmony_ci	int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
16208c2ecf20Sopenharmony_ci	int ret;
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci	dout("%s %p %d left\n", __func__, con, con->out_skip);
16238c2ecf20Sopenharmony_ci	while (con->out_skip > 0) {
16248c2ecf20Sopenharmony_ci		size_t size = min(con->out_skip, (int) PAGE_SIZE);
16258c2ecf20Sopenharmony_ci
16268c2ecf20Sopenharmony_ci		if (size == con->out_skip)
16278c2ecf20Sopenharmony_ci			more = MSG_MORE;
16288c2ecf20Sopenharmony_ci		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
16298c2ecf20Sopenharmony_ci		if (ret <= 0)
16308c2ecf20Sopenharmony_ci			goto out;
16318c2ecf20Sopenharmony_ci		con->out_skip -= ret;
16328c2ecf20Sopenharmony_ci	}
16338c2ecf20Sopenharmony_ci	ret = 1;
16348c2ecf20Sopenharmony_ciout:
16358c2ecf20Sopenharmony_ci	return ret;
16368c2ecf20Sopenharmony_ci}
16378c2ecf20Sopenharmony_ci
16388c2ecf20Sopenharmony_ci/*
16398c2ecf20Sopenharmony_ci * Prepare to read connection handshake, or an ack.
16408c2ecf20Sopenharmony_ci */
16418c2ecf20Sopenharmony_cistatic void prepare_read_banner(struct ceph_connection *con)
16428c2ecf20Sopenharmony_ci{
16438c2ecf20Sopenharmony_ci	dout("prepare_read_banner %p\n", con);
16448c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16458c2ecf20Sopenharmony_ci}
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_cistatic void prepare_read_connect(struct ceph_connection *con)
16488c2ecf20Sopenharmony_ci{
16498c2ecf20Sopenharmony_ci	dout("prepare_read_connect %p\n", con);
16508c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16518c2ecf20Sopenharmony_ci}
16528c2ecf20Sopenharmony_ci
16538c2ecf20Sopenharmony_cistatic void prepare_read_ack(struct ceph_connection *con)
16548c2ecf20Sopenharmony_ci{
16558c2ecf20Sopenharmony_ci	dout("prepare_read_ack %p\n", con);
16568c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16578c2ecf20Sopenharmony_ci}
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_cistatic void prepare_read_seq(struct ceph_connection *con)
16608c2ecf20Sopenharmony_ci{
16618c2ecf20Sopenharmony_ci	dout("prepare_read_seq %p\n", con);
16628c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16638c2ecf20Sopenharmony_ci	con->in_tag = CEPH_MSGR_TAG_SEQ;
16648c2ecf20Sopenharmony_ci}
16658c2ecf20Sopenharmony_ci
16668c2ecf20Sopenharmony_cistatic void prepare_read_tag(struct ceph_connection *con)
16678c2ecf20Sopenharmony_ci{
16688c2ecf20Sopenharmony_ci	dout("prepare_read_tag %p\n", con);
16698c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16708c2ecf20Sopenharmony_ci	con->in_tag = CEPH_MSGR_TAG_READY;
16718c2ecf20Sopenharmony_ci}
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_cistatic void prepare_read_keepalive_ack(struct ceph_connection *con)
16748c2ecf20Sopenharmony_ci{
16758c2ecf20Sopenharmony_ci	dout("prepare_read_keepalive_ack %p\n", con);
16768c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16778c2ecf20Sopenharmony_ci}
16788c2ecf20Sopenharmony_ci
16798c2ecf20Sopenharmony_ci/*
16808c2ecf20Sopenharmony_ci * Prepare to read a message.
16818c2ecf20Sopenharmony_ci */
16828c2ecf20Sopenharmony_cistatic int prepare_read_message(struct ceph_connection *con)
16838c2ecf20Sopenharmony_ci{
16848c2ecf20Sopenharmony_ci	dout("prepare_read_message %p\n", con);
16858c2ecf20Sopenharmony_ci	BUG_ON(con->in_msg != NULL);
16868c2ecf20Sopenharmony_ci	con->in_base_pos = 0;
16878c2ecf20Sopenharmony_ci	con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
16888c2ecf20Sopenharmony_ci	return 0;
16898c2ecf20Sopenharmony_ci}
16908c2ecf20Sopenharmony_ci
16918c2ecf20Sopenharmony_ci
16928c2ecf20Sopenharmony_cistatic int read_partial(struct ceph_connection *con,
16938c2ecf20Sopenharmony_ci			int end, int size, void *object)
16948c2ecf20Sopenharmony_ci{
16958c2ecf20Sopenharmony_ci	while (con->in_base_pos < end) {
16968c2ecf20Sopenharmony_ci		int left = end - con->in_base_pos;
16978c2ecf20Sopenharmony_ci		int have = size - left;
16988c2ecf20Sopenharmony_ci		int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
16998c2ecf20Sopenharmony_ci		if (ret <= 0)
17008c2ecf20Sopenharmony_ci			return ret;
17018c2ecf20Sopenharmony_ci		con->in_base_pos += ret;
17028c2ecf20Sopenharmony_ci	}
17038c2ecf20Sopenharmony_ci	return 1;
17048c2ecf20Sopenharmony_ci}
17058c2ecf20Sopenharmony_ci
17068c2ecf20Sopenharmony_ci
17078c2ecf20Sopenharmony_ci/*
17088c2ecf20Sopenharmony_ci * Read all or part of the connect-side handshake on a new connection
17098c2ecf20Sopenharmony_ci */
17108c2ecf20Sopenharmony_cistatic int read_partial_banner(struct ceph_connection *con)
17118c2ecf20Sopenharmony_ci{
17128c2ecf20Sopenharmony_ci	int size;
17138c2ecf20Sopenharmony_ci	int end;
17148c2ecf20Sopenharmony_ci	int ret;
17158c2ecf20Sopenharmony_ci
17168c2ecf20Sopenharmony_ci	dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
17178c2ecf20Sopenharmony_ci
17188c2ecf20Sopenharmony_ci	/* peer's banner */
17198c2ecf20Sopenharmony_ci	size = strlen(CEPH_BANNER);
17208c2ecf20Sopenharmony_ci	end = size;
17218c2ecf20Sopenharmony_ci	ret = read_partial(con, end, size, con->in_banner);
17228c2ecf20Sopenharmony_ci	if (ret <= 0)
17238c2ecf20Sopenharmony_ci		goto out;
17248c2ecf20Sopenharmony_ci
17258c2ecf20Sopenharmony_ci	size = sizeof (con->actual_peer_addr);
17268c2ecf20Sopenharmony_ci	end += size;
17278c2ecf20Sopenharmony_ci	ret = read_partial(con, end, size, &con->actual_peer_addr);
17288c2ecf20Sopenharmony_ci	if (ret <= 0)
17298c2ecf20Sopenharmony_ci		goto out;
17308c2ecf20Sopenharmony_ci	ceph_decode_banner_addr(&con->actual_peer_addr);
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	size = sizeof (con->peer_addr_for_me);
17338c2ecf20Sopenharmony_ci	end += size;
17348c2ecf20Sopenharmony_ci	ret = read_partial(con, end, size, &con->peer_addr_for_me);
17358c2ecf20Sopenharmony_ci	if (ret <= 0)
17368c2ecf20Sopenharmony_ci		goto out;
17378c2ecf20Sopenharmony_ci	ceph_decode_banner_addr(&con->peer_addr_for_me);
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ciout:
17408c2ecf20Sopenharmony_ci	return ret;
17418c2ecf20Sopenharmony_ci}
17428c2ecf20Sopenharmony_ci
17438c2ecf20Sopenharmony_cistatic int read_partial_connect(struct ceph_connection *con)
17448c2ecf20Sopenharmony_ci{
17458c2ecf20Sopenharmony_ci	int size;
17468c2ecf20Sopenharmony_ci	int end;
17478c2ecf20Sopenharmony_ci	int ret;
17488c2ecf20Sopenharmony_ci
17498c2ecf20Sopenharmony_ci	dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
17508c2ecf20Sopenharmony_ci
17518c2ecf20Sopenharmony_ci	size = sizeof (con->in_reply);
17528c2ecf20Sopenharmony_ci	end = size;
17538c2ecf20Sopenharmony_ci	ret = read_partial(con, end, size, &con->in_reply);
17548c2ecf20Sopenharmony_ci	if (ret <= 0)
17558c2ecf20Sopenharmony_ci		goto out;
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_ci	if (con->auth) {
17588c2ecf20Sopenharmony_ci		size = le32_to_cpu(con->in_reply.authorizer_len);
17598c2ecf20Sopenharmony_ci		if (size > con->auth->authorizer_reply_buf_len) {
17608c2ecf20Sopenharmony_ci			pr_err("authorizer reply too big: %d > %zu\n", size,
17618c2ecf20Sopenharmony_ci			       con->auth->authorizer_reply_buf_len);
17628c2ecf20Sopenharmony_ci			ret = -EINVAL;
17638c2ecf20Sopenharmony_ci			goto out;
17648c2ecf20Sopenharmony_ci		}
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci		end += size;
17678c2ecf20Sopenharmony_ci		ret = read_partial(con, end, size,
17688c2ecf20Sopenharmony_ci				   con->auth->authorizer_reply_buf);
17698c2ecf20Sopenharmony_ci		if (ret <= 0)
17708c2ecf20Sopenharmony_ci			goto out;
17718c2ecf20Sopenharmony_ci	}
17728c2ecf20Sopenharmony_ci
17738c2ecf20Sopenharmony_ci	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
17748c2ecf20Sopenharmony_ci	     con, (int)con->in_reply.tag,
17758c2ecf20Sopenharmony_ci	     le32_to_cpu(con->in_reply.connect_seq),
17768c2ecf20Sopenharmony_ci	     le32_to_cpu(con->in_reply.global_seq));
17778c2ecf20Sopenharmony_ciout:
17788c2ecf20Sopenharmony_ci	return ret;
17798c2ecf20Sopenharmony_ci}
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci/*
17828c2ecf20Sopenharmony_ci * Verify the hello banner looks okay.
17838c2ecf20Sopenharmony_ci */
17848c2ecf20Sopenharmony_cistatic int verify_hello(struct ceph_connection *con)
17858c2ecf20Sopenharmony_ci{
17868c2ecf20Sopenharmony_ci	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
17878c2ecf20Sopenharmony_ci		pr_err("connect to %s got bad banner\n",
17888c2ecf20Sopenharmony_ci		       ceph_pr_addr(&con->peer_addr));
17898c2ecf20Sopenharmony_ci		con->error_msg = "protocol error, bad banner";
17908c2ecf20Sopenharmony_ci		return -1;
17918c2ecf20Sopenharmony_ci	}
17928c2ecf20Sopenharmony_ci	return 0;
17938c2ecf20Sopenharmony_ci}
17948c2ecf20Sopenharmony_ci
17958c2ecf20Sopenharmony_cistatic bool addr_is_blank(struct ceph_entity_addr *addr)
17968c2ecf20Sopenharmony_ci{
17978c2ecf20Sopenharmony_ci	struct sockaddr_storage ss = addr->in_addr; /* align */
17988c2ecf20Sopenharmony_ci	struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr;
17998c2ecf20Sopenharmony_ci	struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr;
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	switch (ss.ss_family) {
18028c2ecf20Sopenharmony_ci	case AF_INET:
18038c2ecf20Sopenharmony_ci		return addr4->s_addr == htonl(INADDR_ANY);
18048c2ecf20Sopenharmony_ci	case AF_INET6:
18058c2ecf20Sopenharmony_ci		return ipv6_addr_any(addr6);
18068c2ecf20Sopenharmony_ci	default:
18078c2ecf20Sopenharmony_ci		return true;
18088c2ecf20Sopenharmony_ci	}
18098c2ecf20Sopenharmony_ci}
18108c2ecf20Sopenharmony_ci
18118c2ecf20Sopenharmony_cistatic int addr_port(struct ceph_entity_addr *addr)
18128c2ecf20Sopenharmony_ci{
18138c2ecf20Sopenharmony_ci	switch (get_unaligned(&addr->in_addr.ss_family)) {
18148c2ecf20Sopenharmony_ci	case AF_INET:
18158c2ecf20Sopenharmony_ci		return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port));
18168c2ecf20Sopenharmony_ci	case AF_INET6:
18178c2ecf20Sopenharmony_ci		return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port));
18188c2ecf20Sopenharmony_ci	}
18198c2ecf20Sopenharmony_ci	return 0;
18208c2ecf20Sopenharmony_ci}
18218c2ecf20Sopenharmony_ci
18228c2ecf20Sopenharmony_cistatic void addr_set_port(struct ceph_entity_addr *addr, int p)
18238c2ecf20Sopenharmony_ci{
18248c2ecf20Sopenharmony_ci	switch (get_unaligned(&addr->in_addr.ss_family)) {
18258c2ecf20Sopenharmony_ci	case AF_INET:
18268c2ecf20Sopenharmony_ci		put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port);
18278c2ecf20Sopenharmony_ci		break;
18288c2ecf20Sopenharmony_ci	case AF_INET6:
18298c2ecf20Sopenharmony_ci		put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port);
18308c2ecf20Sopenharmony_ci		break;
18318c2ecf20Sopenharmony_ci	}
18328c2ecf20Sopenharmony_ci}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_ci/*
18358c2ecf20Sopenharmony_ci * Unlike other *_pton function semantics, zero indicates success.
18368c2ecf20Sopenharmony_ci */
18378c2ecf20Sopenharmony_cistatic int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr,
18388c2ecf20Sopenharmony_ci		char delim, const char **ipend)
18398c2ecf20Sopenharmony_ci{
18408c2ecf20Sopenharmony_ci	memset(&addr->in_addr, 0, sizeof(addr->in_addr));
18418c2ecf20Sopenharmony_ci
18428c2ecf20Sopenharmony_ci	if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) {
18438c2ecf20Sopenharmony_ci		put_unaligned(AF_INET, &addr->in_addr.ss_family);
18448c2ecf20Sopenharmony_ci		return 0;
18458c2ecf20Sopenharmony_ci	}
18468c2ecf20Sopenharmony_ci
18478c2ecf20Sopenharmony_ci	if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) {
18488c2ecf20Sopenharmony_ci		put_unaligned(AF_INET6, &addr->in_addr.ss_family);
18498c2ecf20Sopenharmony_ci		return 0;
18508c2ecf20Sopenharmony_ci	}
18518c2ecf20Sopenharmony_ci
18528c2ecf20Sopenharmony_ci	return -EINVAL;
18538c2ecf20Sopenharmony_ci}
18548c2ecf20Sopenharmony_ci
18558c2ecf20Sopenharmony_ci/*
18568c2ecf20Sopenharmony_ci * Extract hostname string and resolve using kernel DNS facility.
18578c2ecf20Sopenharmony_ci */
18588c2ecf20Sopenharmony_ci#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
18598c2ecf20Sopenharmony_cistatic int ceph_dns_resolve_name(const char *name, size_t namelen,
18608c2ecf20Sopenharmony_ci		struct ceph_entity_addr *addr, char delim, const char **ipend)
18618c2ecf20Sopenharmony_ci{
18628c2ecf20Sopenharmony_ci	const char *end, *delim_p;
18638c2ecf20Sopenharmony_ci	char *colon_p, *ip_addr = NULL;
18648c2ecf20Sopenharmony_ci	int ip_len, ret;
18658c2ecf20Sopenharmony_ci
18668c2ecf20Sopenharmony_ci	/*
18678c2ecf20Sopenharmony_ci	 * The end of the hostname occurs immediately preceding the delimiter or
18688c2ecf20Sopenharmony_ci	 * the port marker (':') where the delimiter takes precedence.
18698c2ecf20Sopenharmony_ci	 */
18708c2ecf20Sopenharmony_ci	delim_p = memchr(name, delim, namelen);
18718c2ecf20Sopenharmony_ci	colon_p = memchr(name, ':', namelen);
18728c2ecf20Sopenharmony_ci
18738c2ecf20Sopenharmony_ci	if (delim_p && colon_p)
18748c2ecf20Sopenharmony_ci		end = delim_p < colon_p ? delim_p : colon_p;
18758c2ecf20Sopenharmony_ci	else if (!delim_p && colon_p)
18768c2ecf20Sopenharmony_ci		end = colon_p;
18778c2ecf20Sopenharmony_ci	else {
18788c2ecf20Sopenharmony_ci		end = delim_p;
18798c2ecf20Sopenharmony_ci		if (!end) /* case: hostname:/ */
18808c2ecf20Sopenharmony_ci			end = name + namelen;
18818c2ecf20Sopenharmony_ci	}
18828c2ecf20Sopenharmony_ci
18838c2ecf20Sopenharmony_ci	if (end <= name)
18848c2ecf20Sopenharmony_ci		return -EINVAL;
18858c2ecf20Sopenharmony_ci
18868c2ecf20Sopenharmony_ci	/* do dns_resolve upcall */
18878c2ecf20Sopenharmony_ci	ip_len = dns_query(current->nsproxy->net_ns,
18888c2ecf20Sopenharmony_ci			   NULL, name, end - name, NULL, &ip_addr, NULL, false);
18898c2ecf20Sopenharmony_ci	if (ip_len > 0)
18908c2ecf20Sopenharmony_ci		ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
18918c2ecf20Sopenharmony_ci	else
18928c2ecf20Sopenharmony_ci		ret = -ESRCH;
18938c2ecf20Sopenharmony_ci
18948c2ecf20Sopenharmony_ci	kfree(ip_addr);
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci	*ipend = end;
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_ci	pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
18998c2ecf20Sopenharmony_ci			ret, ret ? "failed" : ceph_pr_addr(addr));
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci	return ret;
19028c2ecf20Sopenharmony_ci}
19038c2ecf20Sopenharmony_ci#else
19048c2ecf20Sopenharmony_cistatic inline int ceph_dns_resolve_name(const char *name, size_t namelen,
19058c2ecf20Sopenharmony_ci		struct ceph_entity_addr *addr, char delim, const char **ipend)
19068c2ecf20Sopenharmony_ci{
19078c2ecf20Sopenharmony_ci	return -EINVAL;
19088c2ecf20Sopenharmony_ci}
19098c2ecf20Sopenharmony_ci#endif
19108c2ecf20Sopenharmony_ci
19118c2ecf20Sopenharmony_ci/*
19128c2ecf20Sopenharmony_ci * Parse a server name (IP or hostname). If a valid IP address is not found
19138c2ecf20Sopenharmony_ci * then try to extract a hostname to resolve using userspace DNS upcall.
19148c2ecf20Sopenharmony_ci */
19158c2ecf20Sopenharmony_cistatic int ceph_parse_server_name(const char *name, size_t namelen,
19168c2ecf20Sopenharmony_ci		struct ceph_entity_addr *addr, char delim, const char **ipend)
19178c2ecf20Sopenharmony_ci{
19188c2ecf20Sopenharmony_ci	int ret;
19198c2ecf20Sopenharmony_ci
19208c2ecf20Sopenharmony_ci	ret = ceph_pton(name, namelen, addr, delim, ipend);
19218c2ecf20Sopenharmony_ci	if (ret)
19228c2ecf20Sopenharmony_ci		ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend);
19238c2ecf20Sopenharmony_ci
19248c2ecf20Sopenharmony_ci	return ret;
19258c2ecf20Sopenharmony_ci}
19268c2ecf20Sopenharmony_ci
19278c2ecf20Sopenharmony_ci/*
19288c2ecf20Sopenharmony_ci * Parse an ip[:port] list into an addr array.  Use the default
19298c2ecf20Sopenharmony_ci * monitor port if a port isn't specified.
19308c2ecf20Sopenharmony_ci */
19318c2ecf20Sopenharmony_ciint ceph_parse_ips(const char *c, const char *end,
19328c2ecf20Sopenharmony_ci		   struct ceph_entity_addr *addr,
19338c2ecf20Sopenharmony_ci		   int max_count, int *count)
19348c2ecf20Sopenharmony_ci{
19358c2ecf20Sopenharmony_ci	int i, ret = -EINVAL;
19368c2ecf20Sopenharmony_ci	const char *p = c;
19378c2ecf20Sopenharmony_ci
19388c2ecf20Sopenharmony_ci	dout("parse_ips on '%.*s'\n", (int)(end-c), c);
19398c2ecf20Sopenharmony_ci	for (i = 0; i < max_count; i++) {
19408c2ecf20Sopenharmony_ci		const char *ipend;
19418c2ecf20Sopenharmony_ci		int port;
19428c2ecf20Sopenharmony_ci		char delim = ',';
19438c2ecf20Sopenharmony_ci
19448c2ecf20Sopenharmony_ci		if (*p == '[') {
19458c2ecf20Sopenharmony_ci			delim = ']';
19468c2ecf20Sopenharmony_ci			p++;
19478c2ecf20Sopenharmony_ci		}
19488c2ecf20Sopenharmony_ci
19498c2ecf20Sopenharmony_ci		ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
19508c2ecf20Sopenharmony_ci		if (ret)
19518c2ecf20Sopenharmony_ci			goto bad;
19528c2ecf20Sopenharmony_ci		ret = -EINVAL;
19538c2ecf20Sopenharmony_ci
19548c2ecf20Sopenharmony_ci		p = ipend;
19558c2ecf20Sopenharmony_ci
19568c2ecf20Sopenharmony_ci		if (delim == ']') {
19578c2ecf20Sopenharmony_ci			if (*p != ']') {
19588c2ecf20Sopenharmony_ci				dout("missing matching ']'\n");
19598c2ecf20Sopenharmony_ci				goto bad;
19608c2ecf20Sopenharmony_ci			}
19618c2ecf20Sopenharmony_ci			p++;
19628c2ecf20Sopenharmony_ci		}
19638c2ecf20Sopenharmony_ci
19648c2ecf20Sopenharmony_ci		/* port? */
19658c2ecf20Sopenharmony_ci		if (p < end && *p == ':') {
19668c2ecf20Sopenharmony_ci			port = 0;
19678c2ecf20Sopenharmony_ci			p++;
19688c2ecf20Sopenharmony_ci			while (p < end && *p >= '0' && *p <= '9') {
19698c2ecf20Sopenharmony_ci				port = (port * 10) + (*p - '0');
19708c2ecf20Sopenharmony_ci				p++;
19718c2ecf20Sopenharmony_ci			}
19728c2ecf20Sopenharmony_ci			if (port == 0)
19738c2ecf20Sopenharmony_ci				port = CEPH_MON_PORT;
19748c2ecf20Sopenharmony_ci			else if (port > 65535)
19758c2ecf20Sopenharmony_ci				goto bad;
19768c2ecf20Sopenharmony_ci		} else {
19778c2ecf20Sopenharmony_ci			port = CEPH_MON_PORT;
19788c2ecf20Sopenharmony_ci		}
19798c2ecf20Sopenharmony_ci
19808c2ecf20Sopenharmony_ci		addr_set_port(&addr[i], port);
19818c2ecf20Sopenharmony_ci		addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
19828c2ecf20Sopenharmony_ci
19838c2ecf20Sopenharmony_ci		dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
19848c2ecf20Sopenharmony_ci
19858c2ecf20Sopenharmony_ci		if (p == end)
19868c2ecf20Sopenharmony_ci			break;
19878c2ecf20Sopenharmony_ci		if (*p != ',')
19888c2ecf20Sopenharmony_ci			goto bad;
19898c2ecf20Sopenharmony_ci		p++;
19908c2ecf20Sopenharmony_ci	}
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_ci	if (p != end)
19938c2ecf20Sopenharmony_ci		goto bad;
19948c2ecf20Sopenharmony_ci
19958c2ecf20Sopenharmony_ci	if (count)
19968c2ecf20Sopenharmony_ci		*count = i + 1;
19978c2ecf20Sopenharmony_ci	return 0;
19988c2ecf20Sopenharmony_ci
19998c2ecf20Sopenharmony_cibad:
20008c2ecf20Sopenharmony_ci	return ret;
20018c2ecf20Sopenharmony_ci}
20028c2ecf20Sopenharmony_ci
20038c2ecf20Sopenharmony_cistatic int process_banner(struct ceph_connection *con)
20048c2ecf20Sopenharmony_ci{
20058c2ecf20Sopenharmony_ci	dout("process_banner on %p\n", con);
20068c2ecf20Sopenharmony_ci
20078c2ecf20Sopenharmony_ci	if (verify_hello(con) < 0)
20088c2ecf20Sopenharmony_ci		return -1;
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci	/*
20118c2ecf20Sopenharmony_ci	 * Make sure the other end is who we wanted.  note that the other
20128c2ecf20Sopenharmony_ci	 * end may not yet know their ip address, so if it's 0.0.0.0, give
20138c2ecf20Sopenharmony_ci	 * them the benefit of the doubt.
20148c2ecf20Sopenharmony_ci	 */
20158c2ecf20Sopenharmony_ci	if (memcmp(&con->peer_addr, &con->actual_peer_addr,
20168c2ecf20Sopenharmony_ci		   sizeof(con->peer_addr)) != 0 &&
20178c2ecf20Sopenharmony_ci	    !(addr_is_blank(&con->actual_peer_addr) &&
20188c2ecf20Sopenharmony_ci	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
20198c2ecf20Sopenharmony_ci		pr_warn("wrong peer, want %s/%u, got %s/%u\n",
20208c2ecf20Sopenharmony_ci			ceph_pr_addr(&con->peer_addr),
20218c2ecf20Sopenharmony_ci			le32_to_cpu(con->peer_addr.nonce),
20228c2ecf20Sopenharmony_ci			ceph_pr_addr(&con->actual_peer_addr),
20238c2ecf20Sopenharmony_ci			le32_to_cpu(con->actual_peer_addr.nonce));
20248c2ecf20Sopenharmony_ci		con->error_msg = "wrong peer at address";
20258c2ecf20Sopenharmony_ci		return -1;
20268c2ecf20Sopenharmony_ci	}
20278c2ecf20Sopenharmony_ci
20288c2ecf20Sopenharmony_ci	/*
20298c2ecf20Sopenharmony_ci	 * did we learn our address?
20308c2ecf20Sopenharmony_ci	 */
20318c2ecf20Sopenharmony_ci	if (addr_is_blank(&con->msgr->inst.addr)) {
20328c2ecf20Sopenharmony_ci		int port = addr_port(&con->msgr->inst.addr);
20338c2ecf20Sopenharmony_ci
20348c2ecf20Sopenharmony_ci		memcpy(&con->msgr->inst.addr.in_addr,
20358c2ecf20Sopenharmony_ci		       &con->peer_addr_for_me.in_addr,
20368c2ecf20Sopenharmony_ci		       sizeof(con->peer_addr_for_me.in_addr));
20378c2ecf20Sopenharmony_ci		addr_set_port(&con->msgr->inst.addr, port);
20388c2ecf20Sopenharmony_ci		encode_my_addr(con->msgr);
20398c2ecf20Sopenharmony_ci		dout("process_banner learned my addr is %s\n",
20408c2ecf20Sopenharmony_ci		     ceph_pr_addr(&con->msgr->inst.addr));
20418c2ecf20Sopenharmony_ci	}
20428c2ecf20Sopenharmony_ci
20438c2ecf20Sopenharmony_ci	return 0;
20448c2ecf20Sopenharmony_ci}
20458c2ecf20Sopenharmony_ci
20468c2ecf20Sopenharmony_cistatic int process_connect(struct ceph_connection *con)
20478c2ecf20Sopenharmony_ci{
20488c2ecf20Sopenharmony_ci	u64 sup_feat = from_msgr(con->msgr)->supported_features;
20498c2ecf20Sopenharmony_ci	u64 req_feat = from_msgr(con->msgr)->required_features;
20508c2ecf20Sopenharmony_ci	u64 server_feat = le64_to_cpu(con->in_reply.features);
20518c2ecf20Sopenharmony_ci	int ret;
20528c2ecf20Sopenharmony_ci
20538c2ecf20Sopenharmony_ci	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
20548c2ecf20Sopenharmony_ci
20558c2ecf20Sopenharmony_ci	if (con->auth) {
20568c2ecf20Sopenharmony_ci		int len = le32_to_cpu(con->in_reply.authorizer_len);
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci		/*
20598c2ecf20Sopenharmony_ci		 * Any connection that defines ->get_authorizer()
20608c2ecf20Sopenharmony_ci		 * should also define ->add_authorizer_challenge() and
20618c2ecf20Sopenharmony_ci		 * ->verify_authorizer_reply().
20628c2ecf20Sopenharmony_ci		 *
20638c2ecf20Sopenharmony_ci		 * See get_connect_authorizer().
20648c2ecf20Sopenharmony_ci		 */
20658c2ecf20Sopenharmony_ci		if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
20668c2ecf20Sopenharmony_ci			ret = con->ops->add_authorizer_challenge(
20678c2ecf20Sopenharmony_ci				    con, con->auth->authorizer_reply_buf, len);
20688c2ecf20Sopenharmony_ci			if (ret < 0)
20698c2ecf20Sopenharmony_ci				return ret;
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ci			con_out_kvec_reset(con);
20728c2ecf20Sopenharmony_ci			__prepare_write_connect(con);
20738c2ecf20Sopenharmony_ci			prepare_read_connect(con);
20748c2ecf20Sopenharmony_ci			return 0;
20758c2ecf20Sopenharmony_ci		}
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci		if (len) {
20788c2ecf20Sopenharmony_ci			ret = con->ops->verify_authorizer_reply(con);
20798c2ecf20Sopenharmony_ci			if (ret < 0) {
20808c2ecf20Sopenharmony_ci				con->error_msg = "bad authorize reply";
20818c2ecf20Sopenharmony_ci				return ret;
20828c2ecf20Sopenharmony_ci			}
20838c2ecf20Sopenharmony_ci		}
20848c2ecf20Sopenharmony_ci	}
20858c2ecf20Sopenharmony_ci
20868c2ecf20Sopenharmony_ci	switch (con->in_reply.tag) {
20878c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_FEATURES:
20888c2ecf20Sopenharmony_ci		pr_err("%s%lld %s feature set mismatch,"
20898c2ecf20Sopenharmony_ci		       " my %llx < server's %llx, missing %llx\n",
20908c2ecf20Sopenharmony_ci		       ENTITY_NAME(con->peer_name),
20918c2ecf20Sopenharmony_ci		       ceph_pr_addr(&con->peer_addr),
20928c2ecf20Sopenharmony_ci		       sup_feat, server_feat, server_feat & ~sup_feat);
20938c2ecf20Sopenharmony_ci		con->error_msg = "missing required protocol features";
20948c2ecf20Sopenharmony_ci		reset_connection(con);
20958c2ecf20Sopenharmony_ci		return -1;
20968c2ecf20Sopenharmony_ci
20978c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_BADPROTOVER:
20988c2ecf20Sopenharmony_ci		pr_err("%s%lld %s protocol version mismatch,"
20998c2ecf20Sopenharmony_ci		       " my %d != server's %d\n",
21008c2ecf20Sopenharmony_ci		       ENTITY_NAME(con->peer_name),
21018c2ecf20Sopenharmony_ci		       ceph_pr_addr(&con->peer_addr),
21028c2ecf20Sopenharmony_ci		       le32_to_cpu(con->out_connect.protocol_version),
21038c2ecf20Sopenharmony_ci		       le32_to_cpu(con->in_reply.protocol_version));
21048c2ecf20Sopenharmony_ci		con->error_msg = "protocol version mismatch";
21058c2ecf20Sopenharmony_ci		reset_connection(con);
21068c2ecf20Sopenharmony_ci		return -1;
21078c2ecf20Sopenharmony_ci
21088c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_BADAUTHORIZER:
21098c2ecf20Sopenharmony_ci		con->auth_retry++;
21108c2ecf20Sopenharmony_ci		dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
21118c2ecf20Sopenharmony_ci		     con->auth_retry);
21128c2ecf20Sopenharmony_ci		if (con->auth_retry == 2) {
21138c2ecf20Sopenharmony_ci			con->error_msg = "connect authorization failure";
21148c2ecf20Sopenharmony_ci			return -1;
21158c2ecf20Sopenharmony_ci		}
21168c2ecf20Sopenharmony_ci		con_out_kvec_reset(con);
21178c2ecf20Sopenharmony_ci		ret = prepare_write_connect(con);
21188c2ecf20Sopenharmony_ci		if (ret < 0)
21198c2ecf20Sopenharmony_ci			return ret;
21208c2ecf20Sopenharmony_ci		prepare_read_connect(con);
21218c2ecf20Sopenharmony_ci		break;
21228c2ecf20Sopenharmony_ci
21238c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_RESETSESSION:
21248c2ecf20Sopenharmony_ci		/*
21258c2ecf20Sopenharmony_ci		 * If we connected with a large connect_seq but the peer
21268c2ecf20Sopenharmony_ci		 * has no record of a session with us (no connection, or
21278c2ecf20Sopenharmony_ci		 * connect_seq == 0), they will send RESETSESION to indicate
21288c2ecf20Sopenharmony_ci		 * that they must have reset their session, and may have
21298c2ecf20Sopenharmony_ci		 * dropped messages.
21308c2ecf20Sopenharmony_ci		 */
21318c2ecf20Sopenharmony_ci		dout("process_connect got RESET peer seq %u\n",
21328c2ecf20Sopenharmony_ci		     le32_to_cpu(con->in_reply.connect_seq));
21338c2ecf20Sopenharmony_ci		pr_err("%s%lld %s connection reset\n",
21348c2ecf20Sopenharmony_ci		       ENTITY_NAME(con->peer_name),
21358c2ecf20Sopenharmony_ci		       ceph_pr_addr(&con->peer_addr));
21368c2ecf20Sopenharmony_ci		reset_connection(con);
21378c2ecf20Sopenharmony_ci		con_out_kvec_reset(con);
21388c2ecf20Sopenharmony_ci		ret = prepare_write_connect(con);
21398c2ecf20Sopenharmony_ci		if (ret < 0)
21408c2ecf20Sopenharmony_ci			return ret;
21418c2ecf20Sopenharmony_ci		prepare_read_connect(con);
21428c2ecf20Sopenharmony_ci
21438c2ecf20Sopenharmony_ci		/* Tell ceph about it. */
21448c2ecf20Sopenharmony_ci		mutex_unlock(&con->mutex);
21458c2ecf20Sopenharmony_ci		pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name));
21468c2ecf20Sopenharmony_ci		if (con->ops->peer_reset)
21478c2ecf20Sopenharmony_ci			con->ops->peer_reset(con);
21488c2ecf20Sopenharmony_ci		mutex_lock(&con->mutex);
21498c2ecf20Sopenharmony_ci		if (con->state != CON_STATE_NEGOTIATING)
21508c2ecf20Sopenharmony_ci			return -EAGAIN;
21518c2ecf20Sopenharmony_ci		break;
21528c2ecf20Sopenharmony_ci
21538c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_RETRY_SESSION:
21548c2ecf20Sopenharmony_ci		/*
21558c2ecf20Sopenharmony_ci		 * If we sent a smaller connect_seq than the peer has, try
21568c2ecf20Sopenharmony_ci		 * again with a larger value.
21578c2ecf20Sopenharmony_ci		 */
21588c2ecf20Sopenharmony_ci		dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
21598c2ecf20Sopenharmony_ci		     le32_to_cpu(con->out_connect.connect_seq),
21608c2ecf20Sopenharmony_ci		     le32_to_cpu(con->in_reply.connect_seq));
21618c2ecf20Sopenharmony_ci		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
21628c2ecf20Sopenharmony_ci		con_out_kvec_reset(con);
21638c2ecf20Sopenharmony_ci		ret = prepare_write_connect(con);
21648c2ecf20Sopenharmony_ci		if (ret < 0)
21658c2ecf20Sopenharmony_ci			return ret;
21668c2ecf20Sopenharmony_ci		prepare_read_connect(con);
21678c2ecf20Sopenharmony_ci		break;
21688c2ecf20Sopenharmony_ci
21698c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_RETRY_GLOBAL:
21708c2ecf20Sopenharmony_ci		/*
21718c2ecf20Sopenharmony_ci		 * If we sent a smaller global_seq than the peer has, try
21728c2ecf20Sopenharmony_ci		 * again with a larger value.
21738c2ecf20Sopenharmony_ci		 */
21748c2ecf20Sopenharmony_ci		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
21758c2ecf20Sopenharmony_ci		     con->peer_global_seq,
21768c2ecf20Sopenharmony_ci		     le32_to_cpu(con->in_reply.global_seq));
21778c2ecf20Sopenharmony_ci		get_global_seq(con->msgr,
21788c2ecf20Sopenharmony_ci			       le32_to_cpu(con->in_reply.global_seq));
21798c2ecf20Sopenharmony_ci		con_out_kvec_reset(con);
21808c2ecf20Sopenharmony_ci		ret = prepare_write_connect(con);
21818c2ecf20Sopenharmony_ci		if (ret < 0)
21828c2ecf20Sopenharmony_ci			return ret;
21838c2ecf20Sopenharmony_ci		prepare_read_connect(con);
21848c2ecf20Sopenharmony_ci		break;
21858c2ecf20Sopenharmony_ci
21868c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_SEQ:
21878c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_READY:
21888c2ecf20Sopenharmony_ci		if (req_feat & ~server_feat) {
21898c2ecf20Sopenharmony_ci			pr_err("%s%lld %s protocol feature mismatch,"
21908c2ecf20Sopenharmony_ci			       " my required %llx > server's %llx, need %llx\n",
21918c2ecf20Sopenharmony_ci			       ENTITY_NAME(con->peer_name),
21928c2ecf20Sopenharmony_ci			       ceph_pr_addr(&con->peer_addr),
21938c2ecf20Sopenharmony_ci			       req_feat, server_feat, req_feat & ~server_feat);
21948c2ecf20Sopenharmony_ci			con->error_msg = "missing required protocol features";
21958c2ecf20Sopenharmony_ci			reset_connection(con);
21968c2ecf20Sopenharmony_ci			return -1;
21978c2ecf20Sopenharmony_ci		}
21988c2ecf20Sopenharmony_ci
21998c2ecf20Sopenharmony_ci		WARN_ON(con->state != CON_STATE_NEGOTIATING);
22008c2ecf20Sopenharmony_ci		con->state = CON_STATE_OPEN;
22018c2ecf20Sopenharmony_ci		con->auth_retry = 0;    /* we authenticated; clear flag */
22028c2ecf20Sopenharmony_ci		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
22038c2ecf20Sopenharmony_ci		con->connect_seq++;
22048c2ecf20Sopenharmony_ci		con->peer_features = server_feat;
22058c2ecf20Sopenharmony_ci		dout("process_connect got READY gseq %d cseq %d (%d)\n",
22068c2ecf20Sopenharmony_ci		     con->peer_global_seq,
22078c2ecf20Sopenharmony_ci		     le32_to_cpu(con->in_reply.connect_seq),
22088c2ecf20Sopenharmony_ci		     con->connect_seq);
22098c2ecf20Sopenharmony_ci		WARN_ON(con->connect_seq !=
22108c2ecf20Sopenharmony_ci			le32_to_cpu(con->in_reply.connect_seq));
22118c2ecf20Sopenharmony_ci
22128c2ecf20Sopenharmony_ci		if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
22138c2ecf20Sopenharmony_ci			con_flag_set(con, CON_FLAG_LOSSYTX);
22148c2ecf20Sopenharmony_ci
22158c2ecf20Sopenharmony_ci		con->delay = 0;      /* reset backoff memory */
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci		if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
22188c2ecf20Sopenharmony_ci			prepare_write_seq(con);
22198c2ecf20Sopenharmony_ci			prepare_read_seq(con);
22208c2ecf20Sopenharmony_ci		} else {
22218c2ecf20Sopenharmony_ci			prepare_read_tag(con);
22228c2ecf20Sopenharmony_ci		}
22238c2ecf20Sopenharmony_ci		break;
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci	case CEPH_MSGR_TAG_WAIT:
22268c2ecf20Sopenharmony_ci		/*
22278c2ecf20Sopenharmony_ci		 * If there is a connection race (we are opening
22288c2ecf20Sopenharmony_ci		 * connections to each other), one of us may just have
22298c2ecf20Sopenharmony_ci		 * to WAIT.  This shouldn't happen if we are the
22308c2ecf20Sopenharmony_ci		 * client.
22318c2ecf20Sopenharmony_ci		 */
22328c2ecf20Sopenharmony_ci		con->error_msg = "protocol error, got WAIT as client";
22338c2ecf20Sopenharmony_ci		return -1;
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	default:
22368c2ecf20Sopenharmony_ci		con->error_msg = "protocol error, garbage tag during connect";
22378c2ecf20Sopenharmony_ci		return -1;
22388c2ecf20Sopenharmony_ci	}
22398c2ecf20Sopenharmony_ci	return 0;
22408c2ecf20Sopenharmony_ci}
22418c2ecf20Sopenharmony_ci
22428c2ecf20Sopenharmony_ci
22438c2ecf20Sopenharmony_ci/*
22448c2ecf20Sopenharmony_ci * read (part of) an ack
22458c2ecf20Sopenharmony_ci */
22468c2ecf20Sopenharmony_cistatic int read_partial_ack(struct ceph_connection *con)
22478c2ecf20Sopenharmony_ci{
22488c2ecf20Sopenharmony_ci	int size = sizeof (con->in_temp_ack);
22498c2ecf20Sopenharmony_ci	int end = size;
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ci	return read_partial(con, end, size, &con->in_temp_ack);
22528c2ecf20Sopenharmony_ci}
22538c2ecf20Sopenharmony_ci
22548c2ecf20Sopenharmony_ci/*
22558c2ecf20Sopenharmony_ci * We can finally discard anything that's been acked.
22568c2ecf20Sopenharmony_ci */
22578c2ecf20Sopenharmony_cistatic void process_ack(struct ceph_connection *con)
22588c2ecf20Sopenharmony_ci{
22598c2ecf20Sopenharmony_ci	struct ceph_msg *m;
22608c2ecf20Sopenharmony_ci	u64 ack = le64_to_cpu(con->in_temp_ack);
22618c2ecf20Sopenharmony_ci	u64 seq;
22628c2ecf20Sopenharmony_ci	bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ);
22638c2ecf20Sopenharmony_ci	struct list_head *list = reconnect ? &con->out_queue : &con->out_sent;
22648c2ecf20Sopenharmony_ci
22658c2ecf20Sopenharmony_ci	/*
22668c2ecf20Sopenharmony_ci	 * In the reconnect case, con_fault() has requeued messages
22678c2ecf20Sopenharmony_ci	 * in out_sent. We should cleanup old messages according to
22688c2ecf20Sopenharmony_ci	 * the reconnect seq.
22698c2ecf20Sopenharmony_ci	 */
22708c2ecf20Sopenharmony_ci	while (!list_empty(list)) {
22718c2ecf20Sopenharmony_ci		m = list_first_entry(list, struct ceph_msg, list_head);
22728c2ecf20Sopenharmony_ci		if (reconnect && m->needs_out_seq)
22738c2ecf20Sopenharmony_ci			break;
22748c2ecf20Sopenharmony_ci		seq = le64_to_cpu(m->hdr.seq);
22758c2ecf20Sopenharmony_ci		if (seq > ack)
22768c2ecf20Sopenharmony_ci			break;
22778c2ecf20Sopenharmony_ci		dout("got ack for seq %llu type %d at %p\n", seq,
22788c2ecf20Sopenharmony_ci		     le16_to_cpu(m->hdr.type), m);
22798c2ecf20Sopenharmony_ci		m->ack_stamp = jiffies;
22808c2ecf20Sopenharmony_ci		ceph_msg_remove(m);
22818c2ecf20Sopenharmony_ci	}
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_ci	prepare_read_tag(con);
22848c2ecf20Sopenharmony_ci}
22858c2ecf20Sopenharmony_ci
22868c2ecf20Sopenharmony_ci
22878c2ecf20Sopenharmony_cistatic int read_partial_message_section(struct ceph_connection *con,
22888c2ecf20Sopenharmony_ci					struct kvec *section,
22898c2ecf20Sopenharmony_ci					unsigned int sec_len, u32 *crc)
22908c2ecf20Sopenharmony_ci{
22918c2ecf20Sopenharmony_ci	int ret, left;
22928c2ecf20Sopenharmony_ci
22938c2ecf20Sopenharmony_ci	BUG_ON(!section);
22948c2ecf20Sopenharmony_ci
22958c2ecf20Sopenharmony_ci	while (section->iov_len < sec_len) {
22968c2ecf20Sopenharmony_ci		BUG_ON(section->iov_base == NULL);
22978c2ecf20Sopenharmony_ci		left = sec_len - section->iov_len;
22988c2ecf20Sopenharmony_ci		ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
22998c2ecf20Sopenharmony_ci				       section->iov_len, left);
23008c2ecf20Sopenharmony_ci		if (ret <= 0)
23018c2ecf20Sopenharmony_ci			return ret;
23028c2ecf20Sopenharmony_ci		section->iov_len += ret;
23038c2ecf20Sopenharmony_ci	}
23048c2ecf20Sopenharmony_ci	if (section->iov_len == sec_len)
23058c2ecf20Sopenharmony_ci		*crc = crc32c(0, section->iov_base, section->iov_len);
23068c2ecf20Sopenharmony_ci
23078c2ecf20Sopenharmony_ci	return 1;
23088c2ecf20Sopenharmony_ci}
23098c2ecf20Sopenharmony_ci
23108c2ecf20Sopenharmony_cistatic int read_partial_msg_data(struct ceph_connection *con)
23118c2ecf20Sopenharmony_ci{
23128c2ecf20Sopenharmony_ci	struct ceph_msg *msg = con->in_msg;
23138c2ecf20Sopenharmony_ci	struct ceph_msg_data_cursor *cursor = &msg->cursor;
23148c2ecf20Sopenharmony_ci	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
23158c2ecf20Sopenharmony_ci	struct page *page;
23168c2ecf20Sopenharmony_ci	size_t page_offset;
23178c2ecf20Sopenharmony_ci	size_t length;
23188c2ecf20Sopenharmony_ci	u32 crc = 0;
23198c2ecf20Sopenharmony_ci	int ret;
23208c2ecf20Sopenharmony_ci
23218c2ecf20Sopenharmony_ci	if (!msg->num_data_items)
23228c2ecf20Sopenharmony_ci		return -EIO;
23238c2ecf20Sopenharmony_ci
23248c2ecf20Sopenharmony_ci	if (do_datacrc)
23258c2ecf20Sopenharmony_ci		crc = con->in_data_crc;
23268c2ecf20Sopenharmony_ci	while (cursor->total_resid) {
23278c2ecf20Sopenharmony_ci		if (!cursor->resid) {
23288c2ecf20Sopenharmony_ci			ceph_msg_data_advance(cursor, 0);
23298c2ecf20Sopenharmony_ci			continue;
23308c2ecf20Sopenharmony_ci		}
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_ci		page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
23338c2ecf20Sopenharmony_ci		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
23348c2ecf20Sopenharmony_ci		if (ret <= 0) {
23358c2ecf20Sopenharmony_ci			if (do_datacrc)
23368c2ecf20Sopenharmony_ci				con->in_data_crc = crc;
23378c2ecf20Sopenharmony_ci
23388c2ecf20Sopenharmony_ci			return ret;
23398c2ecf20Sopenharmony_ci		}
23408c2ecf20Sopenharmony_ci
23418c2ecf20Sopenharmony_ci		if (do_datacrc)
23428c2ecf20Sopenharmony_ci			crc = ceph_crc32c_page(crc, page, page_offset, ret);
23438c2ecf20Sopenharmony_ci		ceph_msg_data_advance(cursor, (size_t)ret);
23448c2ecf20Sopenharmony_ci	}
23458c2ecf20Sopenharmony_ci	if (do_datacrc)
23468c2ecf20Sopenharmony_ci		con->in_data_crc = crc;
23478c2ecf20Sopenharmony_ci
23488c2ecf20Sopenharmony_ci	return 1;	/* must return > 0 to indicate success */
23498c2ecf20Sopenharmony_ci}
23508c2ecf20Sopenharmony_ci
23518c2ecf20Sopenharmony_ci/*
23528c2ecf20Sopenharmony_ci * read (part of) a message.
23538c2ecf20Sopenharmony_ci */
23548c2ecf20Sopenharmony_cistatic int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_cistatic int read_partial_message(struct ceph_connection *con)
23578c2ecf20Sopenharmony_ci{
23588c2ecf20Sopenharmony_ci	struct ceph_msg *m = con->in_msg;
23598c2ecf20Sopenharmony_ci	int size;
23608c2ecf20Sopenharmony_ci	int end;
23618c2ecf20Sopenharmony_ci	int ret;
23628c2ecf20Sopenharmony_ci	unsigned int front_len, middle_len, data_len;
23638c2ecf20Sopenharmony_ci	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
23648c2ecf20Sopenharmony_ci	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
23658c2ecf20Sopenharmony_ci	u64 seq;
23668c2ecf20Sopenharmony_ci	u32 crc;
23678c2ecf20Sopenharmony_ci
23688c2ecf20Sopenharmony_ci	dout("read_partial_message con %p msg %p\n", con, m);
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_ci	/* header */
23718c2ecf20Sopenharmony_ci	size = sizeof (con->in_hdr);
23728c2ecf20Sopenharmony_ci	end = size;
23738c2ecf20Sopenharmony_ci	ret = read_partial(con, end, size, &con->in_hdr);
23748c2ecf20Sopenharmony_ci	if (ret <= 0)
23758c2ecf20Sopenharmony_ci		return ret;
23768c2ecf20Sopenharmony_ci
23778c2ecf20Sopenharmony_ci	crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
23788c2ecf20Sopenharmony_ci	if (cpu_to_le32(crc) != con->in_hdr.crc) {
23798c2ecf20Sopenharmony_ci		pr_err("read_partial_message bad hdr crc %u != expected %u\n",
23808c2ecf20Sopenharmony_ci		       crc, con->in_hdr.crc);
23818c2ecf20Sopenharmony_ci		return -EBADMSG;
23828c2ecf20Sopenharmony_ci	}
23838c2ecf20Sopenharmony_ci
23848c2ecf20Sopenharmony_ci	front_len = le32_to_cpu(con->in_hdr.front_len);
23858c2ecf20Sopenharmony_ci	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
23868c2ecf20Sopenharmony_ci		return -EIO;
23878c2ecf20Sopenharmony_ci	middle_len = le32_to_cpu(con->in_hdr.middle_len);
23888c2ecf20Sopenharmony_ci	if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
23898c2ecf20Sopenharmony_ci		return -EIO;
23908c2ecf20Sopenharmony_ci	data_len = le32_to_cpu(con->in_hdr.data_len);
23918c2ecf20Sopenharmony_ci	if (data_len > CEPH_MSG_MAX_DATA_LEN)
23928c2ecf20Sopenharmony_ci		return -EIO;
23938c2ecf20Sopenharmony_ci
23948c2ecf20Sopenharmony_ci	/* verify seq# */
23958c2ecf20Sopenharmony_ci	seq = le64_to_cpu(con->in_hdr.seq);
23968c2ecf20Sopenharmony_ci	if ((s64)seq - (s64)con->in_seq < 1) {
23978c2ecf20Sopenharmony_ci		pr_info("skipping %s%lld %s seq %lld expected %lld\n",
23988c2ecf20Sopenharmony_ci			ENTITY_NAME(con->peer_name),
23998c2ecf20Sopenharmony_ci			ceph_pr_addr(&con->peer_addr),
24008c2ecf20Sopenharmony_ci			seq, con->in_seq + 1);
24018c2ecf20Sopenharmony_ci		con->in_base_pos = -front_len - middle_len - data_len -
24028c2ecf20Sopenharmony_ci			sizeof_footer(con);
24038c2ecf20Sopenharmony_ci		con->in_tag = CEPH_MSGR_TAG_READY;
24048c2ecf20Sopenharmony_ci		return 1;
24058c2ecf20Sopenharmony_ci	} else if ((s64)seq - (s64)con->in_seq > 1) {
24068c2ecf20Sopenharmony_ci		pr_err("read_partial_message bad seq %lld expected %lld\n",
24078c2ecf20Sopenharmony_ci		       seq, con->in_seq + 1);
24088c2ecf20Sopenharmony_ci		con->error_msg = "bad message sequence # for incoming message";
24098c2ecf20Sopenharmony_ci		return -EBADE;
24108c2ecf20Sopenharmony_ci	}
24118c2ecf20Sopenharmony_ci
24128c2ecf20Sopenharmony_ci	/* allocate message? */
24138c2ecf20Sopenharmony_ci	if (!con->in_msg) {
24148c2ecf20Sopenharmony_ci		int skip = 0;
24158c2ecf20Sopenharmony_ci
24168c2ecf20Sopenharmony_ci		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
24178c2ecf20Sopenharmony_ci		     front_len, data_len);
24188c2ecf20Sopenharmony_ci		ret = ceph_con_in_msg_alloc(con, &skip);
24198c2ecf20Sopenharmony_ci		if (ret < 0)
24208c2ecf20Sopenharmony_ci			return ret;
24218c2ecf20Sopenharmony_ci
24228c2ecf20Sopenharmony_ci		BUG_ON(!con->in_msg ^ skip);
24238c2ecf20Sopenharmony_ci		if (skip) {
24248c2ecf20Sopenharmony_ci			/* skip this message */
24258c2ecf20Sopenharmony_ci			dout("alloc_msg said skip message\n");
24268c2ecf20Sopenharmony_ci			con->in_base_pos = -front_len - middle_len - data_len -
24278c2ecf20Sopenharmony_ci				sizeof_footer(con);
24288c2ecf20Sopenharmony_ci			con->in_tag = CEPH_MSGR_TAG_READY;
24298c2ecf20Sopenharmony_ci			con->in_seq++;
24308c2ecf20Sopenharmony_ci			return 1;
24318c2ecf20Sopenharmony_ci		}
24328c2ecf20Sopenharmony_ci
24338c2ecf20Sopenharmony_ci		BUG_ON(!con->in_msg);
24348c2ecf20Sopenharmony_ci		BUG_ON(con->in_msg->con != con);
24358c2ecf20Sopenharmony_ci		m = con->in_msg;
24368c2ecf20Sopenharmony_ci		m->front.iov_len = 0;    /* haven't read it yet */
24378c2ecf20Sopenharmony_ci		if (m->middle)
24388c2ecf20Sopenharmony_ci			m->middle->vec.iov_len = 0;
24398c2ecf20Sopenharmony_ci
24408c2ecf20Sopenharmony_ci		/* prepare for data payload, if any */
24418c2ecf20Sopenharmony_ci
24428c2ecf20Sopenharmony_ci		if (data_len)
24438c2ecf20Sopenharmony_ci			prepare_message_data(con->in_msg, data_len);
24448c2ecf20Sopenharmony_ci	}
24458c2ecf20Sopenharmony_ci
24468c2ecf20Sopenharmony_ci	/* front */
24478c2ecf20Sopenharmony_ci	ret = read_partial_message_section(con, &m->front, front_len,
24488c2ecf20Sopenharmony_ci					   &con->in_front_crc);
24498c2ecf20Sopenharmony_ci	if (ret <= 0)
24508c2ecf20Sopenharmony_ci		return ret;
24518c2ecf20Sopenharmony_ci
24528c2ecf20Sopenharmony_ci	/* middle */
24538c2ecf20Sopenharmony_ci	if (m->middle) {
24548c2ecf20Sopenharmony_ci		ret = read_partial_message_section(con, &m->middle->vec,
24558c2ecf20Sopenharmony_ci						   middle_len,
24568c2ecf20Sopenharmony_ci						   &con->in_middle_crc);
24578c2ecf20Sopenharmony_ci		if (ret <= 0)
24588c2ecf20Sopenharmony_ci			return ret;
24598c2ecf20Sopenharmony_ci	}
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_ci	/* (page) data */
24628c2ecf20Sopenharmony_ci	if (data_len) {
24638c2ecf20Sopenharmony_ci		ret = read_partial_msg_data(con);
24648c2ecf20Sopenharmony_ci		if (ret <= 0)
24658c2ecf20Sopenharmony_ci			return ret;
24668c2ecf20Sopenharmony_ci	}
24678c2ecf20Sopenharmony_ci
24688c2ecf20Sopenharmony_ci	/* footer */
24698c2ecf20Sopenharmony_ci	size = sizeof_footer(con);
24708c2ecf20Sopenharmony_ci	end += size;
24718c2ecf20Sopenharmony_ci	ret = read_partial(con, end, size, &m->footer);
24728c2ecf20Sopenharmony_ci	if (ret <= 0)
24738c2ecf20Sopenharmony_ci		return ret;
24748c2ecf20Sopenharmony_ci
24758c2ecf20Sopenharmony_ci	if (!need_sign) {
24768c2ecf20Sopenharmony_ci		m->footer.flags = m->old_footer.flags;
24778c2ecf20Sopenharmony_ci		m->footer.sig = 0;
24788c2ecf20Sopenharmony_ci	}
24798c2ecf20Sopenharmony_ci
24808c2ecf20Sopenharmony_ci	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
24818c2ecf20Sopenharmony_ci	     m, front_len, m->footer.front_crc, middle_len,
24828c2ecf20Sopenharmony_ci	     m->footer.middle_crc, data_len, m->footer.data_crc);
24838c2ecf20Sopenharmony_ci
24848c2ecf20Sopenharmony_ci	/* crc ok? */
24858c2ecf20Sopenharmony_ci	if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
24868c2ecf20Sopenharmony_ci		pr_err("read_partial_message %p front crc %u != exp. %u\n",
24878c2ecf20Sopenharmony_ci		       m, con->in_front_crc, m->footer.front_crc);
24888c2ecf20Sopenharmony_ci		return -EBADMSG;
24898c2ecf20Sopenharmony_ci	}
24908c2ecf20Sopenharmony_ci	if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
24918c2ecf20Sopenharmony_ci		pr_err("read_partial_message %p middle crc %u != exp %u\n",
24928c2ecf20Sopenharmony_ci		       m, con->in_middle_crc, m->footer.middle_crc);
24938c2ecf20Sopenharmony_ci		return -EBADMSG;
24948c2ecf20Sopenharmony_ci	}
24958c2ecf20Sopenharmony_ci	if (do_datacrc &&
24968c2ecf20Sopenharmony_ci	    (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
24978c2ecf20Sopenharmony_ci	    con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
24988c2ecf20Sopenharmony_ci		pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
24998c2ecf20Sopenharmony_ci		       con->in_data_crc, le32_to_cpu(m->footer.data_crc));
25008c2ecf20Sopenharmony_ci		return -EBADMSG;
25018c2ecf20Sopenharmony_ci	}
25028c2ecf20Sopenharmony_ci
25038c2ecf20Sopenharmony_ci	if (need_sign && con->ops->check_message_signature &&
25048c2ecf20Sopenharmony_ci	    con->ops->check_message_signature(m)) {
25058c2ecf20Sopenharmony_ci		pr_err("read_partial_message %p signature check failed\n", m);
25068c2ecf20Sopenharmony_ci		return -EBADMSG;
25078c2ecf20Sopenharmony_ci	}
25088c2ecf20Sopenharmony_ci
25098c2ecf20Sopenharmony_ci	return 1; /* done! */
25108c2ecf20Sopenharmony_ci}
25118c2ecf20Sopenharmony_ci
25128c2ecf20Sopenharmony_ci/*
25138c2ecf20Sopenharmony_ci * Process message.  This happens in the worker thread.  The callback should
25148c2ecf20Sopenharmony_ci * be careful not to do anything that waits on other incoming messages or it
25158c2ecf20Sopenharmony_ci * may deadlock.
25168c2ecf20Sopenharmony_ci */
25178c2ecf20Sopenharmony_cistatic void process_message(struct ceph_connection *con)
25188c2ecf20Sopenharmony_ci{
25198c2ecf20Sopenharmony_ci	struct ceph_msg *msg = con->in_msg;
25208c2ecf20Sopenharmony_ci
25218c2ecf20Sopenharmony_ci	BUG_ON(con->in_msg->con != con);
25228c2ecf20Sopenharmony_ci	con->in_msg = NULL;
25238c2ecf20Sopenharmony_ci
25248c2ecf20Sopenharmony_ci	/* if first message, set peer_name */
25258c2ecf20Sopenharmony_ci	if (con->peer_name.type == 0)
25268c2ecf20Sopenharmony_ci		con->peer_name = msg->hdr.src;
25278c2ecf20Sopenharmony_ci
25288c2ecf20Sopenharmony_ci	con->in_seq++;
25298c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
25308c2ecf20Sopenharmony_ci
25318c2ecf20Sopenharmony_ci	dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
25328c2ecf20Sopenharmony_ci	     msg, le64_to_cpu(msg->hdr.seq),
25338c2ecf20Sopenharmony_ci	     ENTITY_NAME(msg->hdr.src),
25348c2ecf20Sopenharmony_ci	     le16_to_cpu(msg->hdr.type),
25358c2ecf20Sopenharmony_ci	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
25368c2ecf20Sopenharmony_ci	     le32_to_cpu(msg->hdr.front_len),
25378c2ecf20Sopenharmony_ci	     le32_to_cpu(msg->hdr.data_len),
25388c2ecf20Sopenharmony_ci	     con->in_front_crc, con->in_middle_crc, con->in_data_crc);
25398c2ecf20Sopenharmony_ci	con->ops->dispatch(con, msg);
25408c2ecf20Sopenharmony_ci
25418c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
25428c2ecf20Sopenharmony_ci}
25438c2ecf20Sopenharmony_ci
25448c2ecf20Sopenharmony_cistatic int read_keepalive_ack(struct ceph_connection *con)
25458c2ecf20Sopenharmony_ci{
25468c2ecf20Sopenharmony_ci	struct ceph_timespec ceph_ts;
25478c2ecf20Sopenharmony_ci	size_t size = sizeof(ceph_ts);
25488c2ecf20Sopenharmony_ci	int ret = read_partial(con, size, size, &ceph_ts);
25498c2ecf20Sopenharmony_ci	if (ret <= 0)
25508c2ecf20Sopenharmony_ci		return ret;
25518c2ecf20Sopenharmony_ci	ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
25528c2ecf20Sopenharmony_ci	prepare_read_tag(con);
25538c2ecf20Sopenharmony_ci	return 1;
25548c2ecf20Sopenharmony_ci}
25558c2ecf20Sopenharmony_ci
25568c2ecf20Sopenharmony_ci/*
25578c2ecf20Sopenharmony_ci * Write something to the socket.  Called in a worker thread when the
25588c2ecf20Sopenharmony_ci * socket appears to be writeable and we have something ready to send.
25598c2ecf20Sopenharmony_ci */
25608c2ecf20Sopenharmony_cistatic int try_write(struct ceph_connection *con)
25618c2ecf20Sopenharmony_ci{
25628c2ecf20Sopenharmony_ci	int ret = 1;
25638c2ecf20Sopenharmony_ci
25648c2ecf20Sopenharmony_ci	dout("try_write start %p state %lu\n", con, con->state);
25658c2ecf20Sopenharmony_ci	if (con->state != CON_STATE_PREOPEN &&
25668c2ecf20Sopenharmony_ci	    con->state != CON_STATE_CONNECTING &&
25678c2ecf20Sopenharmony_ci	    con->state != CON_STATE_NEGOTIATING &&
25688c2ecf20Sopenharmony_ci	    con->state != CON_STATE_OPEN)
25698c2ecf20Sopenharmony_ci		return 0;
25708c2ecf20Sopenharmony_ci
25718c2ecf20Sopenharmony_ci	/* open the socket first? */
25728c2ecf20Sopenharmony_ci	if (con->state == CON_STATE_PREOPEN) {
25738c2ecf20Sopenharmony_ci		BUG_ON(con->sock);
25748c2ecf20Sopenharmony_ci		con->state = CON_STATE_CONNECTING;
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_ci		con_out_kvec_reset(con);
25778c2ecf20Sopenharmony_ci		prepare_write_banner(con);
25788c2ecf20Sopenharmony_ci		prepare_read_banner(con);
25798c2ecf20Sopenharmony_ci
25808c2ecf20Sopenharmony_ci		BUG_ON(con->in_msg);
25818c2ecf20Sopenharmony_ci		con->in_tag = CEPH_MSGR_TAG_READY;
25828c2ecf20Sopenharmony_ci		dout("try_write initiating connect on %p new state %lu\n",
25838c2ecf20Sopenharmony_ci		     con, con->state);
25848c2ecf20Sopenharmony_ci		ret = ceph_tcp_connect(con);
25858c2ecf20Sopenharmony_ci		if (ret < 0) {
25868c2ecf20Sopenharmony_ci			con->error_msg = "connect error";
25878c2ecf20Sopenharmony_ci			goto out;
25888c2ecf20Sopenharmony_ci		}
25898c2ecf20Sopenharmony_ci	}
25908c2ecf20Sopenharmony_ci
25918c2ecf20Sopenharmony_cimore:
25928c2ecf20Sopenharmony_ci	dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
25938c2ecf20Sopenharmony_ci	BUG_ON(!con->sock);
25948c2ecf20Sopenharmony_ci
25958c2ecf20Sopenharmony_ci	/* kvec data queued? */
25968c2ecf20Sopenharmony_ci	if (con->out_kvec_left) {
25978c2ecf20Sopenharmony_ci		ret = write_partial_kvec(con);
25988c2ecf20Sopenharmony_ci		if (ret <= 0)
25998c2ecf20Sopenharmony_ci			goto out;
26008c2ecf20Sopenharmony_ci	}
26018c2ecf20Sopenharmony_ci	if (con->out_skip) {
26028c2ecf20Sopenharmony_ci		ret = write_partial_skip(con);
26038c2ecf20Sopenharmony_ci		if (ret <= 0)
26048c2ecf20Sopenharmony_ci			goto out;
26058c2ecf20Sopenharmony_ci	}
26068c2ecf20Sopenharmony_ci
26078c2ecf20Sopenharmony_ci	/* msg pages? */
26088c2ecf20Sopenharmony_ci	if (con->out_msg) {
26098c2ecf20Sopenharmony_ci		if (con->out_msg_done) {
26108c2ecf20Sopenharmony_ci			ceph_msg_put(con->out_msg);
26118c2ecf20Sopenharmony_ci			con->out_msg = NULL;   /* we're done with this one */
26128c2ecf20Sopenharmony_ci			goto do_next;
26138c2ecf20Sopenharmony_ci		}
26148c2ecf20Sopenharmony_ci
26158c2ecf20Sopenharmony_ci		ret = write_partial_message_data(con);
26168c2ecf20Sopenharmony_ci		if (ret == 1)
26178c2ecf20Sopenharmony_ci			goto more;  /* we need to send the footer, too! */
26188c2ecf20Sopenharmony_ci		if (ret == 0)
26198c2ecf20Sopenharmony_ci			goto out;
26208c2ecf20Sopenharmony_ci		if (ret < 0) {
26218c2ecf20Sopenharmony_ci			dout("try_write write_partial_message_data err %d\n",
26228c2ecf20Sopenharmony_ci			     ret);
26238c2ecf20Sopenharmony_ci			goto out;
26248c2ecf20Sopenharmony_ci		}
26258c2ecf20Sopenharmony_ci	}
26268c2ecf20Sopenharmony_ci
26278c2ecf20Sopenharmony_cido_next:
26288c2ecf20Sopenharmony_ci	if (con->state == CON_STATE_OPEN) {
26298c2ecf20Sopenharmony_ci		if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
26308c2ecf20Sopenharmony_ci			prepare_write_keepalive(con);
26318c2ecf20Sopenharmony_ci			goto more;
26328c2ecf20Sopenharmony_ci		}
26338c2ecf20Sopenharmony_ci		/* is anything else pending? */
26348c2ecf20Sopenharmony_ci		if (!list_empty(&con->out_queue)) {
26358c2ecf20Sopenharmony_ci			prepare_write_message(con);
26368c2ecf20Sopenharmony_ci			goto more;
26378c2ecf20Sopenharmony_ci		}
26388c2ecf20Sopenharmony_ci		if (con->in_seq > con->in_seq_acked) {
26398c2ecf20Sopenharmony_ci			prepare_write_ack(con);
26408c2ecf20Sopenharmony_ci			goto more;
26418c2ecf20Sopenharmony_ci		}
26428c2ecf20Sopenharmony_ci	}
26438c2ecf20Sopenharmony_ci
26448c2ecf20Sopenharmony_ci	/* Nothing to do! */
26458c2ecf20Sopenharmony_ci	con_flag_clear(con, CON_FLAG_WRITE_PENDING);
26468c2ecf20Sopenharmony_ci	dout("try_write nothing else to write.\n");
26478c2ecf20Sopenharmony_ci	ret = 0;
26488c2ecf20Sopenharmony_ciout:
26498c2ecf20Sopenharmony_ci	dout("try_write done on %p ret %d\n", con, ret);
26508c2ecf20Sopenharmony_ci	return ret;
26518c2ecf20Sopenharmony_ci}
26528c2ecf20Sopenharmony_ci
26538c2ecf20Sopenharmony_ci/*
26548c2ecf20Sopenharmony_ci * Read what we can from the socket.
26558c2ecf20Sopenharmony_ci */
26568c2ecf20Sopenharmony_cistatic int try_read(struct ceph_connection *con)
26578c2ecf20Sopenharmony_ci{
26588c2ecf20Sopenharmony_ci	int ret = -1;
26598c2ecf20Sopenharmony_ci
26608c2ecf20Sopenharmony_cimore:
26618c2ecf20Sopenharmony_ci	dout("try_read start on %p state %lu\n", con, con->state);
26628c2ecf20Sopenharmony_ci	if (con->state != CON_STATE_CONNECTING &&
26638c2ecf20Sopenharmony_ci	    con->state != CON_STATE_NEGOTIATING &&
26648c2ecf20Sopenharmony_ci	    con->state != CON_STATE_OPEN)
26658c2ecf20Sopenharmony_ci		return 0;
26668c2ecf20Sopenharmony_ci
26678c2ecf20Sopenharmony_ci	BUG_ON(!con->sock);
26688c2ecf20Sopenharmony_ci
26698c2ecf20Sopenharmony_ci	dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
26708c2ecf20Sopenharmony_ci	     con->in_base_pos);
26718c2ecf20Sopenharmony_ci
26728c2ecf20Sopenharmony_ci	if (con->state == CON_STATE_CONNECTING) {
26738c2ecf20Sopenharmony_ci		dout("try_read connecting\n");
26748c2ecf20Sopenharmony_ci		ret = read_partial_banner(con);
26758c2ecf20Sopenharmony_ci		if (ret <= 0)
26768c2ecf20Sopenharmony_ci			goto out;
26778c2ecf20Sopenharmony_ci		ret = process_banner(con);
26788c2ecf20Sopenharmony_ci		if (ret < 0)
26798c2ecf20Sopenharmony_ci			goto out;
26808c2ecf20Sopenharmony_ci
26818c2ecf20Sopenharmony_ci		con->state = CON_STATE_NEGOTIATING;
26828c2ecf20Sopenharmony_ci
26838c2ecf20Sopenharmony_ci		/*
26848c2ecf20Sopenharmony_ci		 * Received banner is good, exchange connection info.
26858c2ecf20Sopenharmony_ci		 * Do not reset out_kvec, as sending our banner raced
26868c2ecf20Sopenharmony_ci		 * with receiving peer banner after connect completed.
26878c2ecf20Sopenharmony_ci		 */
26888c2ecf20Sopenharmony_ci		ret = prepare_write_connect(con);
26898c2ecf20Sopenharmony_ci		if (ret < 0)
26908c2ecf20Sopenharmony_ci			goto out;
26918c2ecf20Sopenharmony_ci		prepare_read_connect(con);
26928c2ecf20Sopenharmony_ci
26938c2ecf20Sopenharmony_ci		/* Send connection info before awaiting response */
26948c2ecf20Sopenharmony_ci		goto out;
26958c2ecf20Sopenharmony_ci	}
26968c2ecf20Sopenharmony_ci
26978c2ecf20Sopenharmony_ci	if (con->state == CON_STATE_NEGOTIATING) {
26988c2ecf20Sopenharmony_ci		dout("try_read negotiating\n");
26998c2ecf20Sopenharmony_ci		ret = read_partial_connect(con);
27008c2ecf20Sopenharmony_ci		if (ret <= 0)
27018c2ecf20Sopenharmony_ci			goto out;
27028c2ecf20Sopenharmony_ci		ret = process_connect(con);
27038c2ecf20Sopenharmony_ci		if (ret < 0)
27048c2ecf20Sopenharmony_ci			goto out;
27058c2ecf20Sopenharmony_ci		goto more;
27068c2ecf20Sopenharmony_ci	}
27078c2ecf20Sopenharmony_ci
27088c2ecf20Sopenharmony_ci	WARN_ON(con->state != CON_STATE_OPEN);
27098c2ecf20Sopenharmony_ci
27108c2ecf20Sopenharmony_ci	if (con->in_base_pos < 0) {
27118c2ecf20Sopenharmony_ci		/*
27128c2ecf20Sopenharmony_ci		 * skipping + discarding content.
27138c2ecf20Sopenharmony_ci		 */
27148c2ecf20Sopenharmony_ci		ret = ceph_tcp_recvmsg(con->sock, NULL, -con->in_base_pos);
27158c2ecf20Sopenharmony_ci		if (ret <= 0)
27168c2ecf20Sopenharmony_ci			goto out;
27178c2ecf20Sopenharmony_ci		dout("skipped %d / %d bytes\n", ret, -con->in_base_pos);
27188c2ecf20Sopenharmony_ci		con->in_base_pos += ret;
27198c2ecf20Sopenharmony_ci		if (con->in_base_pos)
27208c2ecf20Sopenharmony_ci			goto more;
27218c2ecf20Sopenharmony_ci	}
27228c2ecf20Sopenharmony_ci	if (con->in_tag == CEPH_MSGR_TAG_READY) {
27238c2ecf20Sopenharmony_ci		/*
27248c2ecf20Sopenharmony_ci		 * what's next?
27258c2ecf20Sopenharmony_ci		 */
27268c2ecf20Sopenharmony_ci		ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
27278c2ecf20Sopenharmony_ci		if (ret <= 0)
27288c2ecf20Sopenharmony_ci			goto out;
27298c2ecf20Sopenharmony_ci		dout("try_read got tag %d\n", (int)con->in_tag);
27308c2ecf20Sopenharmony_ci		switch (con->in_tag) {
27318c2ecf20Sopenharmony_ci		case CEPH_MSGR_TAG_MSG:
27328c2ecf20Sopenharmony_ci			prepare_read_message(con);
27338c2ecf20Sopenharmony_ci			break;
27348c2ecf20Sopenharmony_ci		case CEPH_MSGR_TAG_ACK:
27358c2ecf20Sopenharmony_ci			prepare_read_ack(con);
27368c2ecf20Sopenharmony_ci			break;
27378c2ecf20Sopenharmony_ci		case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
27388c2ecf20Sopenharmony_ci			prepare_read_keepalive_ack(con);
27398c2ecf20Sopenharmony_ci			break;
27408c2ecf20Sopenharmony_ci		case CEPH_MSGR_TAG_CLOSE:
27418c2ecf20Sopenharmony_ci			con_close_socket(con);
27428c2ecf20Sopenharmony_ci			con->state = CON_STATE_CLOSED;
27438c2ecf20Sopenharmony_ci			goto out;
27448c2ecf20Sopenharmony_ci		default:
27458c2ecf20Sopenharmony_ci			goto bad_tag;
27468c2ecf20Sopenharmony_ci		}
27478c2ecf20Sopenharmony_ci	}
27488c2ecf20Sopenharmony_ci	if (con->in_tag == CEPH_MSGR_TAG_MSG) {
27498c2ecf20Sopenharmony_ci		ret = read_partial_message(con);
27508c2ecf20Sopenharmony_ci		if (ret <= 0) {
27518c2ecf20Sopenharmony_ci			switch (ret) {
27528c2ecf20Sopenharmony_ci			case -EBADMSG:
27538c2ecf20Sopenharmony_ci				con->error_msg = "bad crc/signature";
27548c2ecf20Sopenharmony_ci				fallthrough;
27558c2ecf20Sopenharmony_ci			case -EBADE:
27568c2ecf20Sopenharmony_ci				ret = -EIO;
27578c2ecf20Sopenharmony_ci				break;
27588c2ecf20Sopenharmony_ci			case -EIO:
27598c2ecf20Sopenharmony_ci				con->error_msg = "io error";
27608c2ecf20Sopenharmony_ci				break;
27618c2ecf20Sopenharmony_ci			}
27628c2ecf20Sopenharmony_ci			goto out;
27638c2ecf20Sopenharmony_ci		}
27648c2ecf20Sopenharmony_ci		if (con->in_tag == CEPH_MSGR_TAG_READY)
27658c2ecf20Sopenharmony_ci			goto more;
27668c2ecf20Sopenharmony_ci		process_message(con);
27678c2ecf20Sopenharmony_ci		if (con->state == CON_STATE_OPEN)
27688c2ecf20Sopenharmony_ci			prepare_read_tag(con);
27698c2ecf20Sopenharmony_ci		goto more;
27708c2ecf20Sopenharmony_ci	}
27718c2ecf20Sopenharmony_ci	if (con->in_tag == CEPH_MSGR_TAG_ACK ||
27728c2ecf20Sopenharmony_ci	    con->in_tag == CEPH_MSGR_TAG_SEQ) {
27738c2ecf20Sopenharmony_ci		/*
27748c2ecf20Sopenharmony_ci		 * the final handshake seq exchange is semantically
27758c2ecf20Sopenharmony_ci		 * equivalent to an ACK
27768c2ecf20Sopenharmony_ci		 */
27778c2ecf20Sopenharmony_ci		ret = read_partial_ack(con);
27788c2ecf20Sopenharmony_ci		if (ret <= 0)
27798c2ecf20Sopenharmony_ci			goto out;
27808c2ecf20Sopenharmony_ci		process_ack(con);
27818c2ecf20Sopenharmony_ci		goto more;
27828c2ecf20Sopenharmony_ci	}
27838c2ecf20Sopenharmony_ci	if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
27848c2ecf20Sopenharmony_ci		ret = read_keepalive_ack(con);
27858c2ecf20Sopenharmony_ci		if (ret <= 0)
27868c2ecf20Sopenharmony_ci			goto out;
27878c2ecf20Sopenharmony_ci		goto more;
27888c2ecf20Sopenharmony_ci	}
27898c2ecf20Sopenharmony_ci
27908c2ecf20Sopenharmony_ciout:
27918c2ecf20Sopenharmony_ci	dout("try_read done on %p ret %d\n", con, ret);
27928c2ecf20Sopenharmony_ci	return ret;
27938c2ecf20Sopenharmony_ci
27948c2ecf20Sopenharmony_cibad_tag:
27958c2ecf20Sopenharmony_ci	pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag);
27968c2ecf20Sopenharmony_ci	con->error_msg = "protocol error, garbage tag";
27978c2ecf20Sopenharmony_ci	ret = -1;
27988c2ecf20Sopenharmony_ci	goto out;
27998c2ecf20Sopenharmony_ci}
28008c2ecf20Sopenharmony_ci
28018c2ecf20Sopenharmony_ci
28028c2ecf20Sopenharmony_ci/*
28038c2ecf20Sopenharmony_ci * Atomically queue work on a connection after the specified delay.
28048c2ecf20Sopenharmony_ci * Bump @con reference to avoid races with connection teardown.
28058c2ecf20Sopenharmony_ci * Returns 0 if work was queued, or an error code otherwise.
28068c2ecf20Sopenharmony_ci */
28078c2ecf20Sopenharmony_cistatic int queue_con_delay(struct ceph_connection *con, unsigned long delay)
28088c2ecf20Sopenharmony_ci{
28098c2ecf20Sopenharmony_ci	if (!con->ops->get(con)) {
28108c2ecf20Sopenharmony_ci		dout("%s %p ref count 0\n", __func__, con);
28118c2ecf20Sopenharmony_ci		return -ENOENT;
28128c2ecf20Sopenharmony_ci	}
28138c2ecf20Sopenharmony_ci
28148c2ecf20Sopenharmony_ci	dout("%s %p %lu\n", __func__, con, delay);
28158c2ecf20Sopenharmony_ci	if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
28168c2ecf20Sopenharmony_ci		dout("%s %p - already queued\n", __func__, con);
28178c2ecf20Sopenharmony_ci		con->ops->put(con);
28188c2ecf20Sopenharmony_ci		return -EBUSY;
28198c2ecf20Sopenharmony_ci	}
28208c2ecf20Sopenharmony_ci
28218c2ecf20Sopenharmony_ci	return 0;
28228c2ecf20Sopenharmony_ci}
28238c2ecf20Sopenharmony_ci
28248c2ecf20Sopenharmony_cistatic void queue_con(struct ceph_connection *con)
28258c2ecf20Sopenharmony_ci{
28268c2ecf20Sopenharmony_ci	(void) queue_con_delay(con, 0);
28278c2ecf20Sopenharmony_ci}
28288c2ecf20Sopenharmony_ci
28298c2ecf20Sopenharmony_cistatic void cancel_con(struct ceph_connection *con)
28308c2ecf20Sopenharmony_ci{
28318c2ecf20Sopenharmony_ci	if (cancel_delayed_work(&con->work)) {
28328c2ecf20Sopenharmony_ci		dout("%s %p\n", __func__, con);
28338c2ecf20Sopenharmony_ci		con->ops->put(con);
28348c2ecf20Sopenharmony_ci	}
28358c2ecf20Sopenharmony_ci}
28368c2ecf20Sopenharmony_ci
28378c2ecf20Sopenharmony_cistatic bool con_sock_closed(struct ceph_connection *con)
28388c2ecf20Sopenharmony_ci{
28398c2ecf20Sopenharmony_ci	if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
28408c2ecf20Sopenharmony_ci		return false;
28418c2ecf20Sopenharmony_ci
28428c2ecf20Sopenharmony_ci#define CASE(x)								\
28438c2ecf20Sopenharmony_ci	case CON_STATE_ ## x:						\
28448c2ecf20Sopenharmony_ci		con->error_msg = "socket closed (con state " #x ")";	\
28458c2ecf20Sopenharmony_ci		break;
28468c2ecf20Sopenharmony_ci
28478c2ecf20Sopenharmony_ci	switch (con->state) {
28488c2ecf20Sopenharmony_ci	CASE(CLOSED);
28498c2ecf20Sopenharmony_ci	CASE(PREOPEN);
28508c2ecf20Sopenharmony_ci	CASE(CONNECTING);
28518c2ecf20Sopenharmony_ci	CASE(NEGOTIATING);
28528c2ecf20Sopenharmony_ci	CASE(OPEN);
28538c2ecf20Sopenharmony_ci	CASE(STANDBY);
28548c2ecf20Sopenharmony_ci	default:
28558c2ecf20Sopenharmony_ci		pr_warn("%s con %p unrecognized state %lu\n",
28568c2ecf20Sopenharmony_ci			__func__, con, con->state);
28578c2ecf20Sopenharmony_ci		con->error_msg = "unrecognized con state";
28588c2ecf20Sopenharmony_ci		BUG();
28598c2ecf20Sopenharmony_ci		break;
28608c2ecf20Sopenharmony_ci	}
28618c2ecf20Sopenharmony_ci#undef CASE
28628c2ecf20Sopenharmony_ci
28638c2ecf20Sopenharmony_ci	return true;
28648c2ecf20Sopenharmony_ci}
28658c2ecf20Sopenharmony_ci
28668c2ecf20Sopenharmony_cistatic bool con_backoff(struct ceph_connection *con)
28678c2ecf20Sopenharmony_ci{
28688c2ecf20Sopenharmony_ci	int ret;
28698c2ecf20Sopenharmony_ci
28708c2ecf20Sopenharmony_ci	if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
28718c2ecf20Sopenharmony_ci		return false;
28728c2ecf20Sopenharmony_ci
28738c2ecf20Sopenharmony_ci	ret = queue_con_delay(con, round_jiffies_relative(con->delay));
28748c2ecf20Sopenharmony_ci	if (ret) {
28758c2ecf20Sopenharmony_ci		dout("%s: con %p FAILED to back off %lu\n", __func__,
28768c2ecf20Sopenharmony_ci			con, con->delay);
28778c2ecf20Sopenharmony_ci		BUG_ON(ret == -ENOENT);
28788c2ecf20Sopenharmony_ci		con_flag_set(con, CON_FLAG_BACKOFF);
28798c2ecf20Sopenharmony_ci	}
28808c2ecf20Sopenharmony_ci
28818c2ecf20Sopenharmony_ci	return true;
28828c2ecf20Sopenharmony_ci}
28838c2ecf20Sopenharmony_ci
28848c2ecf20Sopenharmony_ci/* Finish fault handling; con->mutex must *not* be held here */
28858c2ecf20Sopenharmony_ci
28868c2ecf20Sopenharmony_cistatic void con_fault_finish(struct ceph_connection *con)
28878c2ecf20Sopenharmony_ci{
28888c2ecf20Sopenharmony_ci	dout("%s %p\n", __func__, con);
28898c2ecf20Sopenharmony_ci
28908c2ecf20Sopenharmony_ci	/*
28918c2ecf20Sopenharmony_ci	 * in case we faulted due to authentication, invalidate our
28928c2ecf20Sopenharmony_ci	 * current tickets so that we can get new ones.
28938c2ecf20Sopenharmony_ci	 */
28948c2ecf20Sopenharmony_ci	if (con->auth_retry) {
28958c2ecf20Sopenharmony_ci		dout("auth_retry %d, invalidating\n", con->auth_retry);
28968c2ecf20Sopenharmony_ci		if (con->ops->invalidate_authorizer)
28978c2ecf20Sopenharmony_ci			con->ops->invalidate_authorizer(con);
28988c2ecf20Sopenharmony_ci		con->auth_retry = 0;
28998c2ecf20Sopenharmony_ci	}
29008c2ecf20Sopenharmony_ci
29018c2ecf20Sopenharmony_ci	if (con->ops->fault)
29028c2ecf20Sopenharmony_ci		con->ops->fault(con);
29038c2ecf20Sopenharmony_ci}
29048c2ecf20Sopenharmony_ci
29058c2ecf20Sopenharmony_ci/*
29068c2ecf20Sopenharmony_ci * Do some work on a connection.  Drop a connection ref when we're done.
29078c2ecf20Sopenharmony_ci */
29088c2ecf20Sopenharmony_cistatic void ceph_con_workfn(struct work_struct *work)
29098c2ecf20Sopenharmony_ci{
29108c2ecf20Sopenharmony_ci	struct ceph_connection *con = container_of(work, struct ceph_connection,
29118c2ecf20Sopenharmony_ci						   work.work);
29128c2ecf20Sopenharmony_ci	bool fault;
29138c2ecf20Sopenharmony_ci
29148c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
29158c2ecf20Sopenharmony_ci	while (true) {
29168c2ecf20Sopenharmony_ci		int ret;
29178c2ecf20Sopenharmony_ci
29188c2ecf20Sopenharmony_ci		if ((fault = con_sock_closed(con))) {
29198c2ecf20Sopenharmony_ci			dout("%s: con %p SOCK_CLOSED\n", __func__, con);
29208c2ecf20Sopenharmony_ci			break;
29218c2ecf20Sopenharmony_ci		}
29228c2ecf20Sopenharmony_ci		if (con_backoff(con)) {
29238c2ecf20Sopenharmony_ci			dout("%s: con %p BACKOFF\n", __func__, con);
29248c2ecf20Sopenharmony_ci			break;
29258c2ecf20Sopenharmony_ci		}
29268c2ecf20Sopenharmony_ci		if (con->state == CON_STATE_STANDBY) {
29278c2ecf20Sopenharmony_ci			dout("%s: con %p STANDBY\n", __func__, con);
29288c2ecf20Sopenharmony_ci			break;
29298c2ecf20Sopenharmony_ci		}
29308c2ecf20Sopenharmony_ci		if (con->state == CON_STATE_CLOSED) {
29318c2ecf20Sopenharmony_ci			dout("%s: con %p CLOSED\n", __func__, con);
29328c2ecf20Sopenharmony_ci			BUG_ON(con->sock);
29338c2ecf20Sopenharmony_ci			break;
29348c2ecf20Sopenharmony_ci		}
29358c2ecf20Sopenharmony_ci		if (con->state == CON_STATE_PREOPEN) {
29368c2ecf20Sopenharmony_ci			dout("%s: con %p PREOPEN\n", __func__, con);
29378c2ecf20Sopenharmony_ci			BUG_ON(con->sock);
29388c2ecf20Sopenharmony_ci		}
29398c2ecf20Sopenharmony_ci
29408c2ecf20Sopenharmony_ci		ret = try_read(con);
29418c2ecf20Sopenharmony_ci		if (ret < 0) {
29428c2ecf20Sopenharmony_ci			if (ret == -EAGAIN)
29438c2ecf20Sopenharmony_ci				continue;
29448c2ecf20Sopenharmony_ci			if (!con->error_msg)
29458c2ecf20Sopenharmony_ci				con->error_msg = "socket error on read";
29468c2ecf20Sopenharmony_ci			fault = true;
29478c2ecf20Sopenharmony_ci			break;
29488c2ecf20Sopenharmony_ci		}
29498c2ecf20Sopenharmony_ci
29508c2ecf20Sopenharmony_ci		ret = try_write(con);
29518c2ecf20Sopenharmony_ci		if (ret < 0) {
29528c2ecf20Sopenharmony_ci			if (ret == -EAGAIN)
29538c2ecf20Sopenharmony_ci				continue;
29548c2ecf20Sopenharmony_ci			if (!con->error_msg)
29558c2ecf20Sopenharmony_ci				con->error_msg = "socket error on write";
29568c2ecf20Sopenharmony_ci			fault = true;
29578c2ecf20Sopenharmony_ci		}
29588c2ecf20Sopenharmony_ci
29598c2ecf20Sopenharmony_ci		break;	/* If we make it to here, we're done */
29608c2ecf20Sopenharmony_ci	}
29618c2ecf20Sopenharmony_ci	if (fault)
29628c2ecf20Sopenharmony_ci		con_fault(con);
29638c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
29648c2ecf20Sopenharmony_ci
29658c2ecf20Sopenharmony_ci	if (fault)
29668c2ecf20Sopenharmony_ci		con_fault_finish(con);
29678c2ecf20Sopenharmony_ci
29688c2ecf20Sopenharmony_ci	con->ops->put(con);
29698c2ecf20Sopenharmony_ci}
29708c2ecf20Sopenharmony_ci
29718c2ecf20Sopenharmony_ci/*
29728c2ecf20Sopenharmony_ci * Generic error/fault handler.  A retry mechanism is used with
29738c2ecf20Sopenharmony_ci * exponential backoff
29748c2ecf20Sopenharmony_ci */
29758c2ecf20Sopenharmony_cistatic void con_fault(struct ceph_connection *con)
29768c2ecf20Sopenharmony_ci{
29778c2ecf20Sopenharmony_ci	dout("fault %p state %lu to peer %s\n",
29788c2ecf20Sopenharmony_ci	     con, con->state, ceph_pr_addr(&con->peer_addr));
29798c2ecf20Sopenharmony_ci
29808c2ecf20Sopenharmony_ci	pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
29818c2ecf20Sopenharmony_ci		ceph_pr_addr(&con->peer_addr), con->error_msg);
29828c2ecf20Sopenharmony_ci	con->error_msg = NULL;
29838c2ecf20Sopenharmony_ci
29848c2ecf20Sopenharmony_ci	WARN_ON(con->state != CON_STATE_CONNECTING &&
29858c2ecf20Sopenharmony_ci	       con->state != CON_STATE_NEGOTIATING &&
29868c2ecf20Sopenharmony_ci	       con->state != CON_STATE_OPEN);
29878c2ecf20Sopenharmony_ci
29888c2ecf20Sopenharmony_ci	con_close_socket(con);
29898c2ecf20Sopenharmony_ci
29908c2ecf20Sopenharmony_ci	if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
29918c2ecf20Sopenharmony_ci		dout("fault on LOSSYTX channel, marking CLOSED\n");
29928c2ecf20Sopenharmony_ci		con->state = CON_STATE_CLOSED;
29938c2ecf20Sopenharmony_ci		return;
29948c2ecf20Sopenharmony_ci	}
29958c2ecf20Sopenharmony_ci
29968c2ecf20Sopenharmony_ci	if (con->in_msg) {
29978c2ecf20Sopenharmony_ci		BUG_ON(con->in_msg->con != con);
29988c2ecf20Sopenharmony_ci		ceph_msg_put(con->in_msg);
29998c2ecf20Sopenharmony_ci		con->in_msg = NULL;
30008c2ecf20Sopenharmony_ci	}
30018c2ecf20Sopenharmony_ci	if (con->out_msg) {
30028c2ecf20Sopenharmony_ci		BUG_ON(con->out_msg->con != con);
30038c2ecf20Sopenharmony_ci		ceph_msg_put(con->out_msg);
30048c2ecf20Sopenharmony_ci		con->out_msg = NULL;
30058c2ecf20Sopenharmony_ci	}
30068c2ecf20Sopenharmony_ci
30078c2ecf20Sopenharmony_ci	/* Requeue anything that hasn't been acked */
30088c2ecf20Sopenharmony_ci	list_splice_init(&con->out_sent, &con->out_queue);
30098c2ecf20Sopenharmony_ci
30108c2ecf20Sopenharmony_ci	/* If there are no messages queued or keepalive pending, place
30118c2ecf20Sopenharmony_ci	 * the connection in a STANDBY state */
30128c2ecf20Sopenharmony_ci	if (list_empty(&con->out_queue) &&
30138c2ecf20Sopenharmony_ci	    !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) {
30148c2ecf20Sopenharmony_ci		dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
30158c2ecf20Sopenharmony_ci		con_flag_clear(con, CON_FLAG_WRITE_PENDING);
30168c2ecf20Sopenharmony_ci		con->state = CON_STATE_STANDBY;
30178c2ecf20Sopenharmony_ci	} else {
30188c2ecf20Sopenharmony_ci		/* retry after a delay. */
30198c2ecf20Sopenharmony_ci		con->state = CON_STATE_PREOPEN;
30208c2ecf20Sopenharmony_ci		if (con->delay == 0)
30218c2ecf20Sopenharmony_ci			con->delay = BASE_DELAY_INTERVAL;
30228c2ecf20Sopenharmony_ci		else if (con->delay < MAX_DELAY_INTERVAL)
30238c2ecf20Sopenharmony_ci			con->delay *= 2;
30248c2ecf20Sopenharmony_ci		con_flag_set(con, CON_FLAG_BACKOFF);
30258c2ecf20Sopenharmony_ci		queue_con(con);
30268c2ecf20Sopenharmony_ci	}
30278c2ecf20Sopenharmony_ci}
30288c2ecf20Sopenharmony_ci
30298c2ecf20Sopenharmony_ci
30308c2ecf20Sopenharmony_civoid ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
30318c2ecf20Sopenharmony_ci{
30328c2ecf20Sopenharmony_ci	u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000;
30338c2ecf20Sopenharmony_ci	msgr->inst.addr.nonce = cpu_to_le32(nonce);
30348c2ecf20Sopenharmony_ci	encode_my_addr(msgr);
30358c2ecf20Sopenharmony_ci}
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci/*
30388c2ecf20Sopenharmony_ci * initialize a new messenger instance
30398c2ecf20Sopenharmony_ci */
30408c2ecf20Sopenharmony_civoid ceph_messenger_init(struct ceph_messenger *msgr,
30418c2ecf20Sopenharmony_ci			 struct ceph_entity_addr *myaddr)
30428c2ecf20Sopenharmony_ci{
30438c2ecf20Sopenharmony_ci	spin_lock_init(&msgr->global_seq_lock);
30448c2ecf20Sopenharmony_ci
30458c2ecf20Sopenharmony_ci	if (myaddr)
30468c2ecf20Sopenharmony_ci		msgr->inst.addr = *myaddr;
30478c2ecf20Sopenharmony_ci
30488c2ecf20Sopenharmony_ci	/* select a random nonce */
30498c2ecf20Sopenharmony_ci	msgr->inst.addr.type = 0;
30508c2ecf20Sopenharmony_ci	get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
30518c2ecf20Sopenharmony_ci	encode_my_addr(msgr);
30528c2ecf20Sopenharmony_ci
30538c2ecf20Sopenharmony_ci	atomic_set(&msgr->stopping, 0);
30548c2ecf20Sopenharmony_ci	write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
30558c2ecf20Sopenharmony_ci
30568c2ecf20Sopenharmony_ci	dout("%s %p\n", __func__, msgr);
30578c2ecf20Sopenharmony_ci}
30588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_messenger_init);
30598c2ecf20Sopenharmony_ci
30608c2ecf20Sopenharmony_civoid ceph_messenger_fini(struct ceph_messenger *msgr)
30618c2ecf20Sopenharmony_ci{
30628c2ecf20Sopenharmony_ci	put_net(read_pnet(&msgr->net));
30638c2ecf20Sopenharmony_ci}
30648c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_messenger_fini);
30658c2ecf20Sopenharmony_ci
30668c2ecf20Sopenharmony_cistatic void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con)
30678c2ecf20Sopenharmony_ci{
30688c2ecf20Sopenharmony_ci	if (msg->con)
30698c2ecf20Sopenharmony_ci		msg->con->ops->put(msg->con);
30708c2ecf20Sopenharmony_ci
30718c2ecf20Sopenharmony_ci	msg->con = con ? con->ops->get(con) : NULL;
30728c2ecf20Sopenharmony_ci	BUG_ON(msg->con != con);
30738c2ecf20Sopenharmony_ci}
30748c2ecf20Sopenharmony_ci
30758c2ecf20Sopenharmony_cistatic void clear_standby(struct ceph_connection *con)
30768c2ecf20Sopenharmony_ci{
30778c2ecf20Sopenharmony_ci	/* come back from STANDBY? */
30788c2ecf20Sopenharmony_ci	if (con->state == CON_STATE_STANDBY) {
30798c2ecf20Sopenharmony_ci		dout("clear_standby %p and ++connect_seq\n", con);
30808c2ecf20Sopenharmony_ci		con->state = CON_STATE_PREOPEN;
30818c2ecf20Sopenharmony_ci		con->connect_seq++;
30828c2ecf20Sopenharmony_ci		WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING));
30838c2ecf20Sopenharmony_ci		WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING));
30848c2ecf20Sopenharmony_ci	}
30858c2ecf20Sopenharmony_ci}
30868c2ecf20Sopenharmony_ci
30878c2ecf20Sopenharmony_ci/*
30888c2ecf20Sopenharmony_ci * Queue up an outgoing message on the given connection.
30898c2ecf20Sopenharmony_ci */
30908c2ecf20Sopenharmony_civoid ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
30918c2ecf20Sopenharmony_ci{
30928c2ecf20Sopenharmony_ci	/* set src+dst */
30938c2ecf20Sopenharmony_ci	msg->hdr.src = con->msgr->inst.name;
30948c2ecf20Sopenharmony_ci	BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
30958c2ecf20Sopenharmony_ci	msg->needs_out_seq = true;
30968c2ecf20Sopenharmony_ci
30978c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
30988c2ecf20Sopenharmony_ci
30998c2ecf20Sopenharmony_ci	if (con->state == CON_STATE_CLOSED) {
31008c2ecf20Sopenharmony_ci		dout("con_send %p closed, dropping %p\n", con, msg);
31018c2ecf20Sopenharmony_ci		ceph_msg_put(msg);
31028c2ecf20Sopenharmony_ci		mutex_unlock(&con->mutex);
31038c2ecf20Sopenharmony_ci		return;
31048c2ecf20Sopenharmony_ci	}
31058c2ecf20Sopenharmony_ci
31068c2ecf20Sopenharmony_ci	msg_con_set(msg, con);
31078c2ecf20Sopenharmony_ci
31088c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&msg->list_head));
31098c2ecf20Sopenharmony_ci	list_add_tail(&msg->list_head, &con->out_queue);
31108c2ecf20Sopenharmony_ci	dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
31118c2ecf20Sopenharmony_ci	     ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type),
31128c2ecf20Sopenharmony_ci	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
31138c2ecf20Sopenharmony_ci	     le32_to_cpu(msg->hdr.front_len),
31148c2ecf20Sopenharmony_ci	     le32_to_cpu(msg->hdr.middle_len),
31158c2ecf20Sopenharmony_ci	     le32_to_cpu(msg->hdr.data_len));
31168c2ecf20Sopenharmony_ci
31178c2ecf20Sopenharmony_ci	clear_standby(con);
31188c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
31198c2ecf20Sopenharmony_ci
31208c2ecf20Sopenharmony_ci	/* if there wasn't anything waiting to send before, queue
31218c2ecf20Sopenharmony_ci	 * new work */
31228c2ecf20Sopenharmony_ci	if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
31238c2ecf20Sopenharmony_ci		queue_con(con);
31248c2ecf20Sopenharmony_ci}
31258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_send);
31268c2ecf20Sopenharmony_ci
31278c2ecf20Sopenharmony_ci/*
31288c2ecf20Sopenharmony_ci * Revoke a message that was previously queued for send
31298c2ecf20Sopenharmony_ci */
31308c2ecf20Sopenharmony_civoid ceph_msg_revoke(struct ceph_msg *msg)
31318c2ecf20Sopenharmony_ci{
31328c2ecf20Sopenharmony_ci	struct ceph_connection *con = msg->con;
31338c2ecf20Sopenharmony_ci
31348c2ecf20Sopenharmony_ci	if (!con) {
31358c2ecf20Sopenharmony_ci		dout("%s msg %p null con\n", __func__, msg);
31368c2ecf20Sopenharmony_ci		return;		/* Message not in our possession */
31378c2ecf20Sopenharmony_ci	}
31388c2ecf20Sopenharmony_ci
31398c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
31408c2ecf20Sopenharmony_ci	if (!list_empty(&msg->list_head)) {
31418c2ecf20Sopenharmony_ci		dout("%s %p msg %p - was on queue\n", __func__, con, msg);
31428c2ecf20Sopenharmony_ci		list_del_init(&msg->list_head);
31438c2ecf20Sopenharmony_ci		msg->hdr.seq = 0;
31448c2ecf20Sopenharmony_ci
31458c2ecf20Sopenharmony_ci		ceph_msg_put(msg);
31468c2ecf20Sopenharmony_ci	}
31478c2ecf20Sopenharmony_ci	if (con->out_msg == msg) {
31488c2ecf20Sopenharmony_ci		BUG_ON(con->out_skip);
31498c2ecf20Sopenharmony_ci		/* footer */
31508c2ecf20Sopenharmony_ci		if (con->out_msg_done) {
31518c2ecf20Sopenharmony_ci			con->out_skip += con_out_kvec_skip(con);
31528c2ecf20Sopenharmony_ci		} else {
31538c2ecf20Sopenharmony_ci			BUG_ON(!msg->data_length);
31548c2ecf20Sopenharmony_ci			con->out_skip += sizeof_footer(con);
31558c2ecf20Sopenharmony_ci		}
31568c2ecf20Sopenharmony_ci		/* data, middle, front */
31578c2ecf20Sopenharmony_ci		if (msg->data_length)
31588c2ecf20Sopenharmony_ci			con->out_skip += msg->cursor.total_resid;
31598c2ecf20Sopenharmony_ci		if (msg->middle)
31608c2ecf20Sopenharmony_ci			con->out_skip += con_out_kvec_skip(con);
31618c2ecf20Sopenharmony_ci		con->out_skip += con_out_kvec_skip(con);
31628c2ecf20Sopenharmony_ci
31638c2ecf20Sopenharmony_ci		dout("%s %p msg %p - was sending, will write %d skip %d\n",
31648c2ecf20Sopenharmony_ci		     __func__, con, msg, con->out_kvec_bytes, con->out_skip);
31658c2ecf20Sopenharmony_ci		msg->hdr.seq = 0;
31668c2ecf20Sopenharmony_ci		con->out_msg = NULL;
31678c2ecf20Sopenharmony_ci		ceph_msg_put(msg);
31688c2ecf20Sopenharmony_ci	}
31698c2ecf20Sopenharmony_ci
31708c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
31718c2ecf20Sopenharmony_ci}
31728c2ecf20Sopenharmony_ci
31738c2ecf20Sopenharmony_ci/*
31748c2ecf20Sopenharmony_ci * Revoke a message that we may be reading data into
31758c2ecf20Sopenharmony_ci */
31768c2ecf20Sopenharmony_civoid ceph_msg_revoke_incoming(struct ceph_msg *msg)
31778c2ecf20Sopenharmony_ci{
31788c2ecf20Sopenharmony_ci	struct ceph_connection *con = msg->con;
31798c2ecf20Sopenharmony_ci
31808c2ecf20Sopenharmony_ci	if (!con) {
31818c2ecf20Sopenharmony_ci		dout("%s msg %p null con\n", __func__, msg);
31828c2ecf20Sopenharmony_ci		return;		/* Message not in our possession */
31838c2ecf20Sopenharmony_ci	}
31848c2ecf20Sopenharmony_ci
31858c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
31868c2ecf20Sopenharmony_ci	if (con->in_msg == msg) {
31878c2ecf20Sopenharmony_ci		unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
31888c2ecf20Sopenharmony_ci		unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len);
31898c2ecf20Sopenharmony_ci		unsigned int data_len = le32_to_cpu(con->in_hdr.data_len);
31908c2ecf20Sopenharmony_ci
31918c2ecf20Sopenharmony_ci		/* skip rest of message */
31928c2ecf20Sopenharmony_ci		dout("%s %p msg %p revoked\n", __func__, con, msg);
31938c2ecf20Sopenharmony_ci		con->in_base_pos = con->in_base_pos -
31948c2ecf20Sopenharmony_ci				sizeof(struct ceph_msg_header) -
31958c2ecf20Sopenharmony_ci				front_len -
31968c2ecf20Sopenharmony_ci				middle_len -
31978c2ecf20Sopenharmony_ci				data_len -
31988c2ecf20Sopenharmony_ci				sizeof(struct ceph_msg_footer);
31998c2ecf20Sopenharmony_ci		ceph_msg_put(con->in_msg);
32008c2ecf20Sopenharmony_ci		con->in_msg = NULL;
32018c2ecf20Sopenharmony_ci		con->in_tag = CEPH_MSGR_TAG_READY;
32028c2ecf20Sopenharmony_ci		con->in_seq++;
32038c2ecf20Sopenharmony_ci	} else {
32048c2ecf20Sopenharmony_ci		dout("%s %p in_msg %p msg %p no-op\n",
32058c2ecf20Sopenharmony_ci		     __func__, con, con->in_msg, msg);
32068c2ecf20Sopenharmony_ci	}
32078c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
32088c2ecf20Sopenharmony_ci}
32098c2ecf20Sopenharmony_ci
32108c2ecf20Sopenharmony_ci/*
32118c2ecf20Sopenharmony_ci * Queue a keepalive byte to ensure the tcp connection is alive.
32128c2ecf20Sopenharmony_ci */
32138c2ecf20Sopenharmony_civoid ceph_con_keepalive(struct ceph_connection *con)
32148c2ecf20Sopenharmony_ci{
32158c2ecf20Sopenharmony_ci	dout("con_keepalive %p\n", con);
32168c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
32178c2ecf20Sopenharmony_ci	clear_standby(con);
32188c2ecf20Sopenharmony_ci	con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING);
32198c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
32208c2ecf20Sopenharmony_ci
32218c2ecf20Sopenharmony_ci	if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
32228c2ecf20Sopenharmony_ci		queue_con(con);
32238c2ecf20Sopenharmony_ci}
32248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_con_keepalive);
32258c2ecf20Sopenharmony_ci
32268c2ecf20Sopenharmony_cibool ceph_con_keepalive_expired(struct ceph_connection *con,
32278c2ecf20Sopenharmony_ci			       unsigned long interval)
32288c2ecf20Sopenharmony_ci{
32298c2ecf20Sopenharmony_ci	if (interval > 0 &&
32308c2ecf20Sopenharmony_ci	    (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
32318c2ecf20Sopenharmony_ci		struct timespec64 now;
32328c2ecf20Sopenharmony_ci		struct timespec64 ts;
32338c2ecf20Sopenharmony_ci		ktime_get_real_ts64(&now);
32348c2ecf20Sopenharmony_ci		jiffies_to_timespec64(interval, &ts);
32358c2ecf20Sopenharmony_ci		ts = timespec64_add(con->last_keepalive_ack, ts);
32368c2ecf20Sopenharmony_ci		return timespec64_compare(&now, &ts) >= 0;
32378c2ecf20Sopenharmony_ci	}
32388c2ecf20Sopenharmony_ci	return false;
32398c2ecf20Sopenharmony_ci}
32408c2ecf20Sopenharmony_ci
32418c2ecf20Sopenharmony_cistatic struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
32428c2ecf20Sopenharmony_ci{
32438c2ecf20Sopenharmony_ci	BUG_ON(msg->num_data_items >= msg->max_data_items);
32448c2ecf20Sopenharmony_ci	return &msg->data[msg->num_data_items++];
32458c2ecf20Sopenharmony_ci}
32468c2ecf20Sopenharmony_ci
32478c2ecf20Sopenharmony_cistatic void ceph_msg_data_destroy(struct ceph_msg_data *data)
32488c2ecf20Sopenharmony_ci{
32498c2ecf20Sopenharmony_ci	if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) {
32508c2ecf20Sopenharmony_ci		int num_pages = calc_pages_for(data->alignment, data->length);
32518c2ecf20Sopenharmony_ci		ceph_release_page_vector(data->pages, num_pages);
32528c2ecf20Sopenharmony_ci	} else if (data->type == CEPH_MSG_DATA_PAGELIST) {
32538c2ecf20Sopenharmony_ci		ceph_pagelist_release(data->pagelist);
32548c2ecf20Sopenharmony_ci	}
32558c2ecf20Sopenharmony_ci}
32568c2ecf20Sopenharmony_ci
32578c2ecf20Sopenharmony_civoid ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
32588c2ecf20Sopenharmony_ci			     size_t length, size_t alignment, bool own_pages)
32598c2ecf20Sopenharmony_ci{
32608c2ecf20Sopenharmony_ci	struct ceph_msg_data *data;
32618c2ecf20Sopenharmony_ci
32628c2ecf20Sopenharmony_ci	BUG_ON(!pages);
32638c2ecf20Sopenharmony_ci	BUG_ON(!length);
32648c2ecf20Sopenharmony_ci
32658c2ecf20Sopenharmony_ci	data = ceph_msg_data_add(msg);
32668c2ecf20Sopenharmony_ci	data->type = CEPH_MSG_DATA_PAGES;
32678c2ecf20Sopenharmony_ci	data->pages = pages;
32688c2ecf20Sopenharmony_ci	data->length = length;
32698c2ecf20Sopenharmony_ci	data->alignment = alignment & ~PAGE_MASK;
32708c2ecf20Sopenharmony_ci	data->own_pages = own_pages;
32718c2ecf20Sopenharmony_ci
32728c2ecf20Sopenharmony_ci	msg->data_length += length;
32738c2ecf20Sopenharmony_ci}
32748c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_pages);
32758c2ecf20Sopenharmony_ci
32768c2ecf20Sopenharmony_civoid ceph_msg_data_add_pagelist(struct ceph_msg *msg,
32778c2ecf20Sopenharmony_ci				struct ceph_pagelist *pagelist)
32788c2ecf20Sopenharmony_ci{
32798c2ecf20Sopenharmony_ci	struct ceph_msg_data *data;
32808c2ecf20Sopenharmony_ci
32818c2ecf20Sopenharmony_ci	BUG_ON(!pagelist);
32828c2ecf20Sopenharmony_ci	BUG_ON(!pagelist->length);
32838c2ecf20Sopenharmony_ci
32848c2ecf20Sopenharmony_ci	data = ceph_msg_data_add(msg);
32858c2ecf20Sopenharmony_ci	data->type = CEPH_MSG_DATA_PAGELIST;
32868c2ecf20Sopenharmony_ci	refcount_inc(&pagelist->refcnt);
32878c2ecf20Sopenharmony_ci	data->pagelist = pagelist;
32888c2ecf20Sopenharmony_ci
32898c2ecf20Sopenharmony_ci	msg->data_length += pagelist->length;
32908c2ecf20Sopenharmony_ci}
32918c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_pagelist);
32928c2ecf20Sopenharmony_ci
32938c2ecf20Sopenharmony_ci#ifdef	CONFIG_BLOCK
32948c2ecf20Sopenharmony_civoid ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
32958c2ecf20Sopenharmony_ci			   u32 length)
32968c2ecf20Sopenharmony_ci{
32978c2ecf20Sopenharmony_ci	struct ceph_msg_data *data;
32988c2ecf20Sopenharmony_ci
32998c2ecf20Sopenharmony_ci	data = ceph_msg_data_add(msg);
33008c2ecf20Sopenharmony_ci	data->type = CEPH_MSG_DATA_BIO;
33018c2ecf20Sopenharmony_ci	data->bio_pos = *bio_pos;
33028c2ecf20Sopenharmony_ci	data->bio_length = length;
33038c2ecf20Sopenharmony_ci
33048c2ecf20Sopenharmony_ci	msg->data_length += length;
33058c2ecf20Sopenharmony_ci}
33068c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_bio);
33078c2ecf20Sopenharmony_ci#endif	/* CONFIG_BLOCK */
33088c2ecf20Sopenharmony_ci
33098c2ecf20Sopenharmony_civoid ceph_msg_data_add_bvecs(struct ceph_msg *msg,
33108c2ecf20Sopenharmony_ci			     struct ceph_bvec_iter *bvec_pos)
33118c2ecf20Sopenharmony_ci{
33128c2ecf20Sopenharmony_ci	struct ceph_msg_data *data;
33138c2ecf20Sopenharmony_ci
33148c2ecf20Sopenharmony_ci	data = ceph_msg_data_add(msg);
33158c2ecf20Sopenharmony_ci	data->type = CEPH_MSG_DATA_BVECS;
33168c2ecf20Sopenharmony_ci	data->bvec_pos = *bvec_pos;
33178c2ecf20Sopenharmony_ci
33188c2ecf20Sopenharmony_ci	msg->data_length += bvec_pos->iter.bi_size;
33198c2ecf20Sopenharmony_ci}
33208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_data_add_bvecs);
33218c2ecf20Sopenharmony_ci
33228c2ecf20Sopenharmony_ci/*
33238c2ecf20Sopenharmony_ci * construct a new message with given type, size
33248c2ecf20Sopenharmony_ci * the new msg has a ref count of 1.
33258c2ecf20Sopenharmony_ci */
33268c2ecf20Sopenharmony_cistruct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
33278c2ecf20Sopenharmony_ci			       gfp_t flags, bool can_fail)
33288c2ecf20Sopenharmony_ci{
33298c2ecf20Sopenharmony_ci	struct ceph_msg *m;
33308c2ecf20Sopenharmony_ci
33318c2ecf20Sopenharmony_ci	m = kmem_cache_zalloc(ceph_msg_cache, flags);
33328c2ecf20Sopenharmony_ci	if (m == NULL)
33338c2ecf20Sopenharmony_ci		goto out;
33348c2ecf20Sopenharmony_ci
33358c2ecf20Sopenharmony_ci	m->hdr.type = cpu_to_le16(type);
33368c2ecf20Sopenharmony_ci	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
33378c2ecf20Sopenharmony_ci	m->hdr.front_len = cpu_to_le32(front_len);
33388c2ecf20Sopenharmony_ci
33398c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&m->list_head);
33408c2ecf20Sopenharmony_ci	kref_init(&m->kref);
33418c2ecf20Sopenharmony_ci
33428c2ecf20Sopenharmony_ci	/* front */
33438c2ecf20Sopenharmony_ci	if (front_len) {
33448c2ecf20Sopenharmony_ci		m->front.iov_base = ceph_kvmalloc(front_len, flags);
33458c2ecf20Sopenharmony_ci		if (m->front.iov_base == NULL) {
33468c2ecf20Sopenharmony_ci			dout("ceph_msg_new can't allocate %d bytes\n",
33478c2ecf20Sopenharmony_ci			     front_len);
33488c2ecf20Sopenharmony_ci			goto out2;
33498c2ecf20Sopenharmony_ci		}
33508c2ecf20Sopenharmony_ci	} else {
33518c2ecf20Sopenharmony_ci		m->front.iov_base = NULL;
33528c2ecf20Sopenharmony_ci	}
33538c2ecf20Sopenharmony_ci	m->front_alloc_len = m->front.iov_len = front_len;
33548c2ecf20Sopenharmony_ci
33558c2ecf20Sopenharmony_ci	if (max_data_items) {
33568c2ecf20Sopenharmony_ci		m->data = kmalloc_array(max_data_items, sizeof(*m->data),
33578c2ecf20Sopenharmony_ci					flags);
33588c2ecf20Sopenharmony_ci		if (!m->data)
33598c2ecf20Sopenharmony_ci			goto out2;
33608c2ecf20Sopenharmony_ci
33618c2ecf20Sopenharmony_ci		m->max_data_items = max_data_items;
33628c2ecf20Sopenharmony_ci	}
33638c2ecf20Sopenharmony_ci
33648c2ecf20Sopenharmony_ci	dout("ceph_msg_new %p front %d\n", m, front_len);
33658c2ecf20Sopenharmony_ci	return m;
33668c2ecf20Sopenharmony_ci
33678c2ecf20Sopenharmony_ciout2:
33688c2ecf20Sopenharmony_ci	ceph_msg_put(m);
33698c2ecf20Sopenharmony_ciout:
33708c2ecf20Sopenharmony_ci	if (!can_fail) {
33718c2ecf20Sopenharmony_ci		pr_err("msg_new can't create type %d front %d\n", type,
33728c2ecf20Sopenharmony_ci		       front_len);
33738c2ecf20Sopenharmony_ci		WARN_ON(1);
33748c2ecf20Sopenharmony_ci	} else {
33758c2ecf20Sopenharmony_ci		dout("msg_new can't create type %d front %d\n", type,
33768c2ecf20Sopenharmony_ci		     front_len);
33778c2ecf20Sopenharmony_ci	}
33788c2ecf20Sopenharmony_ci	return NULL;
33798c2ecf20Sopenharmony_ci}
33808c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_new2);
33818c2ecf20Sopenharmony_ci
33828c2ecf20Sopenharmony_cistruct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
33838c2ecf20Sopenharmony_ci			      bool can_fail)
33848c2ecf20Sopenharmony_ci{
33858c2ecf20Sopenharmony_ci	return ceph_msg_new2(type, front_len, 0, flags, can_fail);
33868c2ecf20Sopenharmony_ci}
33878c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_new);
33888c2ecf20Sopenharmony_ci
33898c2ecf20Sopenharmony_ci/*
33908c2ecf20Sopenharmony_ci * Allocate "middle" portion of a message, if it is needed and wasn't
33918c2ecf20Sopenharmony_ci * allocated by alloc_msg.  This allows us to read a small fixed-size
33928c2ecf20Sopenharmony_ci * per-type header in the front and then gracefully fail (i.e.,
33938c2ecf20Sopenharmony_ci * propagate the error to the caller based on info in the front) when
33948c2ecf20Sopenharmony_ci * the middle is too large.
33958c2ecf20Sopenharmony_ci */
33968c2ecf20Sopenharmony_cistatic int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
33978c2ecf20Sopenharmony_ci{
33988c2ecf20Sopenharmony_ci	int type = le16_to_cpu(msg->hdr.type);
33998c2ecf20Sopenharmony_ci	int middle_len = le32_to_cpu(msg->hdr.middle_len);
34008c2ecf20Sopenharmony_ci
34018c2ecf20Sopenharmony_ci	dout("alloc_middle %p type %d %s middle_len %d\n", msg, type,
34028c2ecf20Sopenharmony_ci	     ceph_msg_type_name(type), middle_len);
34038c2ecf20Sopenharmony_ci	BUG_ON(!middle_len);
34048c2ecf20Sopenharmony_ci	BUG_ON(msg->middle);
34058c2ecf20Sopenharmony_ci
34068c2ecf20Sopenharmony_ci	msg->middle = ceph_buffer_new(middle_len, GFP_NOFS);
34078c2ecf20Sopenharmony_ci	if (!msg->middle)
34088c2ecf20Sopenharmony_ci		return -ENOMEM;
34098c2ecf20Sopenharmony_ci	return 0;
34108c2ecf20Sopenharmony_ci}
34118c2ecf20Sopenharmony_ci
34128c2ecf20Sopenharmony_ci/*
34138c2ecf20Sopenharmony_ci * Allocate a message for receiving an incoming message on a
34148c2ecf20Sopenharmony_ci * connection, and save the result in con->in_msg.  Uses the
34158c2ecf20Sopenharmony_ci * connection's private alloc_msg op if available.
34168c2ecf20Sopenharmony_ci *
34178c2ecf20Sopenharmony_ci * Returns 0 on success, or a negative error code.
34188c2ecf20Sopenharmony_ci *
34198c2ecf20Sopenharmony_ci * On success, if we set *skip = 1:
34208c2ecf20Sopenharmony_ci *  - the next message should be skipped and ignored.
34218c2ecf20Sopenharmony_ci *  - con->in_msg == NULL
34228c2ecf20Sopenharmony_ci * or if we set *skip = 0:
34238c2ecf20Sopenharmony_ci *  - con->in_msg is non-null.
34248c2ecf20Sopenharmony_ci * On error (ENOMEM, EAGAIN, ...),
34258c2ecf20Sopenharmony_ci *  - con->in_msg == NULL
34268c2ecf20Sopenharmony_ci */
34278c2ecf20Sopenharmony_cistatic int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
34288c2ecf20Sopenharmony_ci{
34298c2ecf20Sopenharmony_ci	struct ceph_msg_header *hdr = &con->in_hdr;
34308c2ecf20Sopenharmony_ci	int middle_len = le32_to_cpu(hdr->middle_len);
34318c2ecf20Sopenharmony_ci	struct ceph_msg *msg;
34328c2ecf20Sopenharmony_ci	int ret = 0;
34338c2ecf20Sopenharmony_ci
34348c2ecf20Sopenharmony_ci	BUG_ON(con->in_msg != NULL);
34358c2ecf20Sopenharmony_ci	BUG_ON(!con->ops->alloc_msg);
34368c2ecf20Sopenharmony_ci
34378c2ecf20Sopenharmony_ci	mutex_unlock(&con->mutex);
34388c2ecf20Sopenharmony_ci	msg = con->ops->alloc_msg(con, hdr, skip);
34398c2ecf20Sopenharmony_ci	mutex_lock(&con->mutex);
34408c2ecf20Sopenharmony_ci	if (con->state != CON_STATE_OPEN) {
34418c2ecf20Sopenharmony_ci		if (msg)
34428c2ecf20Sopenharmony_ci			ceph_msg_put(msg);
34438c2ecf20Sopenharmony_ci		return -EAGAIN;
34448c2ecf20Sopenharmony_ci	}
34458c2ecf20Sopenharmony_ci	if (msg) {
34468c2ecf20Sopenharmony_ci		BUG_ON(*skip);
34478c2ecf20Sopenharmony_ci		msg_con_set(msg, con);
34488c2ecf20Sopenharmony_ci		con->in_msg = msg;
34498c2ecf20Sopenharmony_ci	} else {
34508c2ecf20Sopenharmony_ci		/*
34518c2ecf20Sopenharmony_ci		 * Null message pointer means either we should skip
34528c2ecf20Sopenharmony_ci		 * this message or we couldn't allocate memory.  The
34538c2ecf20Sopenharmony_ci		 * former is not an error.
34548c2ecf20Sopenharmony_ci		 */
34558c2ecf20Sopenharmony_ci		if (*skip)
34568c2ecf20Sopenharmony_ci			return 0;
34578c2ecf20Sopenharmony_ci
34588c2ecf20Sopenharmony_ci		con->error_msg = "error allocating memory for incoming message";
34598c2ecf20Sopenharmony_ci		return -ENOMEM;
34608c2ecf20Sopenharmony_ci	}
34618c2ecf20Sopenharmony_ci	memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
34628c2ecf20Sopenharmony_ci
34638c2ecf20Sopenharmony_ci	if (middle_len && !con->in_msg->middle) {
34648c2ecf20Sopenharmony_ci		ret = ceph_alloc_middle(con, con->in_msg);
34658c2ecf20Sopenharmony_ci		if (ret < 0) {
34668c2ecf20Sopenharmony_ci			ceph_msg_put(con->in_msg);
34678c2ecf20Sopenharmony_ci			con->in_msg = NULL;
34688c2ecf20Sopenharmony_ci		}
34698c2ecf20Sopenharmony_ci	}
34708c2ecf20Sopenharmony_ci
34718c2ecf20Sopenharmony_ci	return ret;
34728c2ecf20Sopenharmony_ci}
34738c2ecf20Sopenharmony_ci
34748c2ecf20Sopenharmony_ci
34758c2ecf20Sopenharmony_ci/*
34768c2ecf20Sopenharmony_ci * Free a generically kmalloc'd message.
34778c2ecf20Sopenharmony_ci */
34788c2ecf20Sopenharmony_cistatic void ceph_msg_free(struct ceph_msg *m)
34798c2ecf20Sopenharmony_ci{
34808c2ecf20Sopenharmony_ci	dout("%s %p\n", __func__, m);
34818c2ecf20Sopenharmony_ci	kvfree(m->front.iov_base);
34828c2ecf20Sopenharmony_ci	kfree(m->data);
34838c2ecf20Sopenharmony_ci	kmem_cache_free(ceph_msg_cache, m);
34848c2ecf20Sopenharmony_ci}
34858c2ecf20Sopenharmony_ci
34868c2ecf20Sopenharmony_cistatic void ceph_msg_release(struct kref *kref)
34878c2ecf20Sopenharmony_ci{
34888c2ecf20Sopenharmony_ci	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
34898c2ecf20Sopenharmony_ci	int i;
34908c2ecf20Sopenharmony_ci
34918c2ecf20Sopenharmony_ci	dout("%s %p\n", __func__, m);
34928c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&m->list_head));
34938c2ecf20Sopenharmony_ci
34948c2ecf20Sopenharmony_ci	msg_con_set(m, NULL);
34958c2ecf20Sopenharmony_ci
34968c2ecf20Sopenharmony_ci	/* drop middle, data, if any */
34978c2ecf20Sopenharmony_ci	if (m->middle) {
34988c2ecf20Sopenharmony_ci		ceph_buffer_put(m->middle);
34998c2ecf20Sopenharmony_ci		m->middle = NULL;
35008c2ecf20Sopenharmony_ci	}
35018c2ecf20Sopenharmony_ci
35028c2ecf20Sopenharmony_ci	for (i = 0; i < m->num_data_items; i++)
35038c2ecf20Sopenharmony_ci		ceph_msg_data_destroy(&m->data[i]);
35048c2ecf20Sopenharmony_ci
35058c2ecf20Sopenharmony_ci	if (m->pool)
35068c2ecf20Sopenharmony_ci		ceph_msgpool_put(m->pool, m);
35078c2ecf20Sopenharmony_ci	else
35088c2ecf20Sopenharmony_ci		ceph_msg_free(m);
35098c2ecf20Sopenharmony_ci}
35108c2ecf20Sopenharmony_ci
35118c2ecf20Sopenharmony_cistruct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
35128c2ecf20Sopenharmony_ci{
35138c2ecf20Sopenharmony_ci	dout("%s %p (was %d)\n", __func__, msg,
35148c2ecf20Sopenharmony_ci	     kref_read(&msg->kref));
35158c2ecf20Sopenharmony_ci	kref_get(&msg->kref);
35168c2ecf20Sopenharmony_ci	return msg;
35178c2ecf20Sopenharmony_ci}
35188c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_get);
35198c2ecf20Sopenharmony_ci
35208c2ecf20Sopenharmony_civoid ceph_msg_put(struct ceph_msg *msg)
35218c2ecf20Sopenharmony_ci{
35228c2ecf20Sopenharmony_ci	dout("%s %p (was %d)\n", __func__, msg,
35238c2ecf20Sopenharmony_ci	     kref_read(&msg->kref));
35248c2ecf20Sopenharmony_ci	kref_put(&msg->kref, ceph_msg_release);
35258c2ecf20Sopenharmony_ci}
35268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_put);
35278c2ecf20Sopenharmony_ci
35288c2ecf20Sopenharmony_civoid ceph_msg_dump(struct ceph_msg *msg)
35298c2ecf20Sopenharmony_ci{
35308c2ecf20Sopenharmony_ci	pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg,
35318c2ecf20Sopenharmony_ci		 msg->front_alloc_len, msg->data_length);
35328c2ecf20Sopenharmony_ci	print_hex_dump(KERN_DEBUG, "header: ",
35338c2ecf20Sopenharmony_ci		       DUMP_PREFIX_OFFSET, 16, 1,
35348c2ecf20Sopenharmony_ci		       &msg->hdr, sizeof(msg->hdr), true);
35358c2ecf20Sopenharmony_ci	print_hex_dump(KERN_DEBUG, " front: ",
35368c2ecf20Sopenharmony_ci		       DUMP_PREFIX_OFFSET, 16, 1,
35378c2ecf20Sopenharmony_ci		       msg->front.iov_base, msg->front.iov_len, true);
35388c2ecf20Sopenharmony_ci	if (msg->middle)
35398c2ecf20Sopenharmony_ci		print_hex_dump(KERN_DEBUG, "middle: ",
35408c2ecf20Sopenharmony_ci			       DUMP_PREFIX_OFFSET, 16, 1,
35418c2ecf20Sopenharmony_ci			       msg->middle->vec.iov_base,
35428c2ecf20Sopenharmony_ci			       msg->middle->vec.iov_len, true);
35438c2ecf20Sopenharmony_ci	print_hex_dump(KERN_DEBUG, "footer: ",
35448c2ecf20Sopenharmony_ci		       DUMP_PREFIX_OFFSET, 16, 1,
35458c2ecf20Sopenharmony_ci		       &msg->footer, sizeof(msg->footer), true);
35468c2ecf20Sopenharmony_ci}
35478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_msg_dump);
3548