18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Shared Memory Communications over RDMA (SMC-R) and RoCE
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Manage RMBE
68c2ecf20Sopenharmony_ci * copy new RMBE data into user space
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Copyright IBM Corp. 2016
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/net.h>
148c2ecf20Sopenharmony_ci#include <linux/rcupdate.h>
158c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <net/sock.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include "smc.h"
208c2ecf20Sopenharmony_ci#include "smc_core.h"
218c2ecf20Sopenharmony_ci#include "smc_cdc.h"
228c2ecf20Sopenharmony_ci#include "smc_tx.h" /* smc_tx_consumer_update() */
238c2ecf20Sopenharmony_ci#include "smc_rx.h"
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci/* callback implementation to wakeup consumers blocked with smc_rx_wait().
268c2ecf20Sopenharmony_ci * indirectly called by smc_cdc_msg_recv_action().
278c2ecf20Sopenharmony_ci */
288c2ecf20Sopenharmony_cistatic void smc_rx_wake_up(struct sock *sk)
298c2ecf20Sopenharmony_ci{
308c2ecf20Sopenharmony_ci	struct socket_wq *wq;
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	/* derived from sock_def_readable() */
338c2ecf20Sopenharmony_ci	/* called already in smc_listen_work() */
348c2ecf20Sopenharmony_ci	rcu_read_lock();
358c2ecf20Sopenharmony_ci	wq = rcu_dereference(sk->sk_wq);
368c2ecf20Sopenharmony_ci	if (skwq_has_sleeper(wq))
378c2ecf20Sopenharmony_ci		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
388c2ecf20Sopenharmony_ci						EPOLLRDNORM | EPOLLRDBAND);
398c2ecf20Sopenharmony_ci	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
408c2ecf20Sopenharmony_ci	if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
418c2ecf20Sopenharmony_ci	    (sk->sk_state == SMC_CLOSED))
428c2ecf20Sopenharmony_ci		sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
438c2ecf20Sopenharmony_ci	rcu_read_unlock();
448c2ecf20Sopenharmony_ci}
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci/* Update consumer cursor
478c2ecf20Sopenharmony_ci *   @conn   connection to update
488c2ecf20Sopenharmony_ci *   @cons   consumer cursor
498c2ecf20Sopenharmony_ci *   @len    number of Bytes consumed
508c2ecf20Sopenharmony_ci *   Returns:
518c2ecf20Sopenharmony_ci *   1 if we should end our receive, 0 otherwise
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_cistatic int smc_rx_update_consumer(struct smc_sock *smc,
548c2ecf20Sopenharmony_ci				  union smc_host_cursor cons, size_t len)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	struct smc_connection *conn = &smc->conn;
578c2ecf20Sopenharmony_ci	struct sock *sk = &smc->sk;
588c2ecf20Sopenharmony_ci	bool force = false;
598c2ecf20Sopenharmony_ci	int diff, rc = 0;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	smc_curs_add(conn->rmb_desc->len, &cons, len);
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	/* did we process urgent data? */
648c2ecf20Sopenharmony_ci	if (conn->urg_state == SMC_URG_VALID || conn->urg_rx_skip_pend) {
658c2ecf20Sopenharmony_ci		diff = smc_curs_comp(conn->rmb_desc->len, &cons,
668c2ecf20Sopenharmony_ci				     &conn->urg_curs);
678c2ecf20Sopenharmony_ci		if (sock_flag(sk, SOCK_URGINLINE)) {
688c2ecf20Sopenharmony_ci			if (diff == 0) {
698c2ecf20Sopenharmony_ci				force = true;
708c2ecf20Sopenharmony_ci				rc = 1;
718c2ecf20Sopenharmony_ci				conn->urg_state = SMC_URG_READ;
728c2ecf20Sopenharmony_ci			}
738c2ecf20Sopenharmony_ci		} else {
748c2ecf20Sopenharmony_ci			if (diff == 1) {
758c2ecf20Sopenharmony_ci				/* skip urgent byte */
768c2ecf20Sopenharmony_ci				force = true;
778c2ecf20Sopenharmony_ci				smc_curs_add(conn->rmb_desc->len, &cons, 1);
788c2ecf20Sopenharmony_ci				conn->urg_rx_skip_pend = false;
798c2ecf20Sopenharmony_ci			} else if (diff < -1)
808c2ecf20Sopenharmony_ci				/* we read past urgent byte */
818c2ecf20Sopenharmony_ci				conn->urg_state = SMC_URG_READ;
828c2ecf20Sopenharmony_ci		}
838c2ecf20Sopenharmony_ci	}
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	/* send consumer cursor update if required */
888c2ecf20Sopenharmony_ci	/* similar to advertising new TCP rcv_wnd if required */
898c2ecf20Sopenharmony_ci	smc_tx_consumer_update(conn, force);
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	return rc;
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_cistatic void smc_rx_update_cons(struct smc_sock *smc, size_t len)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	struct smc_connection *conn = &smc->conn;
978c2ecf20Sopenharmony_ci	union smc_host_cursor cons;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1008c2ecf20Sopenharmony_ci	smc_rx_update_consumer(smc, cons, len);
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cistruct smc_spd_priv {
1048c2ecf20Sopenharmony_ci	struct smc_sock *smc;
1058c2ecf20Sopenharmony_ci	size_t		 len;
1068c2ecf20Sopenharmony_ci};
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_cistatic void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe,
1098c2ecf20Sopenharmony_ci				    struct pipe_buffer *buf)
1108c2ecf20Sopenharmony_ci{
1118c2ecf20Sopenharmony_ci	struct smc_spd_priv *priv = (struct smc_spd_priv *)buf->private;
1128c2ecf20Sopenharmony_ci	struct smc_sock *smc = priv->smc;
1138c2ecf20Sopenharmony_ci	struct smc_connection *conn;
1148c2ecf20Sopenharmony_ci	struct sock *sk = &smc->sk;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	if (sk->sk_state == SMC_CLOSED ||
1178c2ecf20Sopenharmony_ci	    sk->sk_state == SMC_PEERFINCLOSEWAIT ||
1188c2ecf20Sopenharmony_ci	    sk->sk_state == SMC_APPFINCLOSEWAIT)
1198c2ecf20Sopenharmony_ci		goto out;
1208c2ecf20Sopenharmony_ci	conn = &smc->conn;
1218c2ecf20Sopenharmony_ci	lock_sock(sk);
1228c2ecf20Sopenharmony_ci	smc_rx_update_cons(smc, priv->len);
1238c2ecf20Sopenharmony_ci	release_sock(sk);
1248c2ecf20Sopenharmony_ci	if (atomic_sub_and_test(priv->len, &conn->splice_pending))
1258c2ecf20Sopenharmony_ci		smc_rx_wake_up(sk);
1268c2ecf20Sopenharmony_ciout:
1278c2ecf20Sopenharmony_ci	kfree(priv);
1288c2ecf20Sopenharmony_ci	put_page(buf->page);
1298c2ecf20Sopenharmony_ci	sock_put(sk);
1308c2ecf20Sopenharmony_ci}
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_cistatic const struct pipe_buf_operations smc_pipe_ops = {
1338c2ecf20Sopenharmony_ci	.release = smc_rx_pipe_buf_release,
1348c2ecf20Sopenharmony_ci	.get = generic_pipe_buf_get
1358c2ecf20Sopenharmony_ci};
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic void smc_rx_spd_release(struct splice_pipe_desc *spd,
1388c2ecf20Sopenharmony_ci			       unsigned int i)
1398c2ecf20Sopenharmony_ci{
1408c2ecf20Sopenharmony_ci	put_page(spd->pages[i]);
1418c2ecf20Sopenharmony_ci}
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_cistatic int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
1448c2ecf20Sopenharmony_ci			 struct smc_sock *smc)
1458c2ecf20Sopenharmony_ci{
1468c2ecf20Sopenharmony_ci	struct splice_pipe_desc spd;
1478c2ecf20Sopenharmony_ci	struct partial_page partial;
1488c2ecf20Sopenharmony_ci	struct smc_spd_priv *priv;
1498c2ecf20Sopenharmony_ci	int bytes;
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1528c2ecf20Sopenharmony_ci	if (!priv)
1538c2ecf20Sopenharmony_ci		return -ENOMEM;
1548c2ecf20Sopenharmony_ci	priv->len = len;
1558c2ecf20Sopenharmony_ci	priv->smc = smc;
1568c2ecf20Sopenharmony_ci	partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr;
1578c2ecf20Sopenharmony_ci	partial.len = len;
1588c2ecf20Sopenharmony_ci	partial.private = (unsigned long)priv;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	spd.nr_pages_max = 1;
1618c2ecf20Sopenharmony_ci	spd.nr_pages = 1;
1628c2ecf20Sopenharmony_ci	spd.pages = &smc->conn.rmb_desc->pages;
1638c2ecf20Sopenharmony_ci	spd.partial = &partial;
1648c2ecf20Sopenharmony_ci	spd.ops = &smc_pipe_ops;
1658c2ecf20Sopenharmony_ci	spd.spd_release = smc_rx_spd_release;
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	bytes = splice_to_pipe(pipe, &spd);
1688c2ecf20Sopenharmony_ci	if (bytes > 0) {
1698c2ecf20Sopenharmony_ci		sock_hold(&smc->sk);
1708c2ecf20Sopenharmony_ci		get_page(smc->conn.rmb_desc->pages);
1718c2ecf20Sopenharmony_ci		atomic_add(bytes, &smc->conn.splice_pending);
1728c2ecf20Sopenharmony_ci	}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci	return bytes;
1758c2ecf20Sopenharmony_ci}
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_cistatic int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn)
1788c2ecf20Sopenharmony_ci{
1798c2ecf20Sopenharmony_ci	return atomic_read(&conn->bytes_to_rcv) &&
1808c2ecf20Sopenharmony_ci	       !atomic_read(&conn->splice_pending);
1818c2ecf20Sopenharmony_ci}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci/* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted
1848c2ecf20Sopenharmony_ci *   @smc    smc socket
1858c2ecf20Sopenharmony_ci *   @timeo  pointer to max seconds to wait, pointer to value 0 for no timeout
1868c2ecf20Sopenharmony_ci *   @fcrit  add'l criterion to evaluate as function pointer
1878c2ecf20Sopenharmony_ci * Returns:
1888c2ecf20Sopenharmony_ci * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
1898c2ecf20Sopenharmony_ci * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
1908c2ecf20Sopenharmony_ci */
1918c2ecf20Sopenharmony_ciint smc_rx_wait(struct smc_sock *smc, long *timeo,
1928c2ecf20Sopenharmony_ci		int (*fcrit)(struct smc_connection *conn))
1938c2ecf20Sopenharmony_ci{
1948c2ecf20Sopenharmony_ci	DEFINE_WAIT_FUNC(wait, woken_wake_function);
1958c2ecf20Sopenharmony_ci	struct smc_connection *conn = &smc->conn;
1968c2ecf20Sopenharmony_ci	struct smc_cdc_conn_state_flags *cflags =
1978c2ecf20Sopenharmony_ci					&conn->local_tx_ctrl.conn_state_flags;
1988c2ecf20Sopenharmony_ci	struct sock *sk = &smc->sk;
1998c2ecf20Sopenharmony_ci	int rc;
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	if (fcrit(conn))
2028c2ecf20Sopenharmony_ci		return 1;
2038c2ecf20Sopenharmony_ci	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2048c2ecf20Sopenharmony_ci	add_wait_queue(sk_sleep(sk), &wait);
2058c2ecf20Sopenharmony_ci	rc = sk_wait_event(sk, timeo,
2068c2ecf20Sopenharmony_ci			   READ_ONCE(sk->sk_err) ||
2078c2ecf20Sopenharmony_ci			   cflags->peer_conn_abort ||
2088c2ecf20Sopenharmony_ci			   READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
2098c2ecf20Sopenharmony_ci			   conn->killed ||
2108c2ecf20Sopenharmony_ci			   fcrit(conn),
2118c2ecf20Sopenharmony_ci			   &wait);
2128c2ecf20Sopenharmony_ci	remove_wait_queue(sk_sleep(sk), &wait);
2138c2ecf20Sopenharmony_ci	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2148c2ecf20Sopenharmony_ci	return rc;
2158c2ecf20Sopenharmony_ci}
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_cistatic int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
2188c2ecf20Sopenharmony_ci			   int flags)
2198c2ecf20Sopenharmony_ci{
2208c2ecf20Sopenharmony_ci	struct smc_connection *conn = &smc->conn;
2218c2ecf20Sopenharmony_ci	union smc_host_cursor cons;
2228c2ecf20Sopenharmony_ci	struct sock *sk = &smc->sk;
2238c2ecf20Sopenharmony_ci	int rc = 0;
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	if (sock_flag(sk, SOCK_URGINLINE) ||
2268c2ecf20Sopenharmony_ci	    !(conn->urg_state == SMC_URG_VALID) ||
2278c2ecf20Sopenharmony_ci	    conn->urg_state == SMC_URG_READ)
2288c2ecf20Sopenharmony_ci		return -EINVAL;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	if (conn->urg_state == SMC_URG_VALID) {
2318c2ecf20Sopenharmony_ci		if (!(flags & MSG_PEEK))
2328c2ecf20Sopenharmony_ci			smc->conn.urg_state = SMC_URG_READ;
2338c2ecf20Sopenharmony_ci		msg->msg_flags |= MSG_OOB;
2348c2ecf20Sopenharmony_ci		if (len > 0) {
2358c2ecf20Sopenharmony_ci			if (!(flags & MSG_TRUNC))
2368c2ecf20Sopenharmony_ci				rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1);
2378c2ecf20Sopenharmony_ci			len = 1;
2388c2ecf20Sopenharmony_ci			smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
2398c2ecf20Sopenharmony_ci			if (smc_curs_diff(conn->rmb_desc->len, &cons,
2408c2ecf20Sopenharmony_ci					  &conn->urg_curs) > 1)
2418c2ecf20Sopenharmony_ci				conn->urg_rx_skip_pend = true;
2428c2ecf20Sopenharmony_ci			/* Urgent Byte was already accounted for, but trigger
2438c2ecf20Sopenharmony_ci			 * skipping the urgent byte in non-inline case
2448c2ecf20Sopenharmony_ci			 */
2458c2ecf20Sopenharmony_ci			if (!(flags & MSG_PEEK))
2468c2ecf20Sopenharmony_ci				smc_rx_update_consumer(smc, cons, 0);
2478c2ecf20Sopenharmony_ci		} else {
2488c2ecf20Sopenharmony_ci			msg->msg_flags |= MSG_TRUNC;
2498c2ecf20Sopenharmony_ci		}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci		return rc ? -EFAULT : len;
2528c2ecf20Sopenharmony_ci	}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci	if (sk->sk_state == SMC_CLOSED || sk->sk_shutdown & RCV_SHUTDOWN)
2558c2ecf20Sopenharmony_ci		return 0;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	return -EAGAIN;
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_cistatic bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	struct smc_connection *conn = &smc->conn;
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	if (smc_rx_data_available(conn))
2658c2ecf20Sopenharmony_ci		return true;
2668c2ecf20Sopenharmony_ci	else if (conn->urg_state == SMC_URG_VALID)
2678c2ecf20Sopenharmony_ci		/* we received a single urgent Byte - skip */
2688c2ecf20Sopenharmony_ci		smc_rx_update_cons(smc, 0);
2698c2ecf20Sopenharmony_ci	return false;
2708c2ecf20Sopenharmony_ci}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci/* smc_rx_recvmsg - receive data from RMBE
2738c2ecf20Sopenharmony_ci * @msg:	copy data to receive buffer
2748c2ecf20Sopenharmony_ci * @pipe:	copy data to pipe if set - indicates splice() call
2758c2ecf20Sopenharmony_ci *
2768c2ecf20Sopenharmony_ci * rcvbuf consumer: main API called by socket layer.
2778c2ecf20Sopenharmony_ci * Called under sk lock.
2788c2ecf20Sopenharmony_ci */
2798c2ecf20Sopenharmony_ciint smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
2808c2ecf20Sopenharmony_ci		   struct pipe_inode_info *pipe, size_t len, int flags)
2818c2ecf20Sopenharmony_ci{
2828c2ecf20Sopenharmony_ci	size_t copylen, read_done = 0, read_remaining = len;
2838c2ecf20Sopenharmony_ci	size_t chunk_len, chunk_off, chunk_len_sum;
2848c2ecf20Sopenharmony_ci	struct smc_connection *conn = &smc->conn;
2858c2ecf20Sopenharmony_ci	int (*func)(struct smc_connection *conn);
2868c2ecf20Sopenharmony_ci	union smc_host_cursor cons;
2878c2ecf20Sopenharmony_ci	int readable, chunk;
2888c2ecf20Sopenharmony_ci	char *rcvbuf_base;
2898c2ecf20Sopenharmony_ci	struct sock *sk;
2908c2ecf20Sopenharmony_ci	int splbytes;
2918c2ecf20Sopenharmony_ci	long timeo;
2928c2ecf20Sopenharmony_ci	int target;		/* Read at least these many bytes */
2938c2ecf20Sopenharmony_ci	int rc;
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	if (unlikely(flags & MSG_ERRQUEUE))
2968c2ecf20Sopenharmony_ci		return -EINVAL; /* future work for sk.sk_family == AF_SMC */
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	sk = &smc->sk;
2998c2ecf20Sopenharmony_ci	if (sk->sk_state == SMC_LISTEN)
3008c2ecf20Sopenharmony_ci		return -ENOTCONN;
3018c2ecf20Sopenharmony_ci	if (flags & MSG_OOB)
3028c2ecf20Sopenharmony_ci		return smc_rx_recv_urg(smc, msg, len, flags);
3038c2ecf20Sopenharmony_ci	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
3048c2ecf20Sopenharmony_ci	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
3078c2ecf20Sopenharmony_ci	rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	do { /* while (read_remaining) */
3108c2ecf20Sopenharmony_ci		if (read_done >= target || (pipe && read_done))
3118c2ecf20Sopenharmony_ci			break;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci		if (conn->killed)
3148c2ecf20Sopenharmony_ci			break;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci		if (smc_rx_recvmsg_data_available(smc))
3178c2ecf20Sopenharmony_ci			goto copy;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci		if (sk->sk_shutdown & RCV_SHUTDOWN) {
3208c2ecf20Sopenharmony_ci			/* smc_cdc_msg_recv_action() could have run after
3218c2ecf20Sopenharmony_ci			 * above smc_rx_recvmsg_data_available()
3228c2ecf20Sopenharmony_ci			 */
3238c2ecf20Sopenharmony_ci			if (smc_rx_recvmsg_data_available(smc))
3248c2ecf20Sopenharmony_ci				goto copy;
3258c2ecf20Sopenharmony_ci			break;
3268c2ecf20Sopenharmony_ci		}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci		if (read_done) {
3298c2ecf20Sopenharmony_ci			if (sk->sk_err ||
3308c2ecf20Sopenharmony_ci			    sk->sk_state == SMC_CLOSED ||
3318c2ecf20Sopenharmony_ci			    !timeo ||
3328c2ecf20Sopenharmony_ci			    signal_pending(current))
3338c2ecf20Sopenharmony_ci				break;
3348c2ecf20Sopenharmony_ci		} else {
3358c2ecf20Sopenharmony_ci			if (sk->sk_err) {
3368c2ecf20Sopenharmony_ci				read_done = sock_error(sk);
3378c2ecf20Sopenharmony_ci				break;
3388c2ecf20Sopenharmony_ci			}
3398c2ecf20Sopenharmony_ci			if (sk->sk_state == SMC_CLOSED) {
3408c2ecf20Sopenharmony_ci				if (!sock_flag(sk, SOCK_DONE)) {
3418c2ecf20Sopenharmony_ci					/* This occurs when user tries to read
3428c2ecf20Sopenharmony_ci					 * from never connected socket.
3438c2ecf20Sopenharmony_ci					 */
3448c2ecf20Sopenharmony_ci					read_done = -ENOTCONN;
3458c2ecf20Sopenharmony_ci					break;
3468c2ecf20Sopenharmony_ci				}
3478c2ecf20Sopenharmony_ci				break;
3488c2ecf20Sopenharmony_ci			}
3498c2ecf20Sopenharmony_ci			if (!timeo)
3508c2ecf20Sopenharmony_ci				return -EAGAIN;
3518c2ecf20Sopenharmony_ci			if (signal_pending(current)) {
3528c2ecf20Sopenharmony_ci				read_done = sock_intr_errno(timeo);
3538c2ecf20Sopenharmony_ci				break;
3548c2ecf20Sopenharmony_ci			}
3558c2ecf20Sopenharmony_ci		}
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci		if (!smc_rx_data_available(conn)) {
3588c2ecf20Sopenharmony_ci			smc_rx_wait(smc, &timeo, smc_rx_data_available);
3598c2ecf20Sopenharmony_ci			continue;
3608c2ecf20Sopenharmony_ci		}
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_cicopy:
3638c2ecf20Sopenharmony_ci		/* initialize variables for 1st iteration of subsequent loop */
3648c2ecf20Sopenharmony_ci		/* could be just 1 byte, even after waiting on data above */
3658c2ecf20Sopenharmony_ci		readable = atomic_read(&conn->bytes_to_rcv);
3668c2ecf20Sopenharmony_ci		splbytes = atomic_read(&conn->splice_pending);
3678c2ecf20Sopenharmony_ci		if (!readable || (msg && splbytes)) {
3688c2ecf20Sopenharmony_ci			if (splbytes)
3698c2ecf20Sopenharmony_ci				func = smc_rx_data_available_and_no_splice_pend;
3708c2ecf20Sopenharmony_ci			else
3718c2ecf20Sopenharmony_ci				func = smc_rx_data_available;
3728c2ecf20Sopenharmony_ci			smc_rx_wait(smc, &timeo, func);
3738c2ecf20Sopenharmony_ci			continue;
3748c2ecf20Sopenharmony_ci		}
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci		smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
3778c2ecf20Sopenharmony_ci		/* subsequent splice() calls pick up where previous left */
3788c2ecf20Sopenharmony_ci		if (splbytes)
3798c2ecf20Sopenharmony_ci			smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
3808c2ecf20Sopenharmony_ci		if (conn->urg_state == SMC_URG_VALID &&
3818c2ecf20Sopenharmony_ci		    sock_flag(&smc->sk, SOCK_URGINLINE) &&
3828c2ecf20Sopenharmony_ci		    readable > 1)
3838c2ecf20Sopenharmony_ci			readable--;	/* always stop at urgent Byte */
3848c2ecf20Sopenharmony_ci		/* not more than what user space asked for */
3858c2ecf20Sopenharmony_ci		copylen = min_t(size_t, read_remaining, readable);
3868c2ecf20Sopenharmony_ci		/* determine chunks where to read from rcvbuf */
3878c2ecf20Sopenharmony_ci		/* either unwrapped case, or 1st chunk of wrapped case */
3888c2ecf20Sopenharmony_ci		chunk_len = min_t(size_t, copylen, conn->rmb_desc->len -
3898c2ecf20Sopenharmony_ci				  cons.count);
3908c2ecf20Sopenharmony_ci		chunk_len_sum = chunk_len;
3918c2ecf20Sopenharmony_ci		chunk_off = cons.count;
3928c2ecf20Sopenharmony_ci		smc_rmb_sync_sg_for_cpu(conn);
3938c2ecf20Sopenharmony_ci		for (chunk = 0; chunk < 2; chunk++) {
3948c2ecf20Sopenharmony_ci			if (!(flags & MSG_TRUNC)) {
3958c2ecf20Sopenharmony_ci				if (msg) {
3968c2ecf20Sopenharmony_ci					rc = memcpy_to_msg(msg, rcvbuf_base +
3978c2ecf20Sopenharmony_ci							   chunk_off,
3988c2ecf20Sopenharmony_ci							   chunk_len);
3998c2ecf20Sopenharmony_ci				} else {
4008c2ecf20Sopenharmony_ci					rc = smc_rx_splice(pipe, rcvbuf_base +
4018c2ecf20Sopenharmony_ci							chunk_off, chunk_len,
4028c2ecf20Sopenharmony_ci							smc);
4038c2ecf20Sopenharmony_ci				}
4048c2ecf20Sopenharmony_ci				if (rc < 0) {
4058c2ecf20Sopenharmony_ci					if (!read_done)
4068c2ecf20Sopenharmony_ci						read_done = -EFAULT;
4078c2ecf20Sopenharmony_ci					smc_rmb_sync_sg_for_device(conn);
4088c2ecf20Sopenharmony_ci					goto out;
4098c2ecf20Sopenharmony_ci				}
4108c2ecf20Sopenharmony_ci			}
4118c2ecf20Sopenharmony_ci			read_remaining -= chunk_len;
4128c2ecf20Sopenharmony_ci			read_done += chunk_len;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci			if (chunk_len_sum == copylen)
4158c2ecf20Sopenharmony_ci				break; /* either on 1st or 2nd iteration */
4168c2ecf20Sopenharmony_ci			/* prepare next (== 2nd) iteration */
4178c2ecf20Sopenharmony_ci			chunk_len = copylen - chunk_len; /* remainder */
4188c2ecf20Sopenharmony_ci			chunk_len_sum += chunk_len;
4198c2ecf20Sopenharmony_ci			chunk_off = 0; /* modulo offset in recv ring buffer */
4208c2ecf20Sopenharmony_ci		}
4218c2ecf20Sopenharmony_ci		smc_rmb_sync_sg_for_device(conn);
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci		/* update cursors */
4248c2ecf20Sopenharmony_ci		if (!(flags & MSG_PEEK)) {
4258c2ecf20Sopenharmony_ci			/* increased in recv tasklet smc_cdc_msg_rcv() */
4268c2ecf20Sopenharmony_ci			smp_mb__before_atomic();
4278c2ecf20Sopenharmony_ci			atomic_sub(copylen, &conn->bytes_to_rcv);
4288c2ecf20Sopenharmony_ci			/* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
4298c2ecf20Sopenharmony_ci			smp_mb__after_atomic();
4308c2ecf20Sopenharmony_ci			if (msg && smc_rx_update_consumer(smc, cons, copylen))
4318c2ecf20Sopenharmony_ci				goto out;
4328c2ecf20Sopenharmony_ci		}
4338c2ecf20Sopenharmony_ci	} while (read_remaining);
4348c2ecf20Sopenharmony_ciout:
4358c2ecf20Sopenharmony_ci	return read_done;
4368c2ecf20Sopenharmony_ci}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci/* Initialize receive properties on connection establishment. NB: not __init! */
4398c2ecf20Sopenharmony_civoid smc_rx_init(struct smc_sock *smc)
4408c2ecf20Sopenharmony_ci{
4418c2ecf20Sopenharmony_ci	smc->sk.sk_data_ready = smc_rx_wake_up;
4428c2ecf20Sopenharmony_ci	atomic_set(&smc->conn.splice_pending, 0);
4438c2ecf20Sopenharmony_ci	smc->conn.urg_state = SMC_URG_READ;
4448c2ecf20Sopenharmony_ci}
445