162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * This software is available to you under a choice of one of two
562306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the
862306a36Sopenharmony_ci * OpenIB.org BSD license below:
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
1162306a36Sopenharmony_ci *     without modification, are permitted provided that the following
1262306a36Sopenharmony_ci *     conditions are met:
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci *      - Redistributions of source code must retain the above
1562306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
1662306a36Sopenharmony_ci *        disclaimer.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
1962306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
2062306a36Sopenharmony_ci *        disclaimer in the documentation and/or other materials
2162306a36Sopenharmony_ci *        provided with the distribution.
2262306a36Sopenharmony_ci *
2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3062306a36Sopenharmony_ci * SOFTWARE.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci */
3362306a36Sopenharmony_ci#include <linux/kernel.h>
3462306a36Sopenharmony_ci#include <linux/in.h>
3562306a36Sopenharmony_ci#include <linux/device.h>
3662306a36Sopenharmony_ci#include <linux/dmapool.h>
3762306a36Sopenharmony_ci#include <linux/ratelimit.h>
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#include "rds_single_path.h"
4062306a36Sopenharmony_ci#include "rds.h"
4162306a36Sopenharmony_ci#include "ib.h"
4262306a36Sopenharmony_ci#include "ib_mr.h"
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci/*
4562306a36Sopenharmony_ci * Convert IB-specific error message to RDS error message and call core
4662306a36Sopenharmony_ci * completion handler.
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_cistatic void rds_ib_send_complete(struct rds_message *rm,
4962306a36Sopenharmony_ci				 int wc_status,
5062306a36Sopenharmony_ci				 void (*complete)(struct rds_message *rm, int status))
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	int notify_status;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	switch (wc_status) {
5562306a36Sopenharmony_ci	case IB_WC_WR_FLUSH_ERR:
5662306a36Sopenharmony_ci		return;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	case IB_WC_SUCCESS:
5962306a36Sopenharmony_ci		notify_status = RDS_RDMA_SUCCESS;
6062306a36Sopenharmony_ci		break;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	case IB_WC_REM_ACCESS_ERR:
6362306a36Sopenharmony_ci		notify_status = RDS_RDMA_REMOTE_ERROR;
6462306a36Sopenharmony_ci		break;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	default:
6762306a36Sopenharmony_ci		notify_status = RDS_RDMA_OTHER_ERROR;
6862306a36Sopenharmony_ci		break;
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci	complete(rm, notify_status);
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
7462306a36Sopenharmony_ci				   struct rm_data_op *op,
7562306a36Sopenharmony_ci				   int wc_status)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	if (op->op_nents)
7862306a36Sopenharmony_ci		ib_dma_unmap_sg(ic->i_cm_id->device,
7962306a36Sopenharmony_ci				op->op_sg, op->op_nents,
8062306a36Sopenharmony_ci				DMA_TO_DEVICE);
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_cistatic void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
8462306a36Sopenharmony_ci				   struct rm_rdma_op *op,
8562306a36Sopenharmony_ci				   int wc_status)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	if (op->op_mapped) {
8862306a36Sopenharmony_ci		ib_dma_unmap_sg(ic->i_cm_id->device,
8962306a36Sopenharmony_ci				op->op_sg, op->op_nents,
9062306a36Sopenharmony_ci				op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
9162306a36Sopenharmony_ci		op->op_mapped = 0;
9262306a36Sopenharmony_ci	}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	/* If the user asked for a completion notification on this
9562306a36Sopenharmony_ci	 * message, we can implement three different semantics:
9662306a36Sopenharmony_ci	 *  1.	Notify when we received the ACK on the RDS message
9762306a36Sopenharmony_ci	 *	that was queued with the RDMA. This provides reliable
9862306a36Sopenharmony_ci	 *	notification of RDMA status at the expense of a one-way
9962306a36Sopenharmony_ci	 *	packet delay.
10062306a36Sopenharmony_ci	 *  2.	Notify when the IB stack gives us the completion event for
10162306a36Sopenharmony_ci	 *	the RDMA operation.
10262306a36Sopenharmony_ci	 *  3.	Notify when the IB stack gives us the completion event for
10362306a36Sopenharmony_ci	 *	the accompanying RDS messages.
10462306a36Sopenharmony_ci	 * Here, we implement approach #3. To implement approach #2,
10562306a36Sopenharmony_ci	 * we would need to take an event for the rdma WR. To implement #1,
10662306a36Sopenharmony_ci	 * don't call rds_rdma_send_complete at all, and fall back to the notify
10762306a36Sopenharmony_ci	 * handling in the ACK processing code.
10862306a36Sopenharmony_ci	 *
10962306a36Sopenharmony_ci	 * Note: There's no need to explicitly sync any RDMA buffers using
11062306a36Sopenharmony_ci	 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
11162306a36Sopenharmony_ci	 * operation itself unmapped the RDMA buffers, which takes care
11262306a36Sopenharmony_ci	 * of synching.
11362306a36Sopenharmony_ci	 */
11462306a36Sopenharmony_ci	rds_ib_send_complete(container_of(op, struct rds_message, rdma),
11562306a36Sopenharmony_ci			     wc_status, rds_rdma_send_complete);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	if (op->op_write)
11862306a36Sopenharmony_ci		rds_stats_add(s_send_rdma_bytes, op->op_bytes);
11962306a36Sopenharmony_ci	else
12062306a36Sopenharmony_ci		rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
12162306a36Sopenharmony_ci}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_cistatic void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
12462306a36Sopenharmony_ci				     struct rm_atomic_op *op,
12562306a36Sopenharmony_ci				     int wc_status)
12662306a36Sopenharmony_ci{
12762306a36Sopenharmony_ci	/* unmap atomic recvbuf */
12862306a36Sopenharmony_ci	if (op->op_mapped) {
12962306a36Sopenharmony_ci		ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
13062306a36Sopenharmony_ci				DMA_FROM_DEVICE);
13162306a36Sopenharmony_ci		op->op_mapped = 0;
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	rds_ib_send_complete(container_of(op, struct rds_message, atomic),
13562306a36Sopenharmony_ci			     wc_status, rds_atomic_send_complete);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
13862306a36Sopenharmony_ci		rds_ib_stats_inc(s_ib_atomic_cswp);
13962306a36Sopenharmony_ci	else
14062306a36Sopenharmony_ci		rds_ib_stats_inc(s_ib_atomic_fadd);
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci/*
14462306a36Sopenharmony_ci * Unmap the resources associated with a struct send_work.
14562306a36Sopenharmony_ci *
14662306a36Sopenharmony_ci * Returns the rm for no good reason other than it is unobtainable
14762306a36Sopenharmony_ci * other than by switching on wr.opcode, currently, and the caller,
14862306a36Sopenharmony_ci * the event handler, needs it.
14962306a36Sopenharmony_ci */
15062306a36Sopenharmony_cistatic struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
15162306a36Sopenharmony_ci						struct rds_ib_send_work *send,
15262306a36Sopenharmony_ci						int wc_status)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	struct rds_message *rm = NULL;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	/* In the error case, wc.opcode sometimes contains garbage */
15762306a36Sopenharmony_ci	switch (send->s_wr.opcode) {
15862306a36Sopenharmony_ci	case IB_WR_SEND:
15962306a36Sopenharmony_ci		if (send->s_op) {
16062306a36Sopenharmony_ci			rm = container_of(send->s_op, struct rds_message, data);
16162306a36Sopenharmony_ci			rds_ib_send_unmap_data(ic, send->s_op, wc_status);
16262306a36Sopenharmony_ci		}
16362306a36Sopenharmony_ci		break;
16462306a36Sopenharmony_ci	case IB_WR_RDMA_WRITE:
16562306a36Sopenharmony_ci	case IB_WR_RDMA_READ:
16662306a36Sopenharmony_ci		if (send->s_op) {
16762306a36Sopenharmony_ci			rm = container_of(send->s_op, struct rds_message, rdma);
16862306a36Sopenharmony_ci			rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
16962306a36Sopenharmony_ci		}
17062306a36Sopenharmony_ci		break;
17162306a36Sopenharmony_ci	case IB_WR_ATOMIC_FETCH_AND_ADD:
17262306a36Sopenharmony_ci	case IB_WR_ATOMIC_CMP_AND_SWP:
17362306a36Sopenharmony_ci		if (send->s_op) {
17462306a36Sopenharmony_ci			rm = container_of(send->s_op, struct rds_message, atomic);
17562306a36Sopenharmony_ci			rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
17662306a36Sopenharmony_ci		}
17762306a36Sopenharmony_ci		break;
17862306a36Sopenharmony_ci	default:
17962306a36Sopenharmony_ci		printk_ratelimited(KERN_NOTICE
18062306a36Sopenharmony_ci			       "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
18162306a36Sopenharmony_ci			       __func__, send->s_wr.opcode);
18262306a36Sopenharmony_ci		break;
18362306a36Sopenharmony_ci	}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	send->s_wr.opcode = 0xdead;
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	return rm;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_civoid rds_ib_send_init_ring(struct rds_ib_connection *ic)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	struct rds_ib_send_work *send;
19362306a36Sopenharmony_ci	u32 i;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
19662306a36Sopenharmony_ci		struct ib_sge *sge;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci		send->s_op = NULL;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci		send->s_wr.wr_id = i;
20162306a36Sopenharmony_ci		send->s_wr.sg_list = send->s_sge;
20262306a36Sopenharmony_ci		send->s_wr.ex.imm_data = 0;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci		sge = &send->s_sge[0];
20562306a36Sopenharmony_ci		sge->addr = ic->i_send_hdrs_dma[i];
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		sge->length = sizeof(struct rds_header);
20862306a36Sopenharmony_ci		sge->lkey = ic->i_pd->local_dma_lkey;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci		send->s_sge[1].lkey = ic->i_pd->local_dma_lkey;
21162306a36Sopenharmony_ci	}
21262306a36Sopenharmony_ci}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_civoid rds_ib_send_clear_ring(struct rds_ib_connection *ic)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	struct rds_ib_send_work *send;
21762306a36Sopenharmony_ci	u32 i;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
22062306a36Sopenharmony_ci		if (send->s_op && send->s_wr.opcode != 0xdead)
22162306a36Sopenharmony_ci			rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
22262306a36Sopenharmony_ci	}
22362306a36Sopenharmony_ci}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci/*
22662306a36Sopenharmony_ci * The only fast path caller always has a non-zero nr, so we don't
22762306a36Sopenharmony_ci * bother testing nr before performing the atomic sub.
22862306a36Sopenharmony_ci */
22962306a36Sopenharmony_cistatic void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
23262306a36Sopenharmony_ci	    waitqueue_active(&rds_ib_ring_empty_wait))
23362306a36Sopenharmony_ci		wake_up(&rds_ib_ring_empty_wait);
23462306a36Sopenharmony_ci	BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
23562306a36Sopenharmony_ci}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci/*
23862306a36Sopenharmony_ci * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
23962306a36Sopenharmony_ci * operations performed in the send path.  As the sender allocs and potentially
24062306a36Sopenharmony_ci * unallocs the next free entry in the ring it doesn't alter which is
24162306a36Sopenharmony_ci * the next to be freed, which is what this is concerned with.
24262306a36Sopenharmony_ci */
24362306a36Sopenharmony_civoid rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci	struct rds_message *rm = NULL;
24662306a36Sopenharmony_ci	struct rds_connection *conn = ic->conn;
24762306a36Sopenharmony_ci	struct rds_ib_send_work *send;
24862306a36Sopenharmony_ci	u32 completed;
24962306a36Sopenharmony_ci	u32 oldest;
25062306a36Sopenharmony_ci	u32 i = 0;
25162306a36Sopenharmony_ci	int nr_sig = 0;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
25562306a36Sopenharmony_ci		 (unsigned long long)wc->wr_id, wc->status,
25662306a36Sopenharmony_ci		 ib_wc_status_msg(wc->status), wc->byte_len,
25762306a36Sopenharmony_ci		 be32_to_cpu(wc->ex.imm_data));
25862306a36Sopenharmony_ci	rds_ib_stats_inc(s_ib_tx_cq_event);
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	if (wc->wr_id == RDS_IB_ACK_WR_ID) {
26162306a36Sopenharmony_ci		if (time_after(jiffies, ic->i_ack_queued + HZ / 2))
26262306a36Sopenharmony_ci			rds_ib_stats_inc(s_ib_tx_stalled);
26362306a36Sopenharmony_ci		rds_ib_ack_send_complete(ic);
26462306a36Sopenharmony_ci		return;
26562306a36Sopenharmony_ci	}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	oldest = rds_ib_ring_oldest(&ic->i_send_ring);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	completed = rds_ib_ring_completed(&ic->i_send_ring, wc->wr_id, oldest);
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	for (i = 0; i < completed; i++) {
27262306a36Sopenharmony_ci		send = &ic->i_sends[oldest];
27362306a36Sopenharmony_ci		if (send->s_wr.send_flags & IB_SEND_SIGNALED)
27462306a36Sopenharmony_ci			nr_sig++;
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci		rm = rds_ib_send_unmap_op(ic, send, wc->status);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci		if (time_after(jiffies, send->s_queued + HZ / 2))
27962306a36Sopenharmony_ci			rds_ib_stats_inc(s_ib_tx_stalled);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci		if (send->s_op) {
28262306a36Sopenharmony_ci			if (send->s_op == rm->m_final_op) {
28362306a36Sopenharmony_ci				/* If anyone waited for this message to get
28462306a36Sopenharmony_ci				 * flushed out, wake them up now
28562306a36Sopenharmony_ci				 */
28662306a36Sopenharmony_ci				rds_message_unmapped(rm);
28762306a36Sopenharmony_ci			}
28862306a36Sopenharmony_ci			rds_message_put(rm);
28962306a36Sopenharmony_ci			send->s_op = NULL;
29062306a36Sopenharmony_ci		}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci		oldest = (oldest + 1) % ic->i_send_ring.w_nr;
29362306a36Sopenharmony_ci	}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	rds_ib_ring_free(&ic->i_send_ring, completed);
29662306a36Sopenharmony_ci	rds_ib_sub_signaled(ic, nr_sig);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
29962306a36Sopenharmony_ci	    test_bit(0, &conn->c_map_queued))
30062306a36Sopenharmony_ci		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	/* We expect errors as the qp is drained during shutdown */
30362306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
30462306a36Sopenharmony_ci		rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
30562306a36Sopenharmony_ci				  &conn->c_laddr, &conn->c_faddr,
30662306a36Sopenharmony_ci				  conn->c_tos, wc->status,
30762306a36Sopenharmony_ci				  ib_wc_status_msg(wc->status), wc->vendor_err);
30862306a36Sopenharmony_ci	}
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci/*
31262306a36Sopenharmony_ci * This is the main function for allocating credits when sending
31362306a36Sopenharmony_ci * messages.
31462306a36Sopenharmony_ci *
31562306a36Sopenharmony_ci * Conceptually, we have two counters:
31662306a36Sopenharmony_ci *  -	send credits: this tells us how many WRs we're allowed
31762306a36Sopenharmony_ci *	to submit without overruning the receiver's queue. For
31862306a36Sopenharmony_ci *	each SEND WR we post, we decrement this by one.
31962306a36Sopenharmony_ci *
32062306a36Sopenharmony_ci *  -	posted credits: this tells us how many WRs we recently
32162306a36Sopenharmony_ci *	posted to the receive queue. This value is transferred
32262306a36Sopenharmony_ci *	to the peer as a "credit update" in a RDS header field.
32362306a36Sopenharmony_ci *	Every time we transmit credits to the peer, we subtract
32462306a36Sopenharmony_ci *	the amount of transferred credits from this counter.
32562306a36Sopenharmony_ci *
32662306a36Sopenharmony_ci * It is essential that we avoid situations where both sides have
32762306a36Sopenharmony_ci * exhausted their send credits, and are unable to send new credits
32862306a36Sopenharmony_ci * to the peer. We achieve this by requiring that we send at least
32962306a36Sopenharmony_ci * one credit update to the peer before exhausting our credits.
33062306a36Sopenharmony_ci * When new credits arrive, we subtract one credit that is withheld
33162306a36Sopenharmony_ci * until we've posted new buffers and are ready to transmit these
33262306a36Sopenharmony_ci * credits (see rds_ib_send_add_credits below).
33362306a36Sopenharmony_ci *
33462306a36Sopenharmony_ci * The RDS send code is essentially single-threaded; rds_send_xmit
33562306a36Sopenharmony_ci * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
33662306a36Sopenharmony_ci * However, the ACK sending code is independent and can race with
33762306a36Sopenharmony_ci * message SENDs.
33862306a36Sopenharmony_ci *
33962306a36Sopenharmony_ci * In the send path, we need to update the counters for send credits
34062306a36Sopenharmony_ci * and the counter of posted buffers atomically - when we use the
34162306a36Sopenharmony_ci * last available credit, we cannot allow another thread to race us
34262306a36Sopenharmony_ci * and grab the posted credits counter.  Hence, we have to use a
34362306a36Sopenharmony_ci * spinlock to protect the credit counter, or use atomics.
34462306a36Sopenharmony_ci *
34562306a36Sopenharmony_ci * Spinlocks shared between the send and the receive path are bad,
34662306a36Sopenharmony_ci * because they create unnecessary delays. An early implementation
34762306a36Sopenharmony_ci * using a spinlock showed a 5% degradation in throughput at some
34862306a36Sopenharmony_ci * loads.
34962306a36Sopenharmony_ci *
35062306a36Sopenharmony_ci * This implementation avoids spinlocks completely, putting both
35162306a36Sopenharmony_ci * counters into a single atomic, and updating that atomic using
35262306a36Sopenharmony_ci * atomic_add (in the receive path, when receiving fresh credits),
35362306a36Sopenharmony_ci * and using atomic_cmpxchg when updating the two counters.
35462306a36Sopenharmony_ci */
35562306a36Sopenharmony_ciint rds_ib_send_grab_credits(struct rds_ib_connection *ic,
35662306a36Sopenharmony_ci			     u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	unsigned int avail, posted, got = 0, advertise;
35962306a36Sopenharmony_ci	long oldval, newval;
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	*adv_credits = 0;
36262306a36Sopenharmony_ci	if (!ic->i_flowctl)
36362306a36Sopenharmony_ci		return wanted;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_citry_again:
36662306a36Sopenharmony_ci	advertise = 0;
36762306a36Sopenharmony_ci	oldval = newval = atomic_read(&ic->i_credits);
36862306a36Sopenharmony_ci	posted = IB_GET_POST_CREDITS(oldval);
36962306a36Sopenharmony_ci	avail = IB_GET_SEND_CREDITS(oldval);
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	rdsdebug("wanted=%u credits=%u posted=%u\n",
37262306a36Sopenharmony_ci			wanted, avail, posted);
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	/* The last credit must be used to send a credit update. */
37562306a36Sopenharmony_ci	if (avail && !posted)
37662306a36Sopenharmony_ci		avail--;
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	if (avail < wanted) {
37962306a36Sopenharmony_ci		struct rds_connection *conn = ic->i_cm_id->context;
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci		/* Oops, there aren't that many credits left! */
38262306a36Sopenharmony_ci		set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
38362306a36Sopenharmony_ci		got = avail;
38462306a36Sopenharmony_ci	} else {
38562306a36Sopenharmony_ci		/* Sometimes you get what you want, lalala. */
38662306a36Sopenharmony_ci		got = wanted;
38762306a36Sopenharmony_ci	}
38862306a36Sopenharmony_ci	newval -= IB_SET_SEND_CREDITS(got);
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	/*
39162306a36Sopenharmony_ci	 * If need_posted is non-zero, then the caller wants
39262306a36Sopenharmony_ci	 * the posted regardless of whether any send credits are
39362306a36Sopenharmony_ci	 * available.
39462306a36Sopenharmony_ci	 */
39562306a36Sopenharmony_ci	if (posted && (got || need_posted)) {
39662306a36Sopenharmony_ci		advertise = min_t(unsigned int, posted, max_posted);
39762306a36Sopenharmony_ci		newval -= IB_SET_POST_CREDITS(advertise);
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	/* Finally bill everything */
40162306a36Sopenharmony_ci	if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
40262306a36Sopenharmony_ci		goto try_again;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	*adv_credits = advertise;
40562306a36Sopenharmony_ci	return got;
40662306a36Sopenharmony_ci}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_civoid rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
40962306a36Sopenharmony_ci{
41062306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	if (credits == 0)
41362306a36Sopenharmony_ci		return;
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	rdsdebug("credits=%u current=%u%s\n",
41662306a36Sopenharmony_ci			credits,
41762306a36Sopenharmony_ci			IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
41862306a36Sopenharmony_ci			test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
42162306a36Sopenharmony_ci	if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
42262306a36Sopenharmony_ci		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	rds_ib_stats_inc(s_ib_rx_credit_updates);
42762306a36Sopenharmony_ci}
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_civoid rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	if (posted == 0)
43462306a36Sopenharmony_ci		return;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	/* Decide whether to send an update to the peer now.
43962306a36Sopenharmony_ci	 * If we would send a credit update for every single buffer we
44062306a36Sopenharmony_ci	 * post, we would end up with an ACK storm (ACK arrives,
44162306a36Sopenharmony_ci	 * consumes buffer, we refill the ring, send ACK to remote
44262306a36Sopenharmony_ci	 * advertising the newly posted buffer... ad inf)
44362306a36Sopenharmony_ci	 *
44462306a36Sopenharmony_ci	 * Performance pretty much depends on how often we send
44562306a36Sopenharmony_ci	 * credit updates - too frequent updates mean lots of ACKs.
44662306a36Sopenharmony_ci	 * Too infrequent updates, and the peer will run out of
44762306a36Sopenharmony_ci	 * credits and has to throttle.
44862306a36Sopenharmony_ci	 * For the time being, 16 seems to be a good compromise.
44962306a36Sopenharmony_ci	 */
45062306a36Sopenharmony_ci	if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
45162306a36Sopenharmony_ci		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_cistatic inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
45562306a36Sopenharmony_ci					     struct rds_ib_send_work *send,
45662306a36Sopenharmony_ci					     bool notify)
45762306a36Sopenharmony_ci{
45862306a36Sopenharmony_ci	/*
45962306a36Sopenharmony_ci	 * We want to delay signaling completions just enough to get
46062306a36Sopenharmony_ci	 * the batching benefits but not so much that we create dead time
46162306a36Sopenharmony_ci	 * on the wire.
46262306a36Sopenharmony_ci	 */
46362306a36Sopenharmony_ci	if (ic->i_unsignaled_wrs-- == 0 || notify) {
46462306a36Sopenharmony_ci		ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
46562306a36Sopenharmony_ci		send->s_wr.send_flags |= IB_SEND_SIGNALED;
46662306a36Sopenharmony_ci		return 1;
46762306a36Sopenharmony_ci	}
46862306a36Sopenharmony_ci	return 0;
46962306a36Sopenharmony_ci}
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci/*
47262306a36Sopenharmony_ci * This can be called multiple times for a given message.  The first time
47362306a36Sopenharmony_ci * we see a message we map its scatterlist into the IB device so that
47462306a36Sopenharmony_ci * we can provide that mapped address to the IB scatter gather entries
47562306a36Sopenharmony_ci * in the IB work requests.  We translate the scatterlist into a series
47662306a36Sopenharmony_ci * of work requests that fragment the message.  These work requests complete
47762306a36Sopenharmony_ci * in order so we pass ownership of the message to the completion handler
47862306a36Sopenharmony_ci * once we send the final fragment.
47962306a36Sopenharmony_ci *
48062306a36Sopenharmony_ci * The RDS core uses the c_send_lock to only enter this function once
48162306a36Sopenharmony_ci * per connection.  This makes sure that the tx ring alloc/unalloc pairs
48262306a36Sopenharmony_ci * don't get out of sync and confuse the ring.
48362306a36Sopenharmony_ci */
48462306a36Sopenharmony_ciint rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
48562306a36Sopenharmony_ci		unsigned int hdr_off, unsigned int sg, unsigned int off)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
48862306a36Sopenharmony_ci	struct ib_device *dev = ic->i_cm_id->device;
48962306a36Sopenharmony_ci	struct rds_ib_send_work *send = NULL;
49062306a36Sopenharmony_ci	struct rds_ib_send_work *first;
49162306a36Sopenharmony_ci	struct rds_ib_send_work *prev;
49262306a36Sopenharmony_ci	const struct ib_send_wr *failed_wr;
49362306a36Sopenharmony_ci	struct scatterlist *scat;
49462306a36Sopenharmony_ci	u32 pos;
49562306a36Sopenharmony_ci	u32 i;
49662306a36Sopenharmony_ci	u32 work_alloc;
49762306a36Sopenharmony_ci	u32 credit_alloc = 0;
49862306a36Sopenharmony_ci	u32 posted;
49962306a36Sopenharmony_ci	u32 adv_credits = 0;
50062306a36Sopenharmony_ci	int send_flags = 0;
50162306a36Sopenharmony_ci	int bytes_sent = 0;
50262306a36Sopenharmony_ci	int ret;
50362306a36Sopenharmony_ci	int flow_controlled = 0;
50462306a36Sopenharmony_ci	int nr_sig = 0;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	BUG_ON(off % RDS_FRAG_SIZE);
50762306a36Sopenharmony_ci	BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	/* Do not send cong updates to IB loopback */
51062306a36Sopenharmony_ci	if (conn->c_loopback
51162306a36Sopenharmony_ci	    && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
51262306a36Sopenharmony_ci		rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
51362306a36Sopenharmony_ci		scat = &rm->data.op_sg[sg];
51462306a36Sopenharmony_ci		ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length);
51562306a36Sopenharmony_ci		return sizeof(struct rds_header) + ret;
51662306a36Sopenharmony_ci	}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	/* FIXME we may overallocate here */
51962306a36Sopenharmony_ci	if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
52062306a36Sopenharmony_ci		i = 1;
52162306a36Sopenharmony_ci	else
52262306a36Sopenharmony_ci		i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
52562306a36Sopenharmony_ci	if (work_alloc == 0) {
52662306a36Sopenharmony_ci		set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
52762306a36Sopenharmony_ci		rds_ib_stats_inc(s_ib_tx_ring_full);
52862306a36Sopenharmony_ci		ret = -ENOMEM;
52962306a36Sopenharmony_ci		goto out;
53062306a36Sopenharmony_ci	}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	if (ic->i_flowctl) {
53362306a36Sopenharmony_ci		credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
53462306a36Sopenharmony_ci		adv_credits += posted;
53562306a36Sopenharmony_ci		if (credit_alloc < work_alloc) {
53662306a36Sopenharmony_ci			rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
53762306a36Sopenharmony_ci			work_alloc = credit_alloc;
53862306a36Sopenharmony_ci			flow_controlled = 1;
53962306a36Sopenharmony_ci		}
54062306a36Sopenharmony_ci		if (work_alloc == 0) {
54162306a36Sopenharmony_ci			set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
54262306a36Sopenharmony_ci			rds_ib_stats_inc(s_ib_tx_throttle);
54362306a36Sopenharmony_ci			ret = -ENOMEM;
54462306a36Sopenharmony_ci			goto out;
54562306a36Sopenharmony_ci		}
54662306a36Sopenharmony_ci	}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	/* map the message the first time we see it */
54962306a36Sopenharmony_ci	if (!ic->i_data_op) {
55062306a36Sopenharmony_ci		if (rm->data.op_nents) {
55162306a36Sopenharmony_ci			rm->data.op_count = ib_dma_map_sg(dev,
55262306a36Sopenharmony_ci							  rm->data.op_sg,
55362306a36Sopenharmony_ci							  rm->data.op_nents,
55462306a36Sopenharmony_ci							  DMA_TO_DEVICE);
55562306a36Sopenharmony_ci			rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
55662306a36Sopenharmony_ci			if (rm->data.op_count == 0) {
55762306a36Sopenharmony_ci				rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
55862306a36Sopenharmony_ci				rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
55962306a36Sopenharmony_ci				ret = -ENOMEM; /* XXX ? */
56062306a36Sopenharmony_ci				goto out;
56162306a36Sopenharmony_ci			}
56262306a36Sopenharmony_ci		} else {
56362306a36Sopenharmony_ci			rm->data.op_count = 0;
56462306a36Sopenharmony_ci		}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci		rds_message_addref(rm);
56762306a36Sopenharmony_ci		rm->data.op_dmasg = 0;
56862306a36Sopenharmony_ci		rm->data.op_dmaoff = 0;
56962306a36Sopenharmony_ci		ic->i_data_op = &rm->data;
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci		/* Finalize the header */
57262306a36Sopenharmony_ci		if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
57362306a36Sopenharmony_ci			rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
57462306a36Sopenharmony_ci		if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
57562306a36Sopenharmony_ci			rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci		/* If it has a RDMA op, tell the peer we did it. This is
57862306a36Sopenharmony_ci		 * used by the peer to release use-once RDMA MRs. */
57962306a36Sopenharmony_ci		if (rm->rdma.op_active) {
58062306a36Sopenharmony_ci			struct rds_ext_header_rdma ext_hdr;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci			ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
58362306a36Sopenharmony_ci			rds_message_add_extension(&rm->m_inc.i_hdr,
58462306a36Sopenharmony_ci					RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
58562306a36Sopenharmony_ci		}
58662306a36Sopenharmony_ci		if (rm->m_rdma_cookie) {
58762306a36Sopenharmony_ci			rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
58862306a36Sopenharmony_ci					rds_rdma_cookie_key(rm->m_rdma_cookie),
58962306a36Sopenharmony_ci					rds_rdma_cookie_offset(rm->m_rdma_cookie));
59062306a36Sopenharmony_ci		}
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci		/* Note - rds_ib_piggyb_ack clears the ACK_REQUIRED bit, so
59362306a36Sopenharmony_ci		 * we should not do this unless we have a chance of at least
59462306a36Sopenharmony_ci		 * sticking the header into the send ring. Which is why we
59562306a36Sopenharmony_ci		 * should call rds_ib_ring_alloc first. */
59662306a36Sopenharmony_ci		rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic));
59762306a36Sopenharmony_ci		rds_message_make_checksum(&rm->m_inc.i_hdr);
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci		/*
60062306a36Sopenharmony_ci		 * Update adv_credits since we reset the ACK_REQUIRED bit.
60162306a36Sopenharmony_ci		 */
60262306a36Sopenharmony_ci		if (ic->i_flowctl) {
60362306a36Sopenharmony_ci			rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
60462306a36Sopenharmony_ci			adv_credits += posted;
60562306a36Sopenharmony_ci			BUG_ON(adv_credits > 255);
60662306a36Sopenharmony_ci		}
60762306a36Sopenharmony_ci	}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	/* Sometimes you want to put a fence between an RDMA
61062306a36Sopenharmony_ci	 * READ and the following SEND.
61162306a36Sopenharmony_ci	 * We could either do this all the time
61262306a36Sopenharmony_ci	 * or when requested by the user. Right now, we let
61362306a36Sopenharmony_ci	 * the application choose.
61462306a36Sopenharmony_ci	 */
61562306a36Sopenharmony_ci	if (rm->rdma.op_active && rm->rdma.op_fence)
61662306a36Sopenharmony_ci		send_flags = IB_SEND_FENCE;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	/* Each frag gets a header. Msgs may be 0 bytes */
61962306a36Sopenharmony_ci	send = &ic->i_sends[pos];
62062306a36Sopenharmony_ci	first = send;
62162306a36Sopenharmony_ci	prev = NULL;
62262306a36Sopenharmony_ci	scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
62362306a36Sopenharmony_ci	i = 0;
62462306a36Sopenharmony_ci	do {
62562306a36Sopenharmony_ci		unsigned int len = 0;
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci		/* Set up the header */
62862306a36Sopenharmony_ci		send->s_wr.send_flags = send_flags;
62962306a36Sopenharmony_ci		send->s_wr.opcode = IB_WR_SEND;
63062306a36Sopenharmony_ci		send->s_wr.num_sge = 1;
63162306a36Sopenharmony_ci		send->s_wr.next = NULL;
63262306a36Sopenharmony_ci		send->s_queued = jiffies;
63362306a36Sopenharmony_ci		send->s_op = NULL;
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci		send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci		send->s_sge[0].length = sizeof(struct rds_header);
63862306a36Sopenharmony_ci		send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci		ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev,
64162306a36Sopenharmony_ci					   ic->i_send_hdrs_dma[pos],
64262306a36Sopenharmony_ci					   sizeof(struct rds_header),
64362306a36Sopenharmony_ci					   DMA_TO_DEVICE);
64462306a36Sopenharmony_ci		memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
64562306a36Sopenharmony_ci		       sizeof(struct rds_header));
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci		/* Set up the data, if present */
64962306a36Sopenharmony_ci		if (i < work_alloc
65062306a36Sopenharmony_ci		    && scat != &rm->data.op_sg[rm->data.op_count]) {
65162306a36Sopenharmony_ci			len = min(RDS_FRAG_SIZE,
65262306a36Sopenharmony_ci				  sg_dma_len(scat) - rm->data.op_dmaoff);
65362306a36Sopenharmony_ci			send->s_wr.num_sge = 2;
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci			send->s_sge[1].addr = sg_dma_address(scat);
65662306a36Sopenharmony_ci			send->s_sge[1].addr += rm->data.op_dmaoff;
65762306a36Sopenharmony_ci			send->s_sge[1].length = len;
65862306a36Sopenharmony_ci			send->s_sge[1].lkey = ic->i_pd->local_dma_lkey;
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci			bytes_sent += len;
66162306a36Sopenharmony_ci			rm->data.op_dmaoff += len;
66262306a36Sopenharmony_ci			if (rm->data.op_dmaoff == sg_dma_len(scat)) {
66362306a36Sopenharmony_ci				scat++;
66462306a36Sopenharmony_ci				rm->data.op_dmasg++;
66562306a36Sopenharmony_ci				rm->data.op_dmaoff = 0;
66662306a36Sopenharmony_ci			}
66762306a36Sopenharmony_ci		}
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci		rds_ib_set_wr_signal_state(ic, send, false);
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci		/*
67262306a36Sopenharmony_ci		 * Always signal the last one if we're stopping due to flow control.
67362306a36Sopenharmony_ci		 */
67462306a36Sopenharmony_ci		if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
67562306a36Sopenharmony_ci			rds_ib_set_wr_signal_state(ic, send, true);
67662306a36Sopenharmony_ci			send->s_wr.send_flags |= IB_SEND_SOLICITED;
67762306a36Sopenharmony_ci		}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_ci		if (send->s_wr.send_flags & IB_SEND_SIGNALED)
68062306a36Sopenharmony_ci			nr_sig++;
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci		rdsdebug("send %p wr %p num_sge %u next %p\n", send,
68362306a36Sopenharmony_ci			 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci		if (ic->i_flowctl && adv_credits) {
68662306a36Sopenharmony_ci			struct rds_header *hdr = ic->i_send_hdrs[pos];
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci			/* add credit and redo the header checksum */
68962306a36Sopenharmony_ci			hdr->h_credit = adv_credits;
69062306a36Sopenharmony_ci			rds_message_make_checksum(hdr);
69162306a36Sopenharmony_ci			adv_credits = 0;
69262306a36Sopenharmony_ci			rds_ib_stats_inc(s_ib_tx_credit_updates);
69362306a36Sopenharmony_ci		}
69462306a36Sopenharmony_ci		ib_dma_sync_single_for_device(ic->rds_ibdev->dev,
69562306a36Sopenharmony_ci					      ic->i_send_hdrs_dma[pos],
69662306a36Sopenharmony_ci					      sizeof(struct rds_header),
69762306a36Sopenharmony_ci					      DMA_TO_DEVICE);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci		if (prev)
70062306a36Sopenharmony_ci			prev->s_wr.next = &send->s_wr;
70162306a36Sopenharmony_ci		prev = send;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci		pos = (pos + 1) % ic->i_send_ring.w_nr;
70462306a36Sopenharmony_ci		send = &ic->i_sends[pos];
70562306a36Sopenharmony_ci		i++;
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	} while (i < work_alloc
70862306a36Sopenharmony_ci		 && scat != &rm->data.op_sg[rm->data.op_count]);
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	/* Account the RDS header in the number of bytes we sent, but just once.
71162306a36Sopenharmony_ci	 * The caller has no concept of fragmentation. */
71262306a36Sopenharmony_ci	if (hdr_off == 0)
71362306a36Sopenharmony_ci		bytes_sent += sizeof(struct rds_header);
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	/* if we finished the message then send completion owns it */
71662306a36Sopenharmony_ci	if (scat == &rm->data.op_sg[rm->data.op_count]) {
71762306a36Sopenharmony_ci		prev->s_op = ic->i_data_op;
71862306a36Sopenharmony_ci		prev->s_wr.send_flags |= IB_SEND_SOLICITED;
71962306a36Sopenharmony_ci		if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
72062306a36Sopenharmony_ci			nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
72162306a36Sopenharmony_ci		ic->i_data_op = NULL;
72262306a36Sopenharmony_ci	}
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/* Put back wrs & credits we didn't use */
72562306a36Sopenharmony_ci	if (i < work_alloc) {
72662306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
72762306a36Sopenharmony_ci		work_alloc = i;
72862306a36Sopenharmony_ci	}
72962306a36Sopenharmony_ci	if (ic->i_flowctl && i < credit_alloc)
73062306a36Sopenharmony_ci		rds_ib_send_add_credits(conn, credit_alloc - i);
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	if (nr_sig)
73362306a36Sopenharmony_ci		atomic_add(nr_sig, &ic->i_signaled_sends);
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	/* XXX need to worry about failed_wr and partial sends. */
73662306a36Sopenharmony_ci	failed_wr = &first->s_wr;
73762306a36Sopenharmony_ci	ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
73862306a36Sopenharmony_ci	rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
73962306a36Sopenharmony_ci		 first, &first->s_wr, ret, failed_wr);
74062306a36Sopenharmony_ci	BUG_ON(failed_wr != &first->s_wr);
74162306a36Sopenharmony_ci	if (ret) {
74262306a36Sopenharmony_ci		printk(KERN_WARNING "RDS/IB: ib_post_send to %pI6c "
74362306a36Sopenharmony_ci		       "returned %d\n", &conn->c_faddr, ret);
74462306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
74562306a36Sopenharmony_ci		rds_ib_sub_signaled(ic, nr_sig);
74662306a36Sopenharmony_ci		if (prev->s_op) {
74762306a36Sopenharmony_ci			ic->i_data_op = prev->s_op;
74862306a36Sopenharmony_ci			prev->s_op = NULL;
74962306a36Sopenharmony_ci		}
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci		rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
75262306a36Sopenharmony_ci		goto out;
75362306a36Sopenharmony_ci	}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	ret = bytes_sent;
75662306a36Sopenharmony_ciout:
75762306a36Sopenharmony_ci	BUG_ON(adv_credits);
75862306a36Sopenharmony_ci	return ret;
75962306a36Sopenharmony_ci}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci/*
76262306a36Sopenharmony_ci * Issue atomic operation.
76362306a36Sopenharmony_ci * A simplified version of the rdma case, we always map 1 SG, and
76462306a36Sopenharmony_ci * only 8 bytes, for the return value from the atomic operation.
76562306a36Sopenharmony_ci */
76662306a36Sopenharmony_ciint rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
76762306a36Sopenharmony_ci{
76862306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
76962306a36Sopenharmony_ci	struct rds_ib_send_work *send = NULL;
77062306a36Sopenharmony_ci	const struct ib_send_wr *failed_wr;
77162306a36Sopenharmony_ci	u32 pos;
77262306a36Sopenharmony_ci	u32 work_alloc;
77362306a36Sopenharmony_ci	int ret;
77462306a36Sopenharmony_ci	int nr_sig = 0;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
77762306a36Sopenharmony_ci	if (work_alloc != 1) {
77862306a36Sopenharmony_ci		rds_ib_stats_inc(s_ib_tx_ring_full);
77962306a36Sopenharmony_ci		ret = -ENOMEM;
78062306a36Sopenharmony_ci		goto out;
78162306a36Sopenharmony_ci	}
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	/* address of send request in ring */
78462306a36Sopenharmony_ci	send = &ic->i_sends[pos];
78562306a36Sopenharmony_ci	send->s_queued = jiffies;
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
78862306a36Sopenharmony_ci		send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
78962306a36Sopenharmony_ci		send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
79062306a36Sopenharmony_ci		send->s_atomic_wr.swap = op->op_m_cswp.swap;
79162306a36Sopenharmony_ci		send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
79262306a36Sopenharmony_ci		send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
79362306a36Sopenharmony_ci	} else { /* FADD */
79462306a36Sopenharmony_ci		send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
79562306a36Sopenharmony_ci		send->s_atomic_wr.compare_add = op->op_m_fadd.add;
79662306a36Sopenharmony_ci		send->s_atomic_wr.swap = 0;
79762306a36Sopenharmony_ci		send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
79862306a36Sopenharmony_ci		send->s_atomic_wr.swap_mask = 0;
79962306a36Sopenharmony_ci	}
80062306a36Sopenharmony_ci	send->s_wr.send_flags = 0;
80162306a36Sopenharmony_ci	nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
80262306a36Sopenharmony_ci	send->s_atomic_wr.wr.num_sge = 1;
80362306a36Sopenharmony_ci	send->s_atomic_wr.wr.next = NULL;
80462306a36Sopenharmony_ci	send->s_atomic_wr.remote_addr = op->op_remote_addr;
80562306a36Sopenharmony_ci	send->s_atomic_wr.rkey = op->op_rkey;
80662306a36Sopenharmony_ci	send->s_op = op;
80762306a36Sopenharmony_ci	rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	/* map 8 byte retval buffer to the device */
81062306a36Sopenharmony_ci	ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
81162306a36Sopenharmony_ci	rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
81262306a36Sopenharmony_ci	if (ret != 1) {
81362306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
81462306a36Sopenharmony_ci		rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
81562306a36Sopenharmony_ci		ret = -ENOMEM; /* XXX ? */
81662306a36Sopenharmony_ci		goto out;
81762306a36Sopenharmony_ci	}
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	/* Convert our struct scatterlist to struct ib_sge */
82062306a36Sopenharmony_ci	send->s_sge[0].addr = sg_dma_address(op->op_sg);
82162306a36Sopenharmony_ci	send->s_sge[0].length = sg_dma_len(op->op_sg);
82262306a36Sopenharmony_ci	send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
82562306a36Sopenharmony_ci		 send->s_sge[0].addr, send->s_sge[0].length);
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	if (nr_sig)
82862306a36Sopenharmony_ci		atomic_add(nr_sig, &ic->i_signaled_sends);
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	failed_wr = &send->s_atomic_wr.wr;
83162306a36Sopenharmony_ci	ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
83262306a36Sopenharmony_ci	rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
83362306a36Sopenharmony_ci		 send, &send->s_atomic_wr, ret, failed_wr);
83462306a36Sopenharmony_ci	BUG_ON(failed_wr != &send->s_atomic_wr.wr);
83562306a36Sopenharmony_ci	if (ret) {
83662306a36Sopenharmony_ci		printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI6c "
83762306a36Sopenharmony_ci		       "returned %d\n", &conn->c_faddr, ret);
83862306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
83962306a36Sopenharmony_ci		rds_ib_sub_signaled(ic, nr_sig);
84062306a36Sopenharmony_ci		goto out;
84162306a36Sopenharmony_ci	}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci	if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
84462306a36Sopenharmony_ci		printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
84562306a36Sopenharmony_ci		BUG_ON(failed_wr != &send->s_atomic_wr.wr);
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ciout:
84962306a36Sopenharmony_ci	return ret;
85062306a36Sopenharmony_ci}
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ciint rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
85362306a36Sopenharmony_ci{
85462306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
85562306a36Sopenharmony_ci	struct rds_ib_send_work *send = NULL;
85662306a36Sopenharmony_ci	struct rds_ib_send_work *first;
85762306a36Sopenharmony_ci	struct rds_ib_send_work *prev;
85862306a36Sopenharmony_ci	const struct ib_send_wr *failed_wr;
85962306a36Sopenharmony_ci	struct scatterlist *scat;
86062306a36Sopenharmony_ci	unsigned long len;
86162306a36Sopenharmony_ci	u64 remote_addr = op->op_remote_addr;
86262306a36Sopenharmony_ci	u32 max_sge = ic->rds_ibdev->max_sge;
86362306a36Sopenharmony_ci	u32 pos;
86462306a36Sopenharmony_ci	u32 work_alloc;
86562306a36Sopenharmony_ci	u32 i;
86662306a36Sopenharmony_ci	u32 j;
86762306a36Sopenharmony_ci	int sent;
86862306a36Sopenharmony_ci	int ret;
86962306a36Sopenharmony_ci	int num_sge;
87062306a36Sopenharmony_ci	int nr_sig = 0;
87162306a36Sopenharmony_ci	u64 odp_addr = op->op_odp_addr;
87262306a36Sopenharmony_ci	u32 odp_lkey = 0;
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	/* map the op the first time we see it */
87562306a36Sopenharmony_ci	if (!op->op_odp_mr) {
87662306a36Sopenharmony_ci		if (!op->op_mapped) {
87762306a36Sopenharmony_ci			op->op_count =
87862306a36Sopenharmony_ci				ib_dma_map_sg(ic->i_cm_id->device, op->op_sg,
87962306a36Sopenharmony_ci					      op->op_nents,
88062306a36Sopenharmony_ci					      (op->op_write) ? DMA_TO_DEVICE :
88162306a36Sopenharmony_ci							       DMA_FROM_DEVICE);
88262306a36Sopenharmony_ci			rdsdebug("ic %p mapping op %p: %d\n", ic, op,
88362306a36Sopenharmony_ci				 op->op_count);
88462306a36Sopenharmony_ci			if (op->op_count == 0) {
88562306a36Sopenharmony_ci				rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
88662306a36Sopenharmony_ci				ret = -ENOMEM; /* XXX ? */
88762306a36Sopenharmony_ci				goto out;
88862306a36Sopenharmony_ci			}
88962306a36Sopenharmony_ci			op->op_mapped = 1;
89062306a36Sopenharmony_ci		}
89162306a36Sopenharmony_ci	} else {
89262306a36Sopenharmony_ci		op->op_count = op->op_nents;
89362306a36Sopenharmony_ci		odp_lkey = rds_ib_get_lkey(op->op_odp_mr->r_trans_private);
89462306a36Sopenharmony_ci	}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci	/*
89762306a36Sopenharmony_ci	 * Instead of knowing how to return a partial rdma read/write we insist that there
89862306a36Sopenharmony_ci	 * be enough work requests to send the entire message.
89962306a36Sopenharmony_ci	 */
90062306a36Sopenharmony_ci	i = DIV_ROUND_UP(op->op_count, max_sge);
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
90362306a36Sopenharmony_ci	if (work_alloc != i) {
90462306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
90562306a36Sopenharmony_ci		rds_ib_stats_inc(s_ib_tx_ring_full);
90662306a36Sopenharmony_ci		ret = -ENOMEM;
90762306a36Sopenharmony_ci		goto out;
90862306a36Sopenharmony_ci	}
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci	send = &ic->i_sends[pos];
91162306a36Sopenharmony_ci	first = send;
91262306a36Sopenharmony_ci	prev = NULL;
91362306a36Sopenharmony_ci	scat = &op->op_sg[0];
91462306a36Sopenharmony_ci	sent = 0;
91562306a36Sopenharmony_ci	num_sge = op->op_count;
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
91862306a36Sopenharmony_ci		send->s_wr.send_flags = 0;
91962306a36Sopenharmony_ci		send->s_queued = jiffies;
92062306a36Sopenharmony_ci		send->s_op = NULL;
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci		if (!op->op_notify)
92362306a36Sopenharmony_ci			nr_sig += rds_ib_set_wr_signal_state(ic, send,
92462306a36Sopenharmony_ci							     op->op_notify);
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci		send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
92762306a36Sopenharmony_ci		send->s_rdma_wr.remote_addr = remote_addr;
92862306a36Sopenharmony_ci		send->s_rdma_wr.rkey = op->op_rkey;
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci		if (num_sge > max_sge) {
93162306a36Sopenharmony_ci			send->s_rdma_wr.wr.num_sge = max_sge;
93262306a36Sopenharmony_ci			num_sge -= max_sge;
93362306a36Sopenharmony_ci		} else {
93462306a36Sopenharmony_ci			send->s_rdma_wr.wr.num_sge = num_sge;
93562306a36Sopenharmony_ci		}
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci		send->s_rdma_wr.wr.next = NULL;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci		if (prev)
94062306a36Sopenharmony_ci			prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci		for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
94362306a36Sopenharmony_ci		     scat != &op->op_sg[op->op_count]; j++) {
94462306a36Sopenharmony_ci			len = sg_dma_len(scat);
94562306a36Sopenharmony_ci			if (!op->op_odp_mr) {
94662306a36Sopenharmony_ci				send->s_sge[j].addr = sg_dma_address(scat);
94762306a36Sopenharmony_ci				send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
94862306a36Sopenharmony_ci			} else {
94962306a36Sopenharmony_ci				send->s_sge[j].addr = odp_addr;
95062306a36Sopenharmony_ci				send->s_sge[j].lkey = odp_lkey;
95162306a36Sopenharmony_ci			}
95262306a36Sopenharmony_ci			send->s_sge[j].length = len;
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci			sent += len;
95562306a36Sopenharmony_ci			rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci			remote_addr += len;
95862306a36Sopenharmony_ci			odp_addr += len;
95962306a36Sopenharmony_ci			scat++;
96062306a36Sopenharmony_ci		}
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci		rdsdebug("send %p wr %p num_sge %u next %p\n", send,
96362306a36Sopenharmony_ci			&send->s_rdma_wr.wr,
96462306a36Sopenharmony_ci			send->s_rdma_wr.wr.num_sge,
96562306a36Sopenharmony_ci			send->s_rdma_wr.wr.next);
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ci		prev = send;
96862306a36Sopenharmony_ci		if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
96962306a36Sopenharmony_ci			send = ic->i_sends;
97062306a36Sopenharmony_ci	}
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci	/* give a reference to the last op */
97362306a36Sopenharmony_ci	if (scat == &op->op_sg[op->op_count]) {
97462306a36Sopenharmony_ci		prev->s_op = op;
97562306a36Sopenharmony_ci		rds_message_addref(container_of(op, struct rds_message, rdma));
97662306a36Sopenharmony_ci	}
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	if (i < work_alloc) {
97962306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
98062306a36Sopenharmony_ci		work_alloc = i;
98162306a36Sopenharmony_ci	}
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	if (nr_sig)
98462306a36Sopenharmony_ci		atomic_add(nr_sig, &ic->i_signaled_sends);
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	failed_wr = &first->s_rdma_wr.wr;
98762306a36Sopenharmony_ci	ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
98862306a36Sopenharmony_ci	rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
98962306a36Sopenharmony_ci		 first, &first->s_rdma_wr.wr, ret, failed_wr);
99062306a36Sopenharmony_ci	BUG_ON(failed_wr != &first->s_rdma_wr.wr);
99162306a36Sopenharmony_ci	if (ret) {
99262306a36Sopenharmony_ci		printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI6c "
99362306a36Sopenharmony_ci		       "returned %d\n", &conn->c_faddr, ret);
99462306a36Sopenharmony_ci		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
99562306a36Sopenharmony_ci		rds_ib_sub_signaled(ic, nr_sig);
99662306a36Sopenharmony_ci		goto out;
99762306a36Sopenharmony_ci	}
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
100062306a36Sopenharmony_ci		printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
100162306a36Sopenharmony_ci		BUG_ON(failed_wr != &first->s_rdma_wr.wr);
100262306a36Sopenharmony_ci	}
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ciout:
100662306a36Sopenharmony_ci	return ret;
100762306a36Sopenharmony_ci}
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_civoid rds_ib_xmit_path_complete(struct rds_conn_path *cp)
101062306a36Sopenharmony_ci{
101162306a36Sopenharmony_ci	struct rds_connection *conn = cp->cp_conn;
101262306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci	/* We may have a pending ACK or window update we were unable
101562306a36Sopenharmony_ci	 * to send previously (due to flow control). Try again. */
101662306a36Sopenharmony_ci	rds_ib_attempt_ack(ic);
101762306a36Sopenharmony_ci}
1018