18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright(c) 2018 - 2020 Intel Corporation.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include "hfi.h"
88c2ecf20Sopenharmony_ci#include "qp.h"
98c2ecf20Sopenharmony_ci#include "rc.h"
108c2ecf20Sopenharmony_ci#include "verbs.h"
118c2ecf20Sopenharmony_ci#include "tid_rdma.h"
128c2ecf20Sopenharmony_ci#include "exp_rcv.h"
138c2ecf20Sopenharmony_ci#include "trace.h"
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci/**
168c2ecf20Sopenharmony_ci * DOC: TID RDMA READ protocol
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * This is an end-to-end protocol at the hfi1 level between two nodes that
198c2ecf20Sopenharmony_ci * improves performance by avoiding data copy on the requester side. It
208c2ecf20Sopenharmony_ci * converts a qualified RDMA READ request into a TID RDMA READ request on
218c2ecf20Sopenharmony_ci * the requester side and thereafter handles the request and response
228c2ecf20Sopenharmony_ci * differently. To be qualified, the RDMA READ request should meet the
238c2ecf20Sopenharmony_ci * following:
248c2ecf20Sopenharmony_ci * -- The total data length should be greater than 256K;
258c2ecf20Sopenharmony_ci * -- The total data length should be a multiple of 4K page size;
268c2ecf20Sopenharmony_ci * -- Each local scatter-gather entry should be 4K page aligned;
278c2ecf20Sopenharmony_ci * -- Each local scatter-gather entry should be a multiple of 4K page size;
288c2ecf20Sopenharmony_ci */
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
318c2ecf20Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
328c2ecf20Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
338c2ecf20Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
348c2ecf20Sopenharmony_ci#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
358c2ecf20Sopenharmony_ci#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/* Maximum number of packets within a flow generation. */
388c2ecf20Sopenharmony_ci#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci#define GENERATION_MASK 0xFFFFF
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cistatic u32 mask_generation(u32 a)
438c2ecf20Sopenharmony_ci{
448c2ecf20Sopenharmony_ci	return a & GENERATION_MASK;
458c2ecf20Sopenharmony_ci}
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci/* Reserved generation value to set to unused flows for kernel contexts */
488c2ecf20Sopenharmony_ci#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci/*
518c2ecf20Sopenharmony_ci * J_KEY for kernel contexts when TID RDMA is used.
528c2ecf20Sopenharmony_ci * See generate_jkey() in hfi.h for more information.
538c2ecf20Sopenharmony_ci */
548c2ecf20Sopenharmony_ci#define TID_RDMA_JKEY                   32
558c2ecf20Sopenharmony_ci#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
568c2ecf20Sopenharmony_ci#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci/* Maximum number of segments in flight per QP request. */
598c2ecf20Sopenharmony_ci#define TID_RDMA_MAX_READ_SEGS_PER_REQ  6
608c2ecf20Sopenharmony_ci#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
618c2ecf20Sopenharmony_ci#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
628c2ecf20Sopenharmony_ci			TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
638c2ecf20Sopenharmony_ci#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci#define MAX_EXPECTED_PAGES     (MAX_EXPECTED_BUFFER / PAGE_SIZE)
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci#define TID_RDMA_DESTQP_FLOW_SHIFT      11
688c2ecf20Sopenharmony_ci#define TID_RDMA_DESTQP_FLOW_MASK       0x1f
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci#define TID_OPFN_QP_CTXT_MASK 0xff
718c2ecf20Sopenharmony_ci#define TID_OPFN_QP_CTXT_SHIFT 56
728c2ecf20Sopenharmony_ci#define TID_OPFN_QP_KDETH_MASK 0xff
738c2ecf20Sopenharmony_ci#define TID_OPFN_QP_KDETH_SHIFT 48
748c2ecf20Sopenharmony_ci#define TID_OPFN_MAX_LEN_MASK 0x7ff
758c2ecf20Sopenharmony_ci#define TID_OPFN_MAX_LEN_SHIFT 37
768c2ecf20Sopenharmony_ci#define TID_OPFN_TIMEOUT_MASK 0x1f
778c2ecf20Sopenharmony_ci#define TID_OPFN_TIMEOUT_SHIFT 32
788c2ecf20Sopenharmony_ci#define TID_OPFN_RESERVED_MASK 0x3f
798c2ecf20Sopenharmony_ci#define TID_OPFN_RESERVED_SHIFT 26
808c2ecf20Sopenharmony_ci#define TID_OPFN_URG_MASK 0x1
818c2ecf20Sopenharmony_ci#define TID_OPFN_URG_SHIFT 25
828c2ecf20Sopenharmony_ci#define TID_OPFN_VER_MASK 0x7
838c2ecf20Sopenharmony_ci#define TID_OPFN_VER_SHIFT 22
848c2ecf20Sopenharmony_ci#define TID_OPFN_JKEY_MASK 0x3f
858c2ecf20Sopenharmony_ci#define TID_OPFN_JKEY_SHIFT 16
868c2ecf20Sopenharmony_ci#define TID_OPFN_MAX_READ_MASK 0x3f
878c2ecf20Sopenharmony_ci#define TID_OPFN_MAX_READ_SHIFT 10
888c2ecf20Sopenharmony_ci#define TID_OPFN_MAX_WRITE_MASK 0x3f
898c2ecf20Sopenharmony_ci#define TID_OPFN_MAX_WRITE_SHIFT 4
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci/*
928c2ecf20Sopenharmony_ci * OPFN TID layout
938c2ecf20Sopenharmony_ci *
948c2ecf20Sopenharmony_ci * 63               47               31               15
958c2ecf20Sopenharmony_ci * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC
968c2ecf20Sopenharmony_ci * 3210987654321098 7654321098765432 1098765432109876 5432109876543210
978c2ecf20Sopenharmony_ci * N - the context Number
988c2ecf20Sopenharmony_ci * K - the Kdeth_qp
998c2ecf20Sopenharmony_ci * M - Max_len
1008c2ecf20Sopenharmony_ci * T - Timeout
1018c2ecf20Sopenharmony_ci * D - reserveD
1028c2ecf20Sopenharmony_ci * V - version
1038c2ecf20Sopenharmony_ci * U - Urg capable
1048c2ecf20Sopenharmony_ci * J - Jkey
1058c2ecf20Sopenharmony_ci * R - max_Read
1068c2ecf20Sopenharmony_ci * W - max_Write
1078c2ecf20Sopenharmony_ci * C - Capcode
1088c2ecf20Sopenharmony_ci */
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_cistatic void tid_rdma_trigger_resume(struct work_struct *work);
1118c2ecf20Sopenharmony_cistatic void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
1128c2ecf20Sopenharmony_cistatic int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
1138c2ecf20Sopenharmony_ci					 gfp_t gfp);
1148c2ecf20Sopenharmony_cistatic void hfi1_init_trdma_req(struct rvt_qp *qp,
1158c2ecf20Sopenharmony_ci				struct tid_rdma_request *req);
1168c2ecf20Sopenharmony_cistatic void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
1178c2ecf20Sopenharmony_cistatic void hfi1_tid_timeout(struct timer_list *t);
1188c2ecf20Sopenharmony_cistatic void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
1198c2ecf20Sopenharmony_cistatic void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
1208c2ecf20Sopenharmony_cistatic void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
1218c2ecf20Sopenharmony_cistatic int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
1228c2ecf20Sopenharmony_cistatic void hfi1_tid_retry_timeout(struct timer_list *t);
1238c2ecf20Sopenharmony_cistatic int make_tid_rdma_ack(struct rvt_qp *qp,
1248c2ecf20Sopenharmony_ci			     struct ib_other_headers *ohdr,
1258c2ecf20Sopenharmony_ci			     struct hfi1_pkt_state *ps);
1268c2ecf20Sopenharmony_cistatic void hfi1_do_tid_send(struct rvt_qp *qp);
1278c2ecf20Sopenharmony_cistatic u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
1288c2ecf20Sopenharmony_cistatic void tid_rdma_rcv_err(struct hfi1_packet *packet,
1298c2ecf20Sopenharmony_ci			     struct ib_other_headers *ohdr,
1308c2ecf20Sopenharmony_ci			     struct rvt_qp *qp, u32 psn, int diff, bool fecn);
1318c2ecf20Sopenharmony_cistatic void update_r_next_psn_fecn(struct hfi1_packet *packet,
1328c2ecf20Sopenharmony_ci				   struct hfi1_qp_priv *priv,
1338c2ecf20Sopenharmony_ci				   struct hfi1_ctxtdata *rcd,
1348c2ecf20Sopenharmony_ci				   struct tid_rdma_flow *flow,
1358c2ecf20Sopenharmony_ci				   bool fecn);
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic void validate_r_tid_ack(struct hfi1_qp_priv *priv)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
1408c2ecf20Sopenharmony_ci		priv->r_tid_ack = priv->r_tid_tail;
1418c2ecf20Sopenharmony_ci}
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_cistatic void tid_rdma_schedule_ack(struct rvt_qp *qp)
1448c2ecf20Sopenharmony_ci{
1458c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	priv->s_flags |= RVT_S_ACK_PENDING;
1488c2ecf20Sopenharmony_ci	hfi1_schedule_tid_send(qp);
1498c2ecf20Sopenharmony_ci}
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_cistatic void tid_rdma_trigger_ack(struct rvt_qp *qp)
1528c2ecf20Sopenharmony_ci{
1538c2ecf20Sopenharmony_ci	validate_r_tid_ack(qp->priv);
1548c2ecf20Sopenharmony_ci	tid_rdma_schedule_ack(qp);
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	return
1608c2ecf20Sopenharmony_ci		(((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
1618c2ecf20Sopenharmony_ci			TID_OPFN_QP_CTXT_SHIFT) |
1628c2ecf20Sopenharmony_ci		((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
1638c2ecf20Sopenharmony_ci			TID_OPFN_QP_KDETH_SHIFT) |
1648c2ecf20Sopenharmony_ci		(((u64)((p->max_len >> PAGE_SHIFT) - 1) &
1658c2ecf20Sopenharmony_ci			TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
1668c2ecf20Sopenharmony_ci		(((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
1678c2ecf20Sopenharmony_ci			TID_OPFN_TIMEOUT_SHIFT) |
1688c2ecf20Sopenharmony_ci		(((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
1698c2ecf20Sopenharmony_ci		(((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
1708c2ecf20Sopenharmony_ci		(((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
1718c2ecf20Sopenharmony_ci			TID_OPFN_MAX_READ_SHIFT) |
1728c2ecf20Sopenharmony_ci		(((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
1738c2ecf20Sopenharmony_ci			TID_OPFN_MAX_WRITE_SHIFT);
1748c2ecf20Sopenharmony_ci}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_cistatic void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
1778c2ecf20Sopenharmony_ci{
1788c2ecf20Sopenharmony_ci	p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
1798c2ecf20Sopenharmony_ci		TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
1808c2ecf20Sopenharmony_ci	p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
1818c2ecf20Sopenharmony_ci	p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
1828c2ecf20Sopenharmony_ci		TID_OPFN_MAX_WRITE_MASK;
1838c2ecf20Sopenharmony_ci	p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
1848c2ecf20Sopenharmony_ci		TID_OPFN_MAX_READ_MASK;
1858c2ecf20Sopenharmony_ci	p->qp =
1868c2ecf20Sopenharmony_ci		((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
1878c2ecf20Sopenharmony_ci			<< 16) |
1888c2ecf20Sopenharmony_ci		((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
1898c2ecf20Sopenharmony_ci	p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
1908c2ecf20Sopenharmony_ci	p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
1918c2ecf20Sopenharmony_ci}
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_civoid tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
1948c2ecf20Sopenharmony_ci{
1958c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
1988c2ecf20Sopenharmony_ci	p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
1998c2ecf20Sopenharmony_ci	p->jkey = priv->rcd->jkey;
2008c2ecf20Sopenharmony_ci	p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
2018c2ecf20Sopenharmony_ci	p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
2028c2ecf20Sopenharmony_ci	p->timeout = qp->timeout;
2038c2ecf20Sopenharmony_ci	p->urg = is_urg_masked(priv->rcd);
2048c2ecf20Sopenharmony_ci}
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_cibool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
2078c2ecf20Sopenharmony_ci{
2088c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	*data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
2118c2ecf20Sopenharmony_ci	return true;
2128c2ecf20Sopenharmony_ci}
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_cibool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
2178c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote, *old;
2188c2ecf20Sopenharmony_ci	bool ret = true;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	old = rcu_dereference_protected(priv->tid_rdma.remote,
2218c2ecf20Sopenharmony_ci					lockdep_is_held(&priv->opfn.lock));
2228c2ecf20Sopenharmony_ci	data &= ~0xfULL;
2238c2ecf20Sopenharmony_ci	/*
2248c2ecf20Sopenharmony_ci	 * If data passed in is zero, return true so as not to continue the
2258c2ecf20Sopenharmony_ci	 * negotiation process
2268c2ecf20Sopenharmony_ci	 */
2278c2ecf20Sopenharmony_ci	if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
2288c2ecf20Sopenharmony_ci		goto null;
2298c2ecf20Sopenharmony_ci	/*
2308c2ecf20Sopenharmony_ci	 * If kzalloc fails, return false. This will result in:
2318c2ecf20Sopenharmony_ci	 * * at the requester a new OPFN request being generated to retry
2328c2ecf20Sopenharmony_ci	 *   the negotiation
2338c2ecf20Sopenharmony_ci	 * * at the responder, 0 being returned to the requester so as to
2348c2ecf20Sopenharmony_ci	 *   disable TID RDMA at both the requester and the responder
2358c2ecf20Sopenharmony_ci	 */
2368c2ecf20Sopenharmony_ci	remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
2378c2ecf20Sopenharmony_ci	if (!remote) {
2388c2ecf20Sopenharmony_ci		ret = false;
2398c2ecf20Sopenharmony_ci		goto null;
2408c2ecf20Sopenharmony_ci	}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	tid_rdma_opfn_decode(remote, data);
2438c2ecf20Sopenharmony_ci	priv->tid_timer_timeout_jiffies =
2448c2ecf20Sopenharmony_ci		usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
2458c2ecf20Sopenharmony_ci				   1000UL) << 3) * 7);
2468c2ecf20Sopenharmony_ci	trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
2478c2ecf20Sopenharmony_ci	trace_hfi1_opfn_param(qp, 1, remote);
2488c2ecf20Sopenharmony_ci	rcu_assign_pointer(priv->tid_rdma.remote, remote);
2498c2ecf20Sopenharmony_ci	/*
2508c2ecf20Sopenharmony_ci	 * A TID RDMA READ request's segment size is not equal to
2518c2ecf20Sopenharmony_ci	 * remote->max_len only when the request's data length is smaller
2528c2ecf20Sopenharmony_ci	 * than remote->max_len. In that case, there will be only one segment.
2538c2ecf20Sopenharmony_ci	 * Therefore, when priv->pkts_ps is used to calculate req->cur_seg
2548c2ecf20Sopenharmony_ci	 * during retry, it will lead to req->cur_seg = 0, which is exactly
2558c2ecf20Sopenharmony_ci	 * what is expected.
2568c2ecf20Sopenharmony_ci	 */
2578c2ecf20Sopenharmony_ci	priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
2588c2ecf20Sopenharmony_ci	priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
2598c2ecf20Sopenharmony_ci	goto free;
2608c2ecf20Sopenharmony_cinull:
2618c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
2628c2ecf20Sopenharmony_ci	priv->timeout_shift = 0;
2638c2ecf20Sopenharmony_cifree:
2648c2ecf20Sopenharmony_ci	if (old)
2658c2ecf20Sopenharmony_ci		kfree_rcu(old, rcu_head);
2668c2ecf20Sopenharmony_ci	return ret;
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_cibool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
2708c2ecf20Sopenharmony_ci{
2718c2ecf20Sopenharmony_ci	bool ret;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	ret = tid_rdma_conn_reply(qp, *data);
2748c2ecf20Sopenharmony_ci	*data = 0;
2758c2ecf20Sopenharmony_ci	/*
2768c2ecf20Sopenharmony_ci	 * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate
2778c2ecf20Sopenharmony_ci	 * TID RDMA could not be enabled. This will result in TID RDMA being
2788c2ecf20Sopenharmony_ci	 * disabled at the requester too.
2798c2ecf20Sopenharmony_ci	 */
2808c2ecf20Sopenharmony_ci	if (ret)
2818c2ecf20Sopenharmony_ci		(void)tid_rdma_conn_req(qp, data);
2828c2ecf20Sopenharmony_ci	return ret;
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_civoid tid_rdma_conn_error(struct rvt_qp *qp)
2868c2ecf20Sopenharmony_ci{
2878c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
2888c2ecf20Sopenharmony_ci	struct tid_rdma_params *old;
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	old = rcu_dereference_protected(priv->tid_rdma.remote,
2918c2ecf20Sopenharmony_ci					lockdep_is_held(&priv->opfn.lock));
2928c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
2938c2ecf20Sopenharmony_ci	if (old)
2948c2ecf20Sopenharmony_ci		kfree_rcu(old, rcu_head);
2958c2ecf20Sopenharmony_ci}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci/* This is called at context initialization time */
2988c2ecf20Sopenharmony_ciint hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
2998c2ecf20Sopenharmony_ci{
3008c2ecf20Sopenharmony_ci	if (reinit)
3018c2ecf20Sopenharmony_ci		return 0;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
3048c2ecf20Sopenharmony_ci	BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
3058c2ecf20Sopenharmony_ci	rcd->jkey = TID_RDMA_JKEY;
3068c2ecf20Sopenharmony_ci	hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
3078c2ecf20Sopenharmony_ci	return hfi1_alloc_ctxt_rcv_groups(rcd);
3088c2ecf20Sopenharmony_ci}
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci/**
3118c2ecf20Sopenharmony_ci * qp_to_rcd - determine the receive context used by a qp
3128c2ecf20Sopenharmony_ci * @qp - the qp
3138c2ecf20Sopenharmony_ci *
3148c2ecf20Sopenharmony_ci * This routine returns the receive context associated
3158c2ecf20Sopenharmony_ci * with a a qp's qpn.
3168c2ecf20Sopenharmony_ci *
3178c2ecf20Sopenharmony_ci * Returns the context.
3188c2ecf20Sopenharmony_ci */
3198c2ecf20Sopenharmony_cistatic struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
3208c2ecf20Sopenharmony_ci				       struct rvt_qp *qp)
3218c2ecf20Sopenharmony_ci{
3228c2ecf20Sopenharmony_ci	struct hfi1_ibdev *verbs_dev = container_of(rdi,
3238c2ecf20Sopenharmony_ci						    struct hfi1_ibdev,
3248c2ecf20Sopenharmony_ci						    rdi);
3258c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = container_of(verbs_dev,
3268c2ecf20Sopenharmony_ci					       struct hfi1_devdata,
3278c2ecf20Sopenharmony_ci					       verbs_dev);
3288c2ecf20Sopenharmony_ci	unsigned int ctxt;
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci	if (qp->ibqp.qp_num == 0)
3318c2ecf20Sopenharmony_ci		ctxt = 0;
3328c2ecf20Sopenharmony_ci	else
3338c2ecf20Sopenharmony_ci		ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
3348c2ecf20Sopenharmony_ci	return dd->rcd[ctxt];
3358c2ecf20Sopenharmony_ci}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ciint hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
3388c2ecf20Sopenharmony_ci		      struct ib_qp_init_attr *init_attr)
3398c2ecf20Sopenharmony_ci{
3408c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
3418c2ecf20Sopenharmony_ci	int i, ret;
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	qpriv->rcd = qp_to_rcd(rdi, qp);
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	spin_lock_init(&qpriv->opfn.lock);
3468c2ecf20Sopenharmony_ci	INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
3478c2ecf20Sopenharmony_ci	INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
3488c2ecf20Sopenharmony_ci	qpriv->flow_state.psn = 0;
3498c2ecf20Sopenharmony_ci	qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
3508c2ecf20Sopenharmony_ci	qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
3518c2ecf20Sopenharmony_ci	qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
3528c2ecf20Sopenharmony_ci	qpriv->s_state = TID_OP(WRITE_RESP);
3538c2ecf20Sopenharmony_ci	qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
3548c2ecf20Sopenharmony_ci	qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
3558c2ecf20Sopenharmony_ci	qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
3568c2ecf20Sopenharmony_ci	qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
3578c2ecf20Sopenharmony_ci	qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
3588c2ecf20Sopenharmony_ci	qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
3598c2ecf20Sopenharmony_ci	qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
3608c2ecf20Sopenharmony_ci	qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
3618c2ecf20Sopenharmony_ci	atomic_set(&qpriv->n_requests, 0);
3628c2ecf20Sopenharmony_ci	atomic_set(&qpriv->n_tid_requests, 0);
3638c2ecf20Sopenharmony_ci	timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
3648c2ecf20Sopenharmony_ci	timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
3658c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&qpriv->tid_wait);
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci	if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
3688c2ecf20Sopenharmony_ci		struct hfi1_devdata *dd = qpriv->rcd->dd;
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci		qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
3718c2ecf20Sopenharmony_ci						sizeof(*qpriv->pages),
3728c2ecf20Sopenharmony_ci					    GFP_KERNEL, dd->node);
3738c2ecf20Sopenharmony_ci		if (!qpriv->pages)
3748c2ecf20Sopenharmony_ci			return -ENOMEM;
3758c2ecf20Sopenharmony_ci		for (i = 0; i < qp->s_size; i++) {
3768c2ecf20Sopenharmony_ci			struct hfi1_swqe_priv *priv;
3778c2ecf20Sopenharmony_ci			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci			priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
3808c2ecf20Sopenharmony_ci					    dd->node);
3818c2ecf20Sopenharmony_ci			if (!priv)
3828c2ecf20Sopenharmony_ci				return -ENOMEM;
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci			hfi1_init_trdma_req(qp, &priv->tid_req);
3858c2ecf20Sopenharmony_ci			priv->tid_req.e.swqe = wqe;
3868c2ecf20Sopenharmony_ci			wqe->priv = priv;
3878c2ecf20Sopenharmony_ci		}
3888c2ecf20Sopenharmony_ci		for (i = 0; i < rvt_max_atomic(rdi); i++) {
3898c2ecf20Sopenharmony_ci			struct hfi1_ack_priv *priv;
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci			priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
3928c2ecf20Sopenharmony_ci					    dd->node);
3938c2ecf20Sopenharmony_ci			if (!priv)
3948c2ecf20Sopenharmony_ci				return -ENOMEM;
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci			hfi1_init_trdma_req(qp, &priv->tid_req);
3978c2ecf20Sopenharmony_ci			priv->tid_req.e.ack = &qp->s_ack_queue[i];
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci			ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
4008c2ecf20Sopenharmony_ci							    GFP_KERNEL);
4018c2ecf20Sopenharmony_ci			if (ret) {
4028c2ecf20Sopenharmony_ci				kfree(priv);
4038c2ecf20Sopenharmony_ci				return ret;
4048c2ecf20Sopenharmony_ci			}
4058c2ecf20Sopenharmony_ci			qp->s_ack_queue[i].priv = priv;
4068c2ecf20Sopenharmony_ci		}
4078c2ecf20Sopenharmony_ci	}
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	return 0;
4108c2ecf20Sopenharmony_ci}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_civoid hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
4138c2ecf20Sopenharmony_ci{
4148c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
4158c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
4168c2ecf20Sopenharmony_ci	u32 i;
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci	if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
4198c2ecf20Sopenharmony_ci		for (i = 0; i < qp->s_size; i++) {
4208c2ecf20Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, i);
4218c2ecf20Sopenharmony_ci			kfree(wqe->priv);
4228c2ecf20Sopenharmony_ci			wqe->priv = NULL;
4238c2ecf20Sopenharmony_ci		}
4248c2ecf20Sopenharmony_ci		for (i = 0; i < rvt_max_atomic(rdi); i++) {
4258c2ecf20Sopenharmony_ci			struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci			if (priv)
4288c2ecf20Sopenharmony_ci				hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
4298c2ecf20Sopenharmony_ci			kfree(priv);
4308c2ecf20Sopenharmony_ci			qp->s_ack_queue[i].priv = NULL;
4318c2ecf20Sopenharmony_ci		}
4328c2ecf20Sopenharmony_ci		cancel_work_sync(&qpriv->opfn.opfn_work);
4338c2ecf20Sopenharmony_ci		kfree(qpriv->pages);
4348c2ecf20Sopenharmony_ci		qpriv->pages = NULL;
4358c2ecf20Sopenharmony_ci	}
4368c2ecf20Sopenharmony_ci}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci/* Flow and tid waiter functions */
4398c2ecf20Sopenharmony_ci/**
4408c2ecf20Sopenharmony_ci * DOC: lock ordering
4418c2ecf20Sopenharmony_ci *
4428c2ecf20Sopenharmony_ci * There are two locks involved with the queuing
4438c2ecf20Sopenharmony_ci * routines: the qp s_lock and the exp_lock.
4448c2ecf20Sopenharmony_ci *
4458c2ecf20Sopenharmony_ci * Since the tid space allocation is called from
4468c2ecf20Sopenharmony_ci * the send engine, the qp s_lock is already held.
4478c2ecf20Sopenharmony_ci *
4488c2ecf20Sopenharmony_ci * The allocation routines will get the exp_lock.
4498c2ecf20Sopenharmony_ci *
4508c2ecf20Sopenharmony_ci * The first_qp() call is provided to allow the head of
4518c2ecf20Sopenharmony_ci * the rcd wait queue to be fetched under the exp_lock and
4528c2ecf20Sopenharmony_ci * followed by a drop of the exp_lock.
4538c2ecf20Sopenharmony_ci *
4548c2ecf20Sopenharmony_ci * Any qp in the wait list will have the qp reference count held
4558c2ecf20Sopenharmony_ci * to hold the qp in memory.
4568c2ecf20Sopenharmony_ci */
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci/*
4598c2ecf20Sopenharmony_ci * return head of rcd wait list
4608c2ecf20Sopenharmony_ci *
4618c2ecf20Sopenharmony_ci * Must hold the exp_lock.
4628c2ecf20Sopenharmony_ci *
4638c2ecf20Sopenharmony_ci * Get a reference to the QP to hold the QP in memory.
4648c2ecf20Sopenharmony_ci *
4658c2ecf20Sopenharmony_ci * The caller must release the reference when the local
4668c2ecf20Sopenharmony_ci * is no longer being used.
4678c2ecf20Sopenharmony_ci */
4688c2ecf20Sopenharmony_cistatic struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
4698c2ecf20Sopenharmony_ci			       struct tid_queue *queue)
4708c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock)
4718c2ecf20Sopenharmony_ci{
4728c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
4758c2ecf20Sopenharmony_ci	priv = list_first_entry_or_null(&queue->queue_head,
4768c2ecf20Sopenharmony_ci					struct hfi1_qp_priv,
4778c2ecf20Sopenharmony_ci					tid_wait);
4788c2ecf20Sopenharmony_ci	if (!priv)
4798c2ecf20Sopenharmony_ci		return NULL;
4808c2ecf20Sopenharmony_ci	rvt_get_qp(priv->owner);
4818c2ecf20Sopenharmony_ci	return priv->owner;
4828c2ecf20Sopenharmony_ci}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci/**
4858c2ecf20Sopenharmony_ci * kernel_tid_waiters - determine rcd wait
4868c2ecf20Sopenharmony_ci * @rcd: the receive context
4878c2ecf20Sopenharmony_ci * @qp: the head of the qp being processed
4888c2ecf20Sopenharmony_ci *
4898c2ecf20Sopenharmony_ci * This routine will return false IFF
4908c2ecf20Sopenharmony_ci * the list is NULL or the head of the
4918c2ecf20Sopenharmony_ci * list is the indicated qp.
4928c2ecf20Sopenharmony_ci *
4938c2ecf20Sopenharmony_ci * Must hold the qp s_lock and the exp_lock.
4948c2ecf20Sopenharmony_ci *
4958c2ecf20Sopenharmony_ci * Return:
4968c2ecf20Sopenharmony_ci * false if either of the conditions below are satisfied:
4978c2ecf20Sopenharmony_ci * 1. The list is empty or
4988c2ecf20Sopenharmony_ci * 2. The indicated qp is at the head of the list and the
4998c2ecf20Sopenharmony_ci *    HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
5008c2ecf20Sopenharmony_ci * true is returned otherwise.
5018c2ecf20Sopenharmony_ci */
5028c2ecf20Sopenharmony_cistatic bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
5038c2ecf20Sopenharmony_ci			       struct tid_queue *queue, struct rvt_qp *qp)
5048c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
5058c2ecf20Sopenharmony_ci{
5068c2ecf20Sopenharmony_ci	struct rvt_qp *fqp;
5078c2ecf20Sopenharmony_ci	bool ret = true;
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
5108c2ecf20Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
5118c2ecf20Sopenharmony_ci	fqp = first_qp(rcd, queue);
5128c2ecf20Sopenharmony_ci	if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
5138c2ecf20Sopenharmony_ci		ret = false;
5148c2ecf20Sopenharmony_ci	rvt_put_qp(fqp);
5158c2ecf20Sopenharmony_ci	return ret;
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci/**
5198c2ecf20Sopenharmony_ci * dequeue_tid_waiter - dequeue the qp from the list
5208c2ecf20Sopenharmony_ci * @qp - the qp to remove the wait list
5218c2ecf20Sopenharmony_ci *
5228c2ecf20Sopenharmony_ci * This routine removes the indicated qp from the
5238c2ecf20Sopenharmony_ci * wait list if it is there.
5248c2ecf20Sopenharmony_ci *
5258c2ecf20Sopenharmony_ci * This should be done after the hardware flow and
5268c2ecf20Sopenharmony_ci * tid array resources have been allocated.
5278c2ecf20Sopenharmony_ci *
5288c2ecf20Sopenharmony_ci * Must hold the qp s_lock and the rcd exp_lock.
5298c2ecf20Sopenharmony_ci *
5308c2ecf20Sopenharmony_ci * It assumes the s_lock to protect the s_flags
5318c2ecf20Sopenharmony_ci * field and to reliably test the HFI1_S_WAIT_TID_SPACE flag.
5328c2ecf20Sopenharmony_ci */
5338c2ecf20Sopenharmony_cistatic void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
5348c2ecf20Sopenharmony_ci			       struct tid_queue *queue, struct rvt_qp *qp)
5358c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
5368c2ecf20Sopenharmony_ci{
5378c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
5408c2ecf20Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
5418c2ecf20Sopenharmony_ci	if (list_empty(&priv->tid_wait))
5428c2ecf20Sopenharmony_ci		return;
5438c2ecf20Sopenharmony_ci	list_del_init(&priv->tid_wait);
5448c2ecf20Sopenharmony_ci	qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
5458c2ecf20Sopenharmony_ci	queue->dequeue++;
5468c2ecf20Sopenharmony_ci	rvt_put_qp(qp);
5478c2ecf20Sopenharmony_ci}
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci/**
5508c2ecf20Sopenharmony_ci * queue_qp_for_tid_wait - suspend QP on tid space
5518c2ecf20Sopenharmony_ci * @rcd: the receive context
5528c2ecf20Sopenharmony_ci * @qp: the qp
5538c2ecf20Sopenharmony_ci *
5548c2ecf20Sopenharmony_ci * The qp is inserted at the tail of the rcd
5558c2ecf20Sopenharmony_ci * wait queue and the HFI1_S_WAIT_TID_SPACE s_flag is set.
5568c2ecf20Sopenharmony_ci *
5578c2ecf20Sopenharmony_ci * Must hold the qp s_lock and the exp_lock.
5588c2ecf20Sopenharmony_ci */
5598c2ecf20Sopenharmony_cistatic void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
5608c2ecf20Sopenharmony_ci				  struct tid_queue *queue, struct rvt_qp *qp)
5618c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
5628c2ecf20Sopenharmony_ci{
5638c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
5668c2ecf20Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
5678c2ecf20Sopenharmony_ci	if (list_empty(&priv->tid_wait)) {
5688c2ecf20Sopenharmony_ci		qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
5698c2ecf20Sopenharmony_ci		list_add_tail(&priv->tid_wait, &queue->queue_head);
5708c2ecf20Sopenharmony_ci		priv->tid_enqueue = ++queue->enqueue;
5718c2ecf20Sopenharmony_ci		rcd->dd->verbs_dev.n_tidwait++;
5728c2ecf20Sopenharmony_ci		trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
5738c2ecf20Sopenharmony_ci		rvt_get_qp(qp);
5748c2ecf20Sopenharmony_ci	}
5758c2ecf20Sopenharmony_ci}
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci/**
5788c2ecf20Sopenharmony_ci * __trigger_tid_waiter - trigger tid waiter
5798c2ecf20Sopenharmony_ci * @qp: the qp
5808c2ecf20Sopenharmony_ci *
5818c2ecf20Sopenharmony_ci * This is a private entrance to schedule the qp
5828c2ecf20Sopenharmony_ci * assuming the caller is holding the qp->s_lock.
5838c2ecf20Sopenharmony_ci */
5848c2ecf20Sopenharmony_cistatic void __trigger_tid_waiter(struct rvt_qp *qp)
5858c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
5868c2ecf20Sopenharmony_ci{
5878c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
5888c2ecf20Sopenharmony_ci	if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
5898c2ecf20Sopenharmony_ci		return;
5908c2ecf20Sopenharmony_ci	trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
5918c2ecf20Sopenharmony_ci	hfi1_schedule_send(qp);
5928c2ecf20Sopenharmony_ci}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci/**
5958c2ecf20Sopenharmony_ci * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
5968c2ecf20Sopenharmony_ci * @qp - the qp
5978c2ecf20Sopenharmony_ci *
5988c2ecf20Sopenharmony_ci * trigger a schedule or a waiting qp in a deadlock
5998c2ecf20Sopenharmony_ci * safe manner.  The qp reference is held prior
6008c2ecf20Sopenharmony_ci * to this call via first_qp().
6018c2ecf20Sopenharmony_ci *
6028c2ecf20Sopenharmony_ci * If the qp trigger was already scheduled (!rval)
6038c2ecf20Sopenharmony_ci * the the reference is dropped, otherwise the resume
6048c2ecf20Sopenharmony_ci * or the destroy cancel will dispatch the reference.
6058c2ecf20Sopenharmony_ci */
6068c2ecf20Sopenharmony_cistatic void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
6078c2ecf20Sopenharmony_ci{
6088c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv;
6098c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp;
6108c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd;
6118c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd;
6128c2ecf20Sopenharmony_ci	bool rval;
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci	if (!qp)
6158c2ecf20Sopenharmony_ci		return;
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci	priv = qp->priv;
6188c2ecf20Sopenharmony_ci	ibp = to_iport(qp->ibqp.device, qp->port_num);
6198c2ecf20Sopenharmony_ci	ppd = ppd_from_ibp(ibp);
6208c2ecf20Sopenharmony_ci	dd = dd_from_ibdev(qp->ibqp.device);
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	rval = queue_work_on(priv->s_sde ?
6238c2ecf20Sopenharmony_ci			     priv->s_sde->cpu :
6248c2ecf20Sopenharmony_ci			     cpumask_first(cpumask_of_node(dd->node)),
6258c2ecf20Sopenharmony_ci			     ppd->hfi1_wq,
6268c2ecf20Sopenharmony_ci			     &priv->tid_rdma.trigger_work);
6278c2ecf20Sopenharmony_ci	if (!rval)
6288c2ecf20Sopenharmony_ci		rvt_put_qp(qp);
6298c2ecf20Sopenharmony_ci}
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci/**
6328c2ecf20Sopenharmony_ci * tid_rdma_trigger_resume - field a trigger work request
6338c2ecf20Sopenharmony_ci * @work - the work item
6348c2ecf20Sopenharmony_ci *
6358c2ecf20Sopenharmony_ci * Complete the off qp trigger processing by directly
6368c2ecf20Sopenharmony_ci * calling the progress routine.
6378c2ecf20Sopenharmony_ci */
6388c2ecf20Sopenharmony_cistatic void tid_rdma_trigger_resume(struct work_struct *work)
6398c2ecf20Sopenharmony_ci{
6408c2ecf20Sopenharmony_ci	struct tid_rdma_qp_params *tr;
6418c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv;
6428c2ecf20Sopenharmony_ci	struct rvt_qp *qp;
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
6458c2ecf20Sopenharmony_ci	priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
6468c2ecf20Sopenharmony_ci	qp = priv->owner;
6478c2ecf20Sopenharmony_ci	spin_lock_irq(&qp->s_lock);
6488c2ecf20Sopenharmony_ci	if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
6498c2ecf20Sopenharmony_ci		spin_unlock_irq(&qp->s_lock);
6508c2ecf20Sopenharmony_ci		hfi1_do_send(priv->owner, true);
6518c2ecf20Sopenharmony_ci	} else {
6528c2ecf20Sopenharmony_ci		spin_unlock_irq(&qp->s_lock);
6538c2ecf20Sopenharmony_ci	}
6548c2ecf20Sopenharmony_ci	rvt_put_qp(qp);
6558c2ecf20Sopenharmony_ci}
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci/**
6588c2ecf20Sopenharmony_ci * tid_rdma_flush_wait - unwind any tid space wait
6598c2ecf20Sopenharmony_ci *
6608c2ecf20Sopenharmony_ci * This is called when resetting a qp to
6618c2ecf20Sopenharmony_ci * allow a destroy or reset to get rid
6628c2ecf20Sopenharmony_ci * of any tid space linkage and reference counts.
6638c2ecf20Sopenharmony_ci */
6648c2ecf20Sopenharmony_cistatic void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
6658c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
6668c2ecf20Sopenharmony_ci{
6678c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv;
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	if (!qp)
6708c2ecf20Sopenharmony_ci		return;
6718c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
6728c2ecf20Sopenharmony_ci	priv = qp->priv;
6738c2ecf20Sopenharmony_ci	qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
6748c2ecf20Sopenharmony_ci	spin_lock(&priv->rcd->exp_lock);
6758c2ecf20Sopenharmony_ci	if (!list_empty(&priv->tid_wait)) {
6768c2ecf20Sopenharmony_ci		list_del_init(&priv->tid_wait);
6778c2ecf20Sopenharmony_ci		qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
6788c2ecf20Sopenharmony_ci		queue->dequeue++;
6798c2ecf20Sopenharmony_ci		rvt_put_qp(qp);
6808c2ecf20Sopenharmony_ci	}
6818c2ecf20Sopenharmony_ci	spin_unlock(&priv->rcd->exp_lock);
6828c2ecf20Sopenharmony_ci}
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_civoid hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
6858c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
6868c2ecf20Sopenharmony_ci{
6878c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	_tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
6908c2ecf20Sopenharmony_ci	_tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
6918c2ecf20Sopenharmony_ci}
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci/* Flow functions */
6948c2ecf20Sopenharmony_ci/**
6958c2ecf20Sopenharmony_ci * kern_reserve_flow - allocate a hardware flow
6968c2ecf20Sopenharmony_ci * @rcd - the context to use for allocation
6978c2ecf20Sopenharmony_ci * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
6988c2ecf20Sopenharmony_ci *         signify "don't care".
6998c2ecf20Sopenharmony_ci *
7008c2ecf20Sopenharmony_ci * Use a bit mask based allocation to reserve a hardware
7018c2ecf20Sopenharmony_ci * flow for use in receiving KDETH data packets. If a preferred flow is
7028c2ecf20Sopenharmony_ci * specified the function will attempt to reserve that flow again, if
7038c2ecf20Sopenharmony_ci * available.
7048c2ecf20Sopenharmony_ci *
7058c2ecf20Sopenharmony_ci * The exp_lock must be held.
7068c2ecf20Sopenharmony_ci *
7078c2ecf20Sopenharmony_ci * Return:
7088c2ecf20Sopenharmony_ci * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
7098c2ecf20Sopenharmony_ci * On failure: -EAGAIN
7108c2ecf20Sopenharmony_ci */
7118c2ecf20Sopenharmony_cistatic int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
7128c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock)
7138c2ecf20Sopenharmony_ci{
7148c2ecf20Sopenharmony_ci	int nr;
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	/* Attempt to reserve the preferred flow index */
7178c2ecf20Sopenharmony_ci	if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
7188c2ecf20Sopenharmony_ci	    !test_and_set_bit(last, &rcd->flow_mask))
7198c2ecf20Sopenharmony_ci		return last;
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	nr = ffz(rcd->flow_mask);
7228c2ecf20Sopenharmony_ci	BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
7238c2ecf20Sopenharmony_ci		     (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
7248c2ecf20Sopenharmony_ci	if (nr > (RXE_NUM_TID_FLOWS - 1))
7258c2ecf20Sopenharmony_ci		return -EAGAIN;
7268c2ecf20Sopenharmony_ci	set_bit(nr, &rcd->flow_mask);
7278c2ecf20Sopenharmony_ci	return nr;
7288c2ecf20Sopenharmony_ci}
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_cistatic void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
7318c2ecf20Sopenharmony_ci			     u32 flow_idx)
7328c2ecf20Sopenharmony_ci{
7338c2ecf20Sopenharmony_ci	u64 reg;
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
7368c2ecf20Sopenharmony_ci		RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
7378c2ecf20Sopenharmony_ci		RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
7388c2ecf20Sopenharmony_ci		RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
7398c2ecf20Sopenharmony_ci		RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
7408c2ecf20Sopenharmony_ci		RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci	if (generation != KERN_GENERATION_RESERVED)
7438c2ecf20Sopenharmony_ci		reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	write_uctxt_csr(rcd->dd, rcd->ctxt,
7468c2ecf20Sopenharmony_ci			RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
7478c2ecf20Sopenharmony_ci}
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_cistatic u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
7508c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock)
7518c2ecf20Sopenharmony_ci{
7528c2ecf20Sopenharmony_ci	u32 generation = rcd->flows[flow_idx].generation;
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	kern_set_hw_flow(rcd, generation, flow_idx);
7558c2ecf20Sopenharmony_ci	return generation;
7568c2ecf20Sopenharmony_ci}
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_cistatic u32 kern_flow_generation_next(u32 gen)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	u32 generation = mask_generation(gen + 1);
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_ci	if (generation == KERN_GENERATION_RESERVED)
7638c2ecf20Sopenharmony_ci		generation = mask_generation(generation + 1);
7648c2ecf20Sopenharmony_ci	return generation;
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_cistatic void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
7688c2ecf20Sopenharmony_ci	__must_hold(&rcd->exp_lock)
7698c2ecf20Sopenharmony_ci{
7708c2ecf20Sopenharmony_ci	rcd->flows[flow_idx].generation =
7718c2ecf20Sopenharmony_ci		kern_flow_generation_next(rcd->flows[flow_idx].generation);
7728c2ecf20Sopenharmony_ci	kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
7738c2ecf20Sopenharmony_ci}
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ciint hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
7768c2ecf20Sopenharmony_ci{
7778c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
7788c2ecf20Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
7798c2ecf20Sopenharmony_ci	struct rvt_qp *fqp;
7808c2ecf20Sopenharmony_ci	unsigned long flags;
7818c2ecf20Sopenharmony_ci	int ret = 0;
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci	/* The QP already has an allocated flow */
7848c2ecf20Sopenharmony_ci	if (fs->index != RXE_NUM_TID_FLOWS)
7858c2ecf20Sopenharmony_ci		return ret;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
7888c2ecf20Sopenharmony_ci	if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
7898c2ecf20Sopenharmony_ci		goto queue;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	ret = kern_reserve_flow(rcd, fs->last_index);
7928c2ecf20Sopenharmony_ci	if (ret < 0)
7938c2ecf20Sopenharmony_ci		goto queue;
7948c2ecf20Sopenharmony_ci	fs->index = ret;
7958c2ecf20Sopenharmony_ci	fs->last_index = fs->index;
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	/* Generation received in a RESYNC overrides default flow generation */
7988c2ecf20Sopenharmony_ci	if (fs->generation != KERN_GENERATION_RESERVED)
7998c2ecf20Sopenharmony_ci		rcd->flows[fs->index].generation = fs->generation;
8008c2ecf20Sopenharmony_ci	fs->generation = kern_setup_hw_flow(rcd, fs->index);
8018c2ecf20Sopenharmony_ci	fs->psn = 0;
8028c2ecf20Sopenharmony_ci	dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
8038c2ecf20Sopenharmony_ci	/* get head before dropping lock */
8048c2ecf20Sopenharmony_ci	fqp = first_qp(rcd, &rcd->flow_queue);
8058c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	tid_rdma_schedule_tid_wakeup(fqp);
8088c2ecf20Sopenharmony_ci	return 0;
8098c2ecf20Sopenharmony_ciqueue:
8108c2ecf20Sopenharmony_ci	queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
8118c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
8128c2ecf20Sopenharmony_ci	return -EAGAIN;
8138c2ecf20Sopenharmony_ci}
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_civoid hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
8168c2ecf20Sopenharmony_ci{
8178c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
8188c2ecf20Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
8198c2ecf20Sopenharmony_ci	struct rvt_qp *fqp;
8208c2ecf20Sopenharmony_ci	unsigned long flags;
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci	if (fs->index >= RXE_NUM_TID_FLOWS)
8238c2ecf20Sopenharmony_ci		return;
8248c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
8258c2ecf20Sopenharmony_ci	kern_clear_hw_flow(rcd, fs->index);
8268c2ecf20Sopenharmony_ci	clear_bit(fs->index, &rcd->flow_mask);
8278c2ecf20Sopenharmony_ci	fs->index = RXE_NUM_TID_FLOWS;
8288c2ecf20Sopenharmony_ci	fs->psn = 0;
8298c2ecf20Sopenharmony_ci	fs->generation = KERN_GENERATION_RESERVED;
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci	/* get head before dropping lock */
8328c2ecf20Sopenharmony_ci	fqp = first_qp(rcd, &rcd->flow_queue);
8338c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci	if (fqp == qp) {
8368c2ecf20Sopenharmony_ci		__trigger_tid_waiter(fqp);
8378c2ecf20Sopenharmony_ci		rvt_put_qp(fqp);
8388c2ecf20Sopenharmony_ci	} else {
8398c2ecf20Sopenharmony_ci		tid_rdma_schedule_tid_wakeup(fqp);
8408c2ecf20Sopenharmony_ci	}
8418c2ecf20Sopenharmony_ci}
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_civoid hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
8448c2ecf20Sopenharmony_ci{
8458c2ecf20Sopenharmony_ci	int i;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
8488c2ecf20Sopenharmony_ci		rcd->flows[i].generation = mask_generation(prandom_u32());
8498c2ecf20Sopenharmony_ci		kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
8508c2ecf20Sopenharmony_ci	}
8518c2ecf20Sopenharmony_ci}
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci/* TID allocation functions */
8548c2ecf20Sopenharmony_cistatic u8 trdma_pset_order(struct tid_rdma_pageset *s)
8558c2ecf20Sopenharmony_ci{
8568c2ecf20Sopenharmony_ci	u8 count = s->count;
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_ci	return ilog2(count) + 1;
8598c2ecf20Sopenharmony_ci}
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci/**
8628c2ecf20Sopenharmony_ci * tid_rdma_find_phys_blocks_4k - get groups base on mr info
8638c2ecf20Sopenharmony_ci * @npages - number of pages
8648c2ecf20Sopenharmony_ci * @pages - pointer to an array of page structs
8658c2ecf20Sopenharmony_ci * @list - page set array to return
8668c2ecf20Sopenharmony_ci *
8678c2ecf20Sopenharmony_ci * This routine returns the number of groups associated with
8688c2ecf20Sopenharmony_ci * the current sge information.  This implementation is based
8698c2ecf20Sopenharmony_ci * on the expected receive find_phys_blocks() adjusted to
8708c2ecf20Sopenharmony_ci * use the MR information vs. the pfn.
8718c2ecf20Sopenharmony_ci *
8728c2ecf20Sopenharmony_ci * Return:
8738c2ecf20Sopenharmony_ci * the number of RcvArray entries
8748c2ecf20Sopenharmony_ci */
8758c2ecf20Sopenharmony_cistatic u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
8768c2ecf20Sopenharmony_ci					struct page **pages,
8778c2ecf20Sopenharmony_ci					u32 npages,
8788c2ecf20Sopenharmony_ci					struct tid_rdma_pageset *list)
8798c2ecf20Sopenharmony_ci{
8808c2ecf20Sopenharmony_ci	u32 pagecount, pageidx, setcount = 0, i;
8818c2ecf20Sopenharmony_ci	void *vaddr, *this_vaddr;
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci	if (!npages)
8848c2ecf20Sopenharmony_ci		return 0;
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	/*
8878c2ecf20Sopenharmony_ci	 * Look for sets of physically contiguous pages in the user buffer.
8888c2ecf20Sopenharmony_ci	 * This will allow us to optimize Expected RcvArray entry usage by
8898c2ecf20Sopenharmony_ci	 * using the bigger supported sizes.
8908c2ecf20Sopenharmony_ci	 */
8918c2ecf20Sopenharmony_ci	vaddr = page_address(pages[0]);
8928c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
8938c2ecf20Sopenharmony_ci	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
8948c2ecf20Sopenharmony_ci		this_vaddr = i < npages ? page_address(pages[i]) : NULL;
8958c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
8968c2ecf20Sopenharmony_ci					 this_vaddr);
8978c2ecf20Sopenharmony_ci		/*
8988c2ecf20Sopenharmony_ci		 * If the vaddr's are not sequential, pages are not physically
8998c2ecf20Sopenharmony_ci		 * contiguous.
9008c2ecf20Sopenharmony_ci		 */
9018c2ecf20Sopenharmony_ci		if (this_vaddr != (vaddr + PAGE_SIZE)) {
9028c2ecf20Sopenharmony_ci			/*
9038c2ecf20Sopenharmony_ci			 * At this point we have to loop over the set of
9048c2ecf20Sopenharmony_ci			 * physically contiguous pages and break them down it
9058c2ecf20Sopenharmony_ci			 * sizes supported by the HW.
9068c2ecf20Sopenharmony_ci			 * There are two main constraints:
9078c2ecf20Sopenharmony_ci			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
9088c2ecf20Sopenharmony_ci			 *        If the total set size is bigger than that
9098c2ecf20Sopenharmony_ci			 *        program only a MAX_EXPECTED_BUFFER chunk.
9108c2ecf20Sopenharmony_ci			 *     2. The buffer size has to be a power of two. If
9118c2ecf20Sopenharmony_ci			 *        it is not, round down to the closes power of
9128c2ecf20Sopenharmony_ci			 *        2 and program that size.
9138c2ecf20Sopenharmony_ci			 */
9148c2ecf20Sopenharmony_ci			while (pagecount) {
9158c2ecf20Sopenharmony_ci				int maxpages = pagecount;
9168c2ecf20Sopenharmony_ci				u32 bufsize = pagecount * PAGE_SIZE;
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci				if (bufsize > MAX_EXPECTED_BUFFER)
9198c2ecf20Sopenharmony_ci					maxpages =
9208c2ecf20Sopenharmony_ci						MAX_EXPECTED_BUFFER >>
9218c2ecf20Sopenharmony_ci						PAGE_SHIFT;
9228c2ecf20Sopenharmony_ci				else if (!is_power_of_2(bufsize))
9238c2ecf20Sopenharmony_ci					maxpages =
9248c2ecf20Sopenharmony_ci						rounddown_pow_of_two(bufsize) >>
9258c2ecf20Sopenharmony_ci						PAGE_SHIFT;
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci				list[setcount].idx = pageidx;
9288c2ecf20Sopenharmony_ci				list[setcount].count = maxpages;
9298c2ecf20Sopenharmony_ci				trace_hfi1_tid_pageset(flow->req->qp, setcount,
9308c2ecf20Sopenharmony_ci						       list[setcount].idx,
9318c2ecf20Sopenharmony_ci						       list[setcount].count);
9328c2ecf20Sopenharmony_ci				pagecount -= maxpages;
9338c2ecf20Sopenharmony_ci				pageidx += maxpages;
9348c2ecf20Sopenharmony_ci				setcount++;
9358c2ecf20Sopenharmony_ci			}
9368c2ecf20Sopenharmony_ci			pageidx = i;
9378c2ecf20Sopenharmony_ci			pagecount = 1;
9388c2ecf20Sopenharmony_ci			vaddr = this_vaddr;
9398c2ecf20Sopenharmony_ci		} else {
9408c2ecf20Sopenharmony_ci			vaddr += PAGE_SIZE;
9418c2ecf20Sopenharmony_ci			pagecount++;
9428c2ecf20Sopenharmony_ci		}
9438c2ecf20Sopenharmony_ci	}
9448c2ecf20Sopenharmony_ci	/* insure we always return an even number of sets */
9458c2ecf20Sopenharmony_ci	if (setcount & 1)
9468c2ecf20Sopenharmony_ci		list[setcount++].count = 0;
9478c2ecf20Sopenharmony_ci	return setcount;
9488c2ecf20Sopenharmony_ci}
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci/**
9518c2ecf20Sopenharmony_ci * tid_flush_pages - dump out pages into pagesets
9528c2ecf20Sopenharmony_ci * @list - list of pagesets
9538c2ecf20Sopenharmony_ci * @idx - pointer to current page index
9548c2ecf20Sopenharmony_ci * @pages - number of pages to dump
9558c2ecf20Sopenharmony_ci * @sets - current number of pagesset
9568c2ecf20Sopenharmony_ci *
9578c2ecf20Sopenharmony_ci * This routine flushes out accumuated pages.
9588c2ecf20Sopenharmony_ci *
9598c2ecf20Sopenharmony_ci * To insure an even number of sets the
9608c2ecf20Sopenharmony_ci * code may add a filler.
9618c2ecf20Sopenharmony_ci *
9628c2ecf20Sopenharmony_ci * This can happen with when pages is not
9638c2ecf20Sopenharmony_ci * a power of 2 or pages is a power of 2
9648c2ecf20Sopenharmony_ci * less than the maximum pages.
9658c2ecf20Sopenharmony_ci *
9668c2ecf20Sopenharmony_ci * Return:
9678c2ecf20Sopenharmony_ci * The new number of sets
9688c2ecf20Sopenharmony_ci */
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_cistatic u32 tid_flush_pages(struct tid_rdma_pageset *list,
9718c2ecf20Sopenharmony_ci			   u32 *idx, u32 pages, u32 sets)
9728c2ecf20Sopenharmony_ci{
9738c2ecf20Sopenharmony_ci	while (pages) {
9748c2ecf20Sopenharmony_ci		u32 maxpages = pages;
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci		if (maxpages > MAX_EXPECTED_PAGES)
9778c2ecf20Sopenharmony_ci			maxpages = MAX_EXPECTED_PAGES;
9788c2ecf20Sopenharmony_ci		else if (!is_power_of_2(maxpages))
9798c2ecf20Sopenharmony_ci			maxpages = rounddown_pow_of_two(maxpages);
9808c2ecf20Sopenharmony_ci		list[sets].idx = *idx;
9818c2ecf20Sopenharmony_ci		list[sets++].count = maxpages;
9828c2ecf20Sopenharmony_ci		*idx += maxpages;
9838c2ecf20Sopenharmony_ci		pages -= maxpages;
9848c2ecf20Sopenharmony_ci	}
9858c2ecf20Sopenharmony_ci	/* might need a filler */
9868c2ecf20Sopenharmony_ci	if (sets & 1)
9878c2ecf20Sopenharmony_ci		list[sets++].count = 0;
9888c2ecf20Sopenharmony_ci	return sets;
9898c2ecf20Sopenharmony_ci}
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ci/**
9928c2ecf20Sopenharmony_ci * tid_rdma_find_phys_blocks_8k - get groups base on mr info
9938c2ecf20Sopenharmony_ci * @pages - pointer to an array of page structs
9948c2ecf20Sopenharmony_ci * @npages - number of pages
9958c2ecf20Sopenharmony_ci * @list - page set array to return
9968c2ecf20Sopenharmony_ci *
9978c2ecf20Sopenharmony_ci * This routine parses an array of pages to compute pagesets
9988c2ecf20Sopenharmony_ci * in an 8k compatible way.
9998c2ecf20Sopenharmony_ci *
10008c2ecf20Sopenharmony_ci * pages are tested two at a time, i, i + 1 for contiguous
10018c2ecf20Sopenharmony_ci * pages and i - 1 and i contiguous pages.
10028c2ecf20Sopenharmony_ci *
10038c2ecf20Sopenharmony_ci * If any condition is false, any accumlated pages are flushed and
10048c2ecf20Sopenharmony_ci * v0,v1 are emitted as separate PAGE_SIZE pagesets
10058c2ecf20Sopenharmony_ci *
10068c2ecf20Sopenharmony_ci * Otherwise, the current 8k is totaled for a future flush.
10078c2ecf20Sopenharmony_ci *
10088c2ecf20Sopenharmony_ci * Return:
10098c2ecf20Sopenharmony_ci * The number of pagesets
10108c2ecf20Sopenharmony_ci * list set with the returned number of pagesets
10118c2ecf20Sopenharmony_ci *
10128c2ecf20Sopenharmony_ci */
10138c2ecf20Sopenharmony_cistatic u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
10148c2ecf20Sopenharmony_ci					struct page **pages,
10158c2ecf20Sopenharmony_ci					u32 npages,
10168c2ecf20Sopenharmony_ci					struct tid_rdma_pageset *list)
10178c2ecf20Sopenharmony_ci{
10188c2ecf20Sopenharmony_ci	u32 idx, sets = 0, i;
10198c2ecf20Sopenharmony_ci	u32 pagecnt = 0;
10208c2ecf20Sopenharmony_ci	void *v0, *v1, *vm1;
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci	if (!npages)
10238c2ecf20Sopenharmony_ci		return 0;
10248c2ecf20Sopenharmony_ci	for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
10258c2ecf20Sopenharmony_ci		/* get a new v0 */
10268c2ecf20Sopenharmony_ci		v0 = page_address(pages[i]);
10278c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
10288c2ecf20Sopenharmony_ci		v1 = i + 1 < npages ?
10298c2ecf20Sopenharmony_ci				page_address(pages[i + 1]) : NULL;
10308c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
10318c2ecf20Sopenharmony_ci		/* compare i, i + 1 vaddr */
10328c2ecf20Sopenharmony_ci		if (v1 != (v0 + PAGE_SIZE)) {
10338c2ecf20Sopenharmony_ci			/* flush out pages */
10348c2ecf20Sopenharmony_ci			sets = tid_flush_pages(list, &idx, pagecnt, sets);
10358c2ecf20Sopenharmony_ci			/* output v0,v1 as two pagesets */
10368c2ecf20Sopenharmony_ci			list[sets].idx = idx++;
10378c2ecf20Sopenharmony_ci			list[sets++].count = 1;
10388c2ecf20Sopenharmony_ci			if (v1) {
10398c2ecf20Sopenharmony_ci				list[sets].count = 1;
10408c2ecf20Sopenharmony_ci				list[sets++].idx = idx++;
10418c2ecf20Sopenharmony_ci			} else {
10428c2ecf20Sopenharmony_ci				list[sets++].count = 0;
10438c2ecf20Sopenharmony_ci			}
10448c2ecf20Sopenharmony_ci			vm1 = NULL;
10458c2ecf20Sopenharmony_ci			pagecnt = 0;
10468c2ecf20Sopenharmony_ci			continue;
10478c2ecf20Sopenharmony_ci		}
10488c2ecf20Sopenharmony_ci		/* i,i+1 consecutive, look at i-1,i */
10498c2ecf20Sopenharmony_ci		if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
10508c2ecf20Sopenharmony_ci			/* flush out pages */
10518c2ecf20Sopenharmony_ci			sets = tid_flush_pages(list, &idx, pagecnt, sets);
10528c2ecf20Sopenharmony_ci			pagecnt = 0;
10538c2ecf20Sopenharmony_ci		}
10548c2ecf20Sopenharmony_ci		/* pages will always be a multiple of 8k */
10558c2ecf20Sopenharmony_ci		pagecnt += 2;
10568c2ecf20Sopenharmony_ci		/* save i-1 */
10578c2ecf20Sopenharmony_ci		vm1 = v1;
10588c2ecf20Sopenharmony_ci		/* move to next pair */
10598c2ecf20Sopenharmony_ci	}
10608c2ecf20Sopenharmony_ci	/* dump residual pages at end */
10618c2ecf20Sopenharmony_ci	sets = tid_flush_pages(list, &idx, npages - idx, sets);
10628c2ecf20Sopenharmony_ci	/* by design cannot be odd sets */
10638c2ecf20Sopenharmony_ci	WARN_ON(sets & 1);
10648c2ecf20Sopenharmony_ci	return sets;
10658c2ecf20Sopenharmony_ci}
10668c2ecf20Sopenharmony_ci
10678c2ecf20Sopenharmony_ci/**
10688c2ecf20Sopenharmony_ci * Find pages for one segment of a sge array represented by @ss. The function
10698c2ecf20Sopenharmony_ci * does not check the sge, the sge must have been checked for alignment with a
10708c2ecf20Sopenharmony_ci * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
10718c2ecf20Sopenharmony_ci * rvt_lkey_ok and rvt_rkey_ok. Also, the function only modifies the local sge
10728c2ecf20Sopenharmony_ci * copy maintained in @ss->sge, the original sge is not modified.
10738c2ecf20Sopenharmony_ci *
10748c2ecf20Sopenharmony_ci * Unlike IB RDMA WRITE, we can't decrement ss->num_sge here because we are not
10758c2ecf20Sopenharmony_ci * releasing the MR reference count at the same time. Otherwise, we'll "leak"
10768c2ecf20Sopenharmony_ci * references to the MR. This difference requires that we keep track of progress
10778c2ecf20Sopenharmony_ci * into the sg_list. This is done by the cur_seg cursor in the tid_rdma_request
10788c2ecf20Sopenharmony_ci * structure.
10798c2ecf20Sopenharmony_ci */
10808c2ecf20Sopenharmony_cistatic u32 kern_find_pages(struct tid_rdma_flow *flow,
10818c2ecf20Sopenharmony_ci			   struct page **pages,
10828c2ecf20Sopenharmony_ci			   struct rvt_sge_state *ss, bool *last)
10838c2ecf20Sopenharmony_ci{
10848c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = flow->req;
10858c2ecf20Sopenharmony_ci	struct rvt_sge *sge = &ss->sge;
10868c2ecf20Sopenharmony_ci	u32 length = flow->req->seg_len;
10878c2ecf20Sopenharmony_ci	u32 len = PAGE_SIZE;
10888c2ecf20Sopenharmony_ci	u32 i = 0;
10898c2ecf20Sopenharmony_ci
10908c2ecf20Sopenharmony_ci	while (length && req->isge < ss->num_sge) {
10918c2ecf20Sopenharmony_ci		pages[i++] = virt_to_page(sge->vaddr);
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci		sge->vaddr += len;
10948c2ecf20Sopenharmony_ci		sge->length -= len;
10958c2ecf20Sopenharmony_ci		sge->sge_length -= len;
10968c2ecf20Sopenharmony_ci		if (!sge->sge_length) {
10978c2ecf20Sopenharmony_ci			if (++req->isge < ss->num_sge)
10988c2ecf20Sopenharmony_ci				*sge = ss->sg_list[req->isge - 1];
10998c2ecf20Sopenharmony_ci		} else if (sge->length == 0 && sge->mr->lkey) {
11008c2ecf20Sopenharmony_ci			if (++sge->n >= RVT_SEGSZ) {
11018c2ecf20Sopenharmony_ci				++sge->m;
11028c2ecf20Sopenharmony_ci				sge->n = 0;
11038c2ecf20Sopenharmony_ci			}
11048c2ecf20Sopenharmony_ci			sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
11058c2ecf20Sopenharmony_ci			sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
11068c2ecf20Sopenharmony_ci		}
11078c2ecf20Sopenharmony_ci		length -= len;
11088c2ecf20Sopenharmony_ci	}
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_ci	flow->length = flow->req->seg_len - length;
11118c2ecf20Sopenharmony_ci	*last = req->isge == ss->num_sge ? false : true;
11128c2ecf20Sopenharmony_ci	return i;
11138c2ecf20Sopenharmony_ci}
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_cistatic void dma_unmap_flow(struct tid_rdma_flow *flow)
11168c2ecf20Sopenharmony_ci{
11178c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd;
11188c2ecf20Sopenharmony_ci	int i;
11198c2ecf20Sopenharmony_ci	struct tid_rdma_pageset *pset;
11208c2ecf20Sopenharmony_ci
11218c2ecf20Sopenharmony_ci	dd = flow->req->rcd->dd;
11228c2ecf20Sopenharmony_ci	for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
11238c2ecf20Sopenharmony_ci			i++, pset++) {
11248c2ecf20Sopenharmony_ci		if (pset->count && pset->addr) {
11258c2ecf20Sopenharmony_ci			dma_unmap_page(&dd->pcidev->dev,
11268c2ecf20Sopenharmony_ci				       pset->addr,
11278c2ecf20Sopenharmony_ci				       PAGE_SIZE * pset->count,
11288c2ecf20Sopenharmony_ci				       DMA_FROM_DEVICE);
11298c2ecf20Sopenharmony_ci			pset->mapped = 0;
11308c2ecf20Sopenharmony_ci		}
11318c2ecf20Sopenharmony_ci	}
11328c2ecf20Sopenharmony_ci}
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_cistatic int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
11358c2ecf20Sopenharmony_ci{
11368c2ecf20Sopenharmony_ci	int i;
11378c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = flow->req->rcd->dd;
11388c2ecf20Sopenharmony_ci	struct tid_rdma_pageset *pset;
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_ci	for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
11418c2ecf20Sopenharmony_ci			i++, pset++) {
11428c2ecf20Sopenharmony_ci		if (pset->count) {
11438c2ecf20Sopenharmony_ci			pset->addr = dma_map_page(&dd->pcidev->dev,
11448c2ecf20Sopenharmony_ci						  pages[pset->idx],
11458c2ecf20Sopenharmony_ci						  0,
11468c2ecf20Sopenharmony_ci						  PAGE_SIZE * pset->count,
11478c2ecf20Sopenharmony_ci						  DMA_FROM_DEVICE);
11488c2ecf20Sopenharmony_ci
11498c2ecf20Sopenharmony_ci			if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
11508c2ecf20Sopenharmony_ci				dma_unmap_flow(flow);
11518c2ecf20Sopenharmony_ci				return -ENOMEM;
11528c2ecf20Sopenharmony_ci			}
11538c2ecf20Sopenharmony_ci			pset->mapped = 1;
11548c2ecf20Sopenharmony_ci		}
11558c2ecf20Sopenharmony_ci	}
11568c2ecf20Sopenharmony_ci	return 0;
11578c2ecf20Sopenharmony_ci}
11588c2ecf20Sopenharmony_ci
11598c2ecf20Sopenharmony_cistatic inline bool dma_mapped(struct tid_rdma_flow *flow)
11608c2ecf20Sopenharmony_ci{
11618c2ecf20Sopenharmony_ci	return !!flow->pagesets[0].mapped;
11628c2ecf20Sopenharmony_ci}
11638c2ecf20Sopenharmony_ci
11648c2ecf20Sopenharmony_ci/*
11658c2ecf20Sopenharmony_ci * Get pages pointers and identify contiguous physical memory chunks for a
11668c2ecf20Sopenharmony_ci * segment. All segments are of length flow->req->seg_len.
11678c2ecf20Sopenharmony_ci */
11688c2ecf20Sopenharmony_cistatic int kern_get_phys_blocks(struct tid_rdma_flow *flow,
11698c2ecf20Sopenharmony_ci				struct page **pages,
11708c2ecf20Sopenharmony_ci				struct rvt_sge_state *ss, bool *last)
11718c2ecf20Sopenharmony_ci{
11728c2ecf20Sopenharmony_ci	u8 npages;
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci	/* Reuse previously computed pagesets, if any */
11758c2ecf20Sopenharmony_ci	if (flow->npagesets) {
11768c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
11778c2ecf20Sopenharmony_ci					  flow);
11788c2ecf20Sopenharmony_ci		if (!dma_mapped(flow))
11798c2ecf20Sopenharmony_ci			return dma_map_flow(flow, pages);
11808c2ecf20Sopenharmony_ci		return 0;
11818c2ecf20Sopenharmony_ci	}
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci	npages = kern_find_pages(flow, pages, ss, last);
11848c2ecf20Sopenharmony_ci
11858c2ecf20Sopenharmony_ci	if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
11868c2ecf20Sopenharmony_ci		flow->npagesets =
11878c2ecf20Sopenharmony_ci			tid_rdma_find_phys_blocks_4k(flow, pages, npages,
11888c2ecf20Sopenharmony_ci						     flow->pagesets);
11898c2ecf20Sopenharmony_ci	else
11908c2ecf20Sopenharmony_ci		flow->npagesets =
11918c2ecf20Sopenharmony_ci			tid_rdma_find_phys_blocks_8k(flow, pages, npages,
11928c2ecf20Sopenharmony_ci						     flow->pagesets);
11938c2ecf20Sopenharmony_ci
11948c2ecf20Sopenharmony_ci	return dma_map_flow(flow, pages);
11958c2ecf20Sopenharmony_ci}
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_cistatic inline void kern_add_tid_node(struct tid_rdma_flow *flow,
11988c2ecf20Sopenharmony_ci				     struct hfi1_ctxtdata *rcd, char *s,
11998c2ecf20Sopenharmony_ci				     struct tid_group *grp, u8 cnt)
12008c2ecf20Sopenharmony_ci{
12018c2ecf20Sopenharmony_ci	struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_ci	WARN_ON_ONCE(flow->tnode_cnt >=
12048c2ecf20Sopenharmony_ci		     (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
12058c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(cnt & 1))
12068c2ecf20Sopenharmony_ci		dd_dev_err(rcd->dd,
12078c2ecf20Sopenharmony_ci			   "unexpected odd allocation cnt %u map 0x%x used %u",
12088c2ecf20Sopenharmony_ci			   cnt, grp->map, grp->used);
12098c2ecf20Sopenharmony_ci
12108c2ecf20Sopenharmony_ci	node->grp = grp;
12118c2ecf20Sopenharmony_ci	node->map = grp->map;
12128c2ecf20Sopenharmony_ci	node->cnt = cnt;
12138c2ecf20Sopenharmony_ci	trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
12148c2ecf20Sopenharmony_ci				grp->base, grp->map, grp->used, cnt);
12158c2ecf20Sopenharmony_ci}
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_ci/*
12188c2ecf20Sopenharmony_ci * Try to allocate pageset_count TID's from TID groups for a context
12198c2ecf20Sopenharmony_ci *
12208c2ecf20Sopenharmony_ci * This function allocates TID's without moving groups between lists or
12218c2ecf20Sopenharmony_ci * modifying grp->map. This is done as follows, being cogizant of the lists
12228c2ecf20Sopenharmony_ci * between which the TID groups will move:
12238c2ecf20Sopenharmony_ci * 1. First allocate complete groups of 8 TID's since this is more efficient,
12248c2ecf20Sopenharmony_ci *    these groups will move from group->full without affecting used
12258c2ecf20Sopenharmony_ci * 2. If more TID's are needed allocate from used (will move from used->full or
12268c2ecf20Sopenharmony_ci *    stay in used)
12278c2ecf20Sopenharmony_ci * 3. If we still don't have the required number of TID's go back and look again
12288c2ecf20Sopenharmony_ci *    at a complete group (will move from group->used)
12298c2ecf20Sopenharmony_ci */
12308c2ecf20Sopenharmony_cistatic int kern_alloc_tids(struct tid_rdma_flow *flow)
12318c2ecf20Sopenharmony_ci{
12328c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = flow->req->rcd;
12338c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
12348c2ecf20Sopenharmony_ci	u32 ngroups, pageidx = 0;
12358c2ecf20Sopenharmony_ci	struct tid_group *group = NULL, *used;
12368c2ecf20Sopenharmony_ci	u8 use;
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_ci	flow->tnode_cnt = 0;
12398c2ecf20Sopenharmony_ci	ngroups = flow->npagesets / dd->rcv_entries.group_size;
12408c2ecf20Sopenharmony_ci	if (!ngroups)
12418c2ecf20Sopenharmony_ci		goto used_list;
12428c2ecf20Sopenharmony_ci
12438c2ecf20Sopenharmony_ci	/* First look at complete groups */
12448c2ecf20Sopenharmony_ci	list_for_each_entry(group,  &rcd->tid_group_list.list, list) {
12458c2ecf20Sopenharmony_ci		kern_add_tid_node(flow, rcd, "complete groups", group,
12468c2ecf20Sopenharmony_ci				  group->size);
12478c2ecf20Sopenharmony_ci
12488c2ecf20Sopenharmony_ci		pageidx += group->size;
12498c2ecf20Sopenharmony_ci		if (!--ngroups)
12508c2ecf20Sopenharmony_ci			break;
12518c2ecf20Sopenharmony_ci	}
12528c2ecf20Sopenharmony_ci
12538c2ecf20Sopenharmony_ci	if (pageidx >= flow->npagesets)
12548c2ecf20Sopenharmony_ci		goto ok;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ciused_list:
12578c2ecf20Sopenharmony_ci	/* Now look at partially used groups */
12588c2ecf20Sopenharmony_ci	list_for_each_entry(used, &rcd->tid_used_list.list, list) {
12598c2ecf20Sopenharmony_ci		use = min_t(u32, flow->npagesets - pageidx,
12608c2ecf20Sopenharmony_ci			    used->size - used->used);
12618c2ecf20Sopenharmony_ci		kern_add_tid_node(flow, rcd, "used groups", used, use);
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci		pageidx += use;
12648c2ecf20Sopenharmony_ci		if (pageidx >= flow->npagesets)
12658c2ecf20Sopenharmony_ci			goto ok;
12668c2ecf20Sopenharmony_ci	}
12678c2ecf20Sopenharmony_ci
12688c2ecf20Sopenharmony_ci	/*
12698c2ecf20Sopenharmony_ci	 * Look again at a complete group, continuing from where we left.
12708c2ecf20Sopenharmony_ci	 * However, if we are at the head, we have reached the end of the
12718c2ecf20Sopenharmony_ci	 * complete groups list from the first loop above
12728c2ecf20Sopenharmony_ci	 */
12738c2ecf20Sopenharmony_ci	if (group && &group->list == &rcd->tid_group_list.list)
12748c2ecf20Sopenharmony_ci		goto bail_eagain;
12758c2ecf20Sopenharmony_ci	group = list_prepare_entry(group, &rcd->tid_group_list.list,
12768c2ecf20Sopenharmony_ci				   list);
12778c2ecf20Sopenharmony_ci	if (list_is_last(&group->list, &rcd->tid_group_list.list))
12788c2ecf20Sopenharmony_ci		goto bail_eagain;
12798c2ecf20Sopenharmony_ci	group = list_next_entry(group, list);
12808c2ecf20Sopenharmony_ci	use = min_t(u32, flow->npagesets - pageidx, group->size);
12818c2ecf20Sopenharmony_ci	kern_add_tid_node(flow, rcd, "complete continue", group, use);
12828c2ecf20Sopenharmony_ci	pageidx += use;
12838c2ecf20Sopenharmony_ci	if (pageidx >= flow->npagesets)
12848c2ecf20Sopenharmony_ci		goto ok;
12858c2ecf20Sopenharmony_cibail_eagain:
12868c2ecf20Sopenharmony_ci	trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
12878c2ecf20Sopenharmony_ci				  (u64)flow->npagesets);
12888c2ecf20Sopenharmony_ci	return -EAGAIN;
12898c2ecf20Sopenharmony_ciok:
12908c2ecf20Sopenharmony_ci	return 0;
12918c2ecf20Sopenharmony_ci}
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_cistatic void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
12948c2ecf20Sopenharmony_ci				   u32 *pset_idx)
12958c2ecf20Sopenharmony_ci{
12968c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = flow->req->rcd;
12978c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
12988c2ecf20Sopenharmony_ci	struct kern_tid_node *node = &flow->tnode[grp_num];
12998c2ecf20Sopenharmony_ci	struct tid_group *grp = node->grp;
13008c2ecf20Sopenharmony_ci	struct tid_rdma_pageset *pset;
13018c2ecf20Sopenharmony_ci	u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
13028c2ecf20Sopenharmony_ci	u32 rcventry, npages = 0, pair = 0, tidctrl;
13038c2ecf20Sopenharmony_ci	u8 i, cnt = 0;
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci	for (i = 0; i < grp->size; i++) {
13068c2ecf20Sopenharmony_ci		rcventry = grp->base + i;
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci		if (node->map & BIT(i) || cnt >= node->cnt) {
13098c2ecf20Sopenharmony_ci			rcv_array_wc_fill(dd, rcventry);
13108c2ecf20Sopenharmony_ci			continue;
13118c2ecf20Sopenharmony_ci		}
13128c2ecf20Sopenharmony_ci		pset = &flow->pagesets[(*pset_idx)++];
13138c2ecf20Sopenharmony_ci		if (pset->count) {
13148c2ecf20Sopenharmony_ci			hfi1_put_tid(dd, rcventry, PT_EXPECTED,
13158c2ecf20Sopenharmony_ci				     pset->addr, trdma_pset_order(pset));
13168c2ecf20Sopenharmony_ci		} else {
13178c2ecf20Sopenharmony_ci			hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
13188c2ecf20Sopenharmony_ci		}
13198c2ecf20Sopenharmony_ci		npages += pset->count;
13208c2ecf20Sopenharmony_ci
13218c2ecf20Sopenharmony_ci		rcventry -= rcd->expected_base;
13228c2ecf20Sopenharmony_ci		tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
13238c2ecf20Sopenharmony_ci		/*
13248c2ecf20Sopenharmony_ci		 * A single TID entry will be used to use a rcvarr pair (with
13258c2ecf20Sopenharmony_ci		 * tidctrl 0x3), if ALL these are true (a) the bit pos is even
13268c2ecf20Sopenharmony_ci		 * (b) the group map shows current and the next bits as free
13278c2ecf20Sopenharmony_ci		 * indicating two consecutive rcvarry entries are available (c)
13288c2ecf20Sopenharmony_ci		 * we actually need 2 more entries
13298c2ecf20Sopenharmony_ci		 */
13308c2ecf20Sopenharmony_ci		pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
13318c2ecf20Sopenharmony_ci			node->cnt >= cnt + 2;
13328c2ecf20Sopenharmony_ci		if (!pair) {
13338c2ecf20Sopenharmony_ci			if (!pset->count)
13348c2ecf20Sopenharmony_ci				tidctrl = 0x1;
13358c2ecf20Sopenharmony_ci			flow->tid_entry[flow->tidcnt++] =
13368c2ecf20Sopenharmony_ci				EXP_TID_SET(IDX, rcventry >> 1) |
13378c2ecf20Sopenharmony_ci				EXP_TID_SET(CTRL, tidctrl) |
13388c2ecf20Sopenharmony_ci				EXP_TID_SET(LEN, npages);
13398c2ecf20Sopenharmony_ci			trace_hfi1_tid_entry_alloc(/* entry */
13408c2ecf20Sopenharmony_ci			   flow->req->qp, flow->tidcnt - 1,
13418c2ecf20Sopenharmony_ci			   flow->tid_entry[flow->tidcnt - 1]);
13428c2ecf20Sopenharmony_ci
13438c2ecf20Sopenharmony_ci			/* Efficient DIV_ROUND_UP(npages, pmtu_pg) */
13448c2ecf20Sopenharmony_ci			flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
13458c2ecf20Sopenharmony_ci			npages = 0;
13468c2ecf20Sopenharmony_ci		}
13478c2ecf20Sopenharmony_ci
13488c2ecf20Sopenharmony_ci		if (grp->used == grp->size - 1)
13498c2ecf20Sopenharmony_ci			tid_group_move(grp, &rcd->tid_used_list,
13508c2ecf20Sopenharmony_ci				       &rcd->tid_full_list);
13518c2ecf20Sopenharmony_ci		else if (!grp->used)
13528c2ecf20Sopenharmony_ci			tid_group_move(grp, &rcd->tid_group_list,
13538c2ecf20Sopenharmony_ci				       &rcd->tid_used_list);
13548c2ecf20Sopenharmony_ci
13558c2ecf20Sopenharmony_ci		grp->used++;
13568c2ecf20Sopenharmony_ci		grp->map |= BIT(i);
13578c2ecf20Sopenharmony_ci		cnt++;
13588c2ecf20Sopenharmony_ci	}
13598c2ecf20Sopenharmony_ci}
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_cistatic void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
13628c2ecf20Sopenharmony_ci{
13638c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = flow->req->rcd;
13648c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
13658c2ecf20Sopenharmony_ci	struct kern_tid_node *node = &flow->tnode[grp_num];
13668c2ecf20Sopenharmony_ci	struct tid_group *grp = node->grp;
13678c2ecf20Sopenharmony_ci	u32 rcventry;
13688c2ecf20Sopenharmony_ci	u8 i, cnt = 0;
13698c2ecf20Sopenharmony_ci
13708c2ecf20Sopenharmony_ci	for (i = 0; i < grp->size; i++) {
13718c2ecf20Sopenharmony_ci		rcventry = grp->base + i;
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_ci		if (node->map & BIT(i) || cnt >= node->cnt) {
13748c2ecf20Sopenharmony_ci			rcv_array_wc_fill(dd, rcventry);
13758c2ecf20Sopenharmony_ci			continue;
13768c2ecf20Sopenharmony_ci		}
13778c2ecf20Sopenharmony_ci
13788c2ecf20Sopenharmony_ci		hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
13798c2ecf20Sopenharmony_ci
13808c2ecf20Sopenharmony_ci		grp->used--;
13818c2ecf20Sopenharmony_ci		grp->map &= ~BIT(i);
13828c2ecf20Sopenharmony_ci		cnt++;
13838c2ecf20Sopenharmony_ci
13848c2ecf20Sopenharmony_ci		if (grp->used == grp->size - 1)
13858c2ecf20Sopenharmony_ci			tid_group_move(grp, &rcd->tid_full_list,
13868c2ecf20Sopenharmony_ci				       &rcd->tid_used_list);
13878c2ecf20Sopenharmony_ci		else if (!grp->used)
13888c2ecf20Sopenharmony_ci			tid_group_move(grp, &rcd->tid_used_list,
13898c2ecf20Sopenharmony_ci				       &rcd->tid_group_list);
13908c2ecf20Sopenharmony_ci	}
13918c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(cnt & 1)) {
13928c2ecf20Sopenharmony_ci		struct hfi1_ctxtdata *rcd = flow->req->rcd;
13938c2ecf20Sopenharmony_ci		struct hfi1_devdata *dd = rcd->dd;
13948c2ecf20Sopenharmony_ci
13958c2ecf20Sopenharmony_ci		dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
13968c2ecf20Sopenharmony_ci			   cnt, grp->map, grp->used);
13978c2ecf20Sopenharmony_ci	}
13988c2ecf20Sopenharmony_ci}
13998c2ecf20Sopenharmony_ci
14008c2ecf20Sopenharmony_cistatic void kern_program_rcvarray(struct tid_rdma_flow *flow)
14018c2ecf20Sopenharmony_ci{
14028c2ecf20Sopenharmony_ci	u32 pset_idx = 0;
14038c2ecf20Sopenharmony_ci	int i;
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci	flow->npkts = 0;
14068c2ecf20Sopenharmony_ci	flow->tidcnt = 0;
14078c2ecf20Sopenharmony_ci	for (i = 0; i < flow->tnode_cnt; i++)
14088c2ecf20Sopenharmony_ci		kern_program_rcv_group(flow, i, &pset_idx);
14098c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
14108c2ecf20Sopenharmony_ci}
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci/**
14138c2ecf20Sopenharmony_ci * hfi1_kern_exp_rcv_setup() - setup TID's and flow for one segment of a
14148c2ecf20Sopenharmony_ci * TID RDMA request
14158c2ecf20Sopenharmony_ci *
14168c2ecf20Sopenharmony_ci * @req: TID RDMA request for which the segment/flow is being set up
14178c2ecf20Sopenharmony_ci * @ss: sge state, maintains state across successive segments of a sge
14188c2ecf20Sopenharmony_ci * @last: set to true after the last sge segment has been processed
14198c2ecf20Sopenharmony_ci *
14208c2ecf20Sopenharmony_ci * This function
14218c2ecf20Sopenharmony_ci * (1) finds a free flow entry in the flow circular buffer
14228c2ecf20Sopenharmony_ci * (2) finds pages and continuous physical chunks constituing one segment
14238c2ecf20Sopenharmony_ci *     of an sge
14248c2ecf20Sopenharmony_ci * (3) allocates TID group entries for those chunks
14258c2ecf20Sopenharmony_ci * (4) programs rcvarray entries in the hardware corresponding to those
14268c2ecf20Sopenharmony_ci *     TID's
14278c2ecf20Sopenharmony_ci * (5) computes a tidarray with formatted TID entries which can be sent
14288c2ecf20Sopenharmony_ci *     to the sender
14298c2ecf20Sopenharmony_ci * (6) Reserves and programs HW flows.
14308c2ecf20Sopenharmony_ci * (7) It also manages queing the QP when TID/flow resources are not
14318c2ecf20Sopenharmony_ci *     available.
14328c2ecf20Sopenharmony_ci *
14338c2ecf20Sopenharmony_ci * @req points to struct tid_rdma_request of which the segments are a part. The
14348c2ecf20Sopenharmony_ci * function uses qp, rcd and seg_len members of @req. In the absence of errors,
14358c2ecf20Sopenharmony_ci * req->flow_idx is the index of the flow which has been prepared in this
14368c2ecf20Sopenharmony_ci * invocation of function call. With flow = &req->flows[req->flow_idx],
14378c2ecf20Sopenharmony_ci * flow->tid_entry contains the TID array which the sender can use for TID RDMA
14388c2ecf20Sopenharmony_ci * sends and flow->npkts contains number of packets required to send the
14398c2ecf20Sopenharmony_ci * segment.
14408c2ecf20Sopenharmony_ci *
14418c2ecf20Sopenharmony_ci * hfi1_check_sge_align should be called prior to calling this function and if
14428c2ecf20Sopenharmony_ci * it signals error TID RDMA cannot be used for this sge and this function
14438c2ecf20Sopenharmony_ci * should not be called.
14448c2ecf20Sopenharmony_ci *
14458c2ecf20Sopenharmony_ci * For the queuing, caller must hold the flow->req->qp s_lock from the send
14468c2ecf20Sopenharmony_ci * engine and the function will procure the exp_lock.
14478c2ecf20Sopenharmony_ci *
14488c2ecf20Sopenharmony_ci * Return:
14498c2ecf20Sopenharmony_ci * The function returns -EAGAIN if sufficient number of TID/flow resources to
14508c2ecf20Sopenharmony_ci * map the segment could not be allocated. In this case the function should be
14518c2ecf20Sopenharmony_ci * called again with previous arguments to retry the TID allocation. There are
14528c2ecf20Sopenharmony_ci * no other error returns. The function returns 0 on success.
14538c2ecf20Sopenharmony_ci */
14548c2ecf20Sopenharmony_ciint hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
14558c2ecf20Sopenharmony_ci			    struct rvt_sge_state *ss, bool *last)
14568c2ecf20Sopenharmony_ci	__must_hold(&req->qp->s_lock)
14578c2ecf20Sopenharmony_ci{
14588c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->setup_head];
14598c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = req->rcd;
14608c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = req->qp->priv;
14618c2ecf20Sopenharmony_ci	unsigned long flags;
14628c2ecf20Sopenharmony_ci	struct rvt_qp *fqp;
14638c2ecf20Sopenharmony_ci	u16 clear_tail = req->clear_tail;
14648c2ecf20Sopenharmony_ci
14658c2ecf20Sopenharmony_ci	lockdep_assert_held(&req->qp->s_lock);
14668c2ecf20Sopenharmony_ci	/*
14678c2ecf20Sopenharmony_ci	 * We return error if either (a) we don't have space in the flow
14688c2ecf20Sopenharmony_ci	 * circular buffer, or (b) we already have max entries in the buffer.
14698c2ecf20Sopenharmony_ci	 * Max entries depend on the type of request we are processing and the
14708c2ecf20Sopenharmony_ci	 * negotiated TID RDMA parameters.
14718c2ecf20Sopenharmony_ci	 */
14728c2ecf20Sopenharmony_ci	if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
14738c2ecf20Sopenharmony_ci	    CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
14748c2ecf20Sopenharmony_ci	    req->n_flows)
14758c2ecf20Sopenharmony_ci		return -EINVAL;
14768c2ecf20Sopenharmony_ci
14778c2ecf20Sopenharmony_ci	/*
14788c2ecf20Sopenharmony_ci	 * Get pages, identify contiguous physical memory chunks for the segment
14798c2ecf20Sopenharmony_ci	 * If we can not determine a DMA address mapping we will treat it just
14808c2ecf20Sopenharmony_ci	 * like if we ran out of space above.
14818c2ecf20Sopenharmony_ci	 */
14828c2ecf20Sopenharmony_ci	if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
14838c2ecf20Sopenharmony_ci		hfi1_wait_kmem(flow->req->qp);
14848c2ecf20Sopenharmony_ci		return -ENOMEM;
14858c2ecf20Sopenharmony_ci	}
14868c2ecf20Sopenharmony_ci
14878c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
14888c2ecf20Sopenharmony_ci	if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
14898c2ecf20Sopenharmony_ci		goto queue;
14908c2ecf20Sopenharmony_ci
14918c2ecf20Sopenharmony_ci	/*
14928c2ecf20Sopenharmony_ci	 * At this point we know the number of pagesets and hence the number of
14938c2ecf20Sopenharmony_ci	 * TID's to map the segment. Allocate the TID's from the TID groups. If
14948c2ecf20Sopenharmony_ci	 * we cannot allocate the required number we exit and try again later
14958c2ecf20Sopenharmony_ci	 */
14968c2ecf20Sopenharmony_ci	if (kern_alloc_tids(flow))
14978c2ecf20Sopenharmony_ci		goto queue;
14988c2ecf20Sopenharmony_ci	/*
14998c2ecf20Sopenharmony_ci	 * Finally program the TID entries with the pagesets, compute the
15008c2ecf20Sopenharmony_ci	 * tidarray and enable the HW flow
15018c2ecf20Sopenharmony_ci	 */
15028c2ecf20Sopenharmony_ci	kern_program_rcvarray(flow);
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	/*
15058c2ecf20Sopenharmony_ci	 * Setup the flow state with relevant information.
15068c2ecf20Sopenharmony_ci	 * This information is used for tracking the sequence of data packets
15078c2ecf20Sopenharmony_ci	 * for the segment.
15088c2ecf20Sopenharmony_ci	 * The flow is setup here as this is the most accurate time and place
15098c2ecf20Sopenharmony_ci	 * to do so. Doing at a later time runs the risk of the flow data in
15108c2ecf20Sopenharmony_ci	 * qpriv getting out of sync.
15118c2ecf20Sopenharmony_ci	 */
15128c2ecf20Sopenharmony_ci	memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
15138c2ecf20Sopenharmony_ci	flow->idx = qpriv->flow_state.index;
15148c2ecf20Sopenharmony_ci	flow->flow_state.generation = qpriv->flow_state.generation;
15158c2ecf20Sopenharmony_ci	flow->flow_state.spsn = qpriv->flow_state.psn;
15168c2ecf20Sopenharmony_ci	flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
15178c2ecf20Sopenharmony_ci	flow->flow_state.r_next_psn =
15188c2ecf20Sopenharmony_ci		full_flow_psn(flow, flow->flow_state.spsn);
15198c2ecf20Sopenharmony_ci	qpriv->flow_state.psn += flow->npkts;
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_ci	dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
15228c2ecf20Sopenharmony_ci	/* get head before dropping lock */
15238c2ecf20Sopenharmony_ci	fqp = first_qp(rcd, &rcd->rarr_queue);
15248c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
15258c2ecf20Sopenharmony_ci	tid_rdma_schedule_tid_wakeup(fqp);
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci	req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
15288c2ecf20Sopenharmony_ci	return 0;
15298c2ecf20Sopenharmony_ciqueue:
15308c2ecf20Sopenharmony_ci	queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
15318c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
15328c2ecf20Sopenharmony_ci	return -EAGAIN;
15338c2ecf20Sopenharmony_ci}
15348c2ecf20Sopenharmony_ci
15358c2ecf20Sopenharmony_cistatic void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
15368c2ecf20Sopenharmony_ci{
15378c2ecf20Sopenharmony_ci	flow->npagesets = 0;
15388c2ecf20Sopenharmony_ci}
15398c2ecf20Sopenharmony_ci
15408c2ecf20Sopenharmony_ci/*
15418c2ecf20Sopenharmony_ci * This function is called after one segment has been successfully sent to
15428c2ecf20Sopenharmony_ci * release the flow and TID HW/SW resources for that segment. The segments for a
15438c2ecf20Sopenharmony_ci * TID RDMA request are setup and cleared in FIFO order which is managed using a
15448c2ecf20Sopenharmony_ci * circular buffer.
15458c2ecf20Sopenharmony_ci */
15468c2ecf20Sopenharmony_ciint hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
15478c2ecf20Sopenharmony_ci	__must_hold(&req->qp->s_lock)
15488c2ecf20Sopenharmony_ci{
15498c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
15508c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = req->rcd;
15518c2ecf20Sopenharmony_ci	unsigned long flags;
15528c2ecf20Sopenharmony_ci	int i;
15538c2ecf20Sopenharmony_ci	struct rvt_qp *fqp;
15548c2ecf20Sopenharmony_ci
15558c2ecf20Sopenharmony_ci	lockdep_assert_held(&req->qp->s_lock);
15568c2ecf20Sopenharmony_ci	/* Exit if we have nothing in the flow circular buffer */
15578c2ecf20Sopenharmony_ci	if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
15588c2ecf20Sopenharmony_ci		return -EINVAL;
15598c2ecf20Sopenharmony_ci
15608c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
15618c2ecf20Sopenharmony_ci
15628c2ecf20Sopenharmony_ci	for (i = 0; i < flow->tnode_cnt; i++)
15638c2ecf20Sopenharmony_ci		kern_unprogram_rcv_group(flow, i);
15648c2ecf20Sopenharmony_ci	/* To prevent double unprogramming */
15658c2ecf20Sopenharmony_ci	flow->tnode_cnt = 0;
15668c2ecf20Sopenharmony_ci	/* get head before dropping lock */
15678c2ecf20Sopenharmony_ci	fqp = first_qp(rcd, &rcd->rarr_queue);
15688c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci	dma_unmap_flow(flow);
15718c2ecf20Sopenharmony_ci
15728c2ecf20Sopenharmony_ci	hfi1_tid_rdma_reset_flow(flow);
15738c2ecf20Sopenharmony_ci	req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
15748c2ecf20Sopenharmony_ci
15758c2ecf20Sopenharmony_ci	if (fqp == req->qp) {
15768c2ecf20Sopenharmony_ci		__trigger_tid_waiter(fqp);
15778c2ecf20Sopenharmony_ci		rvt_put_qp(fqp);
15788c2ecf20Sopenharmony_ci	} else {
15798c2ecf20Sopenharmony_ci		tid_rdma_schedule_tid_wakeup(fqp);
15808c2ecf20Sopenharmony_ci	}
15818c2ecf20Sopenharmony_ci
15828c2ecf20Sopenharmony_ci	return 0;
15838c2ecf20Sopenharmony_ci}
15848c2ecf20Sopenharmony_ci
15858c2ecf20Sopenharmony_ci/*
15868c2ecf20Sopenharmony_ci * This function is called to release all the tid entries for
15878c2ecf20Sopenharmony_ci * a request.
15888c2ecf20Sopenharmony_ci */
15898c2ecf20Sopenharmony_civoid hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
15908c2ecf20Sopenharmony_ci	__must_hold(&req->qp->s_lock)
15918c2ecf20Sopenharmony_ci{
15928c2ecf20Sopenharmony_ci	/* Use memory barrier for proper ordering */
15938c2ecf20Sopenharmony_ci	while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
15948c2ecf20Sopenharmony_ci		if (hfi1_kern_exp_rcv_clear(req))
15958c2ecf20Sopenharmony_ci			break;
15968c2ecf20Sopenharmony_ci	}
15978c2ecf20Sopenharmony_ci}
15988c2ecf20Sopenharmony_ci
15998c2ecf20Sopenharmony_ci/**
16008c2ecf20Sopenharmony_ci * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
16018c2ecf20Sopenharmony_ci * @req - the tid rdma request to be cleaned
16028c2ecf20Sopenharmony_ci */
16038c2ecf20Sopenharmony_cistatic void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
16048c2ecf20Sopenharmony_ci{
16058c2ecf20Sopenharmony_ci	kfree(req->flows);
16068c2ecf20Sopenharmony_ci	req->flows = NULL;
16078c2ecf20Sopenharmony_ci}
16088c2ecf20Sopenharmony_ci
16098c2ecf20Sopenharmony_ci/**
16108c2ecf20Sopenharmony_ci * __trdma_clean_swqe - clean up for large sized QPs
16118c2ecf20Sopenharmony_ci * @qp: the queue patch
16128c2ecf20Sopenharmony_ci * @wqe: the send wqe
16138c2ecf20Sopenharmony_ci */
16148c2ecf20Sopenharmony_civoid __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
16158c2ecf20Sopenharmony_ci{
16168c2ecf20Sopenharmony_ci	struct hfi1_swqe_priv *p = wqe->priv;
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_ci	hfi1_kern_exp_rcv_free_flows(&p->tid_req);
16198c2ecf20Sopenharmony_ci}
16208c2ecf20Sopenharmony_ci
16218c2ecf20Sopenharmony_ci/*
16228c2ecf20Sopenharmony_ci * This can be called at QP create time or in the data path.
16238c2ecf20Sopenharmony_ci */
16248c2ecf20Sopenharmony_cistatic int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
16258c2ecf20Sopenharmony_ci					 gfp_t gfp)
16268c2ecf20Sopenharmony_ci{
16278c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flows;
16288c2ecf20Sopenharmony_ci	int i;
16298c2ecf20Sopenharmony_ci
16308c2ecf20Sopenharmony_ci	if (likely(req->flows))
16318c2ecf20Sopenharmony_ci		return 0;
16328c2ecf20Sopenharmony_ci	flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
16338c2ecf20Sopenharmony_ci			     req->rcd->numa_id);
16348c2ecf20Sopenharmony_ci	if (!flows)
16358c2ecf20Sopenharmony_ci		return -ENOMEM;
16368c2ecf20Sopenharmony_ci	/* mini init */
16378c2ecf20Sopenharmony_ci	for (i = 0; i < MAX_FLOWS; i++) {
16388c2ecf20Sopenharmony_ci		flows[i].req = req;
16398c2ecf20Sopenharmony_ci		flows[i].npagesets = 0;
16408c2ecf20Sopenharmony_ci		flows[i].pagesets[0].mapped =  0;
16418c2ecf20Sopenharmony_ci		flows[i].resync_npkts = 0;
16428c2ecf20Sopenharmony_ci	}
16438c2ecf20Sopenharmony_ci	req->flows = flows;
16448c2ecf20Sopenharmony_ci	return 0;
16458c2ecf20Sopenharmony_ci}
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_cistatic void hfi1_init_trdma_req(struct rvt_qp *qp,
16488c2ecf20Sopenharmony_ci				struct tid_rdma_request *req)
16498c2ecf20Sopenharmony_ci{
16508c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
16518c2ecf20Sopenharmony_ci
16528c2ecf20Sopenharmony_ci	/*
16538c2ecf20Sopenharmony_ci	 * Initialize various TID RDMA request variables.
16548c2ecf20Sopenharmony_ci	 * These variables are "static", which is why they
16558c2ecf20Sopenharmony_ci	 * can be pre-initialized here before the WRs has
16568c2ecf20Sopenharmony_ci	 * even been submitted.
16578c2ecf20Sopenharmony_ci	 * However, non-NULL values for these variables do not
16588c2ecf20Sopenharmony_ci	 * imply that this WQE has been enabled for TID RDMA.
16598c2ecf20Sopenharmony_ci	 * Drivers should check the WQE's opcode to determine
16608c2ecf20Sopenharmony_ci	 * if a request is a TID RDMA one or not.
16618c2ecf20Sopenharmony_ci	 */
16628c2ecf20Sopenharmony_ci	req->qp = qp;
16638c2ecf20Sopenharmony_ci	req->rcd = qpriv->rcd;
16648c2ecf20Sopenharmony_ci}
16658c2ecf20Sopenharmony_ci
16668c2ecf20Sopenharmony_ciu64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
16678c2ecf20Sopenharmony_ci			    void *context, int vl, int mode, u64 data)
16688c2ecf20Sopenharmony_ci{
16698c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = context;
16708c2ecf20Sopenharmony_ci
16718c2ecf20Sopenharmony_ci	return dd->verbs_dev.n_tidwait;
16728c2ecf20Sopenharmony_ci}
16738c2ecf20Sopenharmony_ci
16748c2ecf20Sopenharmony_cistatic struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
16758c2ecf20Sopenharmony_ci					  u32 psn, u16 *fidx)
16768c2ecf20Sopenharmony_ci{
16778c2ecf20Sopenharmony_ci	u16 head, tail;
16788c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
16798c2ecf20Sopenharmony_ci
16808c2ecf20Sopenharmony_ci	head = req->setup_head;
16818c2ecf20Sopenharmony_ci	tail = req->clear_tail;
16828c2ecf20Sopenharmony_ci	for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
16838c2ecf20Sopenharmony_ci	     tail = CIRC_NEXT(tail, MAX_FLOWS)) {
16848c2ecf20Sopenharmony_ci		flow = &req->flows[tail];
16858c2ecf20Sopenharmony_ci		if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
16868c2ecf20Sopenharmony_ci		    cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
16878c2ecf20Sopenharmony_ci			if (fidx)
16888c2ecf20Sopenharmony_ci				*fidx = tail;
16898c2ecf20Sopenharmony_ci			return flow;
16908c2ecf20Sopenharmony_ci		}
16918c2ecf20Sopenharmony_ci	}
16928c2ecf20Sopenharmony_ci	return NULL;
16938c2ecf20Sopenharmony_ci}
16948c2ecf20Sopenharmony_ci
16958c2ecf20Sopenharmony_ci/* TID RDMA READ functions */
16968c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
16978c2ecf20Sopenharmony_ci				    struct ib_other_headers *ohdr, u32 *bth1,
16988c2ecf20Sopenharmony_ci				    u32 *bth2, u32 *len)
16998c2ecf20Sopenharmony_ci{
17008c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
17018c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
17028c2ecf20Sopenharmony_ci	struct rvt_qp *qp = req->qp;
17038c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
17048c2ecf20Sopenharmony_ci	struct hfi1_swqe_priv *wpriv = wqe->priv;
17058c2ecf20Sopenharmony_ci	struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
17068c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
17078c2ecf20Sopenharmony_ci	u32 req_len = 0;
17088c2ecf20Sopenharmony_ci	void *req_addr = NULL;
17098c2ecf20Sopenharmony_ci
17108c2ecf20Sopenharmony_ci	/* This is the IB psn used to send the request */
17118c2ecf20Sopenharmony_ci	*bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
17128c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
17138c2ecf20Sopenharmony_ci
17148c2ecf20Sopenharmony_ci	/* TID Entries for TID RDMA READ payload */
17158c2ecf20Sopenharmony_ci	req_addr = &flow->tid_entry[flow->tid_idx];
17168c2ecf20Sopenharmony_ci	req_len = sizeof(*flow->tid_entry) *
17178c2ecf20Sopenharmony_ci			(flow->tidcnt - flow->tid_idx);
17188c2ecf20Sopenharmony_ci
17198c2ecf20Sopenharmony_ci	memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
17208c2ecf20Sopenharmony_ci	wpriv->ss.sge.vaddr = req_addr;
17218c2ecf20Sopenharmony_ci	wpriv->ss.sge.sge_length = req_len;
17228c2ecf20Sopenharmony_ci	wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
17238c2ecf20Sopenharmony_ci	/*
17248c2ecf20Sopenharmony_ci	 * We can safely zero these out. Since the first SGE covers the
17258c2ecf20Sopenharmony_ci	 * entire packet, nothing else should even look at the MR.
17268c2ecf20Sopenharmony_ci	 */
17278c2ecf20Sopenharmony_ci	wpriv->ss.sge.mr = NULL;
17288c2ecf20Sopenharmony_ci	wpriv->ss.sge.m = 0;
17298c2ecf20Sopenharmony_ci	wpriv->ss.sge.n = 0;
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci	wpriv->ss.sg_list = NULL;
17328c2ecf20Sopenharmony_ci	wpriv->ss.total_len = wpriv->ss.sge.sge_length;
17338c2ecf20Sopenharmony_ci	wpriv->ss.num_sge = 1;
17348c2ecf20Sopenharmony_ci
17358c2ecf20Sopenharmony_ci	/* Construct the TID RDMA READ REQ packet header */
17368c2ecf20Sopenharmony_ci	rcu_read_lock();
17378c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ci	KDETH_RESET(rreq->kdeth0, KVER, 0x1);
17408c2ecf20Sopenharmony_ci	KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
17418c2ecf20Sopenharmony_ci	rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
17428c2ecf20Sopenharmony_ci			   req->cur_seg * req->seg_len + flow->sent);
17438c2ecf20Sopenharmony_ci	rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
17448c2ecf20Sopenharmony_ci	rreq->reth.length = cpu_to_be32(*len);
17458c2ecf20Sopenharmony_ci	rreq->tid_flow_psn =
17468c2ecf20Sopenharmony_ci		cpu_to_be32((flow->flow_state.generation <<
17478c2ecf20Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_SHIFT) |
17488c2ecf20Sopenharmony_ci			    ((flow->flow_state.spsn + flow->pkt) &
17498c2ecf20Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_MASK));
17508c2ecf20Sopenharmony_ci	rreq->tid_flow_qp =
17518c2ecf20Sopenharmony_ci		cpu_to_be32(qpriv->tid_rdma.local.qp |
17528c2ecf20Sopenharmony_ci			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
17538c2ecf20Sopenharmony_ci			     TID_RDMA_DESTQP_FLOW_SHIFT) |
17548c2ecf20Sopenharmony_ci			    qpriv->rcd->ctxt);
17558c2ecf20Sopenharmony_ci	rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
17568c2ecf20Sopenharmony_ci	*bth1 &= ~RVT_QPN_MASK;
17578c2ecf20Sopenharmony_ci	*bth1 |= remote->qp;
17588c2ecf20Sopenharmony_ci	*bth2 |= IB_BTH_REQ_ACK;
17598c2ecf20Sopenharmony_ci	rcu_read_unlock();
17608c2ecf20Sopenharmony_ci
17618c2ecf20Sopenharmony_ci	/* We are done with this segment */
17628c2ecf20Sopenharmony_ci	flow->sent += *len;
17638c2ecf20Sopenharmony_ci	req->cur_seg++;
17648c2ecf20Sopenharmony_ci	qp->s_state = TID_OP(READ_REQ);
17658c2ecf20Sopenharmony_ci	req->ack_pending++;
17668c2ecf20Sopenharmony_ci	req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
17678c2ecf20Sopenharmony_ci	qpriv->pending_tid_r_segs++;
17688c2ecf20Sopenharmony_ci	qp->s_num_rd_atomic++;
17698c2ecf20Sopenharmony_ci
17708c2ecf20Sopenharmony_ci	/* Set the TID RDMA READ request payload size */
17718c2ecf20Sopenharmony_ci	*len = req_len;
17728c2ecf20Sopenharmony_ci
17738c2ecf20Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
17748c2ecf20Sopenharmony_ci}
17758c2ecf20Sopenharmony_ci
17768c2ecf20Sopenharmony_ci/*
17778c2ecf20Sopenharmony_ci * @len: contains the data length to read upon entry and the read request
17788c2ecf20Sopenharmony_ci *       payload length upon exit.
17798c2ecf20Sopenharmony_ci */
17808c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
17818c2ecf20Sopenharmony_ci				 struct ib_other_headers *ohdr, u32 *bth1,
17828c2ecf20Sopenharmony_ci				 u32 *bth2, u32 *len)
17838c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
17848c2ecf20Sopenharmony_ci{
17858c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
17868c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
17878c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = NULL;
17888c2ecf20Sopenharmony_ci	u32 hdwords = 0;
17898c2ecf20Sopenharmony_ci	bool last;
17908c2ecf20Sopenharmony_ci	bool retry = true;
17918c2ecf20Sopenharmony_ci	u32 npkts = rvt_div_round_up_mtu(qp, *len);
17928c2ecf20Sopenharmony_ci
17938c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
17948c2ecf20Sopenharmony_ci					  wqe->lpsn, req);
17958c2ecf20Sopenharmony_ci	/*
17968c2ecf20Sopenharmony_ci	 * Check sync conditions. Make sure that there are no pending
17978c2ecf20Sopenharmony_ci	 * segments before freeing the flow.
17988c2ecf20Sopenharmony_ci	 */
17998c2ecf20Sopenharmony_cisync_check:
18008c2ecf20Sopenharmony_ci	if (req->state == TID_REQUEST_SYNC) {
18018c2ecf20Sopenharmony_ci		if (qpriv->pending_tid_r_segs)
18028c2ecf20Sopenharmony_ci			goto done;
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ci		hfi1_kern_clear_hw_flow(req->rcd, qp);
18058c2ecf20Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
18068c2ecf20Sopenharmony_ci		req->state = TID_REQUEST_ACTIVE;
18078c2ecf20Sopenharmony_ci	}
18088c2ecf20Sopenharmony_ci
18098c2ecf20Sopenharmony_ci	/*
18108c2ecf20Sopenharmony_ci	 * If the request for this segment is resent, the tid resources should
18118c2ecf20Sopenharmony_ci	 * have been allocated before. In this case, req->flow_idx should
18128c2ecf20Sopenharmony_ci	 * fall behind req->setup_head.
18138c2ecf20Sopenharmony_ci	 */
18148c2ecf20Sopenharmony_ci	if (req->flow_idx == req->setup_head) {
18158c2ecf20Sopenharmony_ci		retry = false;
18168c2ecf20Sopenharmony_ci		if (req->state == TID_REQUEST_RESEND) {
18178c2ecf20Sopenharmony_ci			/*
18188c2ecf20Sopenharmony_ci			 * This is the first new segment for a request whose
18198c2ecf20Sopenharmony_ci			 * earlier segments have been re-sent. We need to
18208c2ecf20Sopenharmony_ci			 * set up the sge pointer correctly.
18218c2ecf20Sopenharmony_ci			 */
18228c2ecf20Sopenharmony_ci			restart_sge(&qp->s_sge, wqe, req->s_next_psn,
18238c2ecf20Sopenharmony_ci				    qp->pmtu);
18248c2ecf20Sopenharmony_ci			req->isge = 0;
18258c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_ACTIVE;
18268c2ecf20Sopenharmony_ci		}
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci		/*
18298c2ecf20Sopenharmony_ci		 * Check sync. The last PSN of each generation is reserved for
18308c2ecf20Sopenharmony_ci		 * RESYNC.
18318c2ecf20Sopenharmony_ci		 */
18328c2ecf20Sopenharmony_ci		if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
18338c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_SYNC;
18348c2ecf20Sopenharmony_ci			goto sync_check;
18358c2ecf20Sopenharmony_ci		}
18368c2ecf20Sopenharmony_ci
18378c2ecf20Sopenharmony_ci		/* Allocate the flow if not yet */
18388c2ecf20Sopenharmony_ci		if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
18398c2ecf20Sopenharmony_ci			goto done;
18408c2ecf20Sopenharmony_ci
18418c2ecf20Sopenharmony_ci		/*
18428c2ecf20Sopenharmony_ci		 * The following call will advance req->setup_head after
18438c2ecf20Sopenharmony_ci		 * allocating the tid entries.
18448c2ecf20Sopenharmony_ci		 */
18458c2ecf20Sopenharmony_ci		if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
18468c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_QUEUED;
18478c2ecf20Sopenharmony_ci
18488c2ecf20Sopenharmony_ci			/*
18498c2ecf20Sopenharmony_ci			 * We don't have resources for this segment. The QP has
18508c2ecf20Sopenharmony_ci			 * already been queued.
18518c2ecf20Sopenharmony_ci			 */
18528c2ecf20Sopenharmony_ci			goto done;
18538c2ecf20Sopenharmony_ci		}
18548c2ecf20Sopenharmony_ci	}
18558c2ecf20Sopenharmony_ci
18568c2ecf20Sopenharmony_ci	/* req->flow_idx should only be one slot behind req->setup_head */
18578c2ecf20Sopenharmony_ci	flow = &req->flows[req->flow_idx];
18588c2ecf20Sopenharmony_ci	flow->pkt = 0;
18598c2ecf20Sopenharmony_ci	flow->tid_idx = 0;
18608c2ecf20Sopenharmony_ci	flow->sent = 0;
18618c2ecf20Sopenharmony_ci	if (!retry) {
18628c2ecf20Sopenharmony_ci		/* Set the first and last IB PSN for the flow in use.*/
18638c2ecf20Sopenharmony_ci		flow->flow_state.ib_spsn = req->s_next_psn;
18648c2ecf20Sopenharmony_ci		flow->flow_state.ib_lpsn =
18658c2ecf20Sopenharmony_ci			flow->flow_state.ib_spsn + flow->npkts - 1;
18668c2ecf20Sopenharmony_ci	}
18678c2ecf20Sopenharmony_ci
18688c2ecf20Sopenharmony_ci	/* Calculate the next segment start psn.*/
18698c2ecf20Sopenharmony_ci	req->s_next_psn += flow->npkts;
18708c2ecf20Sopenharmony_ci
18718c2ecf20Sopenharmony_ci	/* Build the packet header */
18728c2ecf20Sopenharmony_ci	hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
18738c2ecf20Sopenharmony_cidone:
18748c2ecf20Sopenharmony_ci	return hdwords;
18758c2ecf20Sopenharmony_ci}
18768c2ecf20Sopenharmony_ci
18778c2ecf20Sopenharmony_ci/*
18788c2ecf20Sopenharmony_ci * Validate and accept the TID RDMA READ request parameters.
18798c2ecf20Sopenharmony_ci * Return 0 if the request is accepted successfully;
18808c2ecf20Sopenharmony_ci * Return 1 otherwise.
18818c2ecf20Sopenharmony_ci */
18828c2ecf20Sopenharmony_cistatic int tid_rdma_rcv_read_request(struct rvt_qp *qp,
18838c2ecf20Sopenharmony_ci				     struct rvt_ack_entry *e,
18848c2ecf20Sopenharmony_ci				     struct hfi1_packet *packet,
18858c2ecf20Sopenharmony_ci				     struct ib_other_headers *ohdr,
18868c2ecf20Sopenharmony_ci				     u32 bth0, u32 psn, u64 vaddr, u32 len)
18878c2ecf20Sopenharmony_ci{
18888c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
18898c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
18908c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
18918c2ecf20Sopenharmony_ci	u32 flow_psn, i, tidlen = 0, pktlen, tlen;
18928c2ecf20Sopenharmony_ci
18938c2ecf20Sopenharmony_ci	req = ack_to_tid_req(e);
18948c2ecf20Sopenharmony_ci
18958c2ecf20Sopenharmony_ci	/* Validate the payload first */
18968c2ecf20Sopenharmony_ci	flow = &req->flows[req->setup_head];
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_ci	/* payload length = packet length - (header length + ICRC length) */
18998c2ecf20Sopenharmony_ci	pktlen = packet->tlen - (packet->hlen + 4);
19008c2ecf20Sopenharmony_ci	if (pktlen > sizeof(flow->tid_entry))
19018c2ecf20Sopenharmony_ci		return 1;
19028c2ecf20Sopenharmony_ci	memcpy(flow->tid_entry, packet->ebuf, pktlen);
19038c2ecf20Sopenharmony_ci	flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	/*
19068c2ecf20Sopenharmony_ci	 * Walk the TID_ENTRY list to make sure we have enough space for a
19078c2ecf20Sopenharmony_ci	 * complete segment. Also calculate the number of required packets.
19088c2ecf20Sopenharmony_ci	 */
19098c2ecf20Sopenharmony_ci	flow->npkts = rvt_div_round_up_mtu(qp, len);
19108c2ecf20Sopenharmony_ci	for (i = 0; i < flow->tidcnt; i++) {
19118c2ecf20Sopenharmony_ci		trace_hfi1_tid_entry_rcv_read_req(qp, i,
19128c2ecf20Sopenharmony_ci						  flow->tid_entry[i]);
19138c2ecf20Sopenharmony_ci		tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
19148c2ecf20Sopenharmony_ci		if (!tlen)
19158c2ecf20Sopenharmony_ci			return 1;
19168c2ecf20Sopenharmony_ci
19178c2ecf20Sopenharmony_ci		/*
19188c2ecf20Sopenharmony_ci		 * For tid pair (tidctr == 3), the buffer size of the pair
19198c2ecf20Sopenharmony_ci		 * should be the sum of the buffer size described by each
19208c2ecf20Sopenharmony_ci		 * tid entry. However, only the first entry needs to be
19218c2ecf20Sopenharmony_ci		 * specified in the request (see WFR HAS Section 8.5.7.1).
19228c2ecf20Sopenharmony_ci		 */
19238c2ecf20Sopenharmony_ci		tidlen += tlen;
19248c2ecf20Sopenharmony_ci	}
19258c2ecf20Sopenharmony_ci	if (tidlen * PAGE_SIZE < len)
19268c2ecf20Sopenharmony_ci		return 1;
19278c2ecf20Sopenharmony_ci
19288c2ecf20Sopenharmony_ci	/* Empty the flow array */
19298c2ecf20Sopenharmony_ci	req->clear_tail = req->setup_head;
19308c2ecf20Sopenharmony_ci	flow->pkt = 0;
19318c2ecf20Sopenharmony_ci	flow->tid_idx = 0;
19328c2ecf20Sopenharmony_ci	flow->tid_offset = 0;
19338c2ecf20Sopenharmony_ci	flow->sent = 0;
19348c2ecf20Sopenharmony_ci	flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
19358c2ecf20Sopenharmony_ci	flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
19368c2ecf20Sopenharmony_ci		    TID_RDMA_DESTQP_FLOW_MASK;
19378c2ecf20Sopenharmony_ci	flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
19388c2ecf20Sopenharmony_ci	flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
19398c2ecf20Sopenharmony_ci	flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
19408c2ecf20Sopenharmony_ci	flow->length = len;
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_ci	flow->flow_state.lpsn = flow->flow_state.spsn +
19438c2ecf20Sopenharmony_ci		flow->npkts - 1;
19448c2ecf20Sopenharmony_ci	flow->flow_state.ib_spsn = psn;
19458c2ecf20Sopenharmony_ci	flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
19488c2ecf20Sopenharmony_ci	/* Set the initial flow index to the current flow. */
19498c2ecf20Sopenharmony_ci	req->flow_idx = req->setup_head;
19508c2ecf20Sopenharmony_ci
19518c2ecf20Sopenharmony_ci	/* advance circular buffer head */
19528c2ecf20Sopenharmony_ci	req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
19538c2ecf20Sopenharmony_ci
19548c2ecf20Sopenharmony_ci	/*
19558c2ecf20Sopenharmony_ci	 * Compute last PSN for request.
19568c2ecf20Sopenharmony_ci	 */
19578c2ecf20Sopenharmony_ci	e->opcode = (bth0 >> 24) & 0xff;
19588c2ecf20Sopenharmony_ci	e->psn = psn;
19598c2ecf20Sopenharmony_ci	e->lpsn = psn + flow->npkts - 1;
19608c2ecf20Sopenharmony_ci	e->sent = 0;
19618c2ecf20Sopenharmony_ci
19628c2ecf20Sopenharmony_ci	req->n_flows = qpriv->tid_rdma.local.max_read;
19638c2ecf20Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
19648c2ecf20Sopenharmony_ci	req->cur_seg = 0;
19658c2ecf20Sopenharmony_ci	req->comp_seg = 0;
19668c2ecf20Sopenharmony_ci	req->ack_seg = 0;
19678c2ecf20Sopenharmony_ci	req->isge = 0;
19688c2ecf20Sopenharmony_ci	req->seg_len = qpriv->tid_rdma.local.max_len;
19698c2ecf20Sopenharmony_ci	req->total_len = len;
19708c2ecf20Sopenharmony_ci	req->total_segs = 1;
19718c2ecf20Sopenharmony_ci	req->r_flow_psn = e->psn;
19728c2ecf20Sopenharmony_ci
19738c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
19748c2ecf20Sopenharmony_ci					req);
19758c2ecf20Sopenharmony_ci	return 0;
19768c2ecf20Sopenharmony_ci}
19778c2ecf20Sopenharmony_ci
19788c2ecf20Sopenharmony_cistatic int tid_rdma_rcv_error(struct hfi1_packet *packet,
19798c2ecf20Sopenharmony_ci			      struct ib_other_headers *ohdr,
19808c2ecf20Sopenharmony_ci			      struct rvt_qp *qp, u32 psn, int diff)
19818c2ecf20Sopenharmony_ci{
19828c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
19838c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
19848c2ecf20Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
19858c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
19868c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
19878c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
19888c2ecf20Sopenharmony_ci	unsigned long flags;
19898c2ecf20Sopenharmony_ci	u8 prev;
19908c2ecf20Sopenharmony_ci	bool old_req;
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_ci	trace_hfi1_rsp_tid_rcv_error(qp, psn);
19938c2ecf20Sopenharmony_ci	trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
19948c2ecf20Sopenharmony_ci	if (diff > 0) {
19958c2ecf20Sopenharmony_ci		/* sequence error */
19968c2ecf20Sopenharmony_ci		if (!qp->r_nak_state) {
19978c2ecf20Sopenharmony_ci			ibp->rvp.n_rc_seqnak++;
19988c2ecf20Sopenharmony_ci			qp->r_nak_state = IB_NAK_PSN_ERROR;
19998c2ecf20Sopenharmony_ci			qp->r_ack_psn = qp->r_psn;
20008c2ecf20Sopenharmony_ci			rc_defered_ack(rcd, qp);
20018c2ecf20Sopenharmony_ci		}
20028c2ecf20Sopenharmony_ci		goto done;
20038c2ecf20Sopenharmony_ci	}
20048c2ecf20Sopenharmony_ci
20058c2ecf20Sopenharmony_ci	ibp->rvp.n_rc_dupreq++;
20068c2ecf20Sopenharmony_ci
20078c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
20088c2ecf20Sopenharmony_ci	e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
20098c2ecf20Sopenharmony_ci	if (!e || (e->opcode != TID_OP(READ_REQ) &&
20108c2ecf20Sopenharmony_ci		   e->opcode != TID_OP(WRITE_REQ)))
20118c2ecf20Sopenharmony_ci		goto unlock;
20128c2ecf20Sopenharmony_ci
20138c2ecf20Sopenharmony_ci	req = ack_to_tid_req(e);
20148c2ecf20Sopenharmony_ci	req->r_flow_psn = psn;
20158c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
20168c2ecf20Sopenharmony_ci	if (e->opcode == TID_OP(READ_REQ)) {
20178c2ecf20Sopenharmony_ci		struct ib_reth *reth;
20188c2ecf20Sopenharmony_ci		u32 len;
20198c2ecf20Sopenharmony_ci		u32 rkey;
20208c2ecf20Sopenharmony_ci		u64 vaddr;
20218c2ecf20Sopenharmony_ci		int ok;
20228c2ecf20Sopenharmony_ci		u32 bth0;
20238c2ecf20Sopenharmony_ci
20248c2ecf20Sopenharmony_ci		reth = &ohdr->u.tid_rdma.r_req.reth;
20258c2ecf20Sopenharmony_ci		/*
20268c2ecf20Sopenharmony_ci		 * The requester always restarts from the start of the original
20278c2ecf20Sopenharmony_ci		 * request.
20288c2ecf20Sopenharmony_ci		 */
20298c2ecf20Sopenharmony_ci		len = be32_to_cpu(reth->length);
20308c2ecf20Sopenharmony_ci		if (psn != e->psn || len != req->total_len)
20318c2ecf20Sopenharmony_ci			goto unlock;
20328c2ecf20Sopenharmony_ci
20338c2ecf20Sopenharmony_ci		release_rdma_sge_mr(e);
20348c2ecf20Sopenharmony_ci
20358c2ecf20Sopenharmony_ci		rkey = be32_to_cpu(reth->rkey);
20368c2ecf20Sopenharmony_ci		vaddr = get_ib_reth_vaddr(reth);
20378c2ecf20Sopenharmony_ci
20388c2ecf20Sopenharmony_ci		qp->r_len = len;
20398c2ecf20Sopenharmony_ci		ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
20408c2ecf20Sopenharmony_ci				 IB_ACCESS_REMOTE_READ);
20418c2ecf20Sopenharmony_ci		if (unlikely(!ok))
20428c2ecf20Sopenharmony_ci			goto unlock;
20438c2ecf20Sopenharmony_ci
20448c2ecf20Sopenharmony_ci		/*
20458c2ecf20Sopenharmony_ci		 * If all the response packets for the current request have
20468c2ecf20Sopenharmony_ci		 * been sent out and this request is complete (old_request
20478c2ecf20Sopenharmony_ci		 * == false) and the TID flow may be unusable (the
20488c2ecf20Sopenharmony_ci		 * req->clear_tail is advanced). However, when an earlier
20498c2ecf20Sopenharmony_ci		 * request is received, this request will not be complete any
20508c2ecf20Sopenharmony_ci		 * more (qp->s_tail_ack_queue is moved back, see below).
20518c2ecf20Sopenharmony_ci		 * Consequently, we need to update the TID flow info everytime
20528c2ecf20Sopenharmony_ci		 * a duplicate request is received.
20538c2ecf20Sopenharmony_ci		 */
20548c2ecf20Sopenharmony_ci		bth0 = be32_to_cpu(ohdr->bth[0]);
20558c2ecf20Sopenharmony_ci		if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
20568c2ecf20Sopenharmony_ci					      vaddr, len))
20578c2ecf20Sopenharmony_ci			goto unlock;
20588c2ecf20Sopenharmony_ci
20598c2ecf20Sopenharmony_ci		/*
20608c2ecf20Sopenharmony_ci		 * True if the request is already scheduled (between
20618c2ecf20Sopenharmony_ci		 * qp->s_tail_ack_queue and qp->r_head_ack_queue);
20628c2ecf20Sopenharmony_ci		 */
20638c2ecf20Sopenharmony_ci		if (old_req)
20648c2ecf20Sopenharmony_ci			goto unlock;
20658c2ecf20Sopenharmony_ci	} else {
20668c2ecf20Sopenharmony_ci		struct flow_state *fstate;
20678c2ecf20Sopenharmony_ci		bool schedule = false;
20688c2ecf20Sopenharmony_ci		u8 i;
20698c2ecf20Sopenharmony_ci
20708c2ecf20Sopenharmony_ci		if (req->state == TID_REQUEST_RESEND) {
20718c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_RESEND_ACTIVE;
20728c2ecf20Sopenharmony_ci		} else if (req->state == TID_REQUEST_INIT_RESEND) {
20738c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_INIT;
20748c2ecf20Sopenharmony_ci			schedule = true;
20758c2ecf20Sopenharmony_ci		}
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci		/*
20788c2ecf20Sopenharmony_ci		 * True if the request is already scheduled (between
20798c2ecf20Sopenharmony_ci		 * qp->s_tail_ack_queue and qp->r_head_ack_queue).
20808c2ecf20Sopenharmony_ci		 * Also, don't change requests, which are at the SYNC
20818c2ecf20Sopenharmony_ci		 * point and haven't generated any responses yet.
20828c2ecf20Sopenharmony_ci		 * There is nothing to retransmit for them yet.
20838c2ecf20Sopenharmony_ci		 */
20848c2ecf20Sopenharmony_ci		if (old_req || req->state == TID_REQUEST_INIT ||
20858c2ecf20Sopenharmony_ci		    (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
20868c2ecf20Sopenharmony_ci			for (i = prev + 1; ; i++) {
20878c2ecf20Sopenharmony_ci				if (i > rvt_size_atomic(&dev->rdi))
20888c2ecf20Sopenharmony_ci					i = 0;
20898c2ecf20Sopenharmony_ci				if (i == qp->r_head_ack_queue)
20908c2ecf20Sopenharmony_ci					break;
20918c2ecf20Sopenharmony_ci				e = &qp->s_ack_queue[i];
20928c2ecf20Sopenharmony_ci				req = ack_to_tid_req(e);
20938c2ecf20Sopenharmony_ci				if (e->opcode == TID_OP(WRITE_REQ) &&
20948c2ecf20Sopenharmony_ci				    req->state == TID_REQUEST_INIT)
20958c2ecf20Sopenharmony_ci					req->state = TID_REQUEST_INIT_RESEND;
20968c2ecf20Sopenharmony_ci			}
20978c2ecf20Sopenharmony_ci			/*
20988c2ecf20Sopenharmony_ci			 * If the state of the request has been changed,
20998c2ecf20Sopenharmony_ci			 * the first leg needs to get scheduled in order to
21008c2ecf20Sopenharmony_ci			 * pick up the change. Otherwise, normal response
21018c2ecf20Sopenharmony_ci			 * processing should take care of it.
21028c2ecf20Sopenharmony_ci			 */
21038c2ecf20Sopenharmony_ci			if (!schedule)
21048c2ecf20Sopenharmony_ci				goto unlock;
21058c2ecf20Sopenharmony_ci		}
21068c2ecf20Sopenharmony_ci
21078c2ecf20Sopenharmony_ci		/*
21088c2ecf20Sopenharmony_ci		 * If there is no more allocated segment, just schedule the qp
21098c2ecf20Sopenharmony_ci		 * without changing any state.
21108c2ecf20Sopenharmony_ci		 */
21118c2ecf20Sopenharmony_ci		if (req->clear_tail == req->setup_head)
21128c2ecf20Sopenharmony_ci			goto schedule;
21138c2ecf20Sopenharmony_ci		/*
21148c2ecf20Sopenharmony_ci		 * If this request has sent responses for segments, which have
21158c2ecf20Sopenharmony_ci		 * not received data yet (flow_idx != clear_tail), the flow_idx
21168c2ecf20Sopenharmony_ci		 * pointer needs to be adjusted so the same responses can be
21178c2ecf20Sopenharmony_ci		 * re-sent.
21188c2ecf20Sopenharmony_ci		 */
21198c2ecf20Sopenharmony_ci		if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
21208c2ecf20Sopenharmony_ci			fstate = &req->flows[req->clear_tail].flow_state;
21218c2ecf20Sopenharmony_ci			qpriv->pending_tid_w_segs -=
21228c2ecf20Sopenharmony_ci				CIRC_CNT(req->flow_idx, req->clear_tail,
21238c2ecf20Sopenharmony_ci					 MAX_FLOWS);
21248c2ecf20Sopenharmony_ci			req->flow_idx =
21258c2ecf20Sopenharmony_ci				CIRC_ADD(req->clear_tail,
21268c2ecf20Sopenharmony_ci					 delta_psn(psn, fstate->resp_ib_psn),
21278c2ecf20Sopenharmony_ci					 MAX_FLOWS);
21288c2ecf20Sopenharmony_ci			qpriv->pending_tid_w_segs +=
21298c2ecf20Sopenharmony_ci				delta_psn(psn, fstate->resp_ib_psn);
21308c2ecf20Sopenharmony_ci			/*
21318c2ecf20Sopenharmony_ci			 * When flow_idx == setup_head, we've gotten a duplicate
21328c2ecf20Sopenharmony_ci			 * request for a segment, which has not been allocated
21338c2ecf20Sopenharmony_ci			 * yet. In that case, don't adjust this request.
21348c2ecf20Sopenharmony_ci			 * However, we still want to go through the loop below
21358c2ecf20Sopenharmony_ci			 * to adjust all subsequent requests.
21368c2ecf20Sopenharmony_ci			 */
21378c2ecf20Sopenharmony_ci			if (CIRC_CNT(req->setup_head, req->flow_idx,
21388c2ecf20Sopenharmony_ci				     MAX_FLOWS)) {
21398c2ecf20Sopenharmony_ci				req->cur_seg = delta_psn(psn, e->psn);
21408c2ecf20Sopenharmony_ci				req->state = TID_REQUEST_RESEND_ACTIVE;
21418c2ecf20Sopenharmony_ci			}
21428c2ecf20Sopenharmony_ci		}
21438c2ecf20Sopenharmony_ci
21448c2ecf20Sopenharmony_ci		for (i = prev + 1; ; i++) {
21458c2ecf20Sopenharmony_ci			/*
21468c2ecf20Sopenharmony_ci			 * Look at everything up to and including
21478c2ecf20Sopenharmony_ci			 * s_tail_ack_queue
21488c2ecf20Sopenharmony_ci			 */
21498c2ecf20Sopenharmony_ci			if (i > rvt_size_atomic(&dev->rdi))
21508c2ecf20Sopenharmony_ci				i = 0;
21518c2ecf20Sopenharmony_ci			if (i == qp->r_head_ack_queue)
21528c2ecf20Sopenharmony_ci				break;
21538c2ecf20Sopenharmony_ci			e = &qp->s_ack_queue[i];
21548c2ecf20Sopenharmony_ci			req = ack_to_tid_req(e);
21558c2ecf20Sopenharmony_ci			trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
21568c2ecf20Sopenharmony_ci						   e->lpsn, req);
21578c2ecf20Sopenharmony_ci			if (e->opcode != TID_OP(WRITE_REQ) ||
21588c2ecf20Sopenharmony_ci			    req->cur_seg == req->comp_seg ||
21598c2ecf20Sopenharmony_ci			    req->state == TID_REQUEST_INIT ||
21608c2ecf20Sopenharmony_ci			    req->state == TID_REQUEST_INIT_RESEND) {
21618c2ecf20Sopenharmony_ci				if (req->state == TID_REQUEST_INIT)
21628c2ecf20Sopenharmony_ci					req->state = TID_REQUEST_INIT_RESEND;
21638c2ecf20Sopenharmony_ci				continue;
21648c2ecf20Sopenharmony_ci			}
21658c2ecf20Sopenharmony_ci			qpriv->pending_tid_w_segs -=
21668c2ecf20Sopenharmony_ci				CIRC_CNT(req->flow_idx,
21678c2ecf20Sopenharmony_ci					 req->clear_tail,
21688c2ecf20Sopenharmony_ci					 MAX_FLOWS);
21698c2ecf20Sopenharmony_ci			req->flow_idx = req->clear_tail;
21708c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_RESEND;
21718c2ecf20Sopenharmony_ci			req->cur_seg = req->comp_seg;
21728c2ecf20Sopenharmony_ci		}
21738c2ecf20Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
21748c2ecf20Sopenharmony_ci	}
21758c2ecf20Sopenharmony_ci	/* Re-process old requests.*/
21768c2ecf20Sopenharmony_ci	if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
21778c2ecf20Sopenharmony_ci		qp->s_acked_ack_queue = prev;
21788c2ecf20Sopenharmony_ci	qp->s_tail_ack_queue = prev;
21798c2ecf20Sopenharmony_ci	/*
21808c2ecf20Sopenharmony_ci	 * Since the qp->s_tail_ack_queue is modified, the
21818c2ecf20Sopenharmony_ci	 * qp->s_ack_state must be changed to re-initialize
21828c2ecf20Sopenharmony_ci	 * qp->s_ack_rdma_sge; Otherwise, we will end up in
21838c2ecf20Sopenharmony_ci	 * wrong memory region.
21848c2ecf20Sopenharmony_ci	 */
21858c2ecf20Sopenharmony_ci	qp->s_ack_state = OP(ACKNOWLEDGE);
21868c2ecf20Sopenharmony_cischedule:
21878c2ecf20Sopenharmony_ci	/*
21888c2ecf20Sopenharmony_ci	 * It's possible to receive a retry psn that is earlier than an RNRNAK
21898c2ecf20Sopenharmony_ci	 * psn. In this case, the rnrnak state should be cleared.
21908c2ecf20Sopenharmony_ci	 */
21918c2ecf20Sopenharmony_ci	if (qpriv->rnr_nak_state) {
21928c2ecf20Sopenharmony_ci		qp->s_nak_state = 0;
21938c2ecf20Sopenharmony_ci		qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
21948c2ecf20Sopenharmony_ci		qp->r_psn = e->lpsn + 1;
21958c2ecf20Sopenharmony_ci		hfi1_tid_write_alloc_resources(qp, true);
21968c2ecf20Sopenharmony_ci	}
21978c2ecf20Sopenharmony_ci
21988c2ecf20Sopenharmony_ci	qp->r_state = e->opcode;
21998c2ecf20Sopenharmony_ci	qp->r_nak_state = 0;
22008c2ecf20Sopenharmony_ci	qp->s_flags |= RVT_S_RESP_PENDING;
22018c2ecf20Sopenharmony_ci	hfi1_schedule_send(qp);
22028c2ecf20Sopenharmony_ciunlock:
22038c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
22048c2ecf20Sopenharmony_cidone:
22058c2ecf20Sopenharmony_ci	return 1;
22068c2ecf20Sopenharmony_ci}
22078c2ecf20Sopenharmony_ci
22088c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
22098c2ecf20Sopenharmony_ci{
22108c2ecf20Sopenharmony_ci	/* HANDLER FOR TID RDMA READ REQUEST packet (Responder side)*/
22118c2ecf20Sopenharmony_ci
22128c2ecf20Sopenharmony_ci	/*
22138c2ecf20Sopenharmony_ci	 * 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
22148c2ecf20Sopenharmony_ci	 *    (see hfi1_rc_rcv())
22158c2ecf20Sopenharmony_ci	 * 2. Put TID RDMA READ REQ into the response queueu (s_ack_queue)
22168c2ecf20Sopenharmony_ci	 *     - Setup struct tid_rdma_req with request info
22178c2ecf20Sopenharmony_ci	 *     - Initialize struct tid_rdma_flow info;
22188c2ecf20Sopenharmony_ci	 *     - Copy TID entries;
22198c2ecf20Sopenharmony_ci	 * 3. Set the qp->s_ack_state.
22208c2ecf20Sopenharmony_ci	 * 4. Set RVT_S_RESP_PENDING in s_flags.
22218c2ecf20Sopenharmony_ci	 * 5. Kick the send engine (hfi1_schedule_send())
22228c2ecf20Sopenharmony_ci	 */
22238c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
22248c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
22258c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
22268c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
22278c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
22288c2ecf20Sopenharmony_ci	unsigned long flags;
22298c2ecf20Sopenharmony_ci	struct ib_reth *reth;
22308c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
22318c2ecf20Sopenharmony_ci	u32 bth0, psn, len, rkey;
22328c2ecf20Sopenharmony_ci	bool fecn;
22338c2ecf20Sopenharmony_ci	u8 next;
22348c2ecf20Sopenharmony_ci	u64 vaddr;
22358c2ecf20Sopenharmony_ci	int diff;
22368c2ecf20Sopenharmony_ci	u8 nack_state = IB_NAK_INVALID_REQUEST;
22378c2ecf20Sopenharmony_ci
22388c2ecf20Sopenharmony_ci	bth0 = be32_to_cpu(ohdr->bth[0]);
22398c2ecf20Sopenharmony_ci	if (hfi1_ruc_check_hdr(ibp, packet))
22408c2ecf20Sopenharmony_ci		return;
22418c2ecf20Sopenharmony_ci
22428c2ecf20Sopenharmony_ci	fecn = process_ecn(qp, packet);
22438c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
22448c2ecf20Sopenharmony_ci	trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
22458c2ecf20Sopenharmony_ci
22468c2ecf20Sopenharmony_ci	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
22478c2ecf20Sopenharmony_ci		rvt_comm_est(qp);
22488c2ecf20Sopenharmony_ci
22498c2ecf20Sopenharmony_ci	if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
22508c2ecf20Sopenharmony_ci		goto nack_inv;
22518c2ecf20Sopenharmony_ci
22528c2ecf20Sopenharmony_ci	reth = &ohdr->u.tid_rdma.r_req.reth;
22538c2ecf20Sopenharmony_ci	vaddr = be64_to_cpu(reth->vaddr);
22548c2ecf20Sopenharmony_ci	len = be32_to_cpu(reth->length);
22558c2ecf20Sopenharmony_ci	/* The length needs to be in multiples of PAGE_SIZE */
22568c2ecf20Sopenharmony_ci	if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
22578c2ecf20Sopenharmony_ci		goto nack_inv;
22588c2ecf20Sopenharmony_ci
22598c2ecf20Sopenharmony_ci	diff = delta_psn(psn, qp->r_psn);
22608c2ecf20Sopenharmony_ci	if (unlikely(diff)) {
22618c2ecf20Sopenharmony_ci		tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
22628c2ecf20Sopenharmony_ci		return;
22638c2ecf20Sopenharmony_ci	}
22648c2ecf20Sopenharmony_ci
22658c2ecf20Sopenharmony_ci	/* We've verified the request, insert it into the ack queue. */
22668c2ecf20Sopenharmony_ci	next = qp->r_head_ack_queue + 1;
22678c2ecf20Sopenharmony_ci	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
22688c2ecf20Sopenharmony_ci		next = 0;
22698c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
22708c2ecf20Sopenharmony_ci	if (unlikely(next == qp->s_tail_ack_queue)) {
22718c2ecf20Sopenharmony_ci		if (!qp->s_ack_queue[next].sent) {
22728c2ecf20Sopenharmony_ci			nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
22738c2ecf20Sopenharmony_ci			goto nack_inv_unlock;
22748c2ecf20Sopenharmony_ci		}
22758c2ecf20Sopenharmony_ci		update_ack_queue(qp, next);
22768c2ecf20Sopenharmony_ci	}
22778c2ecf20Sopenharmony_ci	e = &qp->s_ack_queue[qp->r_head_ack_queue];
22788c2ecf20Sopenharmony_ci	release_rdma_sge_mr(e);
22798c2ecf20Sopenharmony_ci
22808c2ecf20Sopenharmony_ci	rkey = be32_to_cpu(reth->rkey);
22818c2ecf20Sopenharmony_ci	qp->r_len = len;
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_ci	if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
22848c2ecf20Sopenharmony_ci				  rkey, IB_ACCESS_REMOTE_READ)))
22858c2ecf20Sopenharmony_ci		goto nack_acc;
22868c2ecf20Sopenharmony_ci
22878c2ecf20Sopenharmony_ci	/* Accept the request parameters */
22888c2ecf20Sopenharmony_ci	if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
22898c2ecf20Sopenharmony_ci				      len))
22908c2ecf20Sopenharmony_ci		goto nack_inv_unlock;
22918c2ecf20Sopenharmony_ci
22928c2ecf20Sopenharmony_ci	qp->r_state = e->opcode;
22938c2ecf20Sopenharmony_ci	qp->r_nak_state = 0;
22948c2ecf20Sopenharmony_ci	/*
22958c2ecf20Sopenharmony_ci	 * We need to increment the MSN here instead of when we
22968c2ecf20Sopenharmony_ci	 * finish sending the result since a duplicate request would
22978c2ecf20Sopenharmony_ci	 * increment it more than once.
22988c2ecf20Sopenharmony_ci	 */
22998c2ecf20Sopenharmony_ci	qp->r_msn++;
23008c2ecf20Sopenharmony_ci	qp->r_psn += e->lpsn - e->psn + 1;
23018c2ecf20Sopenharmony_ci
23028c2ecf20Sopenharmony_ci	qp->r_head_ack_queue = next;
23038c2ecf20Sopenharmony_ci
23048c2ecf20Sopenharmony_ci	/*
23058c2ecf20Sopenharmony_ci	 * For all requests other than TID WRITE which are added to the ack
23068c2ecf20Sopenharmony_ci	 * queue, qpriv->r_tid_alloc follows qp->r_head_ack_queue. It is ok to
23078c2ecf20Sopenharmony_ci	 * do this because of interlocks between these and TID WRITE
23088c2ecf20Sopenharmony_ci	 * requests. The same change has also been made in hfi1_rc_rcv().
23098c2ecf20Sopenharmony_ci	 */
23108c2ecf20Sopenharmony_ci	qpriv->r_tid_alloc = qp->r_head_ack_queue;
23118c2ecf20Sopenharmony_ci
23128c2ecf20Sopenharmony_ci	/* Schedule the send tasklet. */
23138c2ecf20Sopenharmony_ci	qp->s_flags |= RVT_S_RESP_PENDING;
23148c2ecf20Sopenharmony_ci	if (fecn)
23158c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
23168c2ecf20Sopenharmony_ci	hfi1_schedule_send(qp);
23178c2ecf20Sopenharmony_ci
23188c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
23198c2ecf20Sopenharmony_ci	return;
23208c2ecf20Sopenharmony_ci
23218c2ecf20Sopenharmony_cinack_inv_unlock:
23228c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
23238c2ecf20Sopenharmony_cinack_inv:
23248c2ecf20Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
23258c2ecf20Sopenharmony_ci	qp->r_nak_state = nack_state;
23268c2ecf20Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
23278c2ecf20Sopenharmony_ci	/* Queue NAK for later */
23288c2ecf20Sopenharmony_ci	rc_defered_ack(rcd, qp);
23298c2ecf20Sopenharmony_ci	return;
23308c2ecf20Sopenharmony_cinack_acc:
23318c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
23328c2ecf20Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
23338c2ecf20Sopenharmony_ci	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
23348c2ecf20Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
23358c2ecf20Sopenharmony_ci}
23368c2ecf20Sopenharmony_ci
23378c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
23388c2ecf20Sopenharmony_ci				  struct ib_other_headers *ohdr, u32 *bth0,
23398c2ecf20Sopenharmony_ci				  u32 *bth1, u32 *bth2, u32 *len, bool *last)
23408c2ecf20Sopenharmony_ci{
23418c2ecf20Sopenharmony_ci	struct hfi1_ack_priv *epriv = e->priv;
23428c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = &epriv->tid_req;
23438c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
23448c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
23458c2ecf20Sopenharmony_ci	u32 tidentry = flow->tid_entry[flow->tid_idx];
23468c2ecf20Sopenharmony_ci	u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
23478c2ecf20Sopenharmony_ci	struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
23488c2ecf20Sopenharmony_ci	u32 next_offset, om = KDETH_OM_LARGE;
23498c2ecf20Sopenharmony_ci	bool last_pkt;
23508c2ecf20Sopenharmony_ci	u32 hdwords = 0;
23518c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
23528c2ecf20Sopenharmony_ci
23538c2ecf20Sopenharmony_ci	*len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
23548c2ecf20Sopenharmony_ci	flow->sent += *len;
23558c2ecf20Sopenharmony_ci	next_offset = flow->tid_offset + *len;
23568c2ecf20Sopenharmony_ci	last_pkt = (flow->sent >= flow->length);
23578c2ecf20Sopenharmony_ci
23588c2ecf20Sopenharmony_ci	trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
23598c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
23608c2ecf20Sopenharmony_ci
23618c2ecf20Sopenharmony_ci	rcu_read_lock();
23628c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
23638c2ecf20Sopenharmony_ci	if (!remote) {
23648c2ecf20Sopenharmony_ci		rcu_read_unlock();
23658c2ecf20Sopenharmony_ci		goto done;
23668c2ecf20Sopenharmony_ci	}
23678c2ecf20Sopenharmony_ci	KDETH_RESET(resp->kdeth0, KVER, 0x1);
23688c2ecf20Sopenharmony_ci	KDETH_SET(resp->kdeth0, SH, !last_pkt);
23698c2ecf20Sopenharmony_ci	KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
23708c2ecf20Sopenharmony_ci	KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
23718c2ecf20Sopenharmony_ci	KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
23728c2ecf20Sopenharmony_ci	KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
23738c2ecf20Sopenharmony_ci	KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
23748c2ecf20Sopenharmony_ci	KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
23758c2ecf20Sopenharmony_ci	resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
23768c2ecf20Sopenharmony_ci	rcu_read_unlock();
23778c2ecf20Sopenharmony_ci
23788c2ecf20Sopenharmony_ci	resp->aeth = rvt_compute_aeth(qp);
23798c2ecf20Sopenharmony_ci	resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
23808c2ecf20Sopenharmony_ci					       flow->pkt));
23818c2ecf20Sopenharmony_ci
23828c2ecf20Sopenharmony_ci	*bth0 = TID_OP(READ_RESP) << 24;
23838c2ecf20Sopenharmony_ci	*bth1 = flow->tid_qpn;
23848c2ecf20Sopenharmony_ci	*bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
23858c2ecf20Sopenharmony_ci			  HFI1_KDETH_BTH_SEQ_MASK) |
23868c2ecf20Sopenharmony_ci			 (flow->flow_state.generation <<
23878c2ecf20Sopenharmony_ci			  HFI1_KDETH_BTH_SEQ_SHIFT));
23888c2ecf20Sopenharmony_ci	*last = last_pkt;
23898c2ecf20Sopenharmony_ci	if (last_pkt)
23908c2ecf20Sopenharmony_ci		/* Advance to next flow */
23918c2ecf20Sopenharmony_ci		req->clear_tail = (req->clear_tail + 1) &
23928c2ecf20Sopenharmony_ci				  (MAX_FLOWS - 1);
23938c2ecf20Sopenharmony_ci
23948c2ecf20Sopenharmony_ci	if (next_offset >= tidlen) {
23958c2ecf20Sopenharmony_ci		flow->tid_offset = 0;
23968c2ecf20Sopenharmony_ci		flow->tid_idx++;
23978c2ecf20Sopenharmony_ci	} else {
23988c2ecf20Sopenharmony_ci		flow->tid_offset = next_offset;
23998c2ecf20Sopenharmony_ci	}
24008c2ecf20Sopenharmony_ci
24018c2ecf20Sopenharmony_ci	hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
24028c2ecf20Sopenharmony_ci
24038c2ecf20Sopenharmony_cidone:
24048c2ecf20Sopenharmony_ci	return hdwords;
24058c2ecf20Sopenharmony_ci}
24068c2ecf20Sopenharmony_ci
24078c2ecf20Sopenharmony_cistatic inline struct tid_rdma_request *
24088c2ecf20Sopenharmony_cifind_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
24098c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
24108c2ecf20Sopenharmony_ci{
24118c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
24128c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = NULL;
24138c2ecf20Sopenharmony_ci	u32 i, end;
24148c2ecf20Sopenharmony_ci
24158c2ecf20Sopenharmony_ci	end = qp->s_cur + 1;
24168c2ecf20Sopenharmony_ci	if (end == qp->s_size)
24178c2ecf20Sopenharmony_ci		end = 0;
24188c2ecf20Sopenharmony_ci	for (i = qp->s_acked; i != end;) {
24198c2ecf20Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, i);
24208c2ecf20Sopenharmony_ci		if (cmp_psn(psn, wqe->psn) >= 0 &&
24218c2ecf20Sopenharmony_ci		    cmp_psn(psn, wqe->lpsn) <= 0) {
24228c2ecf20Sopenharmony_ci			if (wqe->wr.opcode == opcode)
24238c2ecf20Sopenharmony_ci				req = wqe_to_tid_req(wqe);
24248c2ecf20Sopenharmony_ci			break;
24258c2ecf20Sopenharmony_ci		}
24268c2ecf20Sopenharmony_ci		if (++i == qp->s_size)
24278c2ecf20Sopenharmony_ci			i = 0;
24288c2ecf20Sopenharmony_ci	}
24298c2ecf20Sopenharmony_ci
24308c2ecf20Sopenharmony_ci	return req;
24318c2ecf20Sopenharmony_ci}
24328c2ecf20Sopenharmony_ci
24338c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
24348c2ecf20Sopenharmony_ci{
24358c2ecf20Sopenharmony_ci	/* HANDLER FOR TID RDMA READ RESPONSE packet (Requestor side */
24368c2ecf20Sopenharmony_ci
24378c2ecf20Sopenharmony_ci	/*
24388c2ecf20Sopenharmony_ci	 * 1. Find matching SWQE
24398c2ecf20Sopenharmony_ci	 * 2. Check that the entire segment has been read.
24408c2ecf20Sopenharmony_ci	 * 3. Remove HFI1_S_WAIT_TID_RESP from s_flags.
24418c2ecf20Sopenharmony_ci	 * 4. Free the TID flow resources.
24428c2ecf20Sopenharmony_ci	 * 5. Kick the send engine (hfi1_schedule_send())
24438c2ecf20Sopenharmony_ci	 */
24448c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
24458c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
24468c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
24478c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
24488c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
24498c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
24508c2ecf20Sopenharmony_ci	u32 opcode, aeth;
24518c2ecf20Sopenharmony_ci	bool fecn;
24528c2ecf20Sopenharmony_ci	unsigned long flags;
24538c2ecf20Sopenharmony_ci	u32 kpsn, ipsn;
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_ci	trace_hfi1_sender_rcv_tid_read_resp(qp);
24568c2ecf20Sopenharmony_ci	fecn = process_ecn(qp, packet);
24578c2ecf20Sopenharmony_ci	kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
24588c2ecf20Sopenharmony_ci	aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
24598c2ecf20Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
24628c2ecf20Sopenharmony_ci	ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
24638c2ecf20Sopenharmony_ci	req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
24648c2ecf20Sopenharmony_ci	if (unlikely(!req))
24658c2ecf20Sopenharmony_ci		goto ack_op_err;
24668c2ecf20Sopenharmony_ci
24678c2ecf20Sopenharmony_ci	flow = &req->flows[req->clear_tail];
24688c2ecf20Sopenharmony_ci	/* When header suppression is disabled */
24698c2ecf20Sopenharmony_ci	if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
24708c2ecf20Sopenharmony_ci		update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
24718c2ecf20Sopenharmony_ci
24728c2ecf20Sopenharmony_ci		if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
24738c2ecf20Sopenharmony_ci			goto ack_done;
24748c2ecf20Sopenharmony_ci		flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
24758c2ecf20Sopenharmony_ci		/*
24768c2ecf20Sopenharmony_ci		 * Copy the payload to destination buffer if this packet is
24778c2ecf20Sopenharmony_ci		 * delivered as an eager packet due to RSM rule and FECN.
24788c2ecf20Sopenharmony_ci		 * The RSM rule selects FECN bit in BTH and SH bit in
24798c2ecf20Sopenharmony_ci		 * KDETH header and therefore will not match the last
24808c2ecf20Sopenharmony_ci		 * packet of each segment that has SH bit cleared.
24818c2ecf20Sopenharmony_ci		 */
24828c2ecf20Sopenharmony_ci		if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
24838c2ecf20Sopenharmony_ci			struct rvt_sge_state ss;
24848c2ecf20Sopenharmony_ci			u32 len;
24858c2ecf20Sopenharmony_ci			u32 tlen = packet->tlen;
24868c2ecf20Sopenharmony_ci			u16 hdrsize = packet->hlen;
24878c2ecf20Sopenharmony_ci			u8 pad = packet->pad;
24888c2ecf20Sopenharmony_ci			u8 extra_bytes = pad + packet->extra_byte +
24898c2ecf20Sopenharmony_ci				(SIZE_OF_CRC << 2);
24908c2ecf20Sopenharmony_ci			u32 pmtu = qp->pmtu;
24918c2ecf20Sopenharmony_ci
24928c2ecf20Sopenharmony_ci			if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
24938c2ecf20Sopenharmony_ci				goto ack_op_err;
24948c2ecf20Sopenharmony_ci			len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
24958c2ecf20Sopenharmony_ci			if (unlikely(len < pmtu))
24968c2ecf20Sopenharmony_ci				goto ack_op_err;
24978c2ecf20Sopenharmony_ci			rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
24988c2ecf20Sopenharmony_ci				     false);
24998c2ecf20Sopenharmony_ci			/* Raise the sw sequence check flag for next packet */
25008c2ecf20Sopenharmony_ci			priv->s_flags |= HFI1_R_TID_SW_PSN;
25018c2ecf20Sopenharmony_ci		}
25028c2ecf20Sopenharmony_ci
25038c2ecf20Sopenharmony_ci		goto ack_done;
25048c2ecf20Sopenharmony_ci	}
25058c2ecf20Sopenharmony_ci	flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
25068c2ecf20Sopenharmony_ci	req->ack_pending--;
25078c2ecf20Sopenharmony_ci	priv->pending_tid_r_segs--;
25088c2ecf20Sopenharmony_ci	qp->s_num_rd_atomic--;
25098c2ecf20Sopenharmony_ci	if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
25108c2ecf20Sopenharmony_ci	    !qp->s_num_rd_atomic) {
25118c2ecf20Sopenharmony_ci		qp->s_flags &= ~(RVT_S_WAIT_FENCE |
25128c2ecf20Sopenharmony_ci				 RVT_S_WAIT_ACK);
25138c2ecf20Sopenharmony_ci		hfi1_schedule_send(qp);
25148c2ecf20Sopenharmony_ci	}
25158c2ecf20Sopenharmony_ci	if (qp->s_flags & RVT_S_WAIT_RDMAR) {
25168c2ecf20Sopenharmony_ci		qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
25178c2ecf20Sopenharmony_ci		hfi1_schedule_send(qp);
25188c2ecf20Sopenharmony_ci	}
25198c2ecf20Sopenharmony_ci
25208c2ecf20Sopenharmony_ci	trace_hfi1_ack(qp, ipsn);
25218c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
25228c2ecf20Sopenharmony_ci					 req->e.swqe->psn, req->e.swqe->lpsn,
25238c2ecf20Sopenharmony_ci					 req);
25248c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
25258c2ecf20Sopenharmony_ci
25268c2ecf20Sopenharmony_ci	/* Release the tid resources */
25278c2ecf20Sopenharmony_ci	hfi1_kern_exp_rcv_clear(req);
25288c2ecf20Sopenharmony_ci
25298c2ecf20Sopenharmony_ci	if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
25308c2ecf20Sopenharmony_ci		goto ack_done;
25318c2ecf20Sopenharmony_ci
25328c2ecf20Sopenharmony_ci	/* If not done yet, build next read request */
25338c2ecf20Sopenharmony_ci	if (++req->comp_seg >= req->total_segs) {
25348c2ecf20Sopenharmony_ci		priv->tid_r_comp++;
25358c2ecf20Sopenharmony_ci		req->state = TID_REQUEST_COMPLETE;
25368c2ecf20Sopenharmony_ci	}
25378c2ecf20Sopenharmony_ci
25388c2ecf20Sopenharmony_ci	/*
25398c2ecf20Sopenharmony_ci	 * Clear the hw flow under two conditions:
25408c2ecf20Sopenharmony_ci	 * 1. This request is a sync point and it is complete;
25418c2ecf20Sopenharmony_ci	 * 2. Current request is completed and there are no more requests.
25428c2ecf20Sopenharmony_ci	 */
25438c2ecf20Sopenharmony_ci	if ((req->state == TID_REQUEST_SYNC &&
25448c2ecf20Sopenharmony_ci	     req->comp_seg == req->cur_seg) ||
25458c2ecf20Sopenharmony_ci	    priv->tid_r_comp == priv->tid_r_reqs) {
25468c2ecf20Sopenharmony_ci		hfi1_kern_clear_hw_flow(priv->rcd, qp);
25478c2ecf20Sopenharmony_ci		priv->s_flags &= ~HFI1_R_TID_SW_PSN;
25488c2ecf20Sopenharmony_ci		if (req->state == TID_REQUEST_SYNC)
25498c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_ACTIVE;
25508c2ecf20Sopenharmony_ci	}
25518c2ecf20Sopenharmony_ci
25528c2ecf20Sopenharmony_ci	hfi1_schedule_send(qp);
25538c2ecf20Sopenharmony_ci	goto ack_done;
25548c2ecf20Sopenharmony_ci
25558c2ecf20Sopenharmony_ciack_op_err:
25568c2ecf20Sopenharmony_ci	/*
25578c2ecf20Sopenharmony_ci	 * The test indicates that the send engine has finished its cleanup
25588c2ecf20Sopenharmony_ci	 * after sending the request and it's now safe to put the QP into error
25598c2ecf20Sopenharmony_ci	 * state. However, if the wqe queue is empty (qp->s_acked == qp->s_tail
25608c2ecf20Sopenharmony_ci	 * == qp->s_head), it would be unsafe to complete the wqe pointed by
25618c2ecf20Sopenharmony_ci	 * qp->s_acked here. Putting the qp into error state will safely flush
25628c2ecf20Sopenharmony_ci	 * all remaining requests.
25638c2ecf20Sopenharmony_ci	 */
25648c2ecf20Sopenharmony_ci	if (qp->s_last == qp->s_acked)
25658c2ecf20Sopenharmony_ci		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
25668c2ecf20Sopenharmony_ci
25678c2ecf20Sopenharmony_ciack_done:
25688c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
25698c2ecf20Sopenharmony_ci}
25708c2ecf20Sopenharmony_ci
25718c2ecf20Sopenharmony_civoid hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
25728c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
25738c2ecf20Sopenharmony_ci{
25748c2ecf20Sopenharmony_ci	u32 n = qp->s_acked;
25758c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
25768c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
25778c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
25788c2ecf20Sopenharmony_ci
25798c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
25808c2ecf20Sopenharmony_ci	/* Free any TID entries */
25818c2ecf20Sopenharmony_ci	while (n != qp->s_tail) {
25828c2ecf20Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, n);
25838c2ecf20Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
25848c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(wqe);
25858c2ecf20Sopenharmony_ci			hfi1_kern_exp_rcv_clear_all(req);
25868c2ecf20Sopenharmony_ci		}
25878c2ecf20Sopenharmony_ci
25888c2ecf20Sopenharmony_ci		if (++n == qp->s_size)
25898c2ecf20Sopenharmony_ci			n = 0;
25908c2ecf20Sopenharmony_ci	}
25918c2ecf20Sopenharmony_ci	/* Free flow */
25928c2ecf20Sopenharmony_ci	hfi1_kern_clear_hw_flow(priv->rcd, qp);
25938c2ecf20Sopenharmony_ci}
25948c2ecf20Sopenharmony_ci
25958c2ecf20Sopenharmony_cistatic bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
25968c2ecf20Sopenharmony_ci{
25978c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
25988c2ecf20Sopenharmony_ci
25998c2ecf20Sopenharmony_ci	if (rcv_type >= RHF_RCV_TYPE_IB)
26008c2ecf20Sopenharmony_ci		goto done;
26018c2ecf20Sopenharmony_ci
26028c2ecf20Sopenharmony_ci	spin_lock(&qp->s_lock);
26038c2ecf20Sopenharmony_ci
26048c2ecf20Sopenharmony_ci	/*
26058c2ecf20Sopenharmony_ci	 * We've ran out of space in the eager buffer.
26068c2ecf20Sopenharmony_ci	 * Eagerly received KDETH packets which require space in the
26078c2ecf20Sopenharmony_ci	 * Eager buffer (packet that have payload) are TID RDMA WRITE
26088c2ecf20Sopenharmony_ci	 * response packets. In this case, we have to re-transmit the
26098c2ecf20Sopenharmony_ci	 * TID RDMA WRITE request.
26108c2ecf20Sopenharmony_ci	 */
26118c2ecf20Sopenharmony_ci	if (rcv_type == RHF_RCV_TYPE_EAGER) {
26128c2ecf20Sopenharmony_ci		hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
26138c2ecf20Sopenharmony_ci		hfi1_schedule_send(qp);
26148c2ecf20Sopenharmony_ci	}
26158c2ecf20Sopenharmony_ci
26168c2ecf20Sopenharmony_ci	/* Since no payload is delivered, just drop the packet */
26178c2ecf20Sopenharmony_ci	spin_unlock(&qp->s_lock);
26188c2ecf20Sopenharmony_cidone:
26198c2ecf20Sopenharmony_ci	return true;
26208c2ecf20Sopenharmony_ci}
26218c2ecf20Sopenharmony_ci
26228c2ecf20Sopenharmony_cistatic void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
26238c2ecf20Sopenharmony_ci				      struct rvt_qp *qp, struct rvt_swqe *wqe)
26248c2ecf20Sopenharmony_ci{
26258c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
26268c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
26278c2ecf20Sopenharmony_ci
26288c2ecf20Sopenharmony_ci	/* Start from the right segment */
26298c2ecf20Sopenharmony_ci	qp->r_flags |= RVT_R_RDMAR_SEQ;
26308c2ecf20Sopenharmony_ci	req = wqe_to_tid_req(wqe);
26318c2ecf20Sopenharmony_ci	flow = &req->flows[req->clear_tail];
26328c2ecf20Sopenharmony_ci	hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
26338c2ecf20Sopenharmony_ci	if (list_empty(&qp->rspwait)) {
26348c2ecf20Sopenharmony_ci		qp->r_flags |= RVT_R_RSP_SEND;
26358c2ecf20Sopenharmony_ci		rvt_get_qp(qp);
26368c2ecf20Sopenharmony_ci		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
26378c2ecf20Sopenharmony_ci	}
26388c2ecf20Sopenharmony_ci}
26398c2ecf20Sopenharmony_ci
26408c2ecf20Sopenharmony_ci/*
26418c2ecf20Sopenharmony_ci * Handle the KDETH eflags for TID RDMA READ response.
26428c2ecf20Sopenharmony_ci *
26438c2ecf20Sopenharmony_ci * Return true if the last packet for a segment has been received and it is
26448c2ecf20Sopenharmony_ci * time to process the response normally; otherwise, return true.
26458c2ecf20Sopenharmony_ci *
26468c2ecf20Sopenharmony_ci * The caller must hold the packet->qp->r_lock and the rcu_read_lock.
26478c2ecf20Sopenharmony_ci */
26488c2ecf20Sopenharmony_cistatic bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
26498c2ecf20Sopenharmony_ci				     struct hfi1_packet *packet, u8 rcv_type,
26508c2ecf20Sopenharmony_ci				     u8 rte, u32 psn, u32 ibpsn)
26518c2ecf20Sopenharmony_ci	__must_hold(&packet->qp->r_lock) __must_hold(RCU)
26528c2ecf20Sopenharmony_ci{
26538c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd = rcd->ppd;
26548c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
26558c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp;
26568c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
26578c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
26588c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
26598c2ecf20Sopenharmony_ci	u32 ack_psn;
26608c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
26618c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
26628c2ecf20Sopenharmony_ci	bool ret = true;
26638c2ecf20Sopenharmony_ci	int diff = 0;
26648c2ecf20Sopenharmony_ci	u32 fpsn;
26658c2ecf20Sopenharmony_ci
26668c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->r_lock);
26678c2ecf20Sopenharmony_ci	trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
26688c2ecf20Sopenharmony_ci	trace_hfi1_sender_read_kdeth_eflags(qp);
26698c2ecf20Sopenharmony_ci	trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
26708c2ecf20Sopenharmony_ci	spin_lock(&qp->s_lock);
26718c2ecf20Sopenharmony_ci	/* If the psn is out of valid range, drop the packet */
26728c2ecf20Sopenharmony_ci	if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
26738c2ecf20Sopenharmony_ci	    cmp_psn(ibpsn, qp->s_psn) > 0)
26748c2ecf20Sopenharmony_ci		goto s_unlock;
26758c2ecf20Sopenharmony_ci
26768c2ecf20Sopenharmony_ci	/*
26778c2ecf20Sopenharmony_ci	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
26788c2ecf20Sopenharmony_ci	 * requests and implicitly NAK RDMA read and atomic requests issued
26798c2ecf20Sopenharmony_ci	 * before the NAK'ed request.
26808c2ecf20Sopenharmony_ci	 */
26818c2ecf20Sopenharmony_ci	ack_psn = ibpsn - 1;
26828c2ecf20Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
26838c2ecf20Sopenharmony_ci	ibp = to_iport(qp->ibqp.device, qp->port_num);
26848c2ecf20Sopenharmony_ci
26858c2ecf20Sopenharmony_ci	/* Complete WQEs that the PSN finishes. */
26868c2ecf20Sopenharmony_ci	while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
26878c2ecf20Sopenharmony_ci		/*
26888c2ecf20Sopenharmony_ci		 * If this request is a RDMA read or atomic, and the NACK is
26898c2ecf20Sopenharmony_ci		 * for a later operation, this NACK NAKs the RDMA read or
26908c2ecf20Sopenharmony_ci		 * atomic.
26918c2ecf20Sopenharmony_ci		 */
26928c2ecf20Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
26938c2ecf20Sopenharmony_ci		    wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
26948c2ecf20Sopenharmony_ci		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
26958c2ecf20Sopenharmony_ci		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
26968c2ecf20Sopenharmony_ci			/* Retry this request. */
26978c2ecf20Sopenharmony_ci			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
26988c2ecf20Sopenharmony_ci				qp->r_flags |= RVT_R_RDMAR_SEQ;
26998c2ecf20Sopenharmony_ci				if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
27008c2ecf20Sopenharmony_ci					restart_tid_rdma_read_req(rcd, qp,
27018c2ecf20Sopenharmony_ci								  wqe);
27028c2ecf20Sopenharmony_ci				} else {
27038c2ecf20Sopenharmony_ci					hfi1_restart_rc(qp, qp->s_last_psn + 1,
27048c2ecf20Sopenharmony_ci							0);
27058c2ecf20Sopenharmony_ci					if (list_empty(&qp->rspwait)) {
27068c2ecf20Sopenharmony_ci						qp->r_flags |= RVT_R_RSP_SEND;
27078c2ecf20Sopenharmony_ci						rvt_get_qp(qp);
27088c2ecf20Sopenharmony_ci						list_add_tail(/* wait */
27098c2ecf20Sopenharmony_ci						   &qp->rspwait,
27108c2ecf20Sopenharmony_ci						   &rcd->qp_wait_list);
27118c2ecf20Sopenharmony_ci					}
27128c2ecf20Sopenharmony_ci				}
27138c2ecf20Sopenharmony_ci			}
27148c2ecf20Sopenharmony_ci			/*
27158c2ecf20Sopenharmony_ci			 * No need to process the NAK since we are
27168c2ecf20Sopenharmony_ci			 * restarting an earlier request.
27178c2ecf20Sopenharmony_ci			 */
27188c2ecf20Sopenharmony_ci			break;
27198c2ecf20Sopenharmony_ci		}
27208c2ecf20Sopenharmony_ci
27218c2ecf20Sopenharmony_ci		wqe = do_rc_completion(qp, wqe, ibp);
27228c2ecf20Sopenharmony_ci		if (qp->s_acked == qp->s_tail)
27238c2ecf20Sopenharmony_ci			goto s_unlock;
27248c2ecf20Sopenharmony_ci	}
27258c2ecf20Sopenharmony_ci
27268c2ecf20Sopenharmony_ci	if (qp->s_acked == qp->s_tail)
27278c2ecf20Sopenharmony_ci		goto s_unlock;
27288c2ecf20Sopenharmony_ci
27298c2ecf20Sopenharmony_ci	/* Handle the eflags for the request */
27308c2ecf20Sopenharmony_ci	if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
27318c2ecf20Sopenharmony_ci		goto s_unlock;
27328c2ecf20Sopenharmony_ci
27338c2ecf20Sopenharmony_ci	req = wqe_to_tid_req(wqe);
27348c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
27358c2ecf20Sopenharmony_ci					     wqe->lpsn, req);
27368c2ecf20Sopenharmony_ci	switch (rcv_type) {
27378c2ecf20Sopenharmony_ci	case RHF_RCV_TYPE_EXPECTED:
27388c2ecf20Sopenharmony_ci		switch (rte) {
27398c2ecf20Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
27408c2ecf20Sopenharmony_ci			/*
27418c2ecf20Sopenharmony_ci			 * On the first occurrence of a Flow Sequence error,
27428c2ecf20Sopenharmony_ci			 * the flag TID_FLOW_SW_PSN is set.
27438c2ecf20Sopenharmony_ci			 *
27448c2ecf20Sopenharmony_ci			 * After that, the flow is *not* reprogrammed and the
27458c2ecf20Sopenharmony_ci			 * protocol falls back to SW PSN checking. This is done
27468c2ecf20Sopenharmony_ci			 * to prevent continuous Flow Sequence errors for any
27478c2ecf20Sopenharmony_ci			 * packets that could be still in the fabric.
27488c2ecf20Sopenharmony_ci			 */
27498c2ecf20Sopenharmony_ci			flow = &req->flows[req->clear_tail];
27508c2ecf20Sopenharmony_ci			trace_hfi1_tid_flow_read_kdeth_eflags(qp,
27518c2ecf20Sopenharmony_ci							      req->clear_tail,
27528c2ecf20Sopenharmony_ci							      flow);
27538c2ecf20Sopenharmony_ci			if (priv->s_flags & HFI1_R_TID_SW_PSN) {
27548c2ecf20Sopenharmony_ci				diff = cmp_psn(psn,
27558c2ecf20Sopenharmony_ci					       flow->flow_state.r_next_psn);
27568c2ecf20Sopenharmony_ci				if (diff > 0) {
27578c2ecf20Sopenharmony_ci					/* Drop the packet.*/
27588c2ecf20Sopenharmony_ci					goto s_unlock;
27598c2ecf20Sopenharmony_ci				} else if (diff < 0) {
27608c2ecf20Sopenharmony_ci					/*
27618c2ecf20Sopenharmony_ci					 * If a response packet for a restarted
27628c2ecf20Sopenharmony_ci					 * request has come back, reset the
27638c2ecf20Sopenharmony_ci					 * restart flag.
27648c2ecf20Sopenharmony_ci					 */
27658c2ecf20Sopenharmony_ci					if (qp->r_flags & RVT_R_RDMAR_SEQ)
27668c2ecf20Sopenharmony_ci						qp->r_flags &=
27678c2ecf20Sopenharmony_ci							~RVT_R_RDMAR_SEQ;
27688c2ecf20Sopenharmony_ci
27698c2ecf20Sopenharmony_ci					/* Drop the packet.*/
27708c2ecf20Sopenharmony_ci					goto s_unlock;
27718c2ecf20Sopenharmony_ci				}
27728c2ecf20Sopenharmony_ci
27738c2ecf20Sopenharmony_ci				/*
27748c2ecf20Sopenharmony_ci				 * If SW PSN verification is successful and
27758c2ecf20Sopenharmony_ci				 * this is the last packet in the segment, tell
27768c2ecf20Sopenharmony_ci				 * the caller to process it as a normal packet.
27778c2ecf20Sopenharmony_ci				 */
27788c2ecf20Sopenharmony_ci				fpsn = full_flow_psn(flow,
27798c2ecf20Sopenharmony_ci						     flow->flow_state.lpsn);
27808c2ecf20Sopenharmony_ci				if (cmp_psn(fpsn, psn) == 0) {
27818c2ecf20Sopenharmony_ci					ret = false;
27828c2ecf20Sopenharmony_ci					if (qp->r_flags & RVT_R_RDMAR_SEQ)
27838c2ecf20Sopenharmony_ci						qp->r_flags &=
27848c2ecf20Sopenharmony_ci							~RVT_R_RDMAR_SEQ;
27858c2ecf20Sopenharmony_ci				}
27868c2ecf20Sopenharmony_ci				flow->flow_state.r_next_psn =
27878c2ecf20Sopenharmony_ci					mask_psn(psn + 1);
27888c2ecf20Sopenharmony_ci			} else {
27898c2ecf20Sopenharmony_ci				u32 last_psn;
27908c2ecf20Sopenharmony_ci
27918c2ecf20Sopenharmony_ci				last_psn = read_r_next_psn(dd, rcd->ctxt,
27928c2ecf20Sopenharmony_ci							   flow->idx);
27938c2ecf20Sopenharmony_ci				flow->flow_state.r_next_psn = last_psn;
27948c2ecf20Sopenharmony_ci				priv->s_flags |= HFI1_R_TID_SW_PSN;
27958c2ecf20Sopenharmony_ci				/*
27968c2ecf20Sopenharmony_ci				 * If no request has been restarted yet,
27978c2ecf20Sopenharmony_ci				 * restart the current one.
27988c2ecf20Sopenharmony_ci				 */
27998c2ecf20Sopenharmony_ci				if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
28008c2ecf20Sopenharmony_ci					restart_tid_rdma_read_req(rcd, qp,
28018c2ecf20Sopenharmony_ci								  wqe);
28028c2ecf20Sopenharmony_ci			}
28038c2ecf20Sopenharmony_ci
28048c2ecf20Sopenharmony_ci			break;
28058c2ecf20Sopenharmony_ci
28068c2ecf20Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
28078c2ecf20Sopenharmony_ci			/*
28088c2ecf20Sopenharmony_ci			 * Since the TID flow is able to ride through
28098c2ecf20Sopenharmony_ci			 * generation mismatch, drop this stale packet.
28108c2ecf20Sopenharmony_ci			 */
28118c2ecf20Sopenharmony_ci			break;
28128c2ecf20Sopenharmony_ci
28138c2ecf20Sopenharmony_ci		default:
28148c2ecf20Sopenharmony_ci			break;
28158c2ecf20Sopenharmony_ci		}
28168c2ecf20Sopenharmony_ci		break;
28178c2ecf20Sopenharmony_ci
28188c2ecf20Sopenharmony_ci	case RHF_RCV_TYPE_ERROR:
28198c2ecf20Sopenharmony_ci		switch (rte) {
28208c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_OP_CODE_ERR:
28218c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
28228c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_HCRC_ERR:
28238c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_KVER_ERR:
28248c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_CONTEXT_ERR:
28258c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_TID_ERR:
28268c2ecf20Sopenharmony_ci		default:
28278c2ecf20Sopenharmony_ci			break;
28288c2ecf20Sopenharmony_ci		}
28298c2ecf20Sopenharmony_ci	default:
28308c2ecf20Sopenharmony_ci		break;
28318c2ecf20Sopenharmony_ci	}
28328c2ecf20Sopenharmony_cis_unlock:
28338c2ecf20Sopenharmony_ci	spin_unlock(&qp->s_lock);
28348c2ecf20Sopenharmony_ci	return ret;
28358c2ecf20Sopenharmony_ci}
28368c2ecf20Sopenharmony_ci
28378c2ecf20Sopenharmony_cibool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
28388c2ecf20Sopenharmony_ci			      struct hfi1_pportdata *ppd,
28398c2ecf20Sopenharmony_ci			      struct hfi1_packet *packet)
28408c2ecf20Sopenharmony_ci{
28418c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp = &ppd->ibport_data;
28428c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
28438c2ecf20Sopenharmony_ci	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
28448c2ecf20Sopenharmony_ci	u8 rcv_type = rhf_rcv_type(packet->rhf);
28458c2ecf20Sopenharmony_ci	u8 rte = rhf_rcv_type_err(packet->rhf);
28468c2ecf20Sopenharmony_ci	struct ib_header *hdr = packet->hdr;
28478c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = NULL;
28488c2ecf20Sopenharmony_ci	int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
28498c2ecf20Sopenharmony_ci	u16 lid  = be16_to_cpu(hdr->lrh[1]);
28508c2ecf20Sopenharmony_ci	u8 opcode;
28518c2ecf20Sopenharmony_ci	u32 qp_num, psn, ibpsn;
28528c2ecf20Sopenharmony_ci	struct rvt_qp *qp;
28538c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv;
28548c2ecf20Sopenharmony_ci	unsigned long flags;
28558c2ecf20Sopenharmony_ci	bool ret = true;
28568c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
28578c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
28588c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
28598c2ecf20Sopenharmony_ci	int diff = 0;
28608c2ecf20Sopenharmony_ci
28618c2ecf20Sopenharmony_ci	trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
28628c2ecf20Sopenharmony_ci					   packet->rhf);
28638c2ecf20Sopenharmony_ci	if (packet->rhf & RHF_ICRC_ERR)
28648c2ecf20Sopenharmony_ci		return ret;
28658c2ecf20Sopenharmony_ci
28668c2ecf20Sopenharmony_ci	packet->ohdr = &hdr->u.oth;
28678c2ecf20Sopenharmony_ci	ohdr = packet->ohdr;
28688c2ecf20Sopenharmony_ci	trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
28698c2ecf20Sopenharmony_ci
28708c2ecf20Sopenharmony_ci	/* Get the destination QP number. */
28718c2ecf20Sopenharmony_ci	qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
28728c2ecf20Sopenharmony_ci		RVT_QPN_MASK;
28738c2ecf20Sopenharmony_ci	if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
28748c2ecf20Sopenharmony_ci		goto drop;
28758c2ecf20Sopenharmony_ci
28768c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
28778c2ecf20Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
28788c2ecf20Sopenharmony_ci
28798c2ecf20Sopenharmony_ci	rcu_read_lock();
28808c2ecf20Sopenharmony_ci	qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
28818c2ecf20Sopenharmony_ci	if (!qp)
28828c2ecf20Sopenharmony_ci		goto rcu_unlock;
28838c2ecf20Sopenharmony_ci
28848c2ecf20Sopenharmony_ci	packet->qp = qp;
28858c2ecf20Sopenharmony_ci
28868c2ecf20Sopenharmony_ci	/* Check for valid receive state. */
28878c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->r_lock, flags);
28888c2ecf20Sopenharmony_ci	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
28898c2ecf20Sopenharmony_ci		ibp->rvp.n_pkt_drops++;
28908c2ecf20Sopenharmony_ci		goto r_unlock;
28918c2ecf20Sopenharmony_ci	}
28928c2ecf20Sopenharmony_ci
28938c2ecf20Sopenharmony_ci	if (packet->rhf & RHF_TID_ERR) {
28948c2ecf20Sopenharmony_ci		/* For TIDERR and RC QPs preemptively schedule a NAK */
28958c2ecf20Sopenharmony_ci		u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
28968c2ecf20Sopenharmony_ci
28978c2ecf20Sopenharmony_ci		/* Sanity check packet */
28988c2ecf20Sopenharmony_ci		if (tlen < 24)
28998c2ecf20Sopenharmony_ci			goto r_unlock;
29008c2ecf20Sopenharmony_ci
29018c2ecf20Sopenharmony_ci		/*
29028c2ecf20Sopenharmony_ci		 * Check for GRH. We should never get packets with GRH in this
29038c2ecf20Sopenharmony_ci		 * path.
29048c2ecf20Sopenharmony_ci		 */
29058c2ecf20Sopenharmony_ci		if (lnh == HFI1_LRH_GRH)
29068c2ecf20Sopenharmony_ci			goto r_unlock;
29078c2ecf20Sopenharmony_ci
29088c2ecf20Sopenharmony_ci		if (tid_rdma_tid_err(packet, rcv_type))
29098c2ecf20Sopenharmony_ci			goto r_unlock;
29108c2ecf20Sopenharmony_ci	}
29118c2ecf20Sopenharmony_ci
29128c2ecf20Sopenharmony_ci	/* handle TID RDMA READ */
29138c2ecf20Sopenharmony_ci	if (opcode == TID_OP(READ_RESP)) {
29148c2ecf20Sopenharmony_ci		ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
29158c2ecf20Sopenharmony_ci		ibpsn = mask_psn(ibpsn);
29168c2ecf20Sopenharmony_ci		ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
29178c2ecf20Sopenharmony_ci					       ibpsn);
29188c2ecf20Sopenharmony_ci		goto r_unlock;
29198c2ecf20Sopenharmony_ci	}
29208c2ecf20Sopenharmony_ci
29218c2ecf20Sopenharmony_ci	/*
29228c2ecf20Sopenharmony_ci	 * qp->s_tail_ack_queue points to the rvt_ack_entry currently being
29238c2ecf20Sopenharmony_ci	 * processed. These a completed sequentially so we can be sure that
29248c2ecf20Sopenharmony_ci	 * the pointer will not change until the entire request has completed.
29258c2ecf20Sopenharmony_ci	 */
29268c2ecf20Sopenharmony_ci	spin_lock(&qp->s_lock);
29278c2ecf20Sopenharmony_ci	qpriv = qp->priv;
29288c2ecf20Sopenharmony_ci	if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
29298c2ecf20Sopenharmony_ci	    qpriv->r_tid_tail == qpriv->r_tid_head)
29308c2ecf20Sopenharmony_ci		goto unlock;
29318c2ecf20Sopenharmony_ci	e = &qp->s_ack_queue[qpriv->r_tid_tail];
29328c2ecf20Sopenharmony_ci	if (e->opcode != TID_OP(WRITE_REQ))
29338c2ecf20Sopenharmony_ci		goto unlock;
29348c2ecf20Sopenharmony_ci	req = ack_to_tid_req(e);
29358c2ecf20Sopenharmony_ci	if (req->comp_seg == req->cur_seg)
29368c2ecf20Sopenharmony_ci		goto unlock;
29378c2ecf20Sopenharmony_ci	flow = &req->flows[req->clear_tail];
29388c2ecf20Sopenharmony_ci	trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
29398c2ecf20Sopenharmony_ci	trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
29408c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
29418c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
29428c2ecf20Sopenharmony_ci					       e->lpsn, req);
29438c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
29448c2ecf20Sopenharmony_ci
29458c2ecf20Sopenharmony_ci	switch (rcv_type) {
29468c2ecf20Sopenharmony_ci	case RHF_RCV_TYPE_EXPECTED:
29478c2ecf20Sopenharmony_ci		switch (rte) {
29488c2ecf20Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
29498c2ecf20Sopenharmony_ci			if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
29508c2ecf20Sopenharmony_ci				qpriv->s_flags |= HFI1_R_TID_SW_PSN;
29518c2ecf20Sopenharmony_ci				flow->flow_state.r_next_psn =
29528c2ecf20Sopenharmony_ci					read_r_next_psn(dd, rcd->ctxt,
29538c2ecf20Sopenharmony_ci							flow->idx);
29548c2ecf20Sopenharmony_ci				qpriv->r_next_psn_kdeth =
29558c2ecf20Sopenharmony_ci					flow->flow_state.r_next_psn;
29568c2ecf20Sopenharmony_ci				goto nak_psn;
29578c2ecf20Sopenharmony_ci			} else {
29588c2ecf20Sopenharmony_ci				/*
29598c2ecf20Sopenharmony_ci				 * If the received PSN does not match the next
29608c2ecf20Sopenharmony_ci				 * expected PSN, NAK the packet.
29618c2ecf20Sopenharmony_ci				 * However, only do that if we know that the a
29628c2ecf20Sopenharmony_ci				 * NAK has already been sent. Otherwise, this
29638c2ecf20Sopenharmony_ci				 * mismatch could be due to packets that were
29648c2ecf20Sopenharmony_ci				 * already in flight.
29658c2ecf20Sopenharmony_ci				 */
29668c2ecf20Sopenharmony_ci				diff = cmp_psn(psn,
29678c2ecf20Sopenharmony_ci					       flow->flow_state.r_next_psn);
29688c2ecf20Sopenharmony_ci				if (diff > 0)
29698c2ecf20Sopenharmony_ci					goto nak_psn;
29708c2ecf20Sopenharmony_ci				else if (diff < 0)
29718c2ecf20Sopenharmony_ci					break;
29728c2ecf20Sopenharmony_ci
29738c2ecf20Sopenharmony_ci				qpriv->s_nak_state = 0;
29748c2ecf20Sopenharmony_ci				/*
29758c2ecf20Sopenharmony_ci				 * If SW PSN verification is successful and this
29768c2ecf20Sopenharmony_ci				 * is the last packet in the segment, tell the
29778c2ecf20Sopenharmony_ci				 * caller to process it as a normal packet.
29788c2ecf20Sopenharmony_ci				 */
29798c2ecf20Sopenharmony_ci				if (psn == full_flow_psn(flow,
29808c2ecf20Sopenharmony_ci							 flow->flow_state.lpsn))
29818c2ecf20Sopenharmony_ci					ret = false;
29828c2ecf20Sopenharmony_ci				flow->flow_state.r_next_psn =
29838c2ecf20Sopenharmony_ci					mask_psn(psn + 1);
29848c2ecf20Sopenharmony_ci				qpriv->r_next_psn_kdeth =
29858c2ecf20Sopenharmony_ci					flow->flow_state.r_next_psn;
29868c2ecf20Sopenharmony_ci			}
29878c2ecf20Sopenharmony_ci			break;
29888c2ecf20Sopenharmony_ci
29898c2ecf20Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
29908c2ecf20Sopenharmony_ci			goto nak_psn;
29918c2ecf20Sopenharmony_ci
29928c2ecf20Sopenharmony_ci		default:
29938c2ecf20Sopenharmony_ci			break;
29948c2ecf20Sopenharmony_ci		}
29958c2ecf20Sopenharmony_ci		break;
29968c2ecf20Sopenharmony_ci
29978c2ecf20Sopenharmony_ci	case RHF_RCV_TYPE_ERROR:
29988c2ecf20Sopenharmony_ci		switch (rte) {
29998c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_OP_CODE_ERR:
30008c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
30018c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_HCRC_ERR:
30028c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_KVER_ERR:
30038c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_CONTEXT_ERR:
30048c2ecf20Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_TID_ERR:
30058c2ecf20Sopenharmony_ci		default:
30068c2ecf20Sopenharmony_ci			break;
30078c2ecf20Sopenharmony_ci		}
30088c2ecf20Sopenharmony_ci	default:
30098c2ecf20Sopenharmony_ci		break;
30108c2ecf20Sopenharmony_ci	}
30118c2ecf20Sopenharmony_ci
30128c2ecf20Sopenharmony_ciunlock:
30138c2ecf20Sopenharmony_ci	spin_unlock(&qp->s_lock);
30148c2ecf20Sopenharmony_cir_unlock:
30158c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->r_lock, flags);
30168c2ecf20Sopenharmony_circu_unlock:
30178c2ecf20Sopenharmony_ci	rcu_read_unlock();
30188c2ecf20Sopenharmony_cidrop:
30198c2ecf20Sopenharmony_ci	return ret;
30208c2ecf20Sopenharmony_cinak_psn:
30218c2ecf20Sopenharmony_ci	ibp->rvp.n_rc_seqnak++;
30228c2ecf20Sopenharmony_ci	if (!qpriv->s_nak_state) {
30238c2ecf20Sopenharmony_ci		qpriv->s_nak_state = IB_NAK_PSN_ERROR;
30248c2ecf20Sopenharmony_ci		/* We are NAK'ing the next expected PSN */
30258c2ecf20Sopenharmony_ci		qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
30268c2ecf20Sopenharmony_ci		tid_rdma_trigger_ack(qp);
30278c2ecf20Sopenharmony_ci	}
30288c2ecf20Sopenharmony_ci	goto unlock;
30298c2ecf20Sopenharmony_ci}
30308c2ecf20Sopenharmony_ci
30318c2ecf20Sopenharmony_ci/*
30328c2ecf20Sopenharmony_ci * "Rewind" the TID request information.
30338c2ecf20Sopenharmony_ci * This means that we reset the state back to ACTIVE,
30348c2ecf20Sopenharmony_ci * find the proper flow, set the flow index to that flow,
30358c2ecf20Sopenharmony_ci * and reset the flow information.
30368c2ecf20Sopenharmony_ci */
30378c2ecf20Sopenharmony_civoid hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
30388c2ecf20Sopenharmony_ci			       u32 *bth2)
30398c2ecf20Sopenharmony_ci{
30408c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
30418c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
30428c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
30438c2ecf20Sopenharmony_ci	int diff, delta_pkts;
30448c2ecf20Sopenharmony_ci	u32 tididx = 0, i;
30458c2ecf20Sopenharmony_ci	u16 fidx;
30468c2ecf20Sopenharmony_ci
30478c2ecf20Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
30488c2ecf20Sopenharmony_ci		*bth2 = mask_psn(qp->s_psn);
30498c2ecf20Sopenharmony_ci		flow = find_flow_ib(req, *bth2, &fidx);
30508c2ecf20Sopenharmony_ci		if (!flow) {
30518c2ecf20Sopenharmony_ci			trace_hfi1_msg_tid_restart_req(/* msg */
30528c2ecf20Sopenharmony_ci			   qp, "!!!!!! Could not find flow to restart: bth2 ",
30538c2ecf20Sopenharmony_ci			   (u64)*bth2);
30548c2ecf20Sopenharmony_ci			trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
30558c2ecf20Sopenharmony_ci						       wqe->psn, wqe->lpsn,
30568c2ecf20Sopenharmony_ci						       req);
30578c2ecf20Sopenharmony_ci			return;
30588c2ecf20Sopenharmony_ci		}
30598c2ecf20Sopenharmony_ci	} else {
30608c2ecf20Sopenharmony_ci		fidx = req->acked_tail;
30618c2ecf20Sopenharmony_ci		flow = &req->flows[fidx];
30628c2ecf20Sopenharmony_ci		*bth2 = mask_psn(req->r_ack_psn);
30638c2ecf20Sopenharmony_ci	}
30648c2ecf20Sopenharmony_ci
30658c2ecf20Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
30668c2ecf20Sopenharmony_ci		delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
30678c2ecf20Sopenharmony_ci	else
30688c2ecf20Sopenharmony_ci		delta_pkts = delta_psn(*bth2,
30698c2ecf20Sopenharmony_ci				       full_flow_psn(flow,
30708c2ecf20Sopenharmony_ci						     flow->flow_state.spsn));
30718c2ecf20Sopenharmony_ci
30728c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
30738c2ecf20Sopenharmony_ci	diff = delta_pkts + flow->resync_npkts;
30748c2ecf20Sopenharmony_ci
30758c2ecf20Sopenharmony_ci	flow->sent = 0;
30768c2ecf20Sopenharmony_ci	flow->pkt = 0;
30778c2ecf20Sopenharmony_ci	flow->tid_idx = 0;
30788c2ecf20Sopenharmony_ci	flow->tid_offset = 0;
30798c2ecf20Sopenharmony_ci	if (diff) {
30808c2ecf20Sopenharmony_ci		for (tididx = 0; tididx < flow->tidcnt; tididx++) {
30818c2ecf20Sopenharmony_ci			u32 tidentry = flow->tid_entry[tididx], tidlen,
30828c2ecf20Sopenharmony_ci				tidnpkts, npkts;
30838c2ecf20Sopenharmony_ci
30848c2ecf20Sopenharmony_ci			flow->tid_offset = 0;
30858c2ecf20Sopenharmony_ci			tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
30868c2ecf20Sopenharmony_ci			tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
30878c2ecf20Sopenharmony_ci			npkts = min_t(u32, diff, tidnpkts);
30888c2ecf20Sopenharmony_ci			flow->pkt += npkts;
30898c2ecf20Sopenharmony_ci			flow->sent += (npkts == tidnpkts ? tidlen :
30908c2ecf20Sopenharmony_ci				       npkts * qp->pmtu);
30918c2ecf20Sopenharmony_ci			flow->tid_offset += npkts * qp->pmtu;
30928c2ecf20Sopenharmony_ci			diff -= npkts;
30938c2ecf20Sopenharmony_ci			if (!diff)
30948c2ecf20Sopenharmony_ci				break;
30958c2ecf20Sopenharmony_ci		}
30968c2ecf20Sopenharmony_ci	}
30978c2ecf20Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
30988c2ecf20Sopenharmony_ci		rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
30998c2ecf20Sopenharmony_ci			     flow->sent, 0);
31008c2ecf20Sopenharmony_ci		/*
31018c2ecf20Sopenharmony_ci		 * Packet PSN is based on flow_state.spsn + flow->pkt. However,
31028c2ecf20Sopenharmony_ci		 * during a RESYNC, the generation is incremented and the
31038c2ecf20Sopenharmony_ci		 * sequence is reset to 0. Since we've adjusted the npkts in the
31048c2ecf20Sopenharmony_ci		 * flow and the SGE has been sufficiently advanced, we have to
31058c2ecf20Sopenharmony_ci		 * adjust flow->pkt in order to calculate the correct PSN.
31068c2ecf20Sopenharmony_ci		 */
31078c2ecf20Sopenharmony_ci		flow->pkt -= flow->resync_npkts;
31088c2ecf20Sopenharmony_ci	}
31098c2ecf20Sopenharmony_ci
31108c2ecf20Sopenharmony_ci	if (flow->tid_offset ==
31118c2ecf20Sopenharmony_ci	    EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
31128c2ecf20Sopenharmony_ci		tididx++;
31138c2ecf20Sopenharmony_ci		flow->tid_offset = 0;
31148c2ecf20Sopenharmony_ci	}
31158c2ecf20Sopenharmony_ci	flow->tid_idx = tididx;
31168c2ecf20Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
31178c2ecf20Sopenharmony_ci		/* Move flow_idx to correct index */
31188c2ecf20Sopenharmony_ci		req->flow_idx = fidx;
31198c2ecf20Sopenharmony_ci	else
31208c2ecf20Sopenharmony_ci		req->clear_tail = fidx;
31218c2ecf20Sopenharmony_ci
31228c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
31238c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
31248c2ecf20Sopenharmony_ci				       wqe->lpsn, req);
31258c2ecf20Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
31268c2ecf20Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
31278c2ecf20Sopenharmony_ci		/* Reset all the flows that we are going to resend */
31288c2ecf20Sopenharmony_ci		fidx = CIRC_NEXT(fidx, MAX_FLOWS);
31298c2ecf20Sopenharmony_ci		i = qpriv->s_tid_tail;
31308c2ecf20Sopenharmony_ci		do {
31318c2ecf20Sopenharmony_ci			for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
31328c2ecf20Sopenharmony_ci			      fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
31338c2ecf20Sopenharmony_ci				req->flows[fidx].sent = 0;
31348c2ecf20Sopenharmony_ci				req->flows[fidx].pkt = 0;
31358c2ecf20Sopenharmony_ci				req->flows[fidx].tid_idx = 0;
31368c2ecf20Sopenharmony_ci				req->flows[fidx].tid_offset = 0;
31378c2ecf20Sopenharmony_ci				req->flows[fidx].resync_npkts = 0;
31388c2ecf20Sopenharmony_ci			}
31398c2ecf20Sopenharmony_ci			if (i == qpriv->s_tid_cur)
31408c2ecf20Sopenharmony_ci				break;
31418c2ecf20Sopenharmony_ci			do {
31428c2ecf20Sopenharmony_ci				i = (++i == qp->s_size ? 0 : i);
31438c2ecf20Sopenharmony_ci				wqe = rvt_get_swqe_ptr(qp, i);
31448c2ecf20Sopenharmony_ci			} while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
31458c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(wqe);
31468c2ecf20Sopenharmony_ci			req->cur_seg = req->ack_seg;
31478c2ecf20Sopenharmony_ci			fidx = req->acked_tail;
31488c2ecf20Sopenharmony_ci			/* Pull req->clear_tail back */
31498c2ecf20Sopenharmony_ci			req->clear_tail = fidx;
31508c2ecf20Sopenharmony_ci		} while (1);
31518c2ecf20Sopenharmony_ci	}
31528c2ecf20Sopenharmony_ci}
31538c2ecf20Sopenharmony_ci
31548c2ecf20Sopenharmony_civoid hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
31558c2ecf20Sopenharmony_ci{
31568c2ecf20Sopenharmony_ci	int i, ret;
31578c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
31588c2ecf20Sopenharmony_ci	struct tid_flow_state *fs;
31598c2ecf20Sopenharmony_ci
31608c2ecf20Sopenharmony_ci	if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
31618c2ecf20Sopenharmony_ci		return;
31628c2ecf20Sopenharmony_ci
31638c2ecf20Sopenharmony_ci	/*
31648c2ecf20Sopenharmony_ci	 * First, clear the flow to help prevent any delayed packets from
31658c2ecf20Sopenharmony_ci	 * being delivered.
31668c2ecf20Sopenharmony_ci	 */
31678c2ecf20Sopenharmony_ci	fs = &qpriv->flow_state;
31688c2ecf20Sopenharmony_ci	if (fs->index != RXE_NUM_TID_FLOWS)
31698c2ecf20Sopenharmony_ci		hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
31708c2ecf20Sopenharmony_ci
31718c2ecf20Sopenharmony_ci	for (i = qp->s_acked; i != qp->s_head;) {
31728c2ecf20Sopenharmony_ci		struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
31738c2ecf20Sopenharmony_ci
31748c2ecf20Sopenharmony_ci		if (++i == qp->s_size)
31758c2ecf20Sopenharmony_ci			i = 0;
31768c2ecf20Sopenharmony_ci		/* Free only locally allocated TID entries */
31778c2ecf20Sopenharmony_ci		if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
31788c2ecf20Sopenharmony_ci			continue;
31798c2ecf20Sopenharmony_ci		do {
31808c2ecf20Sopenharmony_ci			struct hfi1_swqe_priv *priv = wqe->priv;
31818c2ecf20Sopenharmony_ci
31828c2ecf20Sopenharmony_ci			ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
31838c2ecf20Sopenharmony_ci		} while (!ret);
31848c2ecf20Sopenharmony_ci	}
31858c2ecf20Sopenharmony_ci	for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
31868c2ecf20Sopenharmony_ci		struct rvt_ack_entry *e = &qp->s_ack_queue[i];
31878c2ecf20Sopenharmony_ci
31888c2ecf20Sopenharmony_ci		if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
31898c2ecf20Sopenharmony_ci			i = 0;
31908c2ecf20Sopenharmony_ci		/* Free only locally allocated TID entries */
31918c2ecf20Sopenharmony_ci		if (e->opcode != TID_OP(WRITE_REQ))
31928c2ecf20Sopenharmony_ci			continue;
31938c2ecf20Sopenharmony_ci		do {
31948c2ecf20Sopenharmony_ci			struct hfi1_ack_priv *priv = e->priv;
31958c2ecf20Sopenharmony_ci
31968c2ecf20Sopenharmony_ci			ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
31978c2ecf20Sopenharmony_ci		} while (!ret);
31988c2ecf20Sopenharmony_ci	}
31998c2ecf20Sopenharmony_ci}
32008c2ecf20Sopenharmony_ci
32018c2ecf20Sopenharmony_cibool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
32028c2ecf20Sopenharmony_ci{
32038c2ecf20Sopenharmony_ci	struct rvt_swqe *prev;
32048c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
32058c2ecf20Sopenharmony_ci	u32 s_prev;
32068c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
32078c2ecf20Sopenharmony_ci
32088c2ecf20Sopenharmony_ci	s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
32098c2ecf20Sopenharmony_ci	prev = rvt_get_swqe_ptr(qp, s_prev);
32108c2ecf20Sopenharmony_ci
32118c2ecf20Sopenharmony_ci	switch (wqe->wr.opcode) {
32128c2ecf20Sopenharmony_ci	case IB_WR_SEND:
32138c2ecf20Sopenharmony_ci	case IB_WR_SEND_WITH_IMM:
32148c2ecf20Sopenharmony_ci	case IB_WR_SEND_WITH_INV:
32158c2ecf20Sopenharmony_ci	case IB_WR_ATOMIC_CMP_AND_SWP:
32168c2ecf20Sopenharmony_ci	case IB_WR_ATOMIC_FETCH_AND_ADD:
32178c2ecf20Sopenharmony_ci	case IB_WR_RDMA_WRITE:
32188c2ecf20Sopenharmony_ci	case IB_WR_RDMA_WRITE_WITH_IMM:
32198c2ecf20Sopenharmony_ci		switch (prev->wr.opcode) {
32208c2ecf20Sopenharmony_ci		case IB_WR_TID_RDMA_WRITE:
32218c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(prev);
32228c2ecf20Sopenharmony_ci			if (req->ack_seg != req->total_segs)
32238c2ecf20Sopenharmony_ci				goto interlock;
32248c2ecf20Sopenharmony_ci		default:
32258c2ecf20Sopenharmony_ci			break;
32268c2ecf20Sopenharmony_ci		}
32278c2ecf20Sopenharmony_ci		break;
32288c2ecf20Sopenharmony_ci	case IB_WR_RDMA_READ:
32298c2ecf20Sopenharmony_ci		if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
32308c2ecf20Sopenharmony_ci			break;
32318c2ecf20Sopenharmony_ci		fallthrough;
32328c2ecf20Sopenharmony_ci	case IB_WR_TID_RDMA_READ:
32338c2ecf20Sopenharmony_ci		switch (prev->wr.opcode) {
32348c2ecf20Sopenharmony_ci		case IB_WR_RDMA_READ:
32358c2ecf20Sopenharmony_ci			if (qp->s_acked != qp->s_cur)
32368c2ecf20Sopenharmony_ci				goto interlock;
32378c2ecf20Sopenharmony_ci			break;
32388c2ecf20Sopenharmony_ci		case IB_WR_TID_RDMA_WRITE:
32398c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(prev);
32408c2ecf20Sopenharmony_ci			if (req->ack_seg != req->total_segs)
32418c2ecf20Sopenharmony_ci				goto interlock;
32428c2ecf20Sopenharmony_ci		default:
32438c2ecf20Sopenharmony_ci			break;
32448c2ecf20Sopenharmony_ci		}
32458c2ecf20Sopenharmony_ci	default:
32468c2ecf20Sopenharmony_ci		break;
32478c2ecf20Sopenharmony_ci	}
32488c2ecf20Sopenharmony_ci	return false;
32498c2ecf20Sopenharmony_ci
32508c2ecf20Sopenharmony_ciinterlock:
32518c2ecf20Sopenharmony_ci	priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
32528c2ecf20Sopenharmony_ci	return true;
32538c2ecf20Sopenharmony_ci}
32548c2ecf20Sopenharmony_ci
32558c2ecf20Sopenharmony_ci/* Does @sge meet the alignment requirements for tid rdma? */
32568c2ecf20Sopenharmony_cistatic inline bool hfi1_check_sge_align(struct rvt_qp *qp,
32578c2ecf20Sopenharmony_ci					struct rvt_sge *sge, int num_sge)
32588c2ecf20Sopenharmony_ci{
32598c2ecf20Sopenharmony_ci	int i;
32608c2ecf20Sopenharmony_ci
32618c2ecf20Sopenharmony_ci	for (i = 0; i < num_sge; i++, sge++) {
32628c2ecf20Sopenharmony_ci		trace_hfi1_sge_check_align(qp, i, sge);
32638c2ecf20Sopenharmony_ci		if ((u64)sge->vaddr & ~PAGE_MASK ||
32648c2ecf20Sopenharmony_ci		    sge->sge_length & ~PAGE_MASK)
32658c2ecf20Sopenharmony_ci			return false;
32668c2ecf20Sopenharmony_ci	}
32678c2ecf20Sopenharmony_ci	return true;
32688c2ecf20Sopenharmony_ci}
32698c2ecf20Sopenharmony_ci
32708c2ecf20Sopenharmony_civoid setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
32718c2ecf20Sopenharmony_ci{
32728c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
32738c2ecf20Sopenharmony_ci	struct hfi1_swqe_priv *priv = wqe->priv;
32748c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
32758c2ecf20Sopenharmony_ci	enum ib_wr_opcode new_opcode;
32768c2ecf20Sopenharmony_ci	bool do_tid_rdma = false;
32778c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
32788c2ecf20Sopenharmony_ci
32798c2ecf20Sopenharmony_ci	if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
32808c2ecf20Sopenharmony_ci				ppd->lid)
32818c2ecf20Sopenharmony_ci		return;
32828c2ecf20Sopenharmony_ci	if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
32838c2ecf20Sopenharmony_ci		return;
32848c2ecf20Sopenharmony_ci
32858c2ecf20Sopenharmony_ci	rcu_read_lock();
32868c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
32878c2ecf20Sopenharmony_ci	/*
32888c2ecf20Sopenharmony_ci	 * If TID RDMA is disabled by the negotiation, don't
32898c2ecf20Sopenharmony_ci	 * use it.
32908c2ecf20Sopenharmony_ci	 */
32918c2ecf20Sopenharmony_ci	if (!remote)
32928c2ecf20Sopenharmony_ci		goto exit;
32938c2ecf20Sopenharmony_ci
32948c2ecf20Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_RDMA_READ) {
32958c2ecf20Sopenharmony_ci		if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
32968c2ecf20Sopenharmony_ci					 wqe->wr.num_sge)) {
32978c2ecf20Sopenharmony_ci			new_opcode = IB_WR_TID_RDMA_READ;
32988c2ecf20Sopenharmony_ci			do_tid_rdma = true;
32998c2ecf20Sopenharmony_ci		}
33008c2ecf20Sopenharmony_ci	} else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
33018c2ecf20Sopenharmony_ci		/*
33028c2ecf20Sopenharmony_ci		 * TID RDMA is enabled for this RDMA WRITE request iff:
33038c2ecf20Sopenharmony_ci		 *   1. The remote address is page-aligned,
33048c2ecf20Sopenharmony_ci		 *   2. The length is larger than the minimum segment size,
33058c2ecf20Sopenharmony_ci		 *   3. The length is page-multiple.
33068c2ecf20Sopenharmony_ci		 */
33078c2ecf20Sopenharmony_ci		if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
33088c2ecf20Sopenharmony_ci		    !(wqe->length & ~PAGE_MASK)) {
33098c2ecf20Sopenharmony_ci			new_opcode = IB_WR_TID_RDMA_WRITE;
33108c2ecf20Sopenharmony_ci			do_tid_rdma = true;
33118c2ecf20Sopenharmony_ci		}
33128c2ecf20Sopenharmony_ci	}
33138c2ecf20Sopenharmony_ci
33148c2ecf20Sopenharmony_ci	if (do_tid_rdma) {
33158c2ecf20Sopenharmony_ci		if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
33168c2ecf20Sopenharmony_ci			goto exit;
33178c2ecf20Sopenharmony_ci		wqe->wr.opcode = new_opcode;
33188c2ecf20Sopenharmony_ci		priv->tid_req.seg_len =
33198c2ecf20Sopenharmony_ci			min_t(u32, remote->max_len, wqe->length);
33208c2ecf20Sopenharmony_ci		priv->tid_req.total_segs =
33218c2ecf20Sopenharmony_ci			DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
33228c2ecf20Sopenharmony_ci		/* Compute the last PSN of the request */
33238c2ecf20Sopenharmony_ci		wqe->lpsn = wqe->psn;
33248c2ecf20Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
33258c2ecf20Sopenharmony_ci			priv->tid_req.n_flows = remote->max_read;
33268c2ecf20Sopenharmony_ci			qpriv->tid_r_reqs++;
33278c2ecf20Sopenharmony_ci			wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
33288c2ecf20Sopenharmony_ci		} else {
33298c2ecf20Sopenharmony_ci			wqe->lpsn += priv->tid_req.total_segs - 1;
33308c2ecf20Sopenharmony_ci			atomic_inc(&qpriv->n_requests);
33318c2ecf20Sopenharmony_ci		}
33328c2ecf20Sopenharmony_ci
33338c2ecf20Sopenharmony_ci		priv->tid_req.cur_seg = 0;
33348c2ecf20Sopenharmony_ci		priv->tid_req.comp_seg = 0;
33358c2ecf20Sopenharmony_ci		priv->tid_req.ack_seg = 0;
33368c2ecf20Sopenharmony_ci		priv->tid_req.state = TID_REQUEST_INACTIVE;
33378c2ecf20Sopenharmony_ci		/*
33388c2ecf20Sopenharmony_ci		 * Reset acked_tail.
33398c2ecf20Sopenharmony_ci		 * TID RDMA READ does not have ACKs so it does not
33408c2ecf20Sopenharmony_ci		 * update the pointer. We have to reset it so TID RDMA
33418c2ecf20Sopenharmony_ci		 * WRITE does not get confused.
33428c2ecf20Sopenharmony_ci		 */
33438c2ecf20Sopenharmony_ci		priv->tid_req.acked_tail = priv->tid_req.setup_head;
33448c2ecf20Sopenharmony_ci		trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
33458c2ecf20Sopenharmony_ci						 wqe->psn, wqe->lpsn,
33468c2ecf20Sopenharmony_ci						 &priv->tid_req);
33478c2ecf20Sopenharmony_ci	}
33488c2ecf20Sopenharmony_ciexit:
33498c2ecf20Sopenharmony_ci	rcu_read_unlock();
33508c2ecf20Sopenharmony_ci}
33518c2ecf20Sopenharmony_ci
33528c2ecf20Sopenharmony_ci/* TID RDMA WRITE functions */
33538c2ecf20Sopenharmony_ci
33548c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
33558c2ecf20Sopenharmony_ci				  struct ib_other_headers *ohdr,
33568c2ecf20Sopenharmony_ci				  u32 *bth1, u32 *bth2, u32 *len)
33578c2ecf20Sopenharmony_ci{
33588c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
33598c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
33608c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
33618c2ecf20Sopenharmony_ci
33628c2ecf20Sopenharmony_ci	rcu_read_lock();
33638c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
33648c2ecf20Sopenharmony_ci	/*
33658c2ecf20Sopenharmony_ci	 * Set the number of flow to be used based on negotiated
33668c2ecf20Sopenharmony_ci	 * parameters.
33678c2ecf20Sopenharmony_ci	 */
33688c2ecf20Sopenharmony_ci	req->n_flows = remote->max_write;
33698c2ecf20Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
33708c2ecf20Sopenharmony_ci
33718c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
33728c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
33738c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_req.reth.vaddr =
33748c2ecf20Sopenharmony_ci		cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
33758c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_req.reth.rkey =
33768c2ecf20Sopenharmony_ci		cpu_to_be32(wqe->rdma_wr.rkey);
33778c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
33788c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
33798c2ecf20Sopenharmony_ci	*bth1 &= ~RVT_QPN_MASK;
33808c2ecf20Sopenharmony_ci	*bth1 |= remote->qp;
33818c2ecf20Sopenharmony_ci	qp->s_state = TID_OP(WRITE_REQ);
33828c2ecf20Sopenharmony_ci	qp->s_flags |= HFI1_S_WAIT_TID_RESP;
33838c2ecf20Sopenharmony_ci	*bth2 |= IB_BTH_REQ_ACK;
33848c2ecf20Sopenharmony_ci	*len = 0;
33858c2ecf20Sopenharmony_ci
33868c2ecf20Sopenharmony_ci	rcu_read_unlock();
33878c2ecf20Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
33888c2ecf20Sopenharmony_ci}
33898c2ecf20Sopenharmony_ci
33908c2ecf20Sopenharmony_cistatic u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
33918c2ecf20Sopenharmony_ci{
33928c2ecf20Sopenharmony_ci	/*
33938c2ecf20Sopenharmony_ci	 * Heuristic for computing the RNR timeout when waiting on the flow
33948c2ecf20Sopenharmony_ci	 * queue. Rather than a computationaly expensive exact estimate of when
33958c2ecf20Sopenharmony_ci	 * a flow will be available, we assume that if a QP is at position N in
33968c2ecf20Sopenharmony_ci	 * the flow queue it has to wait approximately (N + 1) * (number of
33978c2ecf20Sopenharmony_ci	 * segments between two sync points). The rationale for this is that
33988c2ecf20Sopenharmony_ci	 * flows are released and recycled at each sync point.
33998c2ecf20Sopenharmony_ci	 */
34008c2ecf20Sopenharmony_ci	return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
34018c2ecf20Sopenharmony_ci}
34028c2ecf20Sopenharmony_ci
34038c2ecf20Sopenharmony_cistatic u32 position_in_queue(struct hfi1_qp_priv *qpriv,
34048c2ecf20Sopenharmony_ci			     struct tid_queue *queue)
34058c2ecf20Sopenharmony_ci{
34068c2ecf20Sopenharmony_ci	return qpriv->tid_enqueue - queue->dequeue;
34078c2ecf20Sopenharmony_ci}
34088c2ecf20Sopenharmony_ci
34098c2ecf20Sopenharmony_ci/*
34108c2ecf20Sopenharmony_ci * @qp: points to rvt_qp context.
34118c2ecf20Sopenharmony_ci * @to_seg: desired RNR timeout in segments.
34128c2ecf20Sopenharmony_ci * Return: index of the next highest timeout in the ib_hfi1_rnr_table[]
34138c2ecf20Sopenharmony_ci */
34148c2ecf20Sopenharmony_cistatic u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
34158c2ecf20Sopenharmony_ci{
34168c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
34178c2ecf20Sopenharmony_ci	u64 timeout;
34188c2ecf20Sopenharmony_ci	u32 bytes_per_us;
34198c2ecf20Sopenharmony_ci	u8 i;
34208c2ecf20Sopenharmony_ci
34218c2ecf20Sopenharmony_ci	bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
34228c2ecf20Sopenharmony_ci	timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
34238c2ecf20Sopenharmony_ci	/*
34248c2ecf20Sopenharmony_ci	 * Find the next highest value in the RNR table to the required
34258c2ecf20Sopenharmony_ci	 * timeout. This gives the responder some padding.
34268c2ecf20Sopenharmony_ci	 */
34278c2ecf20Sopenharmony_ci	for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
34288c2ecf20Sopenharmony_ci		if (rvt_rnr_tbl_to_usec(i) >= timeout)
34298c2ecf20Sopenharmony_ci			return i;
34308c2ecf20Sopenharmony_ci	return 0;
34318c2ecf20Sopenharmony_ci}
34328c2ecf20Sopenharmony_ci
34338c2ecf20Sopenharmony_ci/**
34348c2ecf20Sopenharmony_ci * Central place for resource allocation at TID write responder,
34358c2ecf20Sopenharmony_ci * is called from write_req and write_data interrupt handlers as
34368c2ecf20Sopenharmony_ci * well as the send thread when a queued QP is scheduled for
34378c2ecf20Sopenharmony_ci * resource allocation.
34388c2ecf20Sopenharmony_ci *
34398c2ecf20Sopenharmony_ci * Iterates over (a) segments of a request and then (b) queued requests
34408c2ecf20Sopenharmony_ci * themselves to allocate resources for up to local->max_write
34418c2ecf20Sopenharmony_ci * segments across multiple requests. Stop allocating when we
34428c2ecf20Sopenharmony_ci * hit a sync point, resume allocating after data packets at
34438c2ecf20Sopenharmony_ci * sync point have been received.
34448c2ecf20Sopenharmony_ci *
34458c2ecf20Sopenharmony_ci * Resource allocation and sending of responses is decoupled. The
34468c2ecf20Sopenharmony_ci * request/segment which are being allocated and sent are as follows.
34478c2ecf20Sopenharmony_ci * Resources are allocated for:
34488c2ecf20Sopenharmony_ci *     [request: qpriv->r_tid_alloc, segment: req->alloc_seg]
34498c2ecf20Sopenharmony_ci * The send thread sends:
34508c2ecf20Sopenharmony_ci *     [request: qp->s_tail_ack_queue, segment:req->cur_seg]
34518c2ecf20Sopenharmony_ci */
34528c2ecf20Sopenharmony_cistatic void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
34538c2ecf20Sopenharmony_ci{
34548c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
34558c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
34568c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = qpriv->rcd;
34578c2ecf20Sopenharmony_ci	struct tid_rdma_params *local = &qpriv->tid_rdma.local;
34588c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
34598c2ecf20Sopenharmony_ci	u32 npkts, to_seg;
34608c2ecf20Sopenharmony_ci	bool last;
34618c2ecf20Sopenharmony_ci	int ret = 0;
34628c2ecf20Sopenharmony_ci
34638c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
34648c2ecf20Sopenharmony_ci
34658c2ecf20Sopenharmony_ci	while (1) {
34668c2ecf20Sopenharmony_ci		trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
34678c2ecf20Sopenharmony_ci		trace_hfi1_tid_write_rsp_alloc_res(qp);
34688c2ecf20Sopenharmony_ci		/*
34698c2ecf20Sopenharmony_ci		 * Don't allocate more segments if a RNR NAK has already been
34708c2ecf20Sopenharmony_ci		 * scheduled to avoid messing up qp->r_psn: the RNR NAK will
34718c2ecf20Sopenharmony_ci		 * be sent only when all allocated segments have been sent.
34728c2ecf20Sopenharmony_ci		 * However, if more segments are allocated before that, TID RDMA
34738c2ecf20Sopenharmony_ci		 * WRITE RESP packets will be sent out for these new segments
34748c2ecf20Sopenharmony_ci		 * before the RNR NAK packet. When the requester receives the
34758c2ecf20Sopenharmony_ci		 * RNR NAK packet, it will restart with qp->s_last_psn + 1,
34768c2ecf20Sopenharmony_ci		 * which does not match qp->r_psn and will be dropped.
34778c2ecf20Sopenharmony_ci		 * Consequently, the requester will exhaust its retries and
34788c2ecf20Sopenharmony_ci		 * put the qp into error state.
34798c2ecf20Sopenharmony_ci		 */
34808c2ecf20Sopenharmony_ci		if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
34818c2ecf20Sopenharmony_ci			break;
34828c2ecf20Sopenharmony_ci
34838c2ecf20Sopenharmony_ci		/* No requests left to process */
34848c2ecf20Sopenharmony_ci		if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
34858c2ecf20Sopenharmony_ci			/* If all data has been received, clear the flow */
34868c2ecf20Sopenharmony_ci			if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
34878c2ecf20Sopenharmony_ci			    !qpriv->alloc_w_segs) {
34888c2ecf20Sopenharmony_ci				hfi1_kern_clear_hw_flow(rcd, qp);
34898c2ecf20Sopenharmony_ci				qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
34908c2ecf20Sopenharmony_ci			}
34918c2ecf20Sopenharmony_ci			break;
34928c2ecf20Sopenharmony_ci		}
34938c2ecf20Sopenharmony_ci
34948c2ecf20Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_alloc];
34958c2ecf20Sopenharmony_ci		if (e->opcode != TID_OP(WRITE_REQ))
34968c2ecf20Sopenharmony_ci			goto next_req;
34978c2ecf20Sopenharmony_ci		req = ack_to_tid_req(e);
34988c2ecf20Sopenharmony_ci		trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
34998c2ecf20Sopenharmony_ci						   e->lpsn, req);
35008c2ecf20Sopenharmony_ci		/* Finished allocating for all segments of this request */
35018c2ecf20Sopenharmony_ci		if (req->alloc_seg >= req->total_segs)
35028c2ecf20Sopenharmony_ci			goto next_req;
35038c2ecf20Sopenharmony_ci
35048c2ecf20Sopenharmony_ci		/* Can allocate only a maximum of local->max_write for a QP */
35058c2ecf20Sopenharmony_ci		if (qpriv->alloc_w_segs >= local->max_write)
35068c2ecf20Sopenharmony_ci			break;
35078c2ecf20Sopenharmony_ci
35088c2ecf20Sopenharmony_ci		/* Don't allocate at a sync point with data packets pending */
35098c2ecf20Sopenharmony_ci		if (qpriv->sync_pt && qpriv->alloc_w_segs)
35108c2ecf20Sopenharmony_ci			break;
35118c2ecf20Sopenharmony_ci
35128c2ecf20Sopenharmony_ci		/* All data received at the sync point, continue */
35138c2ecf20Sopenharmony_ci		if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
35148c2ecf20Sopenharmony_ci			hfi1_kern_clear_hw_flow(rcd, qp);
35158c2ecf20Sopenharmony_ci			qpriv->sync_pt = false;
35168c2ecf20Sopenharmony_ci			qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
35178c2ecf20Sopenharmony_ci		}
35188c2ecf20Sopenharmony_ci
35198c2ecf20Sopenharmony_ci		/* Allocate flow if we don't have one */
35208c2ecf20Sopenharmony_ci		if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
35218c2ecf20Sopenharmony_ci			ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
35228c2ecf20Sopenharmony_ci			if (ret) {
35238c2ecf20Sopenharmony_ci				to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
35248c2ecf20Sopenharmony_ci					position_in_queue(qpriv,
35258c2ecf20Sopenharmony_ci							  &rcd->flow_queue);
35268c2ecf20Sopenharmony_ci				break;
35278c2ecf20Sopenharmony_ci			}
35288c2ecf20Sopenharmony_ci		}
35298c2ecf20Sopenharmony_ci
35308c2ecf20Sopenharmony_ci		npkts = rvt_div_round_up_mtu(qp, req->seg_len);
35318c2ecf20Sopenharmony_ci
35328c2ecf20Sopenharmony_ci		/*
35338c2ecf20Sopenharmony_ci		 * We are at a sync point if we run out of KDETH PSN space.
35348c2ecf20Sopenharmony_ci		 * Last PSN of every generation is reserved for RESYNC.
35358c2ecf20Sopenharmony_ci		 */
35368c2ecf20Sopenharmony_ci		if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
35378c2ecf20Sopenharmony_ci			qpriv->sync_pt = true;
35388c2ecf20Sopenharmony_ci			break;
35398c2ecf20Sopenharmony_ci		}
35408c2ecf20Sopenharmony_ci
35418c2ecf20Sopenharmony_ci		/*
35428c2ecf20Sopenharmony_ci		 * If overtaking req->acked_tail, send an RNR NAK. Because the
35438c2ecf20Sopenharmony_ci		 * QP is not queued in this case, and the issue can only be
35448c2ecf20Sopenharmony_ci		 * caused by a delay in scheduling the second leg which we
35458c2ecf20Sopenharmony_ci		 * cannot estimate, we use a rather arbitrary RNR timeout of
35468c2ecf20Sopenharmony_ci		 * (MAX_FLOWS / 2) segments
35478c2ecf20Sopenharmony_ci		 */
35488c2ecf20Sopenharmony_ci		if (!CIRC_SPACE(req->setup_head, req->acked_tail,
35498c2ecf20Sopenharmony_ci				MAX_FLOWS)) {
35508c2ecf20Sopenharmony_ci			ret = -EAGAIN;
35518c2ecf20Sopenharmony_ci			to_seg = MAX_FLOWS >> 1;
35528c2ecf20Sopenharmony_ci			tid_rdma_trigger_ack(qp);
35538c2ecf20Sopenharmony_ci			break;
35548c2ecf20Sopenharmony_ci		}
35558c2ecf20Sopenharmony_ci
35568c2ecf20Sopenharmony_ci		/* Try to allocate rcv array / TID entries */
35578c2ecf20Sopenharmony_ci		ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
35588c2ecf20Sopenharmony_ci		if (ret == -EAGAIN)
35598c2ecf20Sopenharmony_ci			to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
35608c2ecf20Sopenharmony_ci		if (ret)
35618c2ecf20Sopenharmony_ci			break;
35628c2ecf20Sopenharmony_ci
35638c2ecf20Sopenharmony_ci		qpriv->alloc_w_segs++;
35648c2ecf20Sopenharmony_ci		req->alloc_seg++;
35658c2ecf20Sopenharmony_ci		continue;
35668c2ecf20Sopenharmony_cinext_req:
35678c2ecf20Sopenharmony_ci		/* Begin processing the next request */
35688c2ecf20Sopenharmony_ci		if (++qpriv->r_tid_alloc >
35698c2ecf20Sopenharmony_ci		    rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
35708c2ecf20Sopenharmony_ci			qpriv->r_tid_alloc = 0;
35718c2ecf20Sopenharmony_ci	}
35728c2ecf20Sopenharmony_ci
35738c2ecf20Sopenharmony_ci	/*
35748c2ecf20Sopenharmony_ci	 * Schedule an RNR NAK to be sent if (a) flow or rcv array allocation
35758c2ecf20Sopenharmony_ci	 * has failed (b) we are called from the rcv handler interrupt context
35768c2ecf20Sopenharmony_ci	 * (c) an RNR NAK has not already been scheduled
35778c2ecf20Sopenharmony_ci	 */
35788c2ecf20Sopenharmony_ci	if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
35798c2ecf20Sopenharmony_ci		goto send_rnr_nak;
35808c2ecf20Sopenharmony_ci
35818c2ecf20Sopenharmony_ci	return;
35828c2ecf20Sopenharmony_ci
35838c2ecf20Sopenharmony_cisend_rnr_nak:
35848c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->r_lock);
35858c2ecf20Sopenharmony_ci
35868c2ecf20Sopenharmony_ci	/* Set r_nak_state to prevent unrelated events from generating NAK's */
35878c2ecf20Sopenharmony_ci	qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
35888c2ecf20Sopenharmony_ci
35898c2ecf20Sopenharmony_ci	/* Pull back r_psn to the segment being RNR NAK'd */
35908c2ecf20Sopenharmony_ci	qp->r_psn = e->psn + req->alloc_seg;
35918c2ecf20Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
35928c2ecf20Sopenharmony_ci	/*
35938c2ecf20Sopenharmony_ci	 * Pull back r_head_ack_queue to the ack entry following the request
35948c2ecf20Sopenharmony_ci	 * being RNR NAK'd. This allows resources to be allocated to the request
35958c2ecf20Sopenharmony_ci	 * if the queued QP is scheduled.
35968c2ecf20Sopenharmony_ci	 */
35978c2ecf20Sopenharmony_ci	qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
35988c2ecf20Sopenharmony_ci	if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
35998c2ecf20Sopenharmony_ci		qp->r_head_ack_queue = 0;
36008c2ecf20Sopenharmony_ci	qpriv->r_tid_head = qp->r_head_ack_queue;
36018c2ecf20Sopenharmony_ci	/*
36028c2ecf20Sopenharmony_ci	 * These send side fields are used in make_rc_ack(). They are set in
36038c2ecf20Sopenharmony_ci	 * hfi1_send_rc_ack() but must be set here before dropping qp->s_lock
36048c2ecf20Sopenharmony_ci	 * for consistency
36058c2ecf20Sopenharmony_ci	 */
36068c2ecf20Sopenharmony_ci	qp->s_nak_state = qp->r_nak_state;
36078c2ecf20Sopenharmony_ci	qp->s_ack_psn = qp->r_ack_psn;
36088c2ecf20Sopenharmony_ci	/*
36098c2ecf20Sopenharmony_ci	 * Clear the ACK PENDING flag to prevent unwanted ACK because we
36108c2ecf20Sopenharmony_ci	 * have modified qp->s_ack_psn here.
36118c2ecf20Sopenharmony_ci	 */
36128c2ecf20Sopenharmony_ci	qp->s_flags &= ~(RVT_S_ACK_PENDING);
36138c2ecf20Sopenharmony_ci
36148c2ecf20Sopenharmony_ci	trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
36158c2ecf20Sopenharmony_ci	/*
36168c2ecf20Sopenharmony_ci	 * qpriv->rnr_nak_state is used to determine when the scheduled RNR NAK
36178c2ecf20Sopenharmony_ci	 * has actually been sent. qp->s_flags RVT_S_ACK_PENDING bit cannot be
36188c2ecf20Sopenharmony_ci	 * used for this because qp->s_lock is dropped before calling
36198c2ecf20Sopenharmony_ci	 * hfi1_send_rc_ack() leading to inconsistency between the receive
36208c2ecf20Sopenharmony_ci	 * interrupt handlers and the send thread in make_rc_ack()
36218c2ecf20Sopenharmony_ci	 */
36228c2ecf20Sopenharmony_ci	qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
36238c2ecf20Sopenharmony_ci
36248c2ecf20Sopenharmony_ci	/*
36258c2ecf20Sopenharmony_ci	 * Schedule RNR NAK to be sent. RNR NAK's are scheduled from the receive
36268c2ecf20Sopenharmony_ci	 * interrupt handlers but will be sent from the send engine behind any
36278c2ecf20Sopenharmony_ci	 * previous responses that may have been scheduled
36288c2ecf20Sopenharmony_ci	 */
36298c2ecf20Sopenharmony_ci	rc_defered_ack(rcd, qp);
36308c2ecf20Sopenharmony_ci}
36318c2ecf20Sopenharmony_ci
36328c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
36338c2ecf20Sopenharmony_ci{
36348c2ecf20Sopenharmony_ci	/* HANDLER FOR TID RDMA WRITE REQUEST packet (Responder side)*/
36358c2ecf20Sopenharmony_ci
36368c2ecf20Sopenharmony_ci	/*
36378c2ecf20Sopenharmony_ci	 * 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
36388c2ecf20Sopenharmony_ci	 *    (see hfi1_rc_rcv())
36398c2ecf20Sopenharmony_ci	 *     - Don't allow 0-length requests.
36408c2ecf20Sopenharmony_ci	 * 2. Put TID RDMA WRITE REQ into the response queueu (s_ack_queue)
36418c2ecf20Sopenharmony_ci	 *     - Setup struct tid_rdma_req with request info
36428c2ecf20Sopenharmony_ci	 *     - Prepare struct tid_rdma_flow array?
36438c2ecf20Sopenharmony_ci	 * 3. Set the qp->s_ack_state as state diagram in design doc.
36448c2ecf20Sopenharmony_ci	 * 4. Set RVT_S_RESP_PENDING in s_flags.
36458c2ecf20Sopenharmony_ci	 * 5. Kick the send engine (hfi1_schedule_send())
36468c2ecf20Sopenharmony_ci	 */
36478c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
36488c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
36498c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
36508c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
36518c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
36528c2ecf20Sopenharmony_ci	unsigned long flags;
36538c2ecf20Sopenharmony_ci	struct ib_reth *reth;
36548c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
36558c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
36568c2ecf20Sopenharmony_ci	u32 bth0, psn, len, rkey, num_segs;
36578c2ecf20Sopenharmony_ci	bool fecn;
36588c2ecf20Sopenharmony_ci	u8 next;
36598c2ecf20Sopenharmony_ci	u64 vaddr;
36608c2ecf20Sopenharmony_ci	int diff;
36618c2ecf20Sopenharmony_ci
36628c2ecf20Sopenharmony_ci	bth0 = be32_to_cpu(ohdr->bth[0]);
36638c2ecf20Sopenharmony_ci	if (hfi1_ruc_check_hdr(ibp, packet))
36648c2ecf20Sopenharmony_ci		return;
36658c2ecf20Sopenharmony_ci
36668c2ecf20Sopenharmony_ci	fecn = process_ecn(qp, packet);
36678c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
36688c2ecf20Sopenharmony_ci	trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
36698c2ecf20Sopenharmony_ci
36708c2ecf20Sopenharmony_ci	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
36718c2ecf20Sopenharmony_ci		rvt_comm_est(qp);
36728c2ecf20Sopenharmony_ci
36738c2ecf20Sopenharmony_ci	if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
36748c2ecf20Sopenharmony_ci		goto nack_inv;
36758c2ecf20Sopenharmony_ci
36768c2ecf20Sopenharmony_ci	reth = &ohdr->u.tid_rdma.w_req.reth;
36778c2ecf20Sopenharmony_ci	vaddr = be64_to_cpu(reth->vaddr);
36788c2ecf20Sopenharmony_ci	len = be32_to_cpu(reth->length);
36798c2ecf20Sopenharmony_ci
36808c2ecf20Sopenharmony_ci	num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
36818c2ecf20Sopenharmony_ci	diff = delta_psn(psn, qp->r_psn);
36828c2ecf20Sopenharmony_ci	if (unlikely(diff)) {
36838c2ecf20Sopenharmony_ci		tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
36848c2ecf20Sopenharmony_ci		return;
36858c2ecf20Sopenharmony_ci	}
36868c2ecf20Sopenharmony_ci
36878c2ecf20Sopenharmony_ci	/*
36888c2ecf20Sopenharmony_ci	 * The resent request which was previously RNR NAK'd is inserted at the
36898c2ecf20Sopenharmony_ci	 * location of the original request, which is one entry behind
36908c2ecf20Sopenharmony_ci	 * r_head_ack_queue
36918c2ecf20Sopenharmony_ci	 */
36928c2ecf20Sopenharmony_ci	if (qpriv->rnr_nak_state)
36938c2ecf20Sopenharmony_ci		qp->r_head_ack_queue = qp->r_head_ack_queue ?
36948c2ecf20Sopenharmony_ci			qp->r_head_ack_queue - 1 :
36958c2ecf20Sopenharmony_ci			rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
36968c2ecf20Sopenharmony_ci
36978c2ecf20Sopenharmony_ci	/* We've verified the request, insert it into the ack queue. */
36988c2ecf20Sopenharmony_ci	next = qp->r_head_ack_queue + 1;
36998c2ecf20Sopenharmony_ci	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
37008c2ecf20Sopenharmony_ci		next = 0;
37018c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
37028c2ecf20Sopenharmony_ci	if (unlikely(next == qp->s_acked_ack_queue)) {
37038c2ecf20Sopenharmony_ci		if (!qp->s_ack_queue[next].sent)
37048c2ecf20Sopenharmony_ci			goto nack_inv_unlock;
37058c2ecf20Sopenharmony_ci		update_ack_queue(qp, next);
37068c2ecf20Sopenharmony_ci	}
37078c2ecf20Sopenharmony_ci	e = &qp->s_ack_queue[qp->r_head_ack_queue];
37088c2ecf20Sopenharmony_ci	req = ack_to_tid_req(e);
37098c2ecf20Sopenharmony_ci
37108c2ecf20Sopenharmony_ci	/* Bring previously RNR NAK'd request back to life */
37118c2ecf20Sopenharmony_ci	if (qpriv->rnr_nak_state) {
37128c2ecf20Sopenharmony_ci		qp->r_nak_state = 0;
37138c2ecf20Sopenharmony_ci		qp->s_nak_state = 0;
37148c2ecf20Sopenharmony_ci		qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
37158c2ecf20Sopenharmony_ci		qp->r_psn = e->lpsn + 1;
37168c2ecf20Sopenharmony_ci		req->state = TID_REQUEST_INIT;
37178c2ecf20Sopenharmony_ci		goto update_head;
37188c2ecf20Sopenharmony_ci	}
37198c2ecf20Sopenharmony_ci
37208c2ecf20Sopenharmony_ci	release_rdma_sge_mr(e);
37218c2ecf20Sopenharmony_ci
37228c2ecf20Sopenharmony_ci	/* The length needs to be in multiples of PAGE_SIZE */
37238c2ecf20Sopenharmony_ci	if (!len || len & ~PAGE_MASK)
37248c2ecf20Sopenharmony_ci		goto nack_inv_unlock;
37258c2ecf20Sopenharmony_ci
37268c2ecf20Sopenharmony_ci	rkey = be32_to_cpu(reth->rkey);
37278c2ecf20Sopenharmony_ci	qp->r_len = len;
37288c2ecf20Sopenharmony_ci
37298c2ecf20Sopenharmony_ci	if (e->opcode == TID_OP(WRITE_REQ) &&
37308c2ecf20Sopenharmony_ci	    (req->setup_head != req->clear_tail ||
37318c2ecf20Sopenharmony_ci	     req->clear_tail != req->acked_tail))
37328c2ecf20Sopenharmony_ci		goto nack_inv_unlock;
37338c2ecf20Sopenharmony_ci
37348c2ecf20Sopenharmony_ci	if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
37358c2ecf20Sopenharmony_ci				  rkey, IB_ACCESS_REMOTE_WRITE)))
37368c2ecf20Sopenharmony_ci		goto nack_acc;
37378c2ecf20Sopenharmony_ci
37388c2ecf20Sopenharmony_ci	qp->r_psn += num_segs - 1;
37398c2ecf20Sopenharmony_ci
37408c2ecf20Sopenharmony_ci	e->opcode = (bth0 >> 24) & 0xff;
37418c2ecf20Sopenharmony_ci	e->psn = psn;
37428c2ecf20Sopenharmony_ci	e->lpsn = qp->r_psn;
37438c2ecf20Sopenharmony_ci	e->sent = 0;
37448c2ecf20Sopenharmony_ci
37458c2ecf20Sopenharmony_ci	req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
37468c2ecf20Sopenharmony_ci	req->state = TID_REQUEST_INIT;
37478c2ecf20Sopenharmony_ci	req->cur_seg = 0;
37488c2ecf20Sopenharmony_ci	req->comp_seg = 0;
37498c2ecf20Sopenharmony_ci	req->ack_seg = 0;
37508c2ecf20Sopenharmony_ci	req->alloc_seg = 0;
37518c2ecf20Sopenharmony_ci	req->isge = 0;
37528c2ecf20Sopenharmony_ci	req->seg_len = qpriv->tid_rdma.local.max_len;
37538c2ecf20Sopenharmony_ci	req->total_len = len;
37548c2ecf20Sopenharmony_ci	req->total_segs = num_segs;
37558c2ecf20Sopenharmony_ci	req->r_flow_psn = e->psn;
37568c2ecf20Sopenharmony_ci	req->ss.sge = e->rdma_sge;
37578c2ecf20Sopenharmony_ci	req->ss.num_sge = 1;
37588c2ecf20Sopenharmony_ci
37598c2ecf20Sopenharmony_ci	req->flow_idx = req->setup_head;
37608c2ecf20Sopenharmony_ci	req->clear_tail = req->setup_head;
37618c2ecf20Sopenharmony_ci	req->acked_tail = req->setup_head;
37628c2ecf20Sopenharmony_ci
37638c2ecf20Sopenharmony_ci	qp->r_state = e->opcode;
37648c2ecf20Sopenharmony_ci	qp->r_nak_state = 0;
37658c2ecf20Sopenharmony_ci	/*
37668c2ecf20Sopenharmony_ci	 * We need to increment the MSN here instead of when we
37678c2ecf20Sopenharmony_ci	 * finish sending the result since a duplicate request would
37688c2ecf20Sopenharmony_ci	 * increment it more than once.
37698c2ecf20Sopenharmony_ci	 */
37708c2ecf20Sopenharmony_ci	qp->r_msn++;
37718c2ecf20Sopenharmony_ci	qp->r_psn++;
37728c2ecf20Sopenharmony_ci
37738c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
37748c2ecf20Sopenharmony_ci					 req);
37758c2ecf20Sopenharmony_ci
37768c2ecf20Sopenharmony_ci	if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
37778c2ecf20Sopenharmony_ci		qpriv->r_tid_tail = qp->r_head_ack_queue;
37788c2ecf20Sopenharmony_ci	} else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
37798c2ecf20Sopenharmony_ci		struct tid_rdma_request *ptr;
37808c2ecf20Sopenharmony_ci
37818c2ecf20Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_tail];
37828c2ecf20Sopenharmony_ci		ptr = ack_to_tid_req(e);
37838c2ecf20Sopenharmony_ci
37848c2ecf20Sopenharmony_ci		if (e->opcode != TID_OP(WRITE_REQ) ||
37858c2ecf20Sopenharmony_ci		    ptr->comp_seg == ptr->total_segs) {
37868c2ecf20Sopenharmony_ci			if (qpriv->r_tid_tail == qpriv->r_tid_ack)
37878c2ecf20Sopenharmony_ci				qpriv->r_tid_ack = qp->r_head_ack_queue;
37888c2ecf20Sopenharmony_ci			qpriv->r_tid_tail = qp->r_head_ack_queue;
37898c2ecf20Sopenharmony_ci		}
37908c2ecf20Sopenharmony_ci	}
37918c2ecf20Sopenharmony_ciupdate_head:
37928c2ecf20Sopenharmony_ci	qp->r_head_ack_queue = next;
37938c2ecf20Sopenharmony_ci	qpriv->r_tid_head = qp->r_head_ack_queue;
37948c2ecf20Sopenharmony_ci
37958c2ecf20Sopenharmony_ci	hfi1_tid_write_alloc_resources(qp, true);
37968c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_rcv_req(qp);
37978c2ecf20Sopenharmony_ci
37988c2ecf20Sopenharmony_ci	/* Schedule the send tasklet. */
37998c2ecf20Sopenharmony_ci	qp->s_flags |= RVT_S_RESP_PENDING;
38008c2ecf20Sopenharmony_ci	if (fecn)
38018c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
38028c2ecf20Sopenharmony_ci	hfi1_schedule_send(qp);
38038c2ecf20Sopenharmony_ci
38048c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
38058c2ecf20Sopenharmony_ci	return;
38068c2ecf20Sopenharmony_ci
38078c2ecf20Sopenharmony_cinack_inv_unlock:
38088c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
38098c2ecf20Sopenharmony_cinack_inv:
38108c2ecf20Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
38118c2ecf20Sopenharmony_ci	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
38128c2ecf20Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
38138c2ecf20Sopenharmony_ci	/* Queue NAK for later */
38148c2ecf20Sopenharmony_ci	rc_defered_ack(rcd, qp);
38158c2ecf20Sopenharmony_ci	return;
38168c2ecf20Sopenharmony_cinack_acc:
38178c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
38188c2ecf20Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
38198c2ecf20Sopenharmony_ci	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
38208c2ecf20Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
38218c2ecf20Sopenharmony_ci}
38228c2ecf20Sopenharmony_ci
38238c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
38248c2ecf20Sopenharmony_ci				   struct ib_other_headers *ohdr, u32 *bth1,
38258c2ecf20Sopenharmony_ci				   u32 bth2, u32 *len,
38268c2ecf20Sopenharmony_ci				   struct rvt_sge_state **ss)
38278c2ecf20Sopenharmony_ci{
38288c2ecf20Sopenharmony_ci	struct hfi1_ack_priv *epriv = e->priv;
38298c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = &epriv->tid_req;
38308c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
38318c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = NULL;
38328c2ecf20Sopenharmony_ci	u32 resp_len = 0, hdwords = 0;
38338c2ecf20Sopenharmony_ci	void *resp_addr = NULL;
38348c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
38358c2ecf20Sopenharmony_ci
38368c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
38378c2ecf20Sopenharmony_ci					    req);
38388c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_build_resp(qp);
38398c2ecf20Sopenharmony_ci	trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
38408c2ecf20Sopenharmony_ci	flow = &req->flows[req->flow_idx];
38418c2ecf20Sopenharmony_ci	switch (req->state) {
38428c2ecf20Sopenharmony_ci	default:
38438c2ecf20Sopenharmony_ci		/*
38448c2ecf20Sopenharmony_ci		 * Try to allocate resources here in case QP was queued and was
38458c2ecf20Sopenharmony_ci		 * later scheduled when resources became available
38468c2ecf20Sopenharmony_ci		 */
38478c2ecf20Sopenharmony_ci		hfi1_tid_write_alloc_resources(qp, false);
38488c2ecf20Sopenharmony_ci
38498c2ecf20Sopenharmony_ci		/* We've already sent everything which is ready */
38508c2ecf20Sopenharmony_ci		if (req->cur_seg >= req->alloc_seg)
38518c2ecf20Sopenharmony_ci			goto done;
38528c2ecf20Sopenharmony_ci
38538c2ecf20Sopenharmony_ci		/*
38548c2ecf20Sopenharmony_ci		 * Resources can be assigned but responses cannot be sent in
38558c2ecf20Sopenharmony_ci		 * rnr_nak state, till the resent request is received
38568c2ecf20Sopenharmony_ci		 */
38578c2ecf20Sopenharmony_ci		if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
38588c2ecf20Sopenharmony_ci			goto done;
38598c2ecf20Sopenharmony_ci
38608c2ecf20Sopenharmony_ci		req->state = TID_REQUEST_ACTIVE;
38618c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
38628c2ecf20Sopenharmony_ci		req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
38638c2ecf20Sopenharmony_ci		hfi1_add_tid_reap_timer(qp);
38648c2ecf20Sopenharmony_ci		break;
38658c2ecf20Sopenharmony_ci
38668c2ecf20Sopenharmony_ci	case TID_REQUEST_RESEND_ACTIVE:
38678c2ecf20Sopenharmony_ci	case TID_REQUEST_RESEND:
38688c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
38698c2ecf20Sopenharmony_ci		req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
38708c2ecf20Sopenharmony_ci		if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
38718c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_ACTIVE;
38728c2ecf20Sopenharmony_ci
38738c2ecf20Sopenharmony_ci		hfi1_mod_tid_reap_timer(qp);
38748c2ecf20Sopenharmony_ci		break;
38758c2ecf20Sopenharmony_ci	}
38768c2ecf20Sopenharmony_ci	flow->flow_state.resp_ib_psn = bth2;
38778c2ecf20Sopenharmony_ci	resp_addr = (void *)flow->tid_entry;
38788c2ecf20Sopenharmony_ci	resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
38798c2ecf20Sopenharmony_ci	req->cur_seg++;
38808c2ecf20Sopenharmony_ci
38818c2ecf20Sopenharmony_ci	memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
38828c2ecf20Sopenharmony_ci	epriv->ss.sge.vaddr = resp_addr;
38838c2ecf20Sopenharmony_ci	epriv->ss.sge.sge_length = resp_len;
38848c2ecf20Sopenharmony_ci	epriv->ss.sge.length = epriv->ss.sge.sge_length;
38858c2ecf20Sopenharmony_ci	/*
38868c2ecf20Sopenharmony_ci	 * We can safely zero these out. Since the first SGE covers the
38878c2ecf20Sopenharmony_ci	 * entire packet, nothing else should even look at the MR.
38888c2ecf20Sopenharmony_ci	 */
38898c2ecf20Sopenharmony_ci	epriv->ss.sge.mr = NULL;
38908c2ecf20Sopenharmony_ci	epriv->ss.sge.m = 0;
38918c2ecf20Sopenharmony_ci	epriv->ss.sge.n = 0;
38928c2ecf20Sopenharmony_ci
38938c2ecf20Sopenharmony_ci	epriv->ss.sg_list = NULL;
38948c2ecf20Sopenharmony_ci	epriv->ss.total_len = epriv->ss.sge.sge_length;
38958c2ecf20Sopenharmony_ci	epriv->ss.num_sge = 1;
38968c2ecf20Sopenharmony_ci
38978c2ecf20Sopenharmony_ci	*ss = &epriv->ss;
38988c2ecf20Sopenharmony_ci	*len = epriv->ss.total_len;
38998c2ecf20Sopenharmony_ci
39008c2ecf20Sopenharmony_ci	/* Construct the TID RDMA WRITE RESP packet header */
39018c2ecf20Sopenharmony_ci	rcu_read_lock();
39028c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
39038c2ecf20Sopenharmony_ci
39048c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
39058c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
39068c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
39078c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
39088c2ecf20Sopenharmony_ci		cpu_to_be32((flow->flow_state.generation <<
39098c2ecf20Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_SHIFT) |
39108c2ecf20Sopenharmony_ci			    (flow->flow_state.spsn &
39118c2ecf20Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_MASK));
39128c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
39138c2ecf20Sopenharmony_ci		cpu_to_be32(qpriv->tid_rdma.local.qp |
39148c2ecf20Sopenharmony_ci			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
39158c2ecf20Sopenharmony_ci			     TID_RDMA_DESTQP_FLOW_SHIFT) |
39168c2ecf20Sopenharmony_ci			    qpriv->rcd->ctxt);
39178c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
39188c2ecf20Sopenharmony_ci	*bth1 = remote->qp;
39198c2ecf20Sopenharmony_ci	rcu_read_unlock();
39208c2ecf20Sopenharmony_ci	hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
39218c2ecf20Sopenharmony_ci	qpriv->pending_tid_w_segs++;
39228c2ecf20Sopenharmony_cidone:
39238c2ecf20Sopenharmony_ci	return hdwords;
39248c2ecf20Sopenharmony_ci}
39258c2ecf20Sopenharmony_ci
39268c2ecf20Sopenharmony_cistatic void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
39278c2ecf20Sopenharmony_ci{
39288c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
39298c2ecf20Sopenharmony_ci
39308c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
39318c2ecf20Sopenharmony_ci	if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
39328c2ecf20Sopenharmony_ci		qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
39338c2ecf20Sopenharmony_ci		qpriv->s_tid_timer.expires = jiffies +
39348c2ecf20Sopenharmony_ci			qpriv->tid_timer_timeout_jiffies;
39358c2ecf20Sopenharmony_ci		add_timer(&qpriv->s_tid_timer);
39368c2ecf20Sopenharmony_ci	}
39378c2ecf20Sopenharmony_ci}
39388c2ecf20Sopenharmony_ci
39398c2ecf20Sopenharmony_cistatic void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
39408c2ecf20Sopenharmony_ci{
39418c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
39428c2ecf20Sopenharmony_ci
39438c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
39448c2ecf20Sopenharmony_ci	qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
39458c2ecf20Sopenharmony_ci	mod_timer(&qpriv->s_tid_timer, jiffies +
39468c2ecf20Sopenharmony_ci		  qpriv->tid_timer_timeout_jiffies);
39478c2ecf20Sopenharmony_ci}
39488c2ecf20Sopenharmony_ci
39498c2ecf20Sopenharmony_cistatic int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
39508c2ecf20Sopenharmony_ci{
39518c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
39528c2ecf20Sopenharmony_ci	int rval = 0;
39538c2ecf20Sopenharmony_ci
39548c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
39558c2ecf20Sopenharmony_ci	if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
39568c2ecf20Sopenharmony_ci		rval = del_timer(&qpriv->s_tid_timer);
39578c2ecf20Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
39588c2ecf20Sopenharmony_ci	}
39598c2ecf20Sopenharmony_ci	return rval;
39608c2ecf20Sopenharmony_ci}
39618c2ecf20Sopenharmony_ci
39628c2ecf20Sopenharmony_civoid hfi1_del_tid_reap_timer(struct rvt_qp *qp)
39638c2ecf20Sopenharmony_ci{
39648c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
39658c2ecf20Sopenharmony_ci
39668c2ecf20Sopenharmony_ci	del_timer_sync(&qpriv->s_tid_timer);
39678c2ecf20Sopenharmony_ci	qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
39688c2ecf20Sopenharmony_ci}
39698c2ecf20Sopenharmony_ci
39708c2ecf20Sopenharmony_cistatic void hfi1_tid_timeout(struct timer_list *t)
39718c2ecf20Sopenharmony_ci{
39728c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
39738c2ecf20Sopenharmony_ci	struct rvt_qp *qp = qpriv->owner;
39748c2ecf20Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
39758c2ecf20Sopenharmony_ci	unsigned long flags;
39768c2ecf20Sopenharmony_ci	u32 i;
39778c2ecf20Sopenharmony_ci
39788c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->r_lock, flags);
39798c2ecf20Sopenharmony_ci	spin_lock(&qp->s_lock);
39808c2ecf20Sopenharmony_ci	if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
39818c2ecf20Sopenharmony_ci		dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
39828c2ecf20Sopenharmony_ci			    qp->ibqp.qp_num, __func__, __LINE__);
39838c2ecf20Sopenharmony_ci		trace_hfi1_msg_tid_timeout(/* msg */
39848c2ecf20Sopenharmony_ci			qp, "resource timeout = ",
39858c2ecf20Sopenharmony_ci			(u64)qpriv->tid_timer_timeout_jiffies);
39868c2ecf20Sopenharmony_ci		hfi1_stop_tid_reap_timer(qp);
39878c2ecf20Sopenharmony_ci		/*
39888c2ecf20Sopenharmony_ci		 * Go though the entire ack queue and clear any outstanding
39898c2ecf20Sopenharmony_ci		 * HW flow and RcvArray resources.
39908c2ecf20Sopenharmony_ci		 */
39918c2ecf20Sopenharmony_ci		hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
39928c2ecf20Sopenharmony_ci		for (i = 0; i < rvt_max_atomic(rdi); i++) {
39938c2ecf20Sopenharmony_ci			struct tid_rdma_request *req =
39948c2ecf20Sopenharmony_ci				ack_to_tid_req(&qp->s_ack_queue[i]);
39958c2ecf20Sopenharmony_ci
39968c2ecf20Sopenharmony_ci			hfi1_kern_exp_rcv_clear_all(req);
39978c2ecf20Sopenharmony_ci		}
39988c2ecf20Sopenharmony_ci		spin_unlock(&qp->s_lock);
39998c2ecf20Sopenharmony_ci		if (qp->ibqp.event_handler) {
40008c2ecf20Sopenharmony_ci			struct ib_event ev;
40018c2ecf20Sopenharmony_ci
40028c2ecf20Sopenharmony_ci			ev.device = qp->ibqp.device;
40038c2ecf20Sopenharmony_ci			ev.element.qp = &qp->ibqp;
40048c2ecf20Sopenharmony_ci			ev.event = IB_EVENT_QP_FATAL;
40058c2ecf20Sopenharmony_ci			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
40068c2ecf20Sopenharmony_ci		}
40078c2ecf20Sopenharmony_ci		rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
40088c2ecf20Sopenharmony_ci		goto unlock_r_lock;
40098c2ecf20Sopenharmony_ci	}
40108c2ecf20Sopenharmony_ci	spin_unlock(&qp->s_lock);
40118c2ecf20Sopenharmony_ciunlock_r_lock:
40128c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->r_lock, flags);
40138c2ecf20Sopenharmony_ci}
40148c2ecf20Sopenharmony_ci
40158c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
40168c2ecf20Sopenharmony_ci{
40178c2ecf20Sopenharmony_ci	/* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requestor side */
40188c2ecf20Sopenharmony_ci
40198c2ecf20Sopenharmony_ci	/*
40208c2ecf20Sopenharmony_ci	 * 1. Find matching SWQE
40218c2ecf20Sopenharmony_ci	 * 2. Check that TIDENTRY array has enough space for a complete
40228c2ecf20Sopenharmony_ci	 *    segment. If not, put QP in error state.
40238c2ecf20Sopenharmony_ci	 * 3. Save response data in struct tid_rdma_req and struct tid_rdma_flow
40248c2ecf20Sopenharmony_ci	 * 4. Remove HFI1_S_WAIT_TID_RESP from s_flags.
40258c2ecf20Sopenharmony_ci	 * 5. Set qp->s_state
40268c2ecf20Sopenharmony_ci	 * 6. Kick the send engine (hfi1_schedule_send())
40278c2ecf20Sopenharmony_ci	 */
40288c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
40298c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
40308c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
40318c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
40328c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
40338c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
40348c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
40358c2ecf20Sopenharmony_ci	enum ib_wc_status status;
40368c2ecf20Sopenharmony_ci	u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
40378c2ecf20Sopenharmony_ci	bool fecn;
40388c2ecf20Sopenharmony_ci	unsigned long flags;
40398c2ecf20Sopenharmony_ci
40408c2ecf20Sopenharmony_ci	fecn = process_ecn(qp, packet);
40418c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
40428c2ecf20Sopenharmony_ci	aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
40438c2ecf20Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
40448c2ecf20Sopenharmony_ci
40458c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
40468c2ecf20Sopenharmony_ci
40478c2ecf20Sopenharmony_ci	/* Ignore invalid responses */
40488c2ecf20Sopenharmony_ci	if (cmp_psn(psn, qp->s_next_psn) >= 0)
40498c2ecf20Sopenharmony_ci		goto ack_done;
40508c2ecf20Sopenharmony_ci
40518c2ecf20Sopenharmony_ci	/* Ignore duplicate responses. */
40528c2ecf20Sopenharmony_ci	if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
40538c2ecf20Sopenharmony_ci		goto ack_done;
40548c2ecf20Sopenharmony_ci
40558c2ecf20Sopenharmony_ci	if (unlikely(qp->s_acked == qp->s_tail))
40568c2ecf20Sopenharmony_ci		goto ack_done;
40578c2ecf20Sopenharmony_ci
40588c2ecf20Sopenharmony_ci	/*
40598c2ecf20Sopenharmony_ci	 * If we are waiting for a particular packet sequence number
40608c2ecf20Sopenharmony_ci	 * due to a request being resent, check for it. Otherwise,
40618c2ecf20Sopenharmony_ci	 * ensure that we haven't missed anything.
40628c2ecf20Sopenharmony_ci	 */
40638c2ecf20Sopenharmony_ci	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
40648c2ecf20Sopenharmony_ci		if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
40658c2ecf20Sopenharmony_ci			goto ack_done;
40668c2ecf20Sopenharmony_ci		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
40678c2ecf20Sopenharmony_ci	}
40688c2ecf20Sopenharmony_ci
40698c2ecf20Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
40708c2ecf20Sopenharmony_ci	if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
40718c2ecf20Sopenharmony_ci		goto ack_op_err;
40728c2ecf20Sopenharmony_ci
40738c2ecf20Sopenharmony_ci	req = wqe_to_tid_req(wqe);
40748c2ecf20Sopenharmony_ci	/*
40758c2ecf20Sopenharmony_ci	 * If we've lost ACKs and our acked_tail pointer is too far
40768c2ecf20Sopenharmony_ci	 * behind, don't overwrite segments. Just drop the packet and
40778c2ecf20Sopenharmony_ci	 * let the reliability protocol take care of it.
40788c2ecf20Sopenharmony_ci	 */
40798c2ecf20Sopenharmony_ci	if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
40808c2ecf20Sopenharmony_ci		goto ack_done;
40818c2ecf20Sopenharmony_ci
40828c2ecf20Sopenharmony_ci	/*
40838c2ecf20Sopenharmony_ci	 * The call to do_rc_ack() should be last in the chain of
40848c2ecf20Sopenharmony_ci	 * packet checks because it will end up updating the QP state.
40858c2ecf20Sopenharmony_ci	 * Therefore, anything that would prevent the packet from
40868c2ecf20Sopenharmony_ci	 * being accepted as a successful response should be prior
40878c2ecf20Sopenharmony_ci	 * to it.
40888c2ecf20Sopenharmony_ci	 */
40898c2ecf20Sopenharmony_ci	if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
40908c2ecf20Sopenharmony_ci		goto ack_done;
40918c2ecf20Sopenharmony_ci
40928c2ecf20Sopenharmony_ci	trace_hfi1_ack(qp, psn);
40938c2ecf20Sopenharmony_ci
40948c2ecf20Sopenharmony_ci	flow = &req->flows[req->setup_head];
40958c2ecf20Sopenharmony_ci	flow->pkt = 0;
40968c2ecf20Sopenharmony_ci	flow->tid_idx = 0;
40978c2ecf20Sopenharmony_ci	flow->tid_offset = 0;
40988c2ecf20Sopenharmony_ci	flow->sent = 0;
40998c2ecf20Sopenharmony_ci	flow->resync_npkts = 0;
41008c2ecf20Sopenharmony_ci	flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
41018c2ecf20Sopenharmony_ci	flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
41028c2ecf20Sopenharmony_ci		TID_RDMA_DESTQP_FLOW_MASK;
41038c2ecf20Sopenharmony_ci	flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
41048c2ecf20Sopenharmony_ci	flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
41058c2ecf20Sopenharmony_ci	flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
41068c2ecf20Sopenharmony_ci	flow->flow_state.resp_ib_psn = psn;
41078c2ecf20Sopenharmony_ci	flow->length = min_t(u32, req->seg_len,
41088c2ecf20Sopenharmony_ci			     (wqe->length - (req->comp_seg * req->seg_len)));
41098c2ecf20Sopenharmony_ci
41108c2ecf20Sopenharmony_ci	flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
41118c2ecf20Sopenharmony_ci	flow->flow_state.lpsn = flow->flow_state.spsn +
41128c2ecf20Sopenharmony_ci		flow->npkts - 1;
41138c2ecf20Sopenharmony_ci	/* payload length = packet length - (header length + ICRC length) */
41148c2ecf20Sopenharmony_ci	pktlen = packet->tlen - (packet->hlen + 4);
41158c2ecf20Sopenharmony_ci	if (pktlen > sizeof(flow->tid_entry)) {
41168c2ecf20Sopenharmony_ci		status = IB_WC_LOC_LEN_ERR;
41178c2ecf20Sopenharmony_ci		goto ack_err;
41188c2ecf20Sopenharmony_ci	}
41198c2ecf20Sopenharmony_ci	memcpy(flow->tid_entry, packet->ebuf, pktlen);
41208c2ecf20Sopenharmony_ci	flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
41218c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
41228c2ecf20Sopenharmony_ci
41238c2ecf20Sopenharmony_ci	req->comp_seg++;
41248c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
41258c2ecf20Sopenharmony_ci	/*
41268c2ecf20Sopenharmony_ci	 * Walk the TID_ENTRY list to make sure we have enough space for a
41278c2ecf20Sopenharmony_ci	 * complete segment.
41288c2ecf20Sopenharmony_ci	 */
41298c2ecf20Sopenharmony_ci	for (i = 0; i < flow->tidcnt; i++) {
41308c2ecf20Sopenharmony_ci		trace_hfi1_tid_entry_rcv_write_resp(/* entry */
41318c2ecf20Sopenharmony_ci			qp, i, flow->tid_entry[i]);
41328c2ecf20Sopenharmony_ci		if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
41338c2ecf20Sopenharmony_ci			status = IB_WC_LOC_LEN_ERR;
41348c2ecf20Sopenharmony_ci			goto ack_err;
41358c2ecf20Sopenharmony_ci		}
41368c2ecf20Sopenharmony_ci		tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
41378c2ecf20Sopenharmony_ci	}
41388c2ecf20Sopenharmony_ci	if (tidlen * PAGE_SIZE < flow->length) {
41398c2ecf20Sopenharmony_ci		status = IB_WC_LOC_LEN_ERR;
41408c2ecf20Sopenharmony_ci		goto ack_err;
41418c2ecf20Sopenharmony_ci	}
41428c2ecf20Sopenharmony_ci
41438c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
41448c2ecf20Sopenharmony_ci					  wqe->lpsn, req);
41458c2ecf20Sopenharmony_ci	/*
41468c2ecf20Sopenharmony_ci	 * If this is the first response for this request, set the initial
41478c2ecf20Sopenharmony_ci	 * flow index to the current flow.
41488c2ecf20Sopenharmony_ci	 */
41498c2ecf20Sopenharmony_ci	if (!cmp_psn(psn, wqe->psn)) {
41508c2ecf20Sopenharmony_ci		req->r_last_acked = mask_psn(wqe->psn - 1);
41518c2ecf20Sopenharmony_ci		/* Set acked flow index to head index */
41528c2ecf20Sopenharmony_ci		req->acked_tail = req->setup_head;
41538c2ecf20Sopenharmony_ci	}
41548c2ecf20Sopenharmony_ci
41558c2ecf20Sopenharmony_ci	/* advance circular buffer head */
41568c2ecf20Sopenharmony_ci	req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
41578c2ecf20Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
41588c2ecf20Sopenharmony_ci
41598c2ecf20Sopenharmony_ci	/*
41608c2ecf20Sopenharmony_ci	 * If all responses for this TID RDMA WRITE request have been received
41618c2ecf20Sopenharmony_ci	 * advance the pointer to the next one.
41628c2ecf20Sopenharmony_ci	 * Since TID RDMA requests could be mixed in with regular IB requests,
41638c2ecf20Sopenharmony_ci	 * they might not appear sequentially in the queue. Therefore, the
41648c2ecf20Sopenharmony_ci	 * next request needs to be "found".
41658c2ecf20Sopenharmony_ci	 */
41668c2ecf20Sopenharmony_ci	if (qpriv->s_tid_cur != qpriv->s_tid_head &&
41678c2ecf20Sopenharmony_ci	    req->comp_seg == req->total_segs) {
41688c2ecf20Sopenharmony_ci		for (i = qpriv->s_tid_cur + 1; ; i++) {
41698c2ecf20Sopenharmony_ci			if (i == qp->s_size)
41708c2ecf20Sopenharmony_ci				i = 0;
41718c2ecf20Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, i);
41728c2ecf20Sopenharmony_ci			if (i == qpriv->s_tid_head)
41738c2ecf20Sopenharmony_ci				break;
41748c2ecf20Sopenharmony_ci			if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
41758c2ecf20Sopenharmony_ci				break;
41768c2ecf20Sopenharmony_ci		}
41778c2ecf20Sopenharmony_ci		qpriv->s_tid_cur = i;
41788c2ecf20Sopenharmony_ci	}
41798c2ecf20Sopenharmony_ci	qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
41808c2ecf20Sopenharmony_ci	hfi1_schedule_tid_send(qp);
41818c2ecf20Sopenharmony_ci	goto ack_done;
41828c2ecf20Sopenharmony_ci
41838c2ecf20Sopenharmony_ciack_op_err:
41848c2ecf20Sopenharmony_ci	status = IB_WC_LOC_QP_OP_ERR;
41858c2ecf20Sopenharmony_ciack_err:
41868c2ecf20Sopenharmony_ci	rvt_error_qp(qp, status);
41878c2ecf20Sopenharmony_ciack_done:
41888c2ecf20Sopenharmony_ci	if (fecn)
41898c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
41908c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
41918c2ecf20Sopenharmony_ci}
41928c2ecf20Sopenharmony_ci
41938c2ecf20Sopenharmony_cibool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
41948c2ecf20Sopenharmony_ci				struct ib_other_headers *ohdr,
41958c2ecf20Sopenharmony_ci				u32 *bth1, u32 *bth2, u32 *len)
41968c2ecf20Sopenharmony_ci{
41978c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
41988c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
41998c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
42008c2ecf20Sopenharmony_ci	struct rvt_qp *qp = req->qp;
42018c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
42028c2ecf20Sopenharmony_ci	u32 tidentry = flow->tid_entry[flow->tid_idx];
42038c2ecf20Sopenharmony_ci	u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
42048c2ecf20Sopenharmony_ci	struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
42058c2ecf20Sopenharmony_ci	u32 next_offset, om = KDETH_OM_LARGE;
42068c2ecf20Sopenharmony_ci	bool last_pkt;
42078c2ecf20Sopenharmony_ci
42088c2ecf20Sopenharmony_ci	if (!tidlen) {
42098c2ecf20Sopenharmony_ci		hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
42108c2ecf20Sopenharmony_ci		rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
42118c2ecf20Sopenharmony_ci	}
42128c2ecf20Sopenharmony_ci
42138c2ecf20Sopenharmony_ci	*len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
42148c2ecf20Sopenharmony_ci	flow->sent += *len;
42158c2ecf20Sopenharmony_ci	next_offset = flow->tid_offset + *len;
42168c2ecf20Sopenharmony_ci	last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
42178c2ecf20Sopenharmony_ci		    next_offset >= tidlen) || (flow->sent >= flow->length);
42188c2ecf20Sopenharmony_ci	trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
42198c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
42208c2ecf20Sopenharmony_ci
42218c2ecf20Sopenharmony_ci	rcu_read_lock();
42228c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
42238c2ecf20Sopenharmony_ci	KDETH_RESET(wd->kdeth0, KVER, 0x1);
42248c2ecf20Sopenharmony_ci	KDETH_SET(wd->kdeth0, SH, !last_pkt);
42258c2ecf20Sopenharmony_ci	KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
42268c2ecf20Sopenharmony_ci	KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
42278c2ecf20Sopenharmony_ci	KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
42288c2ecf20Sopenharmony_ci	KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
42298c2ecf20Sopenharmony_ci	KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
42308c2ecf20Sopenharmony_ci	KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
42318c2ecf20Sopenharmony_ci	wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
42328c2ecf20Sopenharmony_ci	rcu_read_unlock();
42338c2ecf20Sopenharmony_ci
42348c2ecf20Sopenharmony_ci	*bth1 = flow->tid_qpn;
42358c2ecf20Sopenharmony_ci	*bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
42368c2ecf20Sopenharmony_ci			 HFI1_KDETH_BTH_SEQ_MASK) |
42378c2ecf20Sopenharmony_ci			 (flow->flow_state.generation <<
42388c2ecf20Sopenharmony_ci			  HFI1_KDETH_BTH_SEQ_SHIFT));
42398c2ecf20Sopenharmony_ci	if (last_pkt) {
42408c2ecf20Sopenharmony_ci		/* PSNs are zero-based, so +1 to count number of packets */
42418c2ecf20Sopenharmony_ci		if (flow->flow_state.lpsn + 1 +
42428c2ecf20Sopenharmony_ci		    rvt_div_round_up_mtu(qp, req->seg_len) >
42438c2ecf20Sopenharmony_ci		    MAX_TID_FLOW_PSN)
42448c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_SYNC;
42458c2ecf20Sopenharmony_ci		*bth2 |= IB_BTH_REQ_ACK;
42468c2ecf20Sopenharmony_ci	}
42478c2ecf20Sopenharmony_ci
42488c2ecf20Sopenharmony_ci	if (next_offset >= tidlen) {
42498c2ecf20Sopenharmony_ci		flow->tid_offset = 0;
42508c2ecf20Sopenharmony_ci		flow->tid_idx++;
42518c2ecf20Sopenharmony_ci	} else {
42528c2ecf20Sopenharmony_ci		flow->tid_offset = next_offset;
42538c2ecf20Sopenharmony_ci	}
42548c2ecf20Sopenharmony_ci	return last_pkt;
42558c2ecf20Sopenharmony_ci}
42568c2ecf20Sopenharmony_ci
42578c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
42588c2ecf20Sopenharmony_ci{
42598c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
42608c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
42618c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = priv->rcd;
42628c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
42638c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
42648c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
42658c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
42668c2ecf20Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
42678c2ecf20Sopenharmony_ci	unsigned long flags;
42688c2ecf20Sopenharmony_ci	u32 psn, next;
42698c2ecf20Sopenharmony_ci	u8 opcode;
42708c2ecf20Sopenharmony_ci	bool fecn;
42718c2ecf20Sopenharmony_ci
42728c2ecf20Sopenharmony_ci	fecn = process_ecn(qp, packet);
42738c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
42748c2ecf20Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
42758c2ecf20Sopenharmony_ci
42768c2ecf20Sopenharmony_ci	/*
42778c2ecf20Sopenharmony_ci	 * All error handling should be done by now. If we are here, the packet
42788c2ecf20Sopenharmony_ci	 * is either good or been accepted by the error handler.
42798c2ecf20Sopenharmony_ci	 */
42808c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
42818c2ecf20Sopenharmony_ci	e = &qp->s_ack_queue[priv->r_tid_tail];
42828c2ecf20Sopenharmony_ci	req = ack_to_tid_req(e);
42838c2ecf20Sopenharmony_ci	flow = &req->flows[req->clear_tail];
42848c2ecf20Sopenharmony_ci	if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
42858c2ecf20Sopenharmony_ci		update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
42868c2ecf20Sopenharmony_ci
42878c2ecf20Sopenharmony_ci		if (cmp_psn(psn, flow->flow_state.r_next_psn))
42888c2ecf20Sopenharmony_ci			goto send_nak;
42898c2ecf20Sopenharmony_ci
42908c2ecf20Sopenharmony_ci		flow->flow_state.r_next_psn = mask_psn(psn + 1);
42918c2ecf20Sopenharmony_ci		/*
42928c2ecf20Sopenharmony_ci		 * Copy the payload to destination buffer if this packet is
42938c2ecf20Sopenharmony_ci		 * delivered as an eager packet due to RSM rule and FECN.
42948c2ecf20Sopenharmony_ci		 * The RSM rule selects FECN bit in BTH and SH bit in
42958c2ecf20Sopenharmony_ci		 * KDETH header and therefore will not match the last
42968c2ecf20Sopenharmony_ci		 * packet of each segment that has SH bit cleared.
42978c2ecf20Sopenharmony_ci		 */
42988c2ecf20Sopenharmony_ci		if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
42998c2ecf20Sopenharmony_ci			struct rvt_sge_state ss;
43008c2ecf20Sopenharmony_ci			u32 len;
43018c2ecf20Sopenharmony_ci			u32 tlen = packet->tlen;
43028c2ecf20Sopenharmony_ci			u16 hdrsize = packet->hlen;
43038c2ecf20Sopenharmony_ci			u8 pad = packet->pad;
43048c2ecf20Sopenharmony_ci			u8 extra_bytes = pad + packet->extra_byte +
43058c2ecf20Sopenharmony_ci				(SIZE_OF_CRC << 2);
43068c2ecf20Sopenharmony_ci			u32 pmtu = qp->pmtu;
43078c2ecf20Sopenharmony_ci
43088c2ecf20Sopenharmony_ci			if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
43098c2ecf20Sopenharmony_ci				goto send_nak;
43108c2ecf20Sopenharmony_ci			len = req->comp_seg * req->seg_len;
43118c2ecf20Sopenharmony_ci			len += delta_psn(psn,
43128c2ecf20Sopenharmony_ci				full_flow_psn(flow, flow->flow_state.spsn)) *
43138c2ecf20Sopenharmony_ci				pmtu;
43148c2ecf20Sopenharmony_ci			if (unlikely(req->total_len - len < pmtu))
43158c2ecf20Sopenharmony_ci				goto send_nak;
43168c2ecf20Sopenharmony_ci
43178c2ecf20Sopenharmony_ci			/*
43188c2ecf20Sopenharmony_ci			 * The e->rdma_sge field is set when TID RDMA WRITE REQ
43198c2ecf20Sopenharmony_ci			 * is first received and is never modified thereafter.
43208c2ecf20Sopenharmony_ci			 */
43218c2ecf20Sopenharmony_ci			ss.sge = e->rdma_sge;
43228c2ecf20Sopenharmony_ci			ss.sg_list = NULL;
43238c2ecf20Sopenharmony_ci			ss.num_sge = 1;
43248c2ecf20Sopenharmony_ci			ss.total_len = req->total_len;
43258c2ecf20Sopenharmony_ci			rvt_skip_sge(&ss, len, false);
43268c2ecf20Sopenharmony_ci			rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
43278c2ecf20Sopenharmony_ci				     false);
43288c2ecf20Sopenharmony_ci			/* Raise the sw sequence check flag for next packet */
43298c2ecf20Sopenharmony_ci			priv->r_next_psn_kdeth = mask_psn(psn + 1);
43308c2ecf20Sopenharmony_ci			priv->s_flags |= HFI1_R_TID_SW_PSN;
43318c2ecf20Sopenharmony_ci		}
43328c2ecf20Sopenharmony_ci		goto exit;
43338c2ecf20Sopenharmony_ci	}
43348c2ecf20Sopenharmony_ci	flow->flow_state.r_next_psn = mask_psn(psn + 1);
43358c2ecf20Sopenharmony_ci	hfi1_kern_exp_rcv_clear(req);
43368c2ecf20Sopenharmony_ci	priv->alloc_w_segs--;
43378c2ecf20Sopenharmony_ci	rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
43388c2ecf20Sopenharmony_ci	req->comp_seg++;
43398c2ecf20Sopenharmony_ci	priv->s_nak_state = 0;
43408c2ecf20Sopenharmony_ci
43418c2ecf20Sopenharmony_ci	/*
43428c2ecf20Sopenharmony_ci	 * Release the flow if one of the following conditions has been met:
43438c2ecf20Sopenharmony_ci	 *  - The request has reached a sync point AND all outstanding
43448c2ecf20Sopenharmony_ci	 *    segments have been completed, or
43458c2ecf20Sopenharmony_ci	 *  - The entire request is complete and there are no more requests
43468c2ecf20Sopenharmony_ci	 *    (of any kind) in the queue.
43478c2ecf20Sopenharmony_ci	 */
43488c2ecf20Sopenharmony_ci	trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
43498c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
43508c2ecf20Sopenharmony_ci					  req);
43518c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_rcv_data(qp);
43528c2ecf20Sopenharmony_ci	validate_r_tid_ack(priv);
43538c2ecf20Sopenharmony_ci
43548c2ecf20Sopenharmony_ci	if (opcode == TID_OP(WRITE_DATA_LAST)) {
43558c2ecf20Sopenharmony_ci		release_rdma_sge_mr(e);
43568c2ecf20Sopenharmony_ci		for (next = priv->r_tid_tail + 1; ; next++) {
43578c2ecf20Sopenharmony_ci			if (next > rvt_size_atomic(&dev->rdi))
43588c2ecf20Sopenharmony_ci				next = 0;
43598c2ecf20Sopenharmony_ci			if (next == priv->r_tid_head)
43608c2ecf20Sopenharmony_ci				break;
43618c2ecf20Sopenharmony_ci			e = &qp->s_ack_queue[next];
43628c2ecf20Sopenharmony_ci			if (e->opcode == TID_OP(WRITE_REQ))
43638c2ecf20Sopenharmony_ci				break;
43648c2ecf20Sopenharmony_ci		}
43658c2ecf20Sopenharmony_ci		priv->r_tid_tail = next;
43668c2ecf20Sopenharmony_ci		if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
43678c2ecf20Sopenharmony_ci			qp->s_acked_ack_queue = 0;
43688c2ecf20Sopenharmony_ci	}
43698c2ecf20Sopenharmony_ci
43708c2ecf20Sopenharmony_ci	hfi1_tid_write_alloc_resources(qp, true);
43718c2ecf20Sopenharmony_ci
43728c2ecf20Sopenharmony_ci	/*
43738c2ecf20Sopenharmony_ci	 * If we need to generate more responses, schedule the
43748c2ecf20Sopenharmony_ci	 * send engine.
43758c2ecf20Sopenharmony_ci	 */
43768c2ecf20Sopenharmony_ci	if (req->cur_seg < req->total_segs ||
43778c2ecf20Sopenharmony_ci	    qp->s_tail_ack_queue != qp->r_head_ack_queue) {
43788c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_RESP_PENDING;
43798c2ecf20Sopenharmony_ci		hfi1_schedule_send(qp);
43808c2ecf20Sopenharmony_ci	}
43818c2ecf20Sopenharmony_ci
43828c2ecf20Sopenharmony_ci	priv->pending_tid_w_segs--;
43838c2ecf20Sopenharmony_ci	if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
43848c2ecf20Sopenharmony_ci		if (priv->pending_tid_w_segs)
43858c2ecf20Sopenharmony_ci			hfi1_mod_tid_reap_timer(req->qp);
43868c2ecf20Sopenharmony_ci		else
43878c2ecf20Sopenharmony_ci			hfi1_stop_tid_reap_timer(req->qp);
43888c2ecf20Sopenharmony_ci	}
43898c2ecf20Sopenharmony_ci
43908c2ecf20Sopenharmony_cidone:
43918c2ecf20Sopenharmony_ci	tid_rdma_schedule_ack(qp);
43928c2ecf20Sopenharmony_ciexit:
43938c2ecf20Sopenharmony_ci	priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
43948c2ecf20Sopenharmony_ci	if (fecn)
43958c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
43968c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
43978c2ecf20Sopenharmony_ci	return;
43988c2ecf20Sopenharmony_ci
43998c2ecf20Sopenharmony_cisend_nak:
44008c2ecf20Sopenharmony_ci	if (!priv->s_nak_state) {
44018c2ecf20Sopenharmony_ci		priv->s_nak_state = IB_NAK_PSN_ERROR;
44028c2ecf20Sopenharmony_ci		priv->s_nak_psn = flow->flow_state.r_next_psn;
44038c2ecf20Sopenharmony_ci		tid_rdma_trigger_ack(qp);
44048c2ecf20Sopenharmony_ci	}
44058c2ecf20Sopenharmony_ci	goto done;
44068c2ecf20Sopenharmony_ci}
44078c2ecf20Sopenharmony_ci
44088c2ecf20Sopenharmony_cistatic bool hfi1_tid_rdma_is_resync_psn(u32 psn)
44098c2ecf20Sopenharmony_ci{
44108c2ecf20Sopenharmony_ci	return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
44118c2ecf20Sopenharmony_ci		      HFI1_KDETH_BTH_SEQ_MASK);
44128c2ecf20Sopenharmony_ci}
44138c2ecf20Sopenharmony_ci
44148c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
44158c2ecf20Sopenharmony_ci				  struct ib_other_headers *ohdr, u16 iflow,
44168c2ecf20Sopenharmony_ci				  u32 *bth1, u32 *bth2)
44178c2ecf20Sopenharmony_ci{
44188c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
44198c2ecf20Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
44208c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = ack_to_tid_req(e);
44218c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[iflow];
44228c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
44238c2ecf20Sopenharmony_ci
44248c2ecf20Sopenharmony_ci	rcu_read_lock();
44258c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
44268c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
44278c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
44288c2ecf20Sopenharmony_ci	*bth1 = remote->qp;
44298c2ecf20Sopenharmony_ci	rcu_read_unlock();
44308c2ecf20Sopenharmony_ci
44318c2ecf20Sopenharmony_ci	if (qpriv->resync) {
44328c2ecf20Sopenharmony_ci		*bth2 = mask_psn((fs->generation <<
44338c2ecf20Sopenharmony_ci				  HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
44348c2ecf20Sopenharmony_ci		ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
44358c2ecf20Sopenharmony_ci	} else if (qpriv->s_nak_state) {
44368c2ecf20Sopenharmony_ci		*bth2 = mask_psn(qpriv->s_nak_psn);
44378c2ecf20Sopenharmony_ci		ohdr->u.tid_rdma.ack.aeth =
44388c2ecf20Sopenharmony_ci			cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
44398c2ecf20Sopenharmony_ci				    (qpriv->s_nak_state <<
44408c2ecf20Sopenharmony_ci				     IB_AETH_CREDIT_SHIFT));
44418c2ecf20Sopenharmony_ci	} else {
44428c2ecf20Sopenharmony_ci		*bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
44438c2ecf20Sopenharmony_ci		ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
44448c2ecf20Sopenharmony_ci	}
44458c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
44468c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.ack.tid_flow_qp =
44478c2ecf20Sopenharmony_ci		cpu_to_be32(qpriv->tid_rdma.local.qp |
44488c2ecf20Sopenharmony_ci			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
44498c2ecf20Sopenharmony_ci			     TID_RDMA_DESTQP_FLOW_SHIFT) |
44508c2ecf20Sopenharmony_ci			    qpriv->rcd->ctxt);
44518c2ecf20Sopenharmony_ci
44528c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
44538c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.ack.verbs_psn =
44548c2ecf20Sopenharmony_ci		cpu_to_be32(flow->flow_state.resp_ib_psn);
44558c2ecf20Sopenharmony_ci
44568c2ecf20Sopenharmony_ci	if (qpriv->resync) {
44578c2ecf20Sopenharmony_ci		/*
44588c2ecf20Sopenharmony_ci		 * If the PSN before the current expect KDETH PSN is the
44598c2ecf20Sopenharmony_ci		 * RESYNC PSN, then we never received a good TID RDMA WRITE
44608c2ecf20Sopenharmony_ci		 * DATA packet after a previous RESYNC.
44618c2ecf20Sopenharmony_ci		 * In this case, the next expected KDETH PSN stays the same.
44628c2ecf20Sopenharmony_ci		 */
44638c2ecf20Sopenharmony_ci		if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
44648c2ecf20Sopenharmony_ci			ohdr->u.tid_rdma.ack.tid_flow_psn =
44658c2ecf20Sopenharmony_ci				cpu_to_be32(qpriv->r_next_psn_kdeth_save);
44668c2ecf20Sopenharmony_ci		} else {
44678c2ecf20Sopenharmony_ci			/*
44688c2ecf20Sopenharmony_ci			 * Because the KDETH PSNs jump during a RESYNC, it's
44698c2ecf20Sopenharmony_ci			 * not possible to infer (or compute) the previous value
44708c2ecf20Sopenharmony_ci			 * of r_next_psn_kdeth in the case of back-to-back
44718c2ecf20Sopenharmony_ci			 * RESYNC packets. Therefore, we save it.
44728c2ecf20Sopenharmony_ci			 */
44738c2ecf20Sopenharmony_ci			qpriv->r_next_psn_kdeth_save =
44748c2ecf20Sopenharmony_ci				qpriv->r_next_psn_kdeth - 1;
44758c2ecf20Sopenharmony_ci			ohdr->u.tid_rdma.ack.tid_flow_psn =
44768c2ecf20Sopenharmony_ci				cpu_to_be32(qpriv->r_next_psn_kdeth_save);
44778c2ecf20Sopenharmony_ci			qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
44788c2ecf20Sopenharmony_ci		}
44798c2ecf20Sopenharmony_ci		qpriv->resync = false;
44808c2ecf20Sopenharmony_ci	}
44818c2ecf20Sopenharmony_ci
44828c2ecf20Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
44838c2ecf20Sopenharmony_ci}
44848c2ecf20Sopenharmony_ci
44858c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
44868c2ecf20Sopenharmony_ci{
44878c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
44888c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
44898c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
44908c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
44918c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
44928c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
44938c2ecf20Sopenharmony_ci	u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
44948c2ecf20Sopenharmony_ci	unsigned long flags;
44958c2ecf20Sopenharmony_ci	u16 fidx;
44968c2ecf20Sopenharmony_ci
44978c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
44988c2ecf20Sopenharmony_ci	process_ecn(qp, packet);
44998c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
45008c2ecf20Sopenharmony_ci	aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
45018c2ecf20Sopenharmony_ci	req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
45028c2ecf20Sopenharmony_ci	resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
45038c2ecf20Sopenharmony_ci
45048c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
45058c2ecf20Sopenharmony_ci	trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
45068c2ecf20Sopenharmony_ci
45078c2ecf20Sopenharmony_ci	/* If we are waiting for an ACK to RESYNC, drop any other packets */
45088c2ecf20Sopenharmony_ci	if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
45098c2ecf20Sopenharmony_ci	    cmp_psn(psn, qpriv->s_resync_psn))
45108c2ecf20Sopenharmony_ci		goto ack_op_err;
45118c2ecf20Sopenharmony_ci
45128c2ecf20Sopenharmony_ci	ack_psn = req_psn;
45138c2ecf20Sopenharmony_ci	if (hfi1_tid_rdma_is_resync_psn(psn))
45148c2ecf20Sopenharmony_ci		ack_kpsn = resync_psn;
45158c2ecf20Sopenharmony_ci	else
45168c2ecf20Sopenharmony_ci		ack_kpsn = psn;
45178c2ecf20Sopenharmony_ci	if (aeth >> 29) {
45188c2ecf20Sopenharmony_ci		ack_psn--;
45198c2ecf20Sopenharmony_ci		ack_kpsn--;
45208c2ecf20Sopenharmony_ci	}
45218c2ecf20Sopenharmony_ci
45228c2ecf20Sopenharmony_ci	if (unlikely(qp->s_acked == qp->s_tail))
45238c2ecf20Sopenharmony_ci		goto ack_op_err;
45248c2ecf20Sopenharmony_ci
45258c2ecf20Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
45268c2ecf20Sopenharmony_ci
45278c2ecf20Sopenharmony_ci	if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
45288c2ecf20Sopenharmony_ci		goto ack_op_err;
45298c2ecf20Sopenharmony_ci
45308c2ecf20Sopenharmony_ci	req = wqe_to_tid_req(wqe);
45318c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
45328c2ecf20Sopenharmony_ci				       wqe->lpsn, req);
45338c2ecf20Sopenharmony_ci	flow = &req->flows[req->acked_tail];
45348c2ecf20Sopenharmony_ci	trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
45358c2ecf20Sopenharmony_ci
45368c2ecf20Sopenharmony_ci	/* Drop stale ACK/NAK */
45378c2ecf20Sopenharmony_ci	if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
45388c2ecf20Sopenharmony_ci	    cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
45398c2ecf20Sopenharmony_ci		goto ack_op_err;
45408c2ecf20Sopenharmony_ci
45418c2ecf20Sopenharmony_ci	while (cmp_psn(ack_kpsn,
45428c2ecf20Sopenharmony_ci		       full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
45438c2ecf20Sopenharmony_ci	       req->ack_seg < req->cur_seg) {
45448c2ecf20Sopenharmony_ci		req->ack_seg++;
45458c2ecf20Sopenharmony_ci		/* advance acked segment pointer */
45468c2ecf20Sopenharmony_ci		req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
45478c2ecf20Sopenharmony_ci		req->r_last_acked = flow->flow_state.resp_ib_psn;
45488c2ecf20Sopenharmony_ci		trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
45498c2ecf20Sopenharmony_ci					       wqe->lpsn, req);
45508c2ecf20Sopenharmony_ci		if (req->ack_seg == req->total_segs) {
45518c2ecf20Sopenharmony_ci			req->state = TID_REQUEST_COMPLETE;
45528c2ecf20Sopenharmony_ci			wqe = do_rc_completion(qp, wqe,
45538c2ecf20Sopenharmony_ci					       to_iport(qp->ibqp.device,
45548c2ecf20Sopenharmony_ci							qp->port_num));
45558c2ecf20Sopenharmony_ci			trace_hfi1_sender_rcv_tid_ack(qp);
45568c2ecf20Sopenharmony_ci			atomic_dec(&qpriv->n_tid_requests);
45578c2ecf20Sopenharmony_ci			if (qp->s_acked == qp->s_tail)
45588c2ecf20Sopenharmony_ci				break;
45598c2ecf20Sopenharmony_ci			if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
45608c2ecf20Sopenharmony_ci				break;
45618c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(wqe);
45628c2ecf20Sopenharmony_ci		}
45638c2ecf20Sopenharmony_ci		flow = &req->flows[req->acked_tail];
45648c2ecf20Sopenharmony_ci		trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
45658c2ecf20Sopenharmony_ci	}
45668c2ecf20Sopenharmony_ci
45678c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
45688c2ecf20Sopenharmony_ci				       wqe->lpsn, req);
45698c2ecf20Sopenharmony_ci	switch (aeth >> 29) {
45708c2ecf20Sopenharmony_ci	case 0:         /* ACK */
45718c2ecf20Sopenharmony_ci		if (qpriv->s_flags & RVT_S_WAIT_ACK)
45728c2ecf20Sopenharmony_ci			qpriv->s_flags &= ~RVT_S_WAIT_ACK;
45738c2ecf20Sopenharmony_ci		if (!hfi1_tid_rdma_is_resync_psn(psn)) {
45748c2ecf20Sopenharmony_ci			/* Check if there is any pending TID ACK */
45758c2ecf20Sopenharmony_ci			if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
45768c2ecf20Sopenharmony_ci			    req->ack_seg < req->cur_seg)
45778c2ecf20Sopenharmony_ci				hfi1_mod_tid_retry_timer(qp);
45788c2ecf20Sopenharmony_ci			else
45798c2ecf20Sopenharmony_ci				hfi1_stop_tid_retry_timer(qp);
45808c2ecf20Sopenharmony_ci			hfi1_schedule_send(qp);
45818c2ecf20Sopenharmony_ci		} else {
45828c2ecf20Sopenharmony_ci			u32 spsn, fpsn, last_acked, generation;
45838c2ecf20Sopenharmony_ci			struct tid_rdma_request *rptr;
45848c2ecf20Sopenharmony_ci
45858c2ecf20Sopenharmony_ci			/* ACK(RESYNC) */
45868c2ecf20Sopenharmony_ci			hfi1_stop_tid_retry_timer(qp);
45878c2ecf20Sopenharmony_ci			/* Allow new requests (see hfi1_make_tid_rdma_pkt) */
45888c2ecf20Sopenharmony_ci			qp->s_flags &= ~HFI1_S_WAIT_HALT;
45898c2ecf20Sopenharmony_ci			/*
45908c2ecf20Sopenharmony_ci			 * Clear RVT_S_SEND_ONE flag in case that the TID RDMA
45918c2ecf20Sopenharmony_ci			 * ACK is received after the TID retry timer is fired
45928c2ecf20Sopenharmony_ci			 * again. In this case, do not send any more TID
45938c2ecf20Sopenharmony_ci			 * RESYNC request or wait for any more TID ACK packet.
45948c2ecf20Sopenharmony_ci			 */
45958c2ecf20Sopenharmony_ci			qpriv->s_flags &= ~RVT_S_SEND_ONE;
45968c2ecf20Sopenharmony_ci			hfi1_schedule_send(qp);
45978c2ecf20Sopenharmony_ci
45988c2ecf20Sopenharmony_ci			if ((qp->s_acked == qpriv->s_tid_tail &&
45998c2ecf20Sopenharmony_ci			     req->ack_seg == req->total_segs) ||
46008c2ecf20Sopenharmony_ci			    qp->s_acked == qp->s_tail) {
46018c2ecf20Sopenharmony_ci				qpriv->s_state = TID_OP(WRITE_DATA_LAST);
46028c2ecf20Sopenharmony_ci				goto done;
46038c2ecf20Sopenharmony_ci			}
46048c2ecf20Sopenharmony_ci
46058c2ecf20Sopenharmony_ci			if (req->ack_seg == req->comp_seg) {
46068c2ecf20Sopenharmony_ci				qpriv->s_state = TID_OP(WRITE_DATA);
46078c2ecf20Sopenharmony_ci				goto done;
46088c2ecf20Sopenharmony_ci			}
46098c2ecf20Sopenharmony_ci
46108c2ecf20Sopenharmony_ci			/*
46118c2ecf20Sopenharmony_ci			 * The PSN to start with is the next PSN after the
46128c2ecf20Sopenharmony_ci			 * RESYNC PSN.
46138c2ecf20Sopenharmony_ci			 */
46148c2ecf20Sopenharmony_ci			psn = mask_psn(psn + 1);
46158c2ecf20Sopenharmony_ci			generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
46168c2ecf20Sopenharmony_ci			spsn = 0;
46178c2ecf20Sopenharmony_ci
46188c2ecf20Sopenharmony_ci			/*
46198c2ecf20Sopenharmony_ci			 * Update to the correct WQE when we get an ACK(RESYNC)
46208c2ecf20Sopenharmony_ci			 * in the middle of a request.
46218c2ecf20Sopenharmony_ci			 */
46228c2ecf20Sopenharmony_ci			if (delta_psn(ack_psn, wqe->lpsn))
46238c2ecf20Sopenharmony_ci				wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
46248c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(wqe);
46258c2ecf20Sopenharmony_ci			flow = &req->flows[req->acked_tail];
46268c2ecf20Sopenharmony_ci			/*
46278c2ecf20Sopenharmony_ci			 * RESYNC re-numbers the PSN ranges of all remaining
46288c2ecf20Sopenharmony_ci			 * segments. Also, PSN's start from 0 in the middle of a
46298c2ecf20Sopenharmony_ci			 * segment and the first segment size is less than the
46308c2ecf20Sopenharmony_ci			 * default number of packets. flow->resync_npkts is used
46318c2ecf20Sopenharmony_ci			 * to track the number of packets from the start of the
46328c2ecf20Sopenharmony_ci			 * real segment to the point of 0 PSN after the RESYNC
46338c2ecf20Sopenharmony_ci			 * in order to later correctly rewind the SGE.
46348c2ecf20Sopenharmony_ci			 */
46358c2ecf20Sopenharmony_ci			fpsn = full_flow_psn(flow, flow->flow_state.spsn);
46368c2ecf20Sopenharmony_ci			req->r_ack_psn = psn;
46378c2ecf20Sopenharmony_ci			/*
46388c2ecf20Sopenharmony_ci			 * If resync_psn points to the last flow PSN for a
46398c2ecf20Sopenharmony_ci			 * segment and the new segment (likely from a new
46408c2ecf20Sopenharmony_ci			 * request) starts with a new generation number, we
46418c2ecf20Sopenharmony_ci			 * need to adjust resync_psn accordingly.
46428c2ecf20Sopenharmony_ci			 */
46438c2ecf20Sopenharmony_ci			if (flow->flow_state.generation !=
46448c2ecf20Sopenharmony_ci			    (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
46458c2ecf20Sopenharmony_ci				resync_psn = mask_psn(fpsn - 1);
46468c2ecf20Sopenharmony_ci			flow->resync_npkts +=
46478c2ecf20Sopenharmony_ci				delta_psn(mask_psn(resync_psn + 1), fpsn);
46488c2ecf20Sopenharmony_ci			/*
46498c2ecf20Sopenharmony_ci			 * Renumber all packet sequence number ranges
46508c2ecf20Sopenharmony_ci			 * based on the new generation.
46518c2ecf20Sopenharmony_ci			 */
46528c2ecf20Sopenharmony_ci			last_acked = qp->s_acked;
46538c2ecf20Sopenharmony_ci			rptr = req;
46548c2ecf20Sopenharmony_ci			while (1) {
46558c2ecf20Sopenharmony_ci				/* start from last acked segment */
46568c2ecf20Sopenharmony_ci				for (fidx = rptr->acked_tail;
46578c2ecf20Sopenharmony_ci				     CIRC_CNT(rptr->setup_head, fidx,
46588c2ecf20Sopenharmony_ci					      MAX_FLOWS);
46598c2ecf20Sopenharmony_ci				     fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
46608c2ecf20Sopenharmony_ci					u32 lpsn;
46618c2ecf20Sopenharmony_ci					u32 gen;
46628c2ecf20Sopenharmony_ci
46638c2ecf20Sopenharmony_ci					flow = &rptr->flows[fidx];
46648c2ecf20Sopenharmony_ci					gen = flow->flow_state.generation;
46658c2ecf20Sopenharmony_ci					if (WARN_ON(gen == generation &&
46668c2ecf20Sopenharmony_ci						    flow->flow_state.spsn !=
46678c2ecf20Sopenharmony_ci						     spsn))
46688c2ecf20Sopenharmony_ci						continue;
46698c2ecf20Sopenharmony_ci					lpsn = flow->flow_state.lpsn;
46708c2ecf20Sopenharmony_ci					lpsn = full_flow_psn(flow, lpsn);
46718c2ecf20Sopenharmony_ci					flow->npkts =
46728c2ecf20Sopenharmony_ci						delta_psn(lpsn,
46738c2ecf20Sopenharmony_ci							  mask_psn(resync_psn)
46748c2ecf20Sopenharmony_ci							  );
46758c2ecf20Sopenharmony_ci					flow->flow_state.generation =
46768c2ecf20Sopenharmony_ci						generation;
46778c2ecf20Sopenharmony_ci					flow->flow_state.spsn = spsn;
46788c2ecf20Sopenharmony_ci					flow->flow_state.lpsn =
46798c2ecf20Sopenharmony_ci						flow->flow_state.spsn +
46808c2ecf20Sopenharmony_ci						flow->npkts - 1;
46818c2ecf20Sopenharmony_ci					flow->pkt = 0;
46828c2ecf20Sopenharmony_ci					spsn += flow->npkts;
46838c2ecf20Sopenharmony_ci					resync_psn += flow->npkts;
46848c2ecf20Sopenharmony_ci					trace_hfi1_tid_flow_rcv_tid_ack(qp,
46858c2ecf20Sopenharmony_ci									fidx,
46868c2ecf20Sopenharmony_ci									flow);
46878c2ecf20Sopenharmony_ci				}
46888c2ecf20Sopenharmony_ci				if (++last_acked == qpriv->s_tid_cur + 1)
46898c2ecf20Sopenharmony_ci					break;
46908c2ecf20Sopenharmony_ci				if (last_acked == qp->s_size)
46918c2ecf20Sopenharmony_ci					last_acked = 0;
46928c2ecf20Sopenharmony_ci				wqe = rvt_get_swqe_ptr(qp, last_acked);
46938c2ecf20Sopenharmony_ci				rptr = wqe_to_tid_req(wqe);
46948c2ecf20Sopenharmony_ci			}
46958c2ecf20Sopenharmony_ci			req->cur_seg = req->ack_seg;
46968c2ecf20Sopenharmony_ci			qpriv->s_tid_tail = qp->s_acked;
46978c2ecf20Sopenharmony_ci			qpriv->s_state = TID_OP(WRITE_REQ);
46988c2ecf20Sopenharmony_ci			hfi1_schedule_tid_send(qp);
46998c2ecf20Sopenharmony_ci		}
47008c2ecf20Sopenharmony_cidone:
47018c2ecf20Sopenharmony_ci		qpriv->s_retry = qp->s_retry_cnt;
47028c2ecf20Sopenharmony_ci		break;
47038c2ecf20Sopenharmony_ci
47048c2ecf20Sopenharmony_ci	case 3:         /* NAK */
47058c2ecf20Sopenharmony_ci		hfi1_stop_tid_retry_timer(qp);
47068c2ecf20Sopenharmony_ci		switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
47078c2ecf20Sopenharmony_ci			IB_AETH_CREDIT_MASK) {
47088c2ecf20Sopenharmony_ci		case 0: /* PSN sequence error */
47098c2ecf20Sopenharmony_ci			if (!req->flows)
47108c2ecf20Sopenharmony_ci				break;
47118c2ecf20Sopenharmony_ci			flow = &req->flows[req->acked_tail];
47128c2ecf20Sopenharmony_ci			flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
47138c2ecf20Sopenharmony_ci			if (cmp_psn(psn, flpsn) > 0)
47148c2ecf20Sopenharmony_ci				break;
47158c2ecf20Sopenharmony_ci			trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
47168c2ecf20Sopenharmony_ci							flow);
47178c2ecf20Sopenharmony_ci			req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
47188c2ecf20Sopenharmony_ci			req->cur_seg = req->ack_seg;
47198c2ecf20Sopenharmony_ci			qpriv->s_tid_tail = qp->s_acked;
47208c2ecf20Sopenharmony_ci			qpriv->s_state = TID_OP(WRITE_REQ);
47218c2ecf20Sopenharmony_ci			qpriv->s_retry = qp->s_retry_cnt;
47228c2ecf20Sopenharmony_ci			hfi1_schedule_tid_send(qp);
47238c2ecf20Sopenharmony_ci			break;
47248c2ecf20Sopenharmony_ci
47258c2ecf20Sopenharmony_ci		default:
47268c2ecf20Sopenharmony_ci			break;
47278c2ecf20Sopenharmony_ci		}
47288c2ecf20Sopenharmony_ci		break;
47298c2ecf20Sopenharmony_ci
47308c2ecf20Sopenharmony_ci	default:
47318c2ecf20Sopenharmony_ci		break;
47328c2ecf20Sopenharmony_ci	}
47338c2ecf20Sopenharmony_ci
47348c2ecf20Sopenharmony_ciack_op_err:
47358c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
47368c2ecf20Sopenharmony_ci}
47378c2ecf20Sopenharmony_ci
47388c2ecf20Sopenharmony_civoid hfi1_add_tid_retry_timer(struct rvt_qp *qp)
47398c2ecf20Sopenharmony_ci{
47408c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
47418c2ecf20Sopenharmony_ci	struct ib_qp *ibqp = &qp->ibqp;
47428c2ecf20Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
47438c2ecf20Sopenharmony_ci
47448c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
47458c2ecf20Sopenharmony_ci	if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
47468c2ecf20Sopenharmony_ci		priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
47478c2ecf20Sopenharmony_ci		priv->s_tid_retry_timer.expires = jiffies +
47488c2ecf20Sopenharmony_ci			priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
47498c2ecf20Sopenharmony_ci		add_timer(&priv->s_tid_retry_timer);
47508c2ecf20Sopenharmony_ci	}
47518c2ecf20Sopenharmony_ci}
47528c2ecf20Sopenharmony_ci
47538c2ecf20Sopenharmony_cistatic void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
47548c2ecf20Sopenharmony_ci{
47558c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
47568c2ecf20Sopenharmony_ci	struct ib_qp *ibqp = &qp->ibqp;
47578c2ecf20Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
47588c2ecf20Sopenharmony_ci
47598c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
47608c2ecf20Sopenharmony_ci	priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
47618c2ecf20Sopenharmony_ci	mod_timer(&priv->s_tid_retry_timer, jiffies +
47628c2ecf20Sopenharmony_ci		  priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
47638c2ecf20Sopenharmony_ci}
47648c2ecf20Sopenharmony_ci
47658c2ecf20Sopenharmony_cistatic int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
47668c2ecf20Sopenharmony_ci{
47678c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
47688c2ecf20Sopenharmony_ci	int rval = 0;
47698c2ecf20Sopenharmony_ci
47708c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
47718c2ecf20Sopenharmony_ci	if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
47728c2ecf20Sopenharmony_ci		rval = del_timer(&priv->s_tid_retry_timer);
47738c2ecf20Sopenharmony_ci		priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
47748c2ecf20Sopenharmony_ci	}
47758c2ecf20Sopenharmony_ci	return rval;
47768c2ecf20Sopenharmony_ci}
47778c2ecf20Sopenharmony_ci
47788c2ecf20Sopenharmony_civoid hfi1_del_tid_retry_timer(struct rvt_qp *qp)
47798c2ecf20Sopenharmony_ci{
47808c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
47818c2ecf20Sopenharmony_ci
47828c2ecf20Sopenharmony_ci	del_timer_sync(&priv->s_tid_retry_timer);
47838c2ecf20Sopenharmony_ci	priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
47848c2ecf20Sopenharmony_ci}
47858c2ecf20Sopenharmony_ci
47868c2ecf20Sopenharmony_cistatic void hfi1_tid_retry_timeout(struct timer_list *t)
47878c2ecf20Sopenharmony_ci{
47888c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
47898c2ecf20Sopenharmony_ci	struct rvt_qp *qp = priv->owner;
47908c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
47918c2ecf20Sopenharmony_ci	unsigned long flags;
47928c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
47938c2ecf20Sopenharmony_ci
47948c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->r_lock, flags);
47958c2ecf20Sopenharmony_ci	spin_lock(&qp->s_lock);
47968c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
47978c2ecf20Sopenharmony_ci	if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
47988c2ecf20Sopenharmony_ci		hfi1_stop_tid_retry_timer(qp);
47998c2ecf20Sopenharmony_ci		if (!priv->s_retry) {
48008c2ecf20Sopenharmony_ci			trace_hfi1_msg_tid_retry_timeout(/* msg */
48018c2ecf20Sopenharmony_ci				qp,
48028c2ecf20Sopenharmony_ci				"Exhausted retries. Tid retry timeout = ",
48038c2ecf20Sopenharmony_ci				(u64)priv->tid_retry_timeout_jiffies);
48048c2ecf20Sopenharmony_ci
48058c2ecf20Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
48068c2ecf20Sopenharmony_ci			hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
48078c2ecf20Sopenharmony_ci			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
48088c2ecf20Sopenharmony_ci		} else {
48098c2ecf20Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
48108c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(wqe);
48118c2ecf20Sopenharmony_ci			trace_hfi1_tid_req_tid_retry_timeout(/* req */
48128c2ecf20Sopenharmony_ci			   qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
48138c2ecf20Sopenharmony_ci
48148c2ecf20Sopenharmony_ci			priv->s_flags &= ~RVT_S_WAIT_ACK;
48158c2ecf20Sopenharmony_ci			/* Only send one packet (the RESYNC) */
48168c2ecf20Sopenharmony_ci			priv->s_flags |= RVT_S_SEND_ONE;
48178c2ecf20Sopenharmony_ci			/*
48188c2ecf20Sopenharmony_ci			 * No additional request shall be made by this QP until
48198c2ecf20Sopenharmony_ci			 * the RESYNC has been complete.
48208c2ecf20Sopenharmony_ci			 */
48218c2ecf20Sopenharmony_ci			qp->s_flags |= HFI1_S_WAIT_HALT;
48228c2ecf20Sopenharmony_ci			priv->s_state = TID_OP(RESYNC);
48238c2ecf20Sopenharmony_ci			priv->s_retry--;
48248c2ecf20Sopenharmony_ci			hfi1_schedule_tid_send(qp);
48258c2ecf20Sopenharmony_ci		}
48268c2ecf20Sopenharmony_ci	}
48278c2ecf20Sopenharmony_ci	spin_unlock(&qp->s_lock);
48288c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->r_lock, flags);
48298c2ecf20Sopenharmony_ci}
48308c2ecf20Sopenharmony_ci
48318c2ecf20Sopenharmony_ciu32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
48328c2ecf20Sopenharmony_ci			       struct ib_other_headers *ohdr, u32 *bth1,
48338c2ecf20Sopenharmony_ci			       u32 *bth2, u16 fidx)
48348c2ecf20Sopenharmony_ci{
48358c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
48368c2ecf20Sopenharmony_ci	struct tid_rdma_params *remote;
48378c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
48388c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[fidx];
48398c2ecf20Sopenharmony_ci	u32 generation;
48408c2ecf20Sopenharmony_ci
48418c2ecf20Sopenharmony_ci	rcu_read_lock();
48428c2ecf20Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
48438c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
48448c2ecf20Sopenharmony_ci	ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
48458c2ecf20Sopenharmony_ci	*bth1 = remote->qp;
48468c2ecf20Sopenharmony_ci	rcu_read_unlock();
48478c2ecf20Sopenharmony_ci
48488c2ecf20Sopenharmony_ci	generation = kern_flow_generation_next(flow->flow_state.generation);
48498c2ecf20Sopenharmony_ci	*bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
48508c2ecf20Sopenharmony_ci	qpriv->s_resync_psn = *bth2;
48518c2ecf20Sopenharmony_ci	*bth2 |= IB_BTH_REQ_ACK;
48528c2ecf20Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
48538c2ecf20Sopenharmony_ci
48548c2ecf20Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
48558c2ecf20Sopenharmony_ci}
48568c2ecf20Sopenharmony_ci
48578c2ecf20Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
48588c2ecf20Sopenharmony_ci{
48598c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
48608c2ecf20Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
48618c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
48628c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *rcd = qpriv->rcd;
48638c2ecf20Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
48648c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
48658c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
48668c2ecf20Sopenharmony_ci	struct tid_rdma_flow *flow;
48678c2ecf20Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
48688c2ecf20Sopenharmony_ci	u32 psn, generation, idx, gen_next;
48698c2ecf20Sopenharmony_ci	bool fecn;
48708c2ecf20Sopenharmony_ci	unsigned long flags;
48718c2ecf20Sopenharmony_ci
48728c2ecf20Sopenharmony_ci	fecn = process_ecn(qp, packet);
48738c2ecf20Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
48748c2ecf20Sopenharmony_ci
48758c2ecf20Sopenharmony_ci	generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
48768c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
48778c2ecf20Sopenharmony_ci
48788c2ecf20Sopenharmony_ci	gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
48798c2ecf20Sopenharmony_ci		generation : kern_flow_generation_next(fs->generation);
48808c2ecf20Sopenharmony_ci	/*
48818c2ecf20Sopenharmony_ci	 * RESYNC packet contains the "next" generation and can only be
48828c2ecf20Sopenharmony_ci	 * from the current or previous generations
48838c2ecf20Sopenharmony_ci	 */
48848c2ecf20Sopenharmony_ci	if (generation != mask_generation(gen_next - 1) &&
48858c2ecf20Sopenharmony_ci	    generation != gen_next)
48868c2ecf20Sopenharmony_ci		goto bail;
48878c2ecf20Sopenharmony_ci	/* Already processing a resync */
48888c2ecf20Sopenharmony_ci	if (qpriv->resync)
48898c2ecf20Sopenharmony_ci		goto bail;
48908c2ecf20Sopenharmony_ci
48918c2ecf20Sopenharmony_ci	spin_lock(&rcd->exp_lock);
48928c2ecf20Sopenharmony_ci	if (fs->index >= RXE_NUM_TID_FLOWS) {
48938c2ecf20Sopenharmony_ci		/*
48948c2ecf20Sopenharmony_ci		 * If we don't have a flow, save the generation so it can be
48958c2ecf20Sopenharmony_ci		 * applied when a new flow is allocated
48968c2ecf20Sopenharmony_ci		 */
48978c2ecf20Sopenharmony_ci		fs->generation = generation;
48988c2ecf20Sopenharmony_ci	} else {
48998c2ecf20Sopenharmony_ci		/* Reprogram the QP flow with new generation */
49008c2ecf20Sopenharmony_ci		rcd->flows[fs->index].generation = generation;
49018c2ecf20Sopenharmony_ci		fs->generation = kern_setup_hw_flow(rcd, fs->index);
49028c2ecf20Sopenharmony_ci	}
49038c2ecf20Sopenharmony_ci	fs->psn = 0;
49048c2ecf20Sopenharmony_ci	/*
49058c2ecf20Sopenharmony_ci	 * Disable SW PSN checking since a RESYNC is equivalent to a
49068c2ecf20Sopenharmony_ci	 * sync point and the flow has/will be reprogrammed
49078c2ecf20Sopenharmony_ci	 */
49088c2ecf20Sopenharmony_ci	qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
49098c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_rcv_resync(qp);
49108c2ecf20Sopenharmony_ci
49118c2ecf20Sopenharmony_ci	/*
49128c2ecf20Sopenharmony_ci	 * Reset all TID flow information with the new generation.
49138c2ecf20Sopenharmony_ci	 * This is done for all requests and segments after the
49148c2ecf20Sopenharmony_ci	 * last received segment
49158c2ecf20Sopenharmony_ci	 */
49168c2ecf20Sopenharmony_ci	for (idx = qpriv->r_tid_tail; ; idx++) {
49178c2ecf20Sopenharmony_ci		u16 flow_idx;
49188c2ecf20Sopenharmony_ci
49198c2ecf20Sopenharmony_ci		if (idx > rvt_size_atomic(&dev->rdi))
49208c2ecf20Sopenharmony_ci			idx = 0;
49218c2ecf20Sopenharmony_ci		e = &qp->s_ack_queue[idx];
49228c2ecf20Sopenharmony_ci		if (e->opcode == TID_OP(WRITE_REQ)) {
49238c2ecf20Sopenharmony_ci			req = ack_to_tid_req(e);
49248c2ecf20Sopenharmony_ci			trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
49258c2ecf20Sopenharmony_ci						      e->lpsn, req);
49268c2ecf20Sopenharmony_ci
49278c2ecf20Sopenharmony_ci			/* start from last unacked segment */
49288c2ecf20Sopenharmony_ci			for (flow_idx = req->clear_tail;
49298c2ecf20Sopenharmony_ci			     CIRC_CNT(req->setup_head, flow_idx,
49308c2ecf20Sopenharmony_ci				      MAX_FLOWS);
49318c2ecf20Sopenharmony_ci			     flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
49328c2ecf20Sopenharmony_ci				u32 lpsn;
49338c2ecf20Sopenharmony_ci				u32 next;
49348c2ecf20Sopenharmony_ci
49358c2ecf20Sopenharmony_ci				flow = &req->flows[flow_idx];
49368c2ecf20Sopenharmony_ci				lpsn = full_flow_psn(flow,
49378c2ecf20Sopenharmony_ci						     flow->flow_state.lpsn);
49388c2ecf20Sopenharmony_ci				next = flow->flow_state.r_next_psn;
49398c2ecf20Sopenharmony_ci				flow->npkts = delta_psn(lpsn, next - 1);
49408c2ecf20Sopenharmony_ci				flow->flow_state.generation = fs->generation;
49418c2ecf20Sopenharmony_ci				flow->flow_state.spsn = fs->psn;
49428c2ecf20Sopenharmony_ci				flow->flow_state.lpsn =
49438c2ecf20Sopenharmony_ci					flow->flow_state.spsn + flow->npkts - 1;
49448c2ecf20Sopenharmony_ci				flow->flow_state.r_next_psn =
49458c2ecf20Sopenharmony_ci					full_flow_psn(flow,
49468c2ecf20Sopenharmony_ci						      flow->flow_state.spsn);
49478c2ecf20Sopenharmony_ci				fs->psn += flow->npkts;
49488c2ecf20Sopenharmony_ci				trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
49498c2ecf20Sopenharmony_ci							       flow);
49508c2ecf20Sopenharmony_ci			}
49518c2ecf20Sopenharmony_ci		}
49528c2ecf20Sopenharmony_ci		if (idx == qp->s_tail_ack_queue)
49538c2ecf20Sopenharmony_ci			break;
49548c2ecf20Sopenharmony_ci	}
49558c2ecf20Sopenharmony_ci
49568c2ecf20Sopenharmony_ci	spin_unlock(&rcd->exp_lock);
49578c2ecf20Sopenharmony_ci	qpriv->resync = true;
49588c2ecf20Sopenharmony_ci	/* RESYNC request always gets a TID RDMA ACK. */
49598c2ecf20Sopenharmony_ci	qpriv->s_nak_state = 0;
49608c2ecf20Sopenharmony_ci	tid_rdma_trigger_ack(qp);
49618c2ecf20Sopenharmony_cibail:
49628c2ecf20Sopenharmony_ci	if (fecn)
49638c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
49648c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
49658c2ecf20Sopenharmony_ci}
49668c2ecf20Sopenharmony_ci
49678c2ecf20Sopenharmony_ci/*
49688c2ecf20Sopenharmony_ci * Call this function when the last TID RDMA WRITE DATA packet for a request
49698c2ecf20Sopenharmony_ci * is built.
49708c2ecf20Sopenharmony_ci */
49718c2ecf20Sopenharmony_cistatic void update_tid_tail(struct rvt_qp *qp)
49728c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
49738c2ecf20Sopenharmony_ci{
49748c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
49758c2ecf20Sopenharmony_ci	u32 i;
49768c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
49778c2ecf20Sopenharmony_ci
49788c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
49798c2ecf20Sopenharmony_ci	/* Can't move beyond s_tid_cur */
49808c2ecf20Sopenharmony_ci	if (priv->s_tid_tail == priv->s_tid_cur)
49818c2ecf20Sopenharmony_ci		return;
49828c2ecf20Sopenharmony_ci	for (i = priv->s_tid_tail + 1; ; i++) {
49838c2ecf20Sopenharmony_ci		if (i == qp->s_size)
49848c2ecf20Sopenharmony_ci			i = 0;
49858c2ecf20Sopenharmony_ci
49868c2ecf20Sopenharmony_ci		if (i == priv->s_tid_cur)
49878c2ecf20Sopenharmony_ci			break;
49888c2ecf20Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, i);
49898c2ecf20Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
49908c2ecf20Sopenharmony_ci			break;
49918c2ecf20Sopenharmony_ci	}
49928c2ecf20Sopenharmony_ci	priv->s_tid_tail = i;
49938c2ecf20Sopenharmony_ci	priv->s_state = TID_OP(WRITE_RESP);
49948c2ecf20Sopenharmony_ci}
49958c2ecf20Sopenharmony_ci
49968c2ecf20Sopenharmony_ciint hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
49978c2ecf20Sopenharmony_ci	__must_hold(&qp->s_lock)
49988c2ecf20Sopenharmony_ci{
49998c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
50008c2ecf20Sopenharmony_ci	struct rvt_swqe *wqe;
50018c2ecf20Sopenharmony_ci	u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
50028c2ecf20Sopenharmony_ci	struct ib_other_headers *ohdr;
50038c2ecf20Sopenharmony_ci	struct rvt_sge_state *ss = &qp->s_sge;
50048c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
50058c2ecf20Sopenharmony_ci	struct tid_rdma_request *req = ack_to_tid_req(e);
50068c2ecf20Sopenharmony_ci	bool last = false;
50078c2ecf20Sopenharmony_ci	u8 opcode = TID_OP(WRITE_DATA);
50088c2ecf20Sopenharmony_ci
50098c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
50108c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
50118c2ecf20Sopenharmony_ci	/*
50128c2ecf20Sopenharmony_ci	 * Prioritize the sending of the requests and responses over the
50138c2ecf20Sopenharmony_ci	 * sending of the TID RDMA data packets.
50148c2ecf20Sopenharmony_ci	 */
50158c2ecf20Sopenharmony_ci	if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
50168c2ecf20Sopenharmony_ci	     atomic_read(&priv->n_requests) &&
50178c2ecf20Sopenharmony_ci	     !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
50188c2ecf20Sopenharmony_ci			     HFI1_S_ANY_WAIT_IO))) ||
50198c2ecf20Sopenharmony_ci	    (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
50208c2ecf20Sopenharmony_ci	     !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
50218c2ecf20Sopenharmony_ci		struct iowait_work *iowork;
50228c2ecf20Sopenharmony_ci
50238c2ecf20Sopenharmony_ci		iowork = iowait_get_ib_work(&priv->s_iowait);
50248c2ecf20Sopenharmony_ci		ps->s_txreq = get_waiting_verbs_txreq(iowork);
50258c2ecf20Sopenharmony_ci		if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
50268c2ecf20Sopenharmony_ci			priv->s_flags |= HFI1_S_TID_BUSY_SET;
50278c2ecf20Sopenharmony_ci			return 1;
50288c2ecf20Sopenharmony_ci		}
50298c2ecf20Sopenharmony_ci	}
50308c2ecf20Sopenharmony_ci
50318c2ecf20Sopenharmony_ci	ps->s_txreq = get_txreq(ps->dev, qp);
50328c2ecf20Sopenharmony_ci	if (!ps->s_txreq)
50338c2ecf20Sopenharmony_ci		goto bail_no_tx;
50348c2ecf20Sopenharmony_ci
50358c2ecf20Sopenharmony_ci	ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
50368c2ecf20Sopenharmony_ci
50378c2ecf20Sopenharmony_ci	if ((priv->s_flags & RVT_S_ACK_PENDING) &&
50388c2ecf20Sopenharmony_ci	    make_tid_rdma_ack(qp, ohdr, ps))
50398c2ecf20Sopenharmony_ci		return 1;
50408c2ecf20Sopenharmony_ci
50418c2ecf20Sopenharmony_ci	/*
50428c2ecf20Sopenharmony_ci	 * Bail out if we can't send data.
50438c2ecf20Sopenharmony_ci	 * Be reminded that this check must been done after the call to
50448c2ecf20Sopenharmony_ci	 * make_tid_rdma_ack() because the responding QP could be in
50458c2ecf20Sopenharmony_ci	 * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA.
50468c2ecf20Sopenharmony_ci	 */
50478c2ecf20Sopenharmony_ci	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
50488c2ecf20Sopenharmony_ci		goto bail;
50498c2ecf20Sopenharmony_ci
50508c2ecf20Sopenharmony_ci	if (priv->s_flags & RVT_S_WAIT_ACK)
50518c2ecf20Sopenharmony_ci		goto bail;
50528c2ecf20Sopenharmony_ci
50538c2ecf20Sopenharmony_ci	/* Check whether there is anything to do. */
50548c2ecf20Sopenharmony_ci	if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
50558c2ecf20Sopenharmony_ci		goto bail;
50568c2ecf20Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
50578c2ecf20Sopenharmony_ci	req = wqe_to_tid_req(wqe);
50588c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
50598c2ecf20Sopenharmony_ci					wqe->lpsn, req);
50608c2ecf20Sopenharmony_ci	switch (priv->s_state) {
50618c2ecf20Sopenharmony_ci	case TID_OP(WRITE_REQ):
50628c2ecf20Sopenharmony_ci	case TID_OP(WRITE_RESP):
50638c2ecf20Sopenharmony_ci		priv->tid_ss.sge = wqe->sg_list[0];
50648c2ecf20Sopenharmony_ci		priv->tid_ss.sg_list = wqe->sg_list + 1;
50658c2ecf20Sopenharmony_ci		priv->tid_ss.num_sge = wqe->wr.num_sge;
50668c2ecf20Sopenharmony_ci		priv->tid_ss.total_len = wqe->length;
50678c2ecf20Sopenharmony_ci
50688c2ecf20Sopenharmony_ci		if (priv->s_state == TID_OP(WRITE_REQ))
50698c2ecf20Sopenharmony_ci			hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
50708c2ecf20Sopenharmony_ci		priv->s_state = TID_OP(WRITE_DATA);
50718c2ecf20Sopenharmony_ci		fallthrough;
50728c2ecf20Sopenharmony_ci
50738c2ecf20Sopenharmony_ci	case TID_OP(WRITE_DATA):
50748c2ecf20Sopenharmony_ci		/*
50758c2ecf20Sopenharmony_ci		 * 1. Check whether TID RDMA WRITE RESP available.
50768c2ecf20Sopenharmony_ci		 * 2. If no:
50778c2ecf20Sopenharmony_ci		 *    2.1 If have more segments and no TID RDMA WRITE RESP,
50788c2ecf20Sopenharmony_ci		 *        set HFI1_S_WAIT_TID_RESP
50798c2ecf20Sopenharmony_ci		 *    2.2 Return indicating no progress made.
50808c2ecf20Sopenharmony_ci		 * 3. If yes:
50818c2ecf20Sopenharmony_ci		 *    3.1 Build TID RDMA WRITE DATA packet.
50828c2ecf20Sopenharmony_ci		 *    3.2 If last packet in segment:
50838c2ecf20Sopenharmony_ci		 *        3.2.1 Change KDETH header bits
50848c2ecf20Sopenharmony_ci		 *        3.2.2 Advance RESP pointers.
50858c2ecf20Sopenharmony_ci		 *    3.3 Return indicating progress made.
50868c2ecf20Sopenharmony_ci		 */
50878c2ecf20Sopenharmony_ci		trace_hfi1_sender_make_tid_pkt(qp);
50888c2ecf20Sopenharmony_ci		trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
50898c2ecf20Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
50908c2ecf20Sopenharmony_ci		req = wqe_to_tid_req(wqe);
50918c2ecf20Sopenharmony_ci		len = wqe->length;
50928c2ecf20Sopenharmony_ci
50938c2ecf20Sopenharmony_ci		if (!req->comp_seg || req->cur_seg == req->comp_seg)
50948c2ecf20Sopenharmony_ci			goto bail;
50958c2ecf20Sopenharmony_ci
50968c2ecf20Sopenharmony_ci		trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
50978c2ecf20Sopenharmony_ci						wqe->psn, wqe->lpsn, req);
50988c2ecf20Sopenharmony_ci		last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
50998c2ecf20Sopenharmony_ci						  &len);
51008c2ecf20Sopenharmony_ci
51018c2ecf20Sopenharmony_ci		if (last) {
51028c2ecf20Sopenharmony_ci			/* move pointer to next flow */
51038c2ecf20Sopenharmony_ci			req->clear_tail = CIRC_NEXT(req->clear_tail,
51048c2ecf20Sopenharmony_ci						    MAX_FLOWS);
51058c2ecf20Sopenharmony_ci			if (++req->cur_seg < req->total_segs) {
51068c2ecf20Sopenharmony_ci				if (!CIRC_CNT(req->setup_head, req->clear_tail,
51078c2ecf20Sopenharmony_ci					      MAX_FLOWS))
51088c2ecf20Sopenharmony_ci					qp->s_flags |= HFI1_S_WAIT_TID_RESP;
51098c2ecf20Sopenharmony_ci			} else {
51108c2ecf20Sopenharmony_ci				priv->s_state = TID_OP(WRITE_DATA_LAST);
51118c2ecf20Sopenharmony_ci				opcode = TID_OP(WRITE_DATA_LAST);
51128c2ecf20Sopenharmony_ci
51138c2ecf20Sopenharmony_ci				/* Advance the s_tid_tail now */
51148c2ecf20Sopenharmony_ci				update_tid_tail(qp);
51158c2ecf20Sopenharmony_ci			}
51168c2ecf20Sopenharmony_ci		}
51178c2ecf20Sopenharmony_ci		hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
51188c2ecf20Sopenharmony_ci		ss = &priv->tid_ss;
51198c2ecf20Sopenharmony_ci		break;
51208c2ecf20Sopenharmony_ci
51218c2ecf20Sopenharmony_ci	case TID_OP(RESYNC):
51228c2ecf20Sopenharmony_ci		trace_hfi1_sender_make_tid_pkt(qp);
51238c2ecf20Sopenharmony_ci		/* Use generation from the most recently received response */
51248c2ecf20Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
51258c2ecf20Sopenharmony_ci		req = wqe_to_tid_req(wqe);
51268c2ecf20Sopenharmony_ci		/* If no responses for this WQE look at the previous one */
51278c2ecf20Sopenharmony_ci		if (!req->comp_seg) {
51288c2ecf20Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp,
51298c2ecf20Sopenharmony_ci					       (!priv->s_tid_cur ? qp->s_size :
51308c2ecf20Sopenharmony_ci						priv->s_tid_cur) - 1);
51318c2ecf20Sopenharmony_ci			req = wqe_to_tid_req(wqe);
51328c2ecf20Sopenharmony_ci		}
51338c2ecf20Sopenharmony_ci		hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
51348c2ecf20Sopenharmony_ci						     &bth2,
51358c2ecf20Sopenharmony_ci						     CIRC_PREV(req->setup_head,
51368c2ecf20Sopenharmony_ci							       MAX_FLOWS));
51378c2ecf20Sopenharmony_ci		ss = NULL;
51388c2ecf20Sopenharmony_ci		len = 0;
51398c2ecf20Sopenharmony_ci		opcode = TID_OP(RESYNC);
51408c2ecf20Sopenharmony_ci		break;
51418c2ecf20Sopenharmony_ci
51428c2ecf20Sopenharmony_ci	default:
51438c2ecf20Sopenharmony_ci		goto bail;
51448c2ecf20Sopenharmony_ci	}
51458c2ecf20Sopenharmony_ci	if (priv->s_flags & RVT_S_SEND_ONE) {
51468c2ecf20Sopenharmony_ci		priv->s_flags &= ~RVT_S_SEND_ONE;
51478c2ecf20Sopenharmony_ci		priv->s_flags |= RVT_S_WAIT_ACK;
51488c2ecf20Sopenharmony_ci		bth2 |= IB_BTH_REQ_ACK;
51498c2ecf20Sopenharmony_ci	}
51508c2ecf20Sopenharmony_ci	qp->s_len -= len;
51518c2ecf20Sopenharmony_ci	ps->s_txreq->hdr_dwords = hwords;
51528c2ecf20Sopenharmony_ci	ps->s_txreq->sde = priv->s_sde;
51538c2ecf20Sopenharmony_ci	ps->s_txreq->ss = ss;
51548c2ecf20Sopenharmony_ci	ps->s_txreq->s_cur_size = len;
51558c2ecf20Sopenharmony_ci	hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
51568c2ecf20Sopenharmony_ci			     middle, ps);
51578c2ecf20Sopenharmony_ci	return 1;
51588c2ecf20Sopenharmony_cibail:
51598c2ecf20Sopenharmony_ci	hfi1_put_txreq(ps->s_txreq);
51608c2ecf20Sopenharmony_cibail_no_tx:
51618c2ecf20Sopenharmony_ci	ps->s_txreq = NULL;
51628c2ecf20Sopenharmony_ci	priv->s_flags &= ~RVT_S_BUSY;
51638c2ecf20Sopenharmony_ci	/*
51648c2ecf20Sopenharmony_ci	 * If we didn't get a txreq, the QP will be woken up later to try
51658c2ecf20Sopenharmony_ci	 * again, set the flags to the the wake up which work item to wake
51668c2ecf20Sopenharmony_ci	 * up.
51678c2ecf20Sopenharmony_ci	 * (A better algorithm should be found to do this and generalize the
51688c2ecf20Sopenharmony_ci	 * sleep/wakeup flags.)
51698c2ecf20Sopenharmony_ci	 */
51708c2ecf20Sopenharmony_ci	iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
51718c2ecf20Sopenharmony_ci	return 0;
51728c2ecf20Sopenharmony_ci}
51738c2ecf20Sopenharmony_ci
51748c2ecf20Sopenharmony_cistatic int make_tid_rdma_ack(struct rvt_qp *qp,
51758c2ecf20Sopenharmony_ci			     struct ib_other_headers *ohdr,
51768c2ecf20Sopenharmony_ci			     struct hfi1_pkt_state *ps)
51778c2ecf20Sopenharmony_ci{
51788c2ecf20Sopenharmony_ci	struct rvt_ack_entry *e;
51798c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
51808c2ecf20Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
51818c2ecf20Sopenharmony_ci	u32 hwords, next;
51828c2ecf20Sopenharmony_ci	u32 len = 0;
51838c2ecf20Sopenharmony_ci	u32 bth1 = 0, bth2 = 0;
51848c2ecf20Sopenharmony_ci	int middle = 0;
51858c2ecf20Sopenharmony_ci	u16 flow;
51868c2ecf20Sopenharmony_ci	struct tid_rdma_request *req, *nreq;
51878c2ecf20Sopenharmony_ci
51888c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_make_tid_ack(qp);
51898c2ecf20Sopenharmony_ci	/* Don't send an ACK if we aren't supposed to. */
51908c2ecf20Sopenharmony_ci	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
51918c2ecf20Sopenharmony_ci		goto bail;
51928c2ecf20Sopenharmony_ci
51938c2ecf20Sopenharmony_ci	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
51948c2ecf20Sopenharmony_ci	hwords = 5;
51958c2ecf20Sopenharmony_ci
51968c2ecf20Sopenharmony_ci	e = &qp->s_ack_queue[qpriv->r_tid_ack];
51978c2ecf20Sopenharmony_ci	req = ack_to_tid_req(e);
51988c2ecf20Sopenharmony_ci	/*
51998c2ecf20Sopenharmony_ci	 * In the RESYNC case, we are exactly one segment past the
52008c2ecf20Sopenharmony_ci	 * previously sent ack or at the previously sent NAK. So to send
52018c2ecf20Sopenharmony_ci	 * the resync ack, we go back one segment (which might be part of
52028c2ecf20Sopenharmony_ci	 * the previous request) and let the do-while loop execute again.
52038c2ecf20Sopenharmony_ci	 * The advantage of executing the do-while loop is that any data
52048c2ecf20Sopenharmony_ci	 * received after the previous ack is automatically acked in the
52058c2ecf20Sopenharmony_ci	 * RESYNC ack. It turns out that for the do-while loop we only need
52068c2ecf20Sopenharmony_ci	 * to pull back qpriv->r_tid_ack, not the segment
52078c2ecf20Sopenharmony_ci	 * indices/counters. The scheme works even if the previous request
52088c2ecf20Sopenharmony_ci	 * was not a TID WRITE request.
52098c2ecf20Sopenharmony_ci	 */
52108c2ecf20Sopenharmony_ci	if (qpriv->resync) {
52118c2ecf20Sopenharmony_ci		if (!req->ack_seg || req->ack_seg == req->total_segs)
52128c2ecf20Sopenharmony_ci			qpriv->r_tid_ack = !qpriv->r_tid_ack ?
52138c2ecf20Sopenharmony_ci				rvt_size_atomic(&dev->rdi) :
52148c2ecf20Sopenharmony_ci				qpriv->r_tid_ack - 1;
52158c2ecf20Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_ack];
52168c2ecf20Sopenharmony_ci		req = ack_to_tid_req(e);
52178c2ecf20Sopenharmony_ci	}
52188c2ecf20Sopenharmony_ci
52198c2ecf20Sopenharmony_ci	trace_hfi1_rsp_make_tid_ack(qp, e->psn);
52208c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
52218c2ecf20Sopenharmony_ci					req);
52228c2ecf20Sopenharmony_ci	/*
52238c2ecf20Sopenharmony_ci	 * If we've sent all the ACKs that we can, we are done
52248c2ecf20Sopenharmony_ci	 * until we get more segments...
52258c2ecf20Sopenharmony_ci	 */
52268c2ecf20Sopenharmony_ci	if (!qpriv->s_nak_state && !qpriv->resync &&
52278c2ecf20Sopenharmony_ci	    req->ack_seg == req->comp_seg)
52288c2ecf20Sopenharmony_ci		goto bail;
52298c2ecf20Sopenharmony_ci
52308c2ecf20Sopenharmony_ci	do {
52318c2ecf20Sopenharmony_ci		/*
52328c2ecf20Sopenharmony_ci		 * To deal with coalesced ACKs, the acked_tail pointer
52338c2ecf20Sopenharmony_ci		 * into the flow array is used. The distance between it
52348c2ecf20Sopenharmony_ci		 * and the clear_tail is the number of flows that are
52358c2ecf20Sopenharmony_ci		 * being ACK'ed.
52368c2ecf20Sopenharmony_ci		 */
52378c2ecf20Sopenharmony_ci		req->ack_seg +=
52388c2ecf20Sopenharmony_ci			/* Get up-to-date value */
52398c2ecf20Sopenharmony_ci			CIRC_CNT(req->clear_tail, req->acked_tail,
52408c2ecf20Sopenharmony_ci				 MAX_FLOWS);
52418c2ecf20Sopenharmony_ci		/* Advance acked index */
52428c2ecf20Sopenharmony_ci		req->acked_tail = req->clear_tail;
52438c2ecf20Sopenharmony_ci
52448c2ecf20Sopenharmony_ci		/*
52458c2ecf20Sopenharmony_ci		 * req->clear_tail points to the segment currently being
52468c2ecf20Sopenharmony_ci		 * received. So, when sending an ACK, the previous
52478c2ecf20Sopenharmony_ci		 * segment is being ACK'ed.
52488c2ecf20Sopenharmony_ci		 */
52498c2ecf20Sopenharmony_ci		flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
52508c2ecf20Sopenharmony_ci		if (req->ack_seg != req->total_segs)
52518c2ecf20Sopenharmony_ci			break;
52528c2ecf20Sopenharmony_ci		req->state = TID_REQUEST_COMPLETE;
52538c2ecf20Sopenharmony_ci
52548c2ecf20Sopenharmony_ci		next = qpriv->r_tid_ack + 1;
52558c2ecf20Sopenharmony_ci		if (next > rvt_size_atomic(&dev->rdi))
52568c2ecf20Sopenharmony_ci			next = 0;
52578c2ecf20Sopenharmony_ci		qpriv->r_tid_ack = next;
52588c2ecf20Sopenharmony_ci		if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
52598c2ecf20Sopenharmony_ci			break;
52608c2ecf20Sopenharmony_ci		nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
52618c2ecf20Sopenharmony_ci		if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
52628c2ecf20Sopenharmony_ci			break;
52638c2ecf20Sopenharmony_ci
52648c2ecf20Sopenharmony_ci		/* Move to the next ack entry now */
52658c2ecf20Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_ack];
52668c2ecf20Sopenharmony_ci		req = ack_to_tid_req(e);
52678c2ecf20Sopenharmony_ci	} while (1);
52688c2ecf20Sopenharmony_ci
52698c2ecf20Sopenharmony_ci	/*
52708c2ecf20Sopenharmony_ci	 * At this point qpriv->r_tid_ack == qpriv->r_tid_tail but e and
52718c2ecf20Sopenharmony_ci	 * req could be pointing at the previous ack queue entry
52728c2ecf20Sopenharmony_ci	 */
52738c2ecf20Sopenharmony_ci	if (qpriv->s_nak_state ||
52748c2ecf20Sopenharmony_ci	    (qpriv->resync &&
52758c2ecf20Sopenharmony_ci	     !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
52768c2ecf20Sopenharmony_ci	     (cmp_psn(qpriv->r_next_psn_kdeth - 1,
52778c2ecf20Sopenharmony_ci		      full_flow_psn(&req->flows[flow],
52788c2ecf20Sopenharmony_ci				    req->flows[flow].flow_state.lpsn)) > 0))) {
52798c2ecf20Sopenharmony_ci		/*
52808c2ecf20Sopenharmony_ci		 * A NAK will implicitly acknowledge all previous TID RDMA
52818c2ecf20Sopenharmony_ci		 * requests. Therefore, we NAK with the req->acked_tail
52828c2ecf20Sopenharmony_ci		 * segment for the request at qpriv->r_tid_ack (same at
52838c2ecf20Sopenharmony_ci		 * this point as the req->clear_tail segment for the
52848c2ecf20Sopenharmony_ci		 * qpriv->r_tid_tail request)
52858c2ecf20Sopenharmony_ci		 */
52868c2ecf20Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_ack];
52878c2ecf20Sopenharmony_ci		req = ack_to_tid_req(e);
52888c2ecf20Sopenharmony_ci		flow = req->acked_tail;
52898c2ecf20Sopenharmony_ci	} else if (req->ack_seg == req->total_segs &&
52908c2ecf20Sopenharmony_ci		   qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
52918c2ecf20Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
52928c2ecf20Sopenharmony_ci
52938c2ecf20Sopenharmony_ci	trace_hfi1_tid_write_rsp_make_tid_ack(qp);
52948c2ecf20Sopenharmony_ci	trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
52958c2ecf20Sopenharmony_ci					req);
52968c2ecf20Sopenharmony_ci	hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
52978c2ecf20Sopenharmony_ci						&bth2);
52988c2ecf20Sopenharmony_ci	len = 0;
52998c2ecf20Sopenharmony_ci	qpriv->s_flags &= ~RVT_S_ACK_PENDING;
53008c2ecf20Sopenharmony_ci	ps->s_txreq->hdr_dwords = hwords;
53018c2ecf20Sopenharmony_ci	ps->s_txreq->sde = qpriv->s_sde;
53028c2ecf20Sopenharmony_ci	ps->s_txreq->s_cur_size = len;
53038c2ecf20Sopenharmony_ci	ps->s_txreq->ss = NULL;
53048c2ecf20Sopenharmony_ci	hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
53058c2ecf20Sopenharmony_ci			     ps);
53068c2ecf20Sopenharmony_ci	ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
53078c2ecf20Sopenharmony_ci	return 1;
53088c2ecf20Sopenharmony_cibail:
53098c2ecf20Sopenharmony_ci	/*
53108c2ecf20Sopenharmony_ci	 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
53118c2ecf20Sopenharmony_ci	 * RVT_S_RESP_PENDING
53128c2ecf20Sopenharmony_ci	 */
53138c2ecf20Sopenharmony_ci	smp_wmb();
53148c2ecf20Sopenharmony_ci	qpriv->s_flags &= ~RVT_S_ACK_PENDING;
53158c2ecf20Sopenharmony_ci	return 0;
53168c2ecf20Sopenharmony_ci}
53178c2ecf20Sopenharmony_ci
53188c2ecf20Sopenharmony_cistatic int hfi1_send_tid_ok(struct rvt_qp *qp)
53198c2ecf20Sopenharmony_ci{
53208c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
53218c2ecf20Sopenharmony_ci
53228c2ecf20Sopenharmony_ci	return !(priv->s_flags & RVT_S_BUSY ||
53238c2ecf20Sopenharmony_ci		 qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
53248c2ecf20Sopenharmony_ci		(verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
53258c2ecf20Sopenharmony_ci		 (priv->s_flags & RVT_S_RESP_PENDING) ||
53268c2ecf20Sopenharmony_ci		 !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
53278c2ecf20Sopenharmony_ci}
53288c2ecf20Sopenharmony_ci
53298c2ecf20Sopenharmony_civoid _hfi1_do_tid_send(struct work_struct *work)
53308c2ecf20Sopenharmony_ci{
53318c2ecf20Sopenharmony_ci	struct iowait_work *w = container_of(work, struct iowait_work, iowork);
53328c2ecf20Sopenharmony_ci	struct rvt_qp *qp = iowait_to_qp(w->iow);
53338c2ecf20Sopenharmony_ci
53348c2ecf20Sopenharmony_ci	hfi1_do_tid_send(qp);
53358c2ecf20Sopenharmony_ci}
53368c2ecf20Sopenharmony_ci
53378c2ecf20Sopenharmony_cistatic void hfi1_do_tid_send(struct rvt_qp *qp)
53388c2ecf20Sopenharmony_ci{
53398c2ecf20Sopenharmony_ci	struct hfi1_pkt_state ps;
53408c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
53418c2ecf20Sopenharmony_ci
53428c2ecf20Sopenharmony_ci	ps.dev = to_idev(qp->ibqp.device);
53438c2ecf20Sopenharmony_ci	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
53448c2ecf20Sopenharmony_ci	ps.ppd = ppd_from_ibp(ps.ibp);
53458c2ecf20Sopenharmony_ci	ps.wait = iowait_get_tid_work(&priv->s_iowait);
53468c2ecf20Sopenharmony_ci	ps.in_thread = false;
53478c2ecf20Sopenharmony_ci	ps.timeout_int = qp->timeout_jiffies / 8;
53488c2ecf20Sopenharmony_ci
53498c2ecf20Sopenharmony_ci	trace_hfi1_rc_do_tid_send(qp, false);
53508c2ecf20Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, ps.flags);
53518c2ecf20Sopenharmony_ci
53528c2ecf20Sopenharmony_ci	/* Return if we are already busy processing a work request. */
53538c2ecf20Sopenharmony_ci	if (!hfi1_send_tid_ok(qp)) {
53548c2ecf20Sopenharmony_ci		if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
53558c2ecf20Sopenharmony_ci			iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
53568c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&qp->s_lock, ps.flags);
53578c2ecf20Sopenharmony_ci		return;
53588c2ecf20Sopenharmony_ci	}
53598c2ecf20Sopenharmony_ci
53608c2ecf20Sopenharmony_ci	priv->s_flags |= RVT_S_BUSY;
53618c2ecf20Sopenharmony_ci
53628c2ecf20Sopenharmony_ci	ps.timeout = jiffies + ps.timeout_int;
53638c2ecf20Sopenharmony_ci	ps.cpu = priv->s_sde ? priv->s_sde->cpu :
53648c2ecf20Sopenharmony_ci		cpumask_first(cpumask_of_node(ps.ppd->dd->node));
53658c2ecf20Sopenharmony_ci	ps.pkts_sent = false;
53668c2ecf20Sopenharmony_ci
53678c2ecf20Sopenharmony_ci	/* insure a pre-built packet is handled  */
53688c2ecf20Sopenharmony_ci	ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
53698c2ecf20Sopenharmony_ci	do {
53708c2ecf20Sopenharmony_ci		/* Check for a constructed packet to be sent. */
53718c2ecf20Sopenharmony_ci		if (ps.s_txreq) {
53728c2ecf20Sopenharmony_ci			if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
53738c2ecf20Sopenharmony_ci				qp->s_flags |= RVT_S_BUSY;
53748c2ecf20Sopenharmony_ci				ps.wait = iowait_get_ib_work(&priv->s_iowait);
53758c2ecf20Sopenharmony_ci			}
53768c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&qp->s_lock, ps.flags);
53778c2ecf20Sopenharmony_ci
53788c2ecf20Sopenharmony_ci			/*
53798c2ecf20Sopenharmony_ci			 * If the packet cannot be sent now, return and
53808c2ecf20Sopenharmony_ci			 * the send tasklet will be woken up later.
53818c2ecf20Sopenharmony_ci			 */
53828c2ecf20Sopenharmony_ci			if (hfi1_verbs_send(qp, &ps))
53838c2ecf20Sopenharmony_ci				return;
53848c2ecf20Sopenharmony_ci
53858c2ecf20Sopenharmony_ci			/* allow other tasks to run */
53868c2ecf20Sopenharmony_ci			if (hfi1_schedule_send_yield(qp, &ps, true))
53878c2ecf20Sopenharmony_ci				return;
53888c2ecf20Sopenharmony_ci
53898c2ecf20Sopenharmony_ci			spin_lock_irqsave(&qp->s_lock, ps.flags);
53908c2ecf20Sopenharmony_ci			if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
53918c2ecf20Sopenharmony_ci				qp->s_flags &= ~RVT_S_BUSY;
53928c2ecf20Sopenharmony_ci				priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
53938c2ecf20Sopenharmony_ci				ps.wait = iowait_get_tid_work(&priv->s_iowait);
53948c2ecf20Sopenharmony_ci				if (iowait_flag_set(&priv->s_iowait,
53958c2ecf20Sopenharmony_ci						    IOWAIT_PENDING_IB))
53968c2ecf20Sopenharmony_ci					hfi1_schedule_send(qp);
53978c2ecf20Sopenharmony_ci			}
53988c2ecf20Sopenharmony_ci		}
53998c2ecf20Sopenharmony_ci	} while (hfi1_make_tid_rdma_pkt(qp, &ps));
54008c2ecf20Sopenharmony_ci	iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
54018c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, ps.flags);
54028c2ecf20Sopenharmony_ci}
54038c2ecf20Sopenharmony_ci
54048c2ecf20Sopenharmony_cistatic bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
54058c2ecf20Sopenharmony_ci{
54068c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
54078c2ecf20Sopenharmony_ci	struct hfi1_ibport *ibp =
54088c2ecf20Sopenharmony_ci		to_iport(qp->ibqp.device, qp->port_num);
54098c2ecf20Sopenharmony_ci	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
54108c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
54118c2ecf20Sopenharmony_ci
54128c2ecf20Sopenharmony_ci	if ((dd->flags & HFI1_SHUTDOWN))
54138c2ecf20Sopenharmony_ci		return true;
54148c2ecf20Sopenharmony_ci
54158c2ecf20Sopenharmony_ci	return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
54168c2ecf20Sopenharmony_ci				   priv->s_sde ?
54178c2ecf20Sopenharmony_ci				   priv->s_sde->cpu :
54188c2ecf20Sopenharmony_ci				   cpumask_first(cpumask_of_node(dd->node)));
54198c2ecf20Sopenharmony_ci}
54208c2ecf20Sopenharmony_ci
54218c2ecf20Sopenharmony_ci/**
54228c2ecf20Sopenharmony_ci * hfi1_schedule_tid_send - schedule progress on TID RDMA state machine
54238c2ecf20Sopenharmony_ci * @qp: the QP
54248c2ecf20Sopenharmony_ci *
54258c2ecf20Sopenharmony_ci * This schedules qp progress on the TID RDMA state machine. Caller
54268c2ecf20Sopenharmony_ci * should hold the s_lock.
54278c2ecf20Sopenharmony_ci * Unlike hfi1_schedule_send(), this cannot use hfi1_send_ok() because
54288c2ecf20Sopenharmony_ci * the two state machines can step on each other with respect to the
54298c2ecf20Sopenharmony_ci * RVT_S_BUSY flag.
54308c2ecf20Sopenharmony_ci * Therefore, a modified test is used.
54318c2ecf20Sopenharmony_ci * @return true if the second leg is scheduled;
54328c2ecf20Sopenharmony_ci *  false if the second leg is not scheduled.
54338c2ecf20Sopenharmony_ci */
54348c2ecf20Sopenharmony_cibool hfi1_schedule_tid_send(struct rvt_qp *qp)
54358c2ecf20Sopenharmony_ci{
54368c2ecf20Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
54378c2ecf20Sopenharmony_ci	if (hfi1_send_tid_ok(qp)) {
54388c2ecf20Sopenharmony_ci		/*
54398c2ecf20Sopenharmony_ci		 * The following call returns true if the qp is not on the
54408c2ecf20Sopenharmony_ci		 * queue and false if the qp is already on the queue before
54418c2ecf20Sopenharmony_ci		 * this call. Either way, the qp will be on the queue when the
54428c2ecf20Sopenharmony_ci		 * call returns.
54438c2ecf20Sopenharmony_ci		 */
54448c2ecf20Sopenharmony_ci		_hfi1_schedule_tid_send(qp);
54458c2ecf20Sopenharmony_ci		return true;
54468c2ecf20Sopenharmony_ci	}
54478c2ecf20Sopenharmony_ci	if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
54488c2ecf20Sopenharmony_ci		iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
54498c2ecf20Sopenharmony_ci				IOWAIT_PENDING_TID);
54508c2ecf20Sopenharmony_ci	return false;
54518c2ecf20Sopenharmony_ci}
54528c2ecf20Sopenharmony_ci
54538c2ecf20Sopenharmony_cibool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
54548c2ecf20Sopenharmony_ci{
54558c2ecf20Sopenharmony_ci	struct rvt_ack_entry *prev;
54568c2ecf20Sopenharmony_ci	struct tid_rdma_request *req;
54578c2ecf20Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
54588c2ecf20Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
54598c2ecf20Sopenharmony_ci	u32 s_prev;
54608c2ecf20Sopenharmony_ci
54618c2ecf20Sopenharmony_ci	s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
54628c2ecf20Sopenharmony_ci		(qp->s_tail_ack_queue - 1);
54638c2ecf20Sopenharmony_ci	prev = &qp->s_ack_queue[s_prev];
54648c2ecf20Sopenharmony_ci
54658c2ecf20Sopenharmony_ci	if ((e->opcode == TID_OP(READ_REQ) ||
54668c2ecf20Sopenharmony_ci	     e->opcode == OP(RDMA_READ_REQUEST)) &&
54678c2ecf20Sopenharmony_ci	    prev->opcode == TID_OP(WRITE_REQ)) {
54688c2ecf20Sopenharmony_ci		req = ack_to_tid_req(prev);
54698c2ecf20Sopenharmony_ci		if (req->ack_seg != req->total_segs) {
54708c2ecf20Sopenharmony_ci			priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
54718c2ecf20Sopenharmony_ci			return true;
54728c2ecf20Sopenharmony_ci		}
54738c2ecf20Sopenharmony_ci	}
54748c2ecf20Sopenharmony_ci	return false;
54758c2ecf20Sopenharmony_ci}
54768c2ecf20Sopenharmony_ci
54778c2ecf20Sopenharmony_cistatic u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
54788c2ecf20Sopenharmony_ci{
54798c2ecf20Sopenharmony_ci	u64 reg;
54808c2ecf20Sopenharmony_ci
54818c2ecf20Sopenharmony_ci	/*
54828c2ecf20Sopenharmony_ci	 * The only sane way to get the amount of
54838c2ecf20Sopenharmony_ci	 * progress is to read the HW flow state.
54848c2ecf20Sopenharmony_ci	 */
54858c2ecf20Sopenharmony_ci	reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
54868c2ecf20Sopenharmony_ci	return mask_psn(reg);
54878c2ecf20Sopenharmony_ci}
54888c2ecf20Sopenharmony_ci
54898c2ecf20Sopenharmony_cistatic void tid_rdma_rcv_err(struct hfi1_packet *packet,
54908c2ecf20Sopenharmony_ci			     struct ib_other_headers *ohdr,
54918c2ecf20Sopenharmony_ci			     struct rvt_qp *qp, u32 psn, int diff, bool fecn)
54928c2ecf20Sopenharmony_ci{
54938c2ecf20Sopenharmony_ci	unsigned long flags;
54948c2ecf20Sopenharmony_ci
54958c2ecf20Sopenharmony_ci	tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
54968c2ecf20Sopenharmony_ci	if (fecn) {
54978c2ecf20Sopenharmony_ci		spin_lock_irqsave(&qp->s_lock, flags);
54988c2ecf20Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
54998c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&qp->s_lock, flags);
55008c2ecf20Sopenharmony_ci	}
55018c2ecf20Sopenharmony_ci}
55028c2ecf20Sopenharmony_ci
55038c2ecf20Sopenharmony_cistatic void update_r_next_psn_fecn(struct hfi1_packet *packet,
55048c2ecf20Sopenharmony_ci				   struct hfi1_qp_priv *priv,
55058c2ecf20Sopenharmony_ci				   struct hfi1_ctxtdata *rcd,
55068c2ecf20Sopenharmony_ci				   struct tid_rdma_flow *flow,
55078c2ecf20Sopenharmony_ci				   bool fecn)
55088c2ecf20Sopenharmony_ci{
55098c2ecf20Sopenharmony_ci	/*
55108c2ecf20Sopenharmony_ci	 * If a start/middle packet is delivered here due to
55118c2ecf20Sopenharmony_ci	 * RSM rule and FECN, we need to update the r_next_psn.
55128c2ecf20Sopenharmony_ci	 */
55138c2ecf20Sopenharmony_ci	if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
55148c2ecf20Sopenharmony_ci	    !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
55158c2ecf20Sopenharmony_ci		struct hfi1_devdata *dd = rcd->dd;
55168c2ecf20Sopenharmony_ci
55178c2ecf20Sopenharmony_ci		flow->flow_state.r_next_psn =
55188c2ecf20Sopenharmony_ci			read_r_next_psn(dd, rcd->ctxt, flow->idx);
55198c2ecf20Sopenharmony_ci	}
55208c2ecf20Sopenharmony_ci}
5521