162306a36Sopenharmony_ci// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2018 - 2020 Intel Corporation.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include "hfi.h"
862306a36Sopenharmony_ci#include "qp.h"
962306a36Sopenharmony_ci#include "rc.h"
1062306a36Sopenharmony_ci#include "verbs.h"
1162306a36Sopenharmony_ci#include "tid_rdma.h"
1262306a36Sopenharmony_ci#include "exp_rcv.h"
1362306a36Sopenharmony_ci#include "trace.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci/**
1662306a36Sopenharmony_ci * DOC: TID RDMA READ protocol
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * This is an end-to-end protocol at the hfi1 level between two nodes that
1962306a36Sopenharmony_ci * improves performance by avoiding data copy on the requester side. It
2062306a36Sopenharmony_ci * converts a qualified RDMA READ request into a TID RDMA READ request on
2162306a36Sopenharmony_ci * the requester side and thereafter handles the request and response
2262306a36Sopenharmony_ci * differently. To be qualified, the RDMA READ request should meet the
2362306a36Sopenharmony_ci * following:
2462306a36Sopenharmony_ci * -- The total data length should be greater than 256K;
2562306a36Sopenharmony_ci * -- The total data length should be a multiple of 4K page size;
2662306a36Sopenharmony_ci * -- Each local scatter-gather entry should be 4K page aligned;
2762306a36Sopenharmony_ci * -- Each local scatter-gather entry should be a multiple of 4K page size;
2862306a36Sopenharmony_ci */
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
3162306a36Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
3262306a36Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
3362306a36Sopenharmony_ci#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
3462306a36Sopenharmony_ci#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
3562306a36Sopenharmony_ci#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/* Maximum number of packets within a flow generation. */
3862306a36Sopenharmony_ci#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#define GENERATION_MASK 0xFFFFF
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic u32 mask_generation(u32 a)
4362306a36Sopenharmony_ci{
4462306a36Sopenharmony_ci	return a & GENERATION_MASK;
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci/* Reserved generation value to set to unused flows for kernel contexts */
4862306a36Sopenharmony_ci#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci/*
5162306a36Sopenharmony_ci * J_KEY for kernel contexts when TID RDMA is used.
5262306a36Sopenharmony_ci * See generate_jkey() in hfi.h for more information.
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_ci#define TID_RDMA_JKEY                   32
5562306a36Sopenharmony_ci#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
5662306a36Sopenharmony_ci#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci/* Maximum number of segments in flight per QP request. */
5962306a36Sopenharmony_ci#define TID_RDMA_MAX_READ_SEGS_PER_REQ  6
6062306a36Sopenharmony_ci#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
6162306a36Sopenharmony_ci#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
6262306a36Sopenharmony_ci			TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
6362306a36Sopenharmony_ci#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci#define MAX_EXPECTED_PAGES     (MAX_EXPECTED_BUFFER / PAGE_SIZE)
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci#define TID_RDMA_DESTQP_FLOW_SHIFT      11
6862306a36Sopenharmony_ci#define TID_RDMA_DESTQP_FLOW_MASK       0x1f
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci#define TID_OPFN_QP_CTXT_MASK 0xff
7162306a36Sopenharmony_ci#define TID_OPFN_QP_CTXT_SHIFT 56
7262306a36Sopenharmony_ci#define TID_OPFN_QP_KDETH_MASK 0xff
7362306a36Sopenharmony_ci#define TID_OPFN_QP_KDETH_SHIFT 48
7462306a36Sopenharmony_ci#define TID_OPFN_MAX_LEN_MASK 0x7ff
7562306a36Sopenharmony_ci#define TID_OPFN_MAX_LEN_SHIFT 37
7662306a36Sopenharmony_ci#define TID_OPFN_TIMEOUT_MASK 0x1f
7762306a36Sopenharmony_ci#define TID_OPFN_TIMEOUT_SHIFT 32
7862306a36Sopenharmony_ci#define TID_OPFN_RESERVED_MASK 0x3f
7962306a36Sopenharmony_ci#define TID_OPFN_RESERVED_SHIFT 26
8062306a36Sopenharmony_ci#define TID_OPFN_URG_MASK 0x1
8162306a36Sopenharmony_ci#define TID_OPFN_URG_SHIFT 25
8262306a36Sopenharmony_ci#define TID_OPFN_VER_MASK 0x7
8362306a36Sopenharmony_ci#define TID_OPFN_VER_SHIFT 22
8462306a36Sopenharmony_ci#define TID_OPFN_JKEY_MASK 0x3f
8562306a36Sopenharmony_ci#define TID_OPFN_JKEY_SHIFT 16
8662306a36Sopenharmony_ci#define TID_OPFN_MAX_READ_MASK 0x3f
8762306a36Sopenharmony_ci#define TID_OPFN_MAX_READ_SHIFT 10
8862306a36Sopenharmony_ci#define TID_OPFN_MAX_WRITE_MASK 0x3f
8962306a36Sopenharmony_ci#define TID_OPFN_MAX_WRITE_SHIFT 4
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci/*
9262306a36Sopenharmony_ci * OPFN TID layout
9362306a36Sopenharmony_ci *
9462306a36Sopenharmony_ci * 63               47               31               15
9562306a36Sopenharmony_ci * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC
9662306a36Sopenharmony_ci * 3210987654321098 7654321098765432 1098765432109876 5432109876543210
9762306a36Sopenharmony_ci * N - the context Number
9862306a36Sopenharmony_ci * K - the Kdeth_qp
9962306a36Sopenharmony_ci * M - Max_len
10062306a36Sopenharmony_ci * T - Timeout
10162306a36Sopenharmony_ci * D - reserveD
10262306a36Sopenharmony_ci * V - version
10362306a36Sopenharmony_ci * U - Urg capable
10462306a36Sopenharmony_ci * J - Jkey
10562306a36Sopenharmony_ci * R - max_Read
10662306a36Sopenharmony_ci * W - max_Write
10762306a36Sopenharmony_ci * C - Capcode
10862306a36Sopenharmony_ci */
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_cistatic void tid_rdma_trigger_resume(struct work_struct *work);
11162306a36Sopenharmony_cistatic void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
11262306a36Sopenharmony_cistatic int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
11362306a36Sopenharmony_ci					 gfp_t gfp);
11462306a36Sopenharmony_cistatic void hfi1_init_trdma_req(struct rvt_qp *qp,
11562306a36Sopenharmony_ci				struct tid_rdma_request *req);
11662306a36Sopenharmony_cistatic void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
11762306a36Sopenharmony_cistatic void hfi1_tid_timeout(struct timer_list *t);
11862306a36Sopenharmony_cistatic void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
11962306a36Sopenharmony_cistatic void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
12062306a36Sopenharmony_cistatic void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
12162306a36Sopenharmony_cistatic int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
12262306a36Sopenharmony_cistatic void hfi1_tid_retry_timeout(struct timer_list *t);
12362306a36Sopenharmony_cistatic int make_tid_rdma_ack(struct rvt_qp *qp,
12462306a36Sopenharmony_ci			     struct ib_other_headers *ohdr,
12562306a36Sopenharmony_ci			     struct hfi1_pkt_state *ps);
12662306a36Sopenharmony_cistatic void hfi1_do_tid_send(struct rvt_qp *qp);
12762306a36Sopenharmony_cistatic u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
12862306a36Sopenharmony_cistatic void tid_rdma_rcv_err(struct hfi1_packet *packet,
12962306a36Sopenharmony_ci			     struct ib_other_headers *ohdr,
13062306a36Sopenharmony_ci			     struct rvt_qp *qp, u32 psn, int diff, bool fecn);
13162306a36Sopenharmony_cistatic void update_r_next_psn_fecn(struct hfi1_packet *packet,
13262306a36Sopenharmony_ci				   struct hfi1_qp_priv *priv,
13362306a36Sopenharmony_ci				   struct hfi1_ctxtdata *rcd,
13462306a36Sopenharmony_ci				   struct tid_rdma_flow *flow,
13562306a36Sopenharmony_ci				   bool fecn);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_cistatic void validate_r_tid_ack(struct hfi1_qp_priv *priv)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
14062306a36Sopenharmony_ci		priv->r_tid_ack = priv->r_tid_tail;
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_cistatic void tid_rdma_schedule_ack(struct rvt_qp *qp)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	priv->s_flags |= RVT_S_ACK_PENDING;
14862306a36Sopenharmony_ci	hfi1_schedule_tid_send(qp);
14962306a36Sopenharmony_ci}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_cistatic void tid_rdma_trigger_ack(struct rvt_qp *qp)
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	validate_r_tid_ack(qp->priv);
15462306a36Sopenharmony_ci	tid_rdma_schedule_ack(qp);
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	return
16062306a36Sopenharmony_ci		(((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
16162306a36Sopenharmony_ci			TID_OPFN_QP_CTXT_SHIFT) |
16262306a36Sopenharmony_ci		((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
16362306a36Sopenharmony_ci			TID_OPFN_QP_KDETH_SHIFT) |
16462306a36Sopenharmony_ci		(((u64)((p->max_len >> PAGE_SHIFT) - 1) &
16562306a36Sopenharmony_ci			TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
16662306a36Sopenharmony_ci		(((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
16762306a36Sopenharmony_ci			TID_OPFN_TIMEOUT_SHIFT) |
16862306a36Sopenharmony_ci		(((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
16962306a36Sopenharmony_ci		(((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
17062306a36Sopenharmony_ci		(((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
17162306a36Sopenharmony_ci			TID_OPFN_MAX_READ_SHIFT) |
17262306a36Sopenharmony_ci		(((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
17362306a36Sopenharmony_ci			TID_OPFN_MAX_WRITE_SHIFT);
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_cistatic void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
17962306a36Sopenharmony_ci		TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
18062306a36Sopenharmony_ci	p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
18162306a36Sopenharmony_ci	p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
18262306a36Sopenharmony_ci		TID_OPFN_MAX_WRITE_MASK;
18362306a36Sopenharmony_ci	p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
18462306a36Sopenharmony_ci		TID_OPFN_MAX_READ_MASK;
18562306a36Sopenharmony_ci	p->qp =
18662306a36Sopenharmony_ci		((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
18762306a36Sopenharmony_ci			<< 16) |
18862306a36Sopenharmony_ci		((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
18962306a36Sopenharmony_ci	p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
19062306a36Sopenharmony_ci	p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
19162306a36Sopenharmony_ci}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_civoid tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
19862306a36Sopenharmony_ci	p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
19962306a36Sopenharmony_ci	p->jkey = priv->rcd->jkey;
20062306a36Sopenharmony_ci	p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
20162306a36Sopenharmony_ci	p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
20262306a36Sopenharmony_ci	p->timeout = qp->timeout;
20362306a36Sopenharmony_ci	p->urg = is_urg_masked(priv->rcd);
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_cibool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	*data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
21162306a36Sopenharmony_ci	return true;
21262306a36Sopenharmony_ci}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_cibool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
21762306a36Sopenharmony_ci	struct tid_rdma_params *remote, *old;
21862306a36Sopenharmony_ci	bool ret = true;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	old = rcu_dereference_protected(priv->tid_rdma.remote,
22162306a36Sopenharmony_ci					lockdep_is_held(&priv->opfn.lock));
22262306a36Sopenharmony_ci	data &= ~0xfULL;
22362306a36Sopenharmony_ci	/*
22462306a36Sopenharmony_ci	 * If data passed in is zero, return true so as not to continue the
22562306a36Sopenharmony_ci	 * negotiation process
22662306a36Sopenharmony_ci	 */
22762306a36Sopenharmony_ci	if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
22862306a36Sopenharmony_ci		goto null;
22962306a36Sopenharmony_ci	/*
23062306a36Sopenharmony_ci	 * If kzalloc fails, return false. This will result in:
23162306a36Sopenharmony_ci	 * * at the requester a new OPFN request being generated to retry
23262306a36Sopenharmony_ci	 *   the negotiation
23362306a36Sopenharmony_ci	 * * at the responder, 0 being returned to the requester so as to
23462306a36Sopenharmony_ci	 *   disable TID RDMA at both the requester and the responder
23562306a36Sopenharmony_ci	 */
23662306a36Sopenharmony_ci	remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
23762306a36Sopenharmony_ci	if (!remote) {
23862306a36Sopenharmony_ci		ret = false;
23962306a36Sopenharmony_ci		goto null;
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	tid_rdma_opfn_decode(remote, data);
24362306a36Sopenharmony_ci	priv->tid_timer_timeout_jiffies =
24462306a36Sopenharmony_ci		usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
24562306a36Sopenharmony_ci				   1000UL) << 3) * 7);
24662306a36Sopenharmony_ci	trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
24762306a36Sopenharmony_ci	trace_hfi1_opfn_param(qp, 1, remote);
24862306a36Sopenharmony_ci	rcu_assign_pointer(priv->tid_rdma.remote, remote);
24962306a36Sopenharmony_ci	/*
25062306a36Sopenharmony_ci	 * A TID RDMA READ request's segment size is not equal to
25162306a36Sopenharmony_ci	 * remote->max_len only when the request's data length is smaller
25262306a36Sopenharmony_ci	 * than remote->max_len. In that case, there will be only one segment.
25362306a36Sopenharmony_ci	 * Therefore, when priv->pkts_ps is used to calculate req->cur_seg
25462306a36Sopenharmony_ci	 * during retry, it will lead to req->cur_seg = 0, which is exactly
25562306a36Sopenharmony_ci	 * what is expected.
25662306a36Sopenharmony_ci	 */
25762306a36Sopenharmony_ci	priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
25862306a36Sopenharmony_ci	priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
25962306a36Sopenharmony_ci	goto free;
26062306a36Sopenharmony_cinull:
26162306a36Sopenharmony_ci	RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
26262306a36Sopenharmony_ci	priv->timeout_shift = 0;
26362306a36Sopenharmony_cifree:
26462306a36Sopenharmony_ci	if (old)
26562306a36Sopenharmony_ci		kfree_rcu(old, rcu_head);
26662306a36Sopenharmony_ci	return ret;
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cibool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	bool ret;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	ret = tid_rdma_conn_reply(qp, *data);
27462306a36Sopenharmony_ci	*data = 0;
27562306a36Sopenharmony_ci	/*
27662306a36Sopenharmony_ci	 * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate
27762306a36Sopenharmony_ci	 * TID RDMA could not be enabled. This will result in TID RDMA being
27862306a36Sopenharmony_ci	 * disabled at the requester too.
27962306a36Sopenharmony_ci	 */
28062306a36Sopenharmony_ci	if (ret)
28162306a36Sopenharmony_ci		(void)tid_rdma_conn_req(qp, data);
28262306a36Sopenharmony_ci	return ret;
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_civoid tid_rdma_conn_error(struct rvt_qp *qp)
28662306a36Sopenharmony_ci{
28762306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
28862306a36Sopenharmony_ci	struct tid_rdma_params *old;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	old = rcu_dereference_protected(priv->tid_rdma.remote,
29162306a36Sopenharmony_ci					lockdep_is_held(&priv->opfn.lock));
29262306a36Sopenharmony_ci	RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
29362306a36Sopenharmony_ci	if (old)
29462306a36Sopenharmony_ci		kfree_rcu(old, rcu_head);
29562306a36Sopenharmony_ci}
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci/* This is called at context initialization time */
29862306a36Sopenharmony_ciint hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
29962306a36Sopenharmony_ci{
30062306a36Sopenharmony_ci	if (reinit)
30162306a36Sopenharmony_ci		return 0;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
30462306a36Sopenharmony_ci	BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
30562306a36Sopenharmony_ci	rcd->jkey = TID_RDMA_JKEY;
30662306a36Sopenharmony_ci	hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
30762306a36Sopenharmony_ci	return hfi1_alloc_ctxt_rcv_groups(rcd);
30862306a36Sopenharmony_ci}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci/**
31162306a36Sopenharmony_ci * qp_to_rcd - determine the receive context used by a qp
31262306a36Sopenharmony_ci * @rdi: rvt dev struct
31362306a36Sopenharmony_ci * @qp: the qp
31462306a36Sopenharmony_ci *
31562306a36Sopenharmony_ci * This routine returns the receive context associated
31662306a36Sopenharmony_ci * with a a qp's qpn.
31762306a36Sopenharmony_ci *
31862306a36Sopenharmony_ci * Returns the context.
31962306a36Sopenharmony_ci */
32062306a36Sopenharmony_cistatic struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
32162306a36Sopenharmony_ci				       struct rvt_qp *qp)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci	struct hfi1_ibdev *verbs_dev = container_of(rdi,
32462306a36Sopenharmony_ci						    struct hfi1_ibdev,
32562306a36Sopenharmony_ci						    rdi);
32662306a36Sopenharmony_ci	struct hfi1_devdata *dd = container_of(verbs_dev,
32762306a36Sopenharmony_ci					       struct hfi1_devdata,
32862306a36Sopenharmony_ci					       verbs_dev);
32962306a36Sopenharmony_ci	unsigned int ctxt;
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	if (qp->ibqp.qp_num == 0)
33262306a36Sopenharmony_ci		ctxt = 0;
33362306a36Sopenharmony_ci	else
33462306a36Sopenharmony_ci		ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
33562306a36Sopenharmony_ci	return dd->rcd[ctxt];
33662306a36Sopenharmony_ci}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ciint hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
33962306a36Sopenharmony_ci		      struct ib_qp_init_attr *init_attr)
34062306a36Sopenharmony_ci{
34162306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
34262306a36Sopenharmony_ci	int i, ret;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	qpriv->rcd = qp_to_rcd(rdi, qp);
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	spin_lock_init(&qpriv->opfn.lock);
34762306a36Sopenharmony_ci	INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
34862306a36Sopenharmony_ci	INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
34962306a36Sopenharmony_ci	qpriv->flow_state.psn = 0;
35062306a36Sopenharmony_ci	qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
35162306a36Sopenharmony_ci	qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
35262306a36Sopenharmony_ci	qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
35362306a36Sopenharmony_ci	qpriv->s_state = TID_OP(WRITE_RESP);
35462306a36Sopenharmony_ci	qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
35562306a36Sopenharmony_ci	qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
35662306a36Sopenharmony_ci	qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
35762306a36Sopenharmony_ci	qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
35862306a36Sopenharmony_ci	qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
35962306a36Sopenharmony_ci	qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
36062306a36Sopenharmony_ci	qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
36162306a36Sopenharmony_ci	qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
36262306a36Sopenharmony_ci	atomic_set(&qpriv->n_requests, 0);
36362306a36Sopenharmony_ci	atomic_set(&qpriv->n_tid_requests, 0);
36462306a36Sopenharmony_ci	timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
36562306a36Sopenharmony_ci	timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
36662306a36Sopenharmony_ci	INIT_LIST_HEAD(&qpriv->tid_wait);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
36962306a36Sopenharmony_ci		struct hfi1_devdata *dd = qpriv->rcd->dd;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci		qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
37262306a36Sopenharmony_ci						sizeof(*qpriv->pages),
37362306a36Sopenharmony_ci					    GFP_KERNEL, dd->node);
37462306a36Sopenharmony_ci		if (!qpriv->pages)
37562306a36Sopenharmony_ci			return -ENOMEM;
37662306a36Sopenharmony_ci		for (i = 0; i < qp->s_size; i++) {
37762306a36Sopenharmony_ci			struct hfi1_swqe_priv *priv;
37862306a36Sopenharmony_ci			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci			priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
38162306a36Sopenharmony_ci					    dd->node);
38262306a36Sopenharmony_ci			if (!priv)
38362306a36Sopenharmony_ci				return -ENOMEM;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci			hfi1_init_trdma_req(qp, &priv->tid_req);
38662306a36Sopenharmony_ci			priv->tid_req.e.swqe = wqe;
38762306a36Sopenharmony_ci			wqe->priv = priv;
38862306a36Sopenharmony_ci		}
38962306a36Sopenharmony_ci		for (i = 0; i < rvt_max_atomic(rdi); i++) {
39062306a36Sopenharmony_ci			struct hfi1_ack_priv *priv;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci			priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
39362306a36Sopenharmony_ci					    dd->node);
39462306a36Sopenharmony_ci			if (!priv)
39562306a36Sopenharmony_ci				return -ENOMEM;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci			hfi1_init_trdma_req(qp, &priv->tid_req);
39862306a36Sopenharmony_ci			priv->tid_req.e.ack = &qp->s_ack_queue[i];
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci			ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
40162306a36Sopenharmony_ci							    GFP_KERNEL);
40262306a36Sopenharmony_ci			if (ret) {
40362306a36Sopenharmony_ci				kfree(priv);
40462306a36Sopenharmony_ci				return ret;
40562306a36Sopenharmony_ci			}
40662306a36Sopenharmony_ci			qp->s_ack_queue[i].priv = priv;
40762306a36Sopenharmony_ci		}
40862306a36Sopenharmony_ci	}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	return 0;
41162306a36Sopenharmony_ci}
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_civoid hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
41662306a36Sopenharmony_ci	struct rvt_swqe *wqe;
41762306a36Sopenharmony_ci	u32 i;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
42062306a36Sopenharmony_ci		for (i = 0; i < qp->s_size; i++) {
42162306a36Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, i);
42262306a36Sopenharmony_ci			kfree(wqe->priv);
42362306a36Sopenharmony_ci			wqe->priv = NULL;
42462306a36Sopenharmony_ci		}
42562306a36Sopenharmony_ci		for (i = 0; i < rvt_max_atomic(rdi); i++) {
42662306a36Sopenharmony_ci			struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci			if (priv)
42962306a36Sopenharmony_ci				hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
43062306a36Sopenharmony_ci			kfree(priv);
43162306a36Sopenharmony_ci			qp->s_ack_queue[i].priv = NULL;
43262306a36Sopenharmony_ci		}
43362306a36Sopenharmony_ci		cancel_work_sync(&qpriv->opfn.opfn_work);
43462306a36Sopenharmony_ci		kfree(qpriv->pages);
43562306a36Sopenharmony_ci		qpriv->pages = NULL;
43662306a36Sopenharmony_ci	}
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci/* Flow and tid waiter functions */
44062306a36Sopenharmony_ci/**
44162306a36Sopenharmony_ci * DOC: lock ordering
44262306a36Sopenharmony_ci *
44362306a36Sopenharmony_ci * There are two locks involved with the queuing
44462306a36Sopenharmony_ci * routines: the qp s_lock and the exp_lock.
44562306a36Sopenharmony_ci *
44662306a36Sopenharmony_ci * Since the tid space allocation is called from
44762306a36Sopenharmony_ci * the send engine, the qp s_lock is already held.
44862306a36Sopenharmony_ci *
44962306a36Sopenharmony_ci * The allocation routines will get the exp_lock.
45062306a36Sopenharmony_ci *
45162306a36Sopenharmony_ci * The first_qp() call is provided to allow the head of
45262306a36Sopenharmony_ci * the rcd wait queue to be fetched under the exp_lock and
45362306a36Sopenharmony_ci * followed by a drop of the exp_lock.
45462306a36Sopenharmony_ci *
45562306a36Sopenharmony_ci * Any qp in the wait list will have the qp reference count held
45662306a36Sopenharmony_ci * to hold the qp in memory.
45762306a36Sopenharmony_ci */
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci/*
46062306a36Sopenharmony_ci * return head of rcd wait list
46162306a36Sopenharmony_ci *
46262306a36Sopenharmony_ci * Must hold the exp_lock.
46362306a36Sopenharmony_ci *
46462306a36Sopenharmony_ci * Get a reference to the QP to hold the QP in memory.
46562306a36Sopenharmony_ci *
46662306a36Sopenharmony_ci * The caller must release the reference when the local
46762306a36Sopenharmony_ci * is no longer being used.
46862306a36Sopenharmony_ci */
46962306a36Sopenharmony_cistatic struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
47062306a36Sopenharmony_ci			       struct tid_queue *queue)
47162306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock)
47262306a36Sopenharmony_ci{
47362306a36Sopenharmony_ci	struct hfi1_qp_priv *priv;
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
47662306a36Sopenharmony_ci	priv = list_first_entry_or_null(&queue->queue_head,
47762306a36Sopenharmony_ci					struct hfi1_qp_priv,
47862306a36Sopenharmony_ci					tid_wait);
47962306a36Sopenharmony_ci	if (!priv)
48062306a36Sopenharmony_ci		return NULL;
48162306a36Sopenharmony_ci	rvt_get_qp(priv->owner);
48262306a36Sopenharmony_ci	return priv->owner;
48362306a36Sopenharmony_ci}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci/**
48662306a36Sopenharmony_ci * kernel_tid_waiters - determine rcd wait
48762306a36Sopenharmony_ci * @rcd: the receive context
48862306a36Sopenharmony_ci * @queue: the queue to operate on
48962306a36Sopenharmony_ci * @qp: the head of the qp being processed
49062306a36Sopenharmony_ci *
49162306a36Sopenharmony_ci * This routine will return false IFF
49262306a36Sopenharmony_ci * the list is NULL or the head of the
49362306a36Sopenharmony_ci * list is the indicated qp.
49462306a36Sopenharmony_ci *
49562306a36Sopenharmony_ci * Must hold the qp s_lock and the exp_lock.
49662306a36Sopenharmony_ci *
49762306a36Sopenharmony_ci * Return:
49862306a36Sopenharmony_ci * false if either of the conditions below are satisfied:
49962306a36Sopenharmony_ci * 1. The list is empty or
50062306a36Sopenharmony_ci * 2. The indicated qp is at the head of the list and the
50162306a36Sopenharmony_ci *    HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
50262306a36Sopenharmony_ci * true is returned otherwise.
50362306a36Sopenharmony_ci */
50462306a36Sopenharmony_cistatic bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
50562306a36Sopenharmony_ci			       struct tid_queue *queue, struct rvt_qp *qp)
50662306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
50762306a36Sopenharmony_ci{
50862306a36Sopenharmony_ci	struct rvt_qp *fqp;
50962306a36Sopenharmony_ci	bool ret = true;
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
51262306a36Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
51362306a36Sopenharmony_ci	fqp = first_qp(rcd, queue);
51462306a36Sopenharmony_ci	if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
51562306a36Sopenharmony_ci		ret = false;
51662306a36Sopenharmony_ci	rvt_put_qp(fqp);
51762306a36Sopenharmony_ci	return ret;
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci/**
52162306a36Sopenharmony_ci * dequeue_tid_waiter - dequeue the qp from the list
52262306a36Sopenharmony_ci * @rcd: the receive context
52362306a36Sopenharmony_ci * @queue: the queue to operate on
52462306a36Sopenharmony_ci * @qp: the qp to remove the wait list
52562306a36Sopenharmony_ci *
52662306a36Sopenharmony_ci * This routine removes the indicated qp from the
52762306a36Sopenharmony_ci * wait list if it is there.
52862306a36Sopenharmony_ci *
52962306a36Sopenharmony_ci * This should be done after the hardware flow and
53062306a36Sopenharmony_ci * tid array resources have been allocated.
53162306a36Sopenharmony_ci *
53262306a36Sopenharmony_ci * Must hold the qp s_lock and the rcd exp_lock.
53362306a36Sopenharmony_ci *
53462306a36Sopenharmony_ci * It assumes the s_lock to protect the s_flags
53562306a36Sopenharmony_ci * field and to reliably test the HFI1_S_WAIT_TID_SPACE flag.
53662306a36Sopenharmony_ci */
53762306a36Sopenharmony_cistatic void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
53862306a36Sopenharmony_ci			       struct tid_queue *queue, struct rvt_qp *qp)
53962306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
54062306a36Sopenharmony_ci{
54162306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
54462306a36Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
54562306a36Sopenharmony_ci	if (list_empty(&priv->tid_wait))
54662306a36Sopenharmony_ci		return;
54762306a36Sopenharmony_ci	list_del_init(&priv->tid_wait);
54862306a36Sopenharmony_ci	qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
54962306a36Sopenharmony_ci	queue->dequeue++;
55062306a36Sopenharmony_ci	rvt_put_qp(qp);
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci/**
55462306a36Sopenharmony_ci * queue_qp_for_tid_wait - suspend QP on tid space
55562306a36Sopenharmony_ci * @rcd: the receive context
55662306a36Sopenharmony_ci * @queue: the queue to operate on
55762306a36Sopenharmony_ci * @qp: the qp
55862306a36Sopenharmony_ci *
55962306a36Sopenharmony_ci * The qp is inserted at the tail of the rcd
56062306a36Sopenharmony_ci * wait queue and the HFI1_S_WAIT_TID_SPACE s_flag is set.
56162306a36Sopenharmony_ci *
56262306a36Sopenharmony_ci * Must hold the qp s_lock and the exp_lock.
56362306a36Sopenharmony_ci */
56462306a36Sopenharmony_cistatic void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
56562306a36Sopenharmony_ci				  struct tid_queue *queue, struct rvt_qp *qp)
56662306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
56762306a36Sopenharmony_ci{
56862306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
57162306a36Sopenharmony_ci	lockdep_assert_held(&rcd->exp_lock);
57262306a36Sopenharmony_ci	if (list_empty(&priv->tid_wait)) {
57362306a36Sopenharmony_ci		qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
57462306a36Sopenharmony_ci		list_add_tail(&priv->tid_wait, &queue->queue_head);
57562306a36Sopenharmony_ci		priv->tid_enqueue = ++queue->enqueue;
57662306a36Sopenharmony_ci		rcd->dd->verbs_dev.n_tidwait++;
57762306a36Sopenharmony_ci		trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
57862306a36Sopenharmony_ci		rvt_get_qp(qp);
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci}
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci/**
58362306a36Sopenharmony_ci * __trigger_tid_waiter - trigger tid waiter
58462306a36Sopenharmony_ci * @qp: the qp
58562306a36Sopenharmony_ci *
58662306a36Sopenharmony_ci * This is a private entrance to schedule the qp
58762306a36Sopenharmony_ci * assuming the caller is holding the qp->s_lock.
58862306a36Sopenharmony_ci */
58962306a36Sopenharmony_cistatic void __trigger_tid_waiter(struct rvt_qp *qp)
59062306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
59362306a36Sopenharmony_ci	if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
59462306a36Sopenharmony_ci		return;
59562306a36Sopenharmony_ci	trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
59662306a36Sopenharmony_ci	hfi1_schedule_send(qp);
59762306a36Sopenharmony_ci}
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci/**
60062306a36Sopenharmony_ci * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
60162306a36Sopenharmony_ci * @qp: the qp
60262306a36Sopenharmony_ci *
60362306a36Sopenharmony_ci * trigger a schedule or a waiting qp in a deadlock
60462306a36Sopenharmony_ci * safe manner.  The qp reference is held prior
60562306a36Sopenharmony_ci * to this call via first_qp().
60662306a36Sopenharmony_ci *
60762306a36Sopenharmony_ci * If the qp trigger was already scheduled (!rval)
60862306a36Sopenharmony_ci * the reference is dropped, otherwise the resume
60962306a36Sopenharmony_ci * or the destroy cancel will dispatch the reference.
61062306a36Sopenharmony_ci */
61162306a36Sopenharmony_cistatic void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
61262306a36Sopenharmony_ci{
61362306a36Sopenharmony_ci	struct hfi1_qp_priv *priv;
61462306a36Sopenharmony_ci	struct hfi1_ibport *ibp;
61562306a36Sopenharmony_ci	struct hfi1_pportdata *ppd;
61662306a36Sopenharmony_ci	struct hfi1_devdata *dd;
61762306a36Sopenharmony_ci	bool rval;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	if (!qp)
62062306a36Sopenharmony_ci		return;
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	priv = qp->priv;
62362306a36Sopenharmony_ci	ibp = to_iport(qp->ibqp.device, qp->port_num);
62462306a36Sopenharmony_ci	ppd = ppd_from_ibp(ibp);
62562306a36Sopenharmony_ci	dd = dd_from_ibdev(qp->ibqp.device);
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	rval = queue_work_on(priv->s_sde ?
62862306a36Sopenharmony_ci			     priv->s_sde->cpu :
62962306a36Sopenharmony_ci			     cpumask_first(cpumask_of_node(dd->node)),
63062306a36Sopenharmony_ci			     ppd->hfi1_wq,
63162306a36Sopenharmony_ci			     &priv->tid_rdma.trigger_work);
63262306a36Sopenharmony_ci	if (!rval)
63362306a36Sopenharmony_ci		rvt_put_qp(qp);
63462306a36Sopenharmony_ci}
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci/**
63762306a36Sopenharmony_ci * tid_rdma_trigger_resume - field a trigger work request
63862306a36Sopenharmony_ci * @work: the work item
63962306a36Sopenharmony_ci *
64062306a36Sopenharmony_ci * Complete the off qp trigger processing by directly
64162306a36Sopenharmony_ci * calling the progress routine.
64262306a36Sopenharmony_ci */
64362306a36Sopenharmony_cistatic void tid_rdma_trigger_resume(struct work_struct *work)
64462306a36Sopenharmony_ci{
64562306a36Sopenharmony_ci	struct tid_rdma_qp_params *tr;
64662306a36Sopenharmony_ci	struct hfi1_qp_priv *priv;
64762306a36Sopenharmony_ci	struct rvt_qp *qp;
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
65062306a36Sopenharmony_ci	priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
65162306a36Sopenharmony_ci	qp = priv->owner;
65262306a36Sopenharmony_ci	spin_lock_irq(&qp->s_lock);
65362306a36Sopenharmony_ci	if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
65462306a36Sopenharmony_ci		spin_unlock_irq(&qp->s_lock);
65562306a36Sopenharmony_ci		hfi1_do_send(priv->owner, true);
65662306a36Sopenharmony_ci	} else {
65762306a36Sopenharmony_ci		spin_unlock_irq(&qp->s_lock);
65862306a36Sopenharmony_ci	}
65962306a36Sopenharmony_ci	rvt_put_qp(qp);
66062306a36Sopenharmony_ci}
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci/*
66362306a36Sopenharmony_ci * tid_rdma_flush_wait - unwind any tid space wait
66462306a36Sopenharmony_ci *
66562306a36Sopenharmony_ci * This is called when resetting a qp to
66662306a36Sopenharmony_ci * allow a destroy or reset to get rid
66762306a36Sopenharmony_ci * of any tid space linkage and reference counts.
66862306a36Sopenharmony_ci */
66962306a36Sopenharmony_cistatic void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
67062306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
67162306a36Sopenharmony_ci{
67262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv;
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	if (!qp)
67562306a36Sopenharmony_ci		return;
67662306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
67762306a36Sopenharmony_ci	priv = qp->priv;
67862306a36Sopenharmony_ci	qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
67962306a36Sopenharmony_ci	spin_lock(&priv->rcd->exp_lock);
68062306a36Sopenharmony_ci	if (!list_empty(&priv->tid_wait)) {
68162306a36Sopenharmony_ci		list_del_init(&priv->tid_wait);
68262306a36Sopenharmony_ci		qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
68362306a36Sopenharmony_ci		queue->dequeue++;
68462306a36Sopenharmony_ci		rvt_put_qp(qp);
68562306a36Sopenharmony_ci	}
68662306a36Sopenharmony_ci	spin_unlock(&priv->rcd->exp_lock);
68762306a36Sopenharmony_ci}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_civoid hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
69062306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
69162306a36Sopenharmony_ci{
69262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	_tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
69562306a36Sopenharmony_ci	_tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
69662306a36Sopenharmony_ci}
69762306a36Sopenharmony_ci
69862306a36Sopenharmony_ci/* Flow functions */
69962306a36Sopenharmony_ci/**
70062306a36Sopenharmony_ci * kern_reserve_flow - allocate a hardware flow
70162306a36Sopenharmony_ci * @rcd: the context to use for allocation
70262306a36Sopenharmony_ci * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
70362306a36Sopenharmony_ci *         signify "don't care".
70462306a36Sopenharmony_ci *
70562306a36Sopenharmony_ci * Use a bit mask based allocation to reserve a hardware
70662306a36Sopenharmony_ci * flow for use in receiving KDETH data packets. If a preferred flow is
70762306a36Sopenharmony_ci * specified the function will attempt to reserve that flow again, if
70862306a36Sopenharmony_ci * available.
70962306a36Sopenharmony_ci *
71062306a36Sopenharmony_ci * The exp_lock must be held.
71162306a36Sopenharmony_ci *
71262306a36Sopenharmony_ci * Return:
71362306a36Sopenharmony_ci * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
71462306a36Sopenharmony_ci * On failure: -EAGAIN
71562306a36Sopenharmony_ci */
71662306a36Sopenharmony_cistatic int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
71762306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock)
71862306a36Sopenharmony_ci{
71962306a36Sopenharmony_ci	int nr;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	/* Attempt to reserve the preferred flow index */
72262306a36Sopenharmony_ci	if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
72362306a36Sopenharmony_ci	    !test_and_set_bit(last, &rcd->flow_mask))
72462306a36Sopenharmony_ci		return last;
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	nr = ffz(rcd->flow_mask);
72762306a36Sopenharmony_ci	BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
72862306a36Sopenharmony_ci		     (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
72962306a36Sopenharmony_ci	if (nr > (RXE_NUM_TID_FLOWS - 1))
73062306a36Sopenharmony_ci		return -EAGAIN;
73162306a36Sopenharmony_ci	set_bit(nr, &rcd->flow_mask);
73262306a36Sopenharmony_ci	return nr;
73362306a36Sopenharmony_ci}
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_cistatic void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
73662306a36Sopenharmony_ci			     u32 flow_idx)
73762306a36Sopenharmony_ci{
73862306a36Sopenharmony_ci	u64 reg;
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
74162306a36Sopenharmony_ci		RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
74262306a36Sopenharmony_ci		RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
74362306a36Sopenharmony_ci		RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
74462306a36Sopenharmony_ci		RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
74562306a36Sopenharmony_ci		RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	if (generation != KERN_GENERATION_RESERVED)
74862306a36Sopenharmony_ci		reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	write_uctxt_csr(rcd->dd, rcd->ctxt,
75162306a36Sopenharmony_ci			RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
75262306a36Sopenharmony_ci}
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_cistatic u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
75562306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	u32 generation = rcd->flows[flow_idx].generation;
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	kern_set_hw_flow(rcd, generation, flow_idx);
76062306a36Sopenharmony_ci	return generation;
76162306a36Sopenharmony_ci}
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_cistatic u32 kern_flow_generation_next(u32 gen)
76462306a36Sopenharmony_ci{
76562306a36Sopenharmony_ci	u32 generation = mask_generation(gen + 1);
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	if (generation == KERN_GENERATION_RESERVED)
76862306a36Sopenharmony_ci		generation = mask_generation(generation + 1);
76962306a36Sopenharmony_ci	return generation;
77062306a36Sopenharmony_ci}
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_cistatic void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
77362306a36Sopenharmony_ci	__must_hold(&rcd->exp_lock)
77462306a36Sopenharmony_ci{
77562306a36Sopenharmony_ci	rcd->flows[flow_idx].generation =
77662306a36Sopenharmony_ci		kern_flow_generation_next(rcd->flows[flow_idx].generation);
77762306a36Sopenharmony_ci	kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
77862306a36Sopenharmony_ci}
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ciint hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
78162306a36Sopenharmony_ci{
78262306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
78362306a36Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
78462306a36Sopenharmony_ci	struct rvt_qp *fqp;
78562306a36Sopenharmony_ci	unsigned long flags;
78662306a36Sopenharmony_ci	int ret = 0;
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	/* The QP already has an allocated flow */
78962306a36Sopenharmony_ci	if (fs->index != RXE_NUM_TID_FLOWS)
79062306a36Sopenharmony_ci		return ret;
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
79362306a36Sopenharmony_ci	if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
79462306a36Sopenharmony_ci		goto queue;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	ret = kern_reserve_flow(rcd, fs->last_index);
79762306a36Sopenharmony_ci	if (ret < 0)
79862306a36Sopenharmony_ci		goto queue;
79962306a36Sopenharmony_ci	fs->index = ret;
80062306a36Sopenharmony_ci	fs->last_index = fs->index;
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	/* Generation received in a RESYNC overrides default flow generation */
80362306a36Sopenharmony_ci	if (fs->generation != KERN_GENERATION_RESERVED)
80462306a36Sopenharmony_ci		rcd->flows[fs->index].generation = fs->generation;
80562306a36Sopenharmony_ci	fs->generation = kern_setup_hw_flow(rcd, fs->index);
80662306a36Sopenharmony_ci	fs->psn = 0;
80762306a36Sopenharmony_ci	dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
80862306a36Sopenharmony_ci	/* get head before dropping lock */
80962306a36Sopenharmony_ci	fqp = first_qp(rcd, &rcd->flow_queue);
81062306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	tid_rdma_schedule_tid_wakeup(fqp);
81362306a36Sopenharmony_ci	return 0;
81462306a36Sopenharmony_ciqueue:
81562306a36Sopenharmony_ci	queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
81662306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
81762306a36Sopenharmony_ci	return -EAGAIN;
81862306a36Sopenharmony_ci}
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_civoid hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
82362306a36Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
82462306a36Sopenharmony_ci	struct rvt_qp *fqp;
82562306a36Sopenharmony_ci	unsigned long flags;
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	if (fs->index >= RXE_NUM_TID_FLOWS)
82862306a36Sopenharmony_ci		return;
82962306a36Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
83062306a36Sopenharmony_ci	kern_clear_hw_flow(rcd, fs->index);
83162306a36Sopenharmony_ci	clear_bit(fs->index, &rcd->flow_mask);
83262306a36Sopenharmony_ci	fs->index = RXE_NUM_TID_FLOWS;
83362306a36Sopenharmony_ci	fs->psn = 0;
83462306a36Sopenharmony_ci	fs->generation = KERN_GENERATION_RESERVED;
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	/* get head before dropping lock */
83762306a36Sopenharmony_ci	fqp = first_qp(rcd, &rcd->flow_queue);
83862306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	if (fqp == qp) {
84162306a36Sopenharmony_ci		__trigger_tid_waiter(fqp);
84262306a36Sopenharmony_ci		rvt_put_qp(fqp);
84362306a36Sopenharmony_ci	} else {
84462306a36Sopenharmony_ci		tid_rdma_schedule_tid_wakeup(fqp);
84562306a36Sopenharmony_ci	}
84662306a36Sopenharmony_ci}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_civoid hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
84962306a36Sopenharmony_ci{
85062306a36Sopenharmony_ci	int i;
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
85362306a36Sopenharmony_ci		rcd->flows[i].generation = mask_generation(get_random_u32());
85462306a36Sopenharmony_ci		kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
85562306a36Sopenharmony_ci	}
85662306a36Sopenharmony_ci}
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci/* TID allocation functions */
85962306a36Sopenharmony_cistatic u8 trdma_pset_order(struct tid_rdma_pageset *s)
86062306a36Sopenharmony_ci{
86162306a36Sopenharmony_ci	u8 count = s->count;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	return ilog2(count) + 1;
86462306a36Sopenharmony_ci}
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci/**
86762306a36Sopenharmony_ci * tid_rdma_find_phys_blocks_4k - get groups base on mr info
86862306a36Sopenharmony_ci * @flow: overall info for a TID RDMA segment
86962306a36Sopenharmony_ci * @pages: pointer to an array of page structs
87062306a36Sopenharmony_ci * @npages: number of pages
87162306a36Sopenharmony_ci * @list: page set array to return
87262306a36Sopenharmony_ci *
87362306a36Sopenharmony_ci * This routine returns the number of groups associated with
87462306a36Sopenharmony_ci * the current sge information.  This implementation is based
87562306a36Sopenharmony_ci * on the expected receive find_phys_blocks() adjusted to
87662306a36Sopenharmony_ci * use the MR information vs. the pfn.
87762306a36Sopenharmony_ci *
87862306a36Sopenharmony_ci * Return:
87962306a36Sopenharmony_ci * the number of RcvArray entries
88062306a36Sopenharmony_ci */
88162306a36Sopenharmony_cistatic u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
88262306a36Sopenharmony_ci					struct page **pages,
88362306a36Sopenharmony_ci					u32 npages,
88462306a36Sopenharmony_ci					struct tid_rdma_pageset *list)
88562306a36Sopenharmony_ci{
88662306a36Sopenharmony_ci	u32 pagecount, pageidx, setcount = 0, i;
88762306a36Sopenharmony_ci	void *vaddr, *this_vaddr;
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci	if (!npages)
89062306a36Sopenharmony_ci		return 0;
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci	/*
89362306a36Sopenharmony_ci	 * Look for sets of physically contiguous pages in the user buffer.
89462306a36Sopenharmony_ci	 * This will allow us to optimize Expected RcvArray entry usage by
89562306a36Sopenharmony_ci	 * using the bigger supported sizes.
89662306a36Sopenharmony_ci	 */
89762306a36Sopenharmony_ci	vaddr = page_address(pages[0]);
89862306a36Sopenharmony_ci	trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
89962306a36Sopenharmony_ci	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
90062306a36Sopenharmony_ci		this_vaddr = i < npages ? page_address(pages[i]) : NULL;
90162306a36Sopenharmony_ci		trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
90262306a36Sopenharmony_ci					 this_vaddr);
90362306a36Sopenharmony_ci		/*
90462306a36Sopenharmony_ci		 * If the vaddr's are not sequential, pages are not physically
90562306a36Sopenharmony_ci		 * contiguous.
90662306a36Sopenharmony_ci		 */
90762306a36Sopenharmony_ci		if (this_vaddr != (vaddr + PAGE_SIZE)) {
90862306a36Sopenharmony_ci			/*
90962306a36Sopenharmony_ci			 * At this point we have to loop over the set of
91062306a36Sopenharmony_ci			 * physically contiguous pages and break them down it
91162306a36Sopenharmony_ci			 * sizes supported by the HW.
91262306a36Sopenharmony_ci			 * There are two main constraints:
91362306a36Sopenharmony_ci			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
91462306a36Sopenharmony_ci			 *        If the total set size is bigger than that
91562306a36Sopenharmony_ci			 *        program only a MAX_EXPECTED_BUFFER chunk.
91662306a36Sopenharmony_ci			 *     2. The buffer size has to be a power of two. If
91762306a36Sopenharmony_ci			 *        it is not, round down to the closes power of
91862306a36Sopenharmony_ci			 *        2 and program that size.
91962306a36Sopenharmony_ci			 */
92062306a36Sopenharmony_ci			while (pagecount) {
92162306a36Sopenharmony_ci				int maxpages = pagecount;
92262306a36Sopenharmony_ci				u32 bufsize = pagecount * PAGE_SIZE;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci				if (bufsize > MAX_EXPECTED_BUFFER)
92562306a36Sopenharmony_ci					maxpages =
92662306a36Sopenharmony_ci						MAX_EXPECTED_BUFFER >>
92762306a36Sopenharmony_ci						PAGE_SHIFT;
92862306a36Sopenharmony_ci				else if (!is_power_of_2(bufsize))
92962306a36Sopenharmony_ci					maxpages =
93062306a36Sopenharmony_ci						rounddown_pow_of_two(bufsize) >>
93162306a36Sopenharmony_ci						PAGE_SHIFT;
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci				list[setcount].idx = pageidx;
93462306a36Sopenharmony_ci				list[setcount].count = maxpages;
93562306a36Sopenharmony_ci				trace_hfi1_tid_pageset(flow->req->qp, setcount,
93662306a36Sopenharmony_ci						       list[setcount].idx,
93762306a36Sopenharmony_ci						       list[setcount].count);
93862306a36Sopenharmony_ci				pagecount -= maxpages;
93962306a36Sopenharmony_ci				pageidx += maxpages;
94062306a36Sopenharmony_ci				setcount++;
94162306a36Sopenharmony_ci			}
94262306a36Sopenharmony_ci			pageidx = i;
94362306a36Sopenharmony_ci			pagecount = 1;
94462306a36Sopenharmony_ci			vaddr = this_vaddr;
94562306a36Sopenharmony_ci		} else {
94662306a36Sopenharmony_ci			vaddr += PAGE_SIZE;
94762306a36Sopenharmony_ci			pagecount++;
94862306a36Sopenharmony_ci		}
94962306a36Sopenharmony_ci	}
95062306a36Sopenharmony_ci	/* insure we always return an even number of sets */
95162306a36Sopenharmony_ci	if (setcount & 1)
95262306a36Sopenharmony_ci		list[setcount++].count = 0;
95362306a36Sopenharmony_ci	return setcount;
95462306a36Sopenharmony_ci}
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci/**
95762306a36Sopenharmony_ci * tid_flush_pages - dump out pages into pagesets
95862306a36Sopenharmony_ci * @list: list of pagesets
95962306a36Sopenharmony_ci * @idx: pointer to current page index
96062306a36Sopenharmony_ci * @pages: number of pages to dump
96162306a36Sopenharmony_ci * @sets: current number of pagesset
96262306a36Sopenharmony_ci *
96362306a36Sopenharmony_ci * This routine flushes out accumuated pages.
96462306a36Sopenharmony_ci *
96562306a36Sopenharmony_ci * To insure an even number of sets the
96662306a36Sopenharmony_ci * code may add a filler.
96762306a36Sopenharmony_ci *
96862306a36Sopenharmony_ci * This can happen with when pages is not
96962306a36Sopenharmony_ci * a power of 2 or pages is a power of 2
97062306a36Sopenharmony_ci * less than the maximum pages.
97162306a36Sopenharmony_ci *
97262306a36Sopenharmony_ci * Return:
97362306a36Sopenharmony_ci * The new number of sets
97462306a36Sopenharmony_ci */
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_cistatic u32 tid_flush_pages(struct tid_rdma_pageset *list,
97762306a36Sopenharmony_ci			   u32 *idx, u32 pages, u32 sets)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	while (pages) {
98062306a36Sopenharmony_ci		u32 maxpages = pages;
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci		if (maxpages > MAX_EXPECTED_PAGES)
98362306a36Sopenharmony_ci			maxpages = MAX_EXPECTED_PAGES;
98462306a36Sopenharmony_ci		else if (!is_power_of_2(maxpages))
98562306a36Sopenharmony_ci			maxpages = rounddown_pow_of_two(maxpages);
98662306a36Sopenharmony_ci		list[sets].idx = *idx;
98762306a36Sopenharmony_ci		list[sets++].count = maxpages;
98862306a36Sopenharmony_ci		*idx += maxpages;
98962306a36Sopenharmony_ci		pages -= maxpages;
99062306a36Sopenharmony_ci	}
99162306a36Sopenharmony_ci	/* might need a filler */
99262306a36Sopenharmony_ci	if (sets & 1)
99362306a36Sopenharmony_ci		list[sets++].count = 0;
99462306a36Sopenharmony_ci	return sets;
99562306a36Sopenharmony_ci}
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci/**
99862306a36Sopenharmony_ci * tid_rdma_find_phys_blocks_8k - get groups base on mr info
99962306a36Sopenharmony_ci * @flow: overall info for a TID RDMA segment
100062306a36Sopenharmony_ci * @pages: pointer to an array of page structs
100162306a36Sopenharmony_ci * @npages: number of pages
100262306a36Sopenharmony_ci * @list: page set array to return
100362306a36Sopenharmony_ci *
100462306a36Sopenharmony_ci * This routine parses an array of pages to compute pagesets
100562306a36Sopenharmony_ci * in an 8k compatible way.
100662306a36Sopenharmony_ci *
100762306a36Sopenharmony_ci * pages are tested two at a time, i, i + 1 for contiguous
100862306a36Sopenharmony_ci * pages and i - 1 and i contiguous pages.
100962306a36Sopenharmony_ci *
101062306a36Sopenharmony_ci * If any condition is false, any accumlated pages are flushed and
101162306a36Sopenharmony_ci * v0,v1 are emitted as separate PAGE_SIZE pagesets
101262306a36Sopenharmony_ci *
101362306a36Sopenharmony_ci * Otherwise, the current 8k is totaled for a future flush.
101462306a36Sopenharmony_ci *
101562306a36Sopenharmony_ci * Return:
101662306a36Sopenharmony_ci * The number of pagesets
101762306a36Sopenharmony_ci * list set with the returned number of pagesets
101862306a36Sopenharmony_ci *
101962306a36Sopenharmony_ci */
102062306a36Sopenharmony_cistatic u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
102162306a36Sopenharmony_ci					struct page **pages,
102262306a36Sopenharmony_ci					u32 npages,
102362306a36Sopenharmony_ci					struct tid_rdma_pageset *list)
102462306a36Sopenharmony_ci{
102562306a36Sopenharmony_ci	u32 idx, sets = 0, i;
102662306a36Sopenharmony_ci	u32 pagecnt = 0;
102762306a36Sopenharmony_ci	void *v0, *v1, *vm1;
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	if (!npages)
103062306a36Sopenharmony_ci		return 0;
103162306a36Sopenharmony_ci	for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
103262306a36Sopenharmony_ci		/* get a new v0 */
103362306a36Sopenharmony_ci		v0 = page_address(pages[i]);
103462306a36Sopenharmony_ci		trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
103562306a36Sopenharmony_ci		v1 = i + 1 < npages ?
103662306a36Sopenharmony_ci				page_address(pages[i + 1]) : NULL;
103762306a36Sopenharmony_ci		trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
103862306a36Sopenharmony_ci		/* compare i, i + 1 vaddr */
103962306a36Sopenharmony_ci		if (v1 != (v0 + PAGE_SIZE)) {
104062306a36Sopenharmony_ci			/* flush out pages */
104162306a36Sopenharmony_ci			sets = tid_flush_pages(list, &idx, pagecnt, sets);
104262306a36Sopenharmony_ci			/* output v0,v1 as two pagesets */
104362306a36Sopenharmony_ci			list[sets].idx = idx++;
104462306a36Sopenharmony_ci			list[sets++].count = 1;
104562306a36Sopenharmony_ci			if (v1) {
104662306a36Sopenharmony_ci				list[sets].count = 1;
104762306a36Sopenharmony_ci				list[sets++].idx = idx++;
104862306a36Sopenharmony_ci			} else {
104962306a36Sopenharmony_ci				list[sets++].count = 0;
105062306a36Sopenharmony_ci			}
105162306a36Sopenharmony_ci			vm1 = NULL;
105262306a36Sopenharmony_ci			pagecnt = 0;
105362306a36Sopenharmony_ci			continue;
105462306a36Sopenharmony_ci		}
105562306a36Sopenharmony_ci		/* i,i+1 consecutive, look at i-1,i */
105662306a36Sopenharmony_ci		if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
105762306a36Sopenharmony_ci			/* flush out pages */
105862306a36Sopenharmony_ci			sets = tid_flush_pages(list, &idx, pagecnt, sets);
105962306a36Sopenharmony_ci			pagecnt = 0;
106062306a36Sopenharmony_ci		}
106162306a36Sopenharmony_ci		/* pages will always be a multiple of 8k */
106262306a36Sopenharmony_ci		pagecnt += 2;
106362306a36Sopenharmony_ci		/* save i-1 */
106462306a36Sopenharmony_ci		vm1 = v1;
106562306a36Sopenharmony_ci		/* move to next pair */
106662306a36Sopenharmony_ci	}
106762306a36Sopenharmony_ci	/* dump residual pages at end */
106862306a36Sopenharmony_ci	sets = tid_flush_pages(list, &idx, npages - idx, sets);
106962306a36Sopenharmony_ci	/* by design cannot be odd sets */
107062306a36Sopenharmony_ci	WARN_ON(sets & 1);
107162306a36Sopenharmony_ci	return sets;
107262306a36Sopenharmony_ci}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci/*
107562306a36Sopenharmony_ci * Find pages for one segment of a sge array represented by @ss. The function
107662306a36Sopenharmony_ci * does not check the sge, the sge must have been checked for alignment with a
107762306a36Sopenharmony_ci * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
107862306a36Sopenharmony_ci * rvt_lkey_ok and rvt_rkey_ok. Also, the function only modifies the local sge
107962306a36Sopenharmony_ci * copy maintained in @ss->sge, the original sge is not modified.
108062306a36Sopenharmony_ci *
108162306a36Sopenharmony_ci * Unlike IB RDMA WRITE, we can't decrement ss->num_sge here because we are not
108262306a36Sopenharmony_ci * releasing the MR reference count at the same time. Otherwise, we'll "leak"
108362306a36Sopenharmony_ci * references to the MR. This difference requires that we keep track of progress
108462306a36Sopenharmony_ci * into the sg_list. This is done by the cur_seg cursor in the tid_rdma_request
108562306a36Sopenharmony_ci * structure.
108662306a36Sopenharmony_ci */
108762306a36Sopenharmony_cistatic u32 kern_find_pages(struct tid_rdma_flow *flow,
108862306a36Sopenharmony_ci			   struct page **pages,
108962306a36Sopenharmony_ci			   struct rvt_sge_state *ss, bool *last)
109062306a36Sopenharmony_ci{
109162306a36Sopenharmony_ci	struct tid_rdma_request *req = flow->req;
109262306a36Sopenharmony_ci	struct rvt_sge *sge = &ss->sge;
109362306a36Sopenharmony_ci	u32 length = flow->req->seg_len;
109462306a36Sopenharmony_ci	u32 len = PAGE_SIZE;
109562306a36Sopenharmony_ci	u32 i = 0;
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	while (length && req->isge < ss->num_sge) {
109862306a36Sopenharmony_ci		pages[i++] = virt_to_page(sge->vaddr);
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci		sge->vaddr += len;
110162306a36Sopenharmony_ci		sge->length -= len;
110262306a36Sopenharmony_ci		sge->sge_length -= len;
110362306a36Sopenharmony_ci		if (!sge->sge_length) {
110462306a36Sopenharmony_ci			if (++req->isge < ss->num_sge)
110562306a36Sopenharmony_ci				*sge = ss->sg_list[req->isge - 1];
110662306a36Sopenharmony_ci		} else if (sge->length == 0 && sge->mr->lkey) {
110762306a36Sopenharmony_ci			if (++sge->n >= RVT_SEGSZ) {
110862306a36Sopenharmony_ci				++sge->m;
110962306a36Sopenharmony_ci				sge->n = 0;
111062306a36Sopenharmony_ci			}
111162306a36Sopenharmony_ci			sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
111262306a36Sopenharmony_ci			sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
111362306a36Sopenharmony_ci		}
111462306a36Sopenharmony_ci		length -= len;
111562306a36Sopenharmony_ci	}
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	flow->length = flow->req->seg_len - length;
111862306a36Sopenharmony_ci	*last = req->isge != ss->num_sge;
111962306a36Sopenharmony_ci	return i;
112062306a36Sopenharmony_ci}
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_cistatic void dma_unmap_flow(struct tid_rdma_flow *flow)
112362306a36Sopenharmony_ci{
112462306a36Sopenharmony_ci	struct hfi1_devdata *dd;
112562306a36Sopenharmony_ci	int i;
112662306a36Sopenharmony_ci	struct tid_rdma_pageset *pset;
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	dd = flow->req->rcd->dd;
112962306a36Sopenharmony_ci	for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
113062306a36Sopenharmony_ci			i++, pset++) {
113162306a36Sopenharmony_ci		if (pset->count && pset->addr) {
113262306a36Sopenharmony_ci			dma_unmap_page(&dd->pcidev->dev,
113362306a36Sopenharmony_ci				       pset->addr,
113462306a36Sopenharmony_ci				       PAGE_SIZE * pset->count,
113562306a36Sopenharmony_ci				       DMA_FROM_DEVICE);
113662306a36Sopenharmony_ci			pset->mapped = 0;
113762306a36Sopenharmony_ci		}
113862306a36Sopenharmony_ci	}
113962306a36Sopenharmony_ci}
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_cistatic int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
114262306a36Sopenharmony_ci{
114362306a36Sopenharmony_ci	int i;
114462306a36Sopenharmony_ci	struct hfi1_devdata *dd = flow->req->rcd->dd;
114562306a36Sopenharmony_ci	struct tid_rdma_pageset *pset;
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
114862306a36Sopenharmony_ci			i++, pset++) {
114962306a36Sopenharmony_ci		if (pset->count) {
115062306a36Sopenharmony_ci			pset->addr = dma_map_page(&dd->pcidev->dev,
115162306a36Sopenharmony_ci						  pages[pset->idx],
115262306a36Sopenharmony_ci						  0,
115362306a36Sopenharmony_ci						  PAGE_SIZE * pset->count,
115462306a36Sopenharmony_ci						  DMA_FROM_DEVICE);
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci			if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
115762306a36Sopenharmony_ci				dma_unmap_flow(flow);
115862306a36Sopenharmony_ci				return -ENOMEM;
115962306a36Sopenharmony_ci			}
116062306a36Sopenharmony_ci			pset->mapped = 1;
116162306a36Sopenharmony_ci		}
116262306a36Sopenharmony_ci	}
116362306a36Sopenharmony_ci	return 0;
116462306a36Sopenharmony_ci}
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_cistatic inline bool dma_mapped(struct tid_rdma_flow *flow)
116762306a36Sopenharmony_ci{
116862306a36Sopenharmony_ci	return !!flow->pagesets[0].mapped;
116962306a36Sopenharmony_ci}
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci/*
117262306a36Sopenharmony_ci * Get pages pointers and identify contiguous physical memory chunks for a
117362306a36Sopenharmony_ci * segment. All segments are of length flow->req->seg_len.
117462306a36Sopenharmony_ci */
117562306a36Sopenharmony_cistatic int kern_get_phys_blocks(struct tid_rdma_flow *flow,
117662306a36Sopenharmony_ci				struct page **pages,
117762306a36Sopenharmony_ci				struct rvt_sge_state *ss, bool *last)
117862306a36Sopenharmony_ci{
117962306a36Sopenharmony_ci	u8 npages;
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci	/* Reuse previously computed pagesets, if any */
118262306a36Sopenharmony_ci	if (flow->npagesets) {
118362306a36Sopenharmony_ci		trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
118462306a36Sopenharmony_ci					  flow);
118562306a36Sopenharmony_ci		if (!dma_mapped(flow))
118662306a36Sopenharmony_ci			return dma_map_flow(flow, pages);
118762306a36Sopenharmony_ci		return 0;
118862306a36Sopenharmony_ci	}
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci	npages = kern_find_pages(flow, pages, ss, last);
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
119362306a36Sopenharmony_ci		flow->npagesets =
119462306a36Sopenharmony_ci			tid_rdma_find_phys_blocks_4k(flow, pages, npages,
119562306a36Sopenharmony_ci						     flow->pagesets);
119662306a36Sopenharmony_ci	else
119762306a36Sopenharmony_ci		flow->npagesets =
119862306a36Sopenharmony_ci			tid_rdma_find_phys_blocks_8k(flow, pages, npages,
119962306a36Sopenharmony_ci						     flow->pagesets);
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci	return dma_map_flow(flow, pages);
120262306a36Sopenharmony_ci}
120362306a36Sopenharmony_ci
120462306a36Sopenharmony_cistatic inline void kern_add_tid_node(struct tid_rdma_flow *flow,
120562306a36Sopenharmony_ci				     struct hfi1_ctxtdata *rcd, char *s,
120662306a36Sopenharmony_ci				     struct tid_group *grp, u8 cnt)
120762306a36Sopenharmony_ci{
120862306a36Sopenharmony_ci	struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci	WARN_ON_ONCE(flow->tnode_cnt >=
121162306a36Sopenharmony_ci		     (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
121262306a36Sopenharmony_ci	if (WARN_ON_ONCE(cnt & 1))
121362306a36Sopenharmony_ci		dd_dev_err(rcd->dd,
121462306a36Sopenharmony_ci			   "unexpected odd allocation cnt %u map 0x%x used %u",
121562306a36Sopenharmony_ci			   cnt, grp->map, grp->used);
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	node->grp = grp;
121862306a36Sopenharmony_ci	node->map = grp->map;
121962306a36Sopenharmony_ci	node->cnt = cnt;
122062306a36Sopenharmony_ci	trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
122162306a36Sopenharmony_ci				grp->base, grp->map, grp->used, cnt);
122262306a36Sopenharmony_ci}
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci/*
122562306a36Sopenharmony_ci * Try to allocate pageset_count TID's from TID groups for a context
122662306a36Sopenharmony_ci *
122762306a36Sopenharmony_ci * This function allocates TID's without moving groups between lists or
122862306a36Sopenharmony_ci * modifying grp->map. This is done as follows, being cogizant of the lists
122962306a36Sopenharmony_ci * between which the TID groups will move:
123062306a36Sopenharmony_ci * 1. First allocate complete groups of 8 TID's since this is more efficient,
123162306a36Sopenharmony_ci *    these groups will move from group->full without affecting used
123262306a36Sopenharmony_ci * 2. If more TID's are needed allocate from used (will move from used->full or
123362306a36Sopenharmony_ci *    stay in used)
123462306a36Sopenharmony_ci * 3. If we still don't have the required number of TID's go back and look again
123562306a36Sopenharmony_ci *    at a complete group (will move from group->used)
123662306a36Sopenharmony_ci */
123762306a36Sopenharmony_cistatic int kern_alloc_tids(struct tid_rdma_flow *flow)
123862306a36Sopenharmony_ci{
123962306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = flow->req->rcd;
124062306a36Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
124162306a36Sopenharmony_ci	u32 ngroups, pageidx = 0;
124262306a36Sopenharmony_ci	struct tid_group *group = NULL, *used;
124362306a36Sopenharmony_ci	u8 use;
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_ci	flow->tnode_cnt = 0;
124662306a36Sopenharmony_ci	ngroups = flow->npagesets / dd->rcv_entries.group_size;
124762306a36Sopenharmony_ci	if (!ngroups)
124862306a36Sopenharmony_ci		goto used_list;
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci	/* First look at complete groups */
125162306a36Sopenharmony_ci	list_for_each_entry(group,  &rcd->tid_group_list.list, list) {
125262306a36Sopenharmony_ci		kern_add_tid_node(flow, rcd, "complete groups", group,
125362306a36Sopenharmony_ci				  group->size);
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_ci		pageidx += group->size;
125662306a36Sopenharmony_ci		if (!--ngroups)
125762306a36Sopenharmony_ci			break;
125862306a36Sopenharmony_ci	}
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	if (pageidx >= flow->npagesets)
126162306a36Sopenharmony_ci		goto ok;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ciused_list:
126462306a36Sopenharmony_ci	/* Now look at partially used groups */
126562306a36Sopenharmony_ci	list_for_each_entry(used, &rcd->tid_used_list.list, list) {
126662306a36Sopenharmony_ci		use = min_t(u32, flow->npagesets - pageidx,
126762306a36Sopenharmony_ci			    used->size - used->used);
126862306a36Sopenharmony_ci		kern_add_tid_node(flow, rcd, "used groups", used, use);
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci		pageidx += use;
127162306a36Sopenharmony_ci		if (pageidx >= flow->npagesets)
127262306a36Sopenharmony_ci			goto ok;
127362306a36Sopenharmony_ci	}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci	/*
127662306a36Sopenharmony_ci	 * Look again at a complete group, continuing from where we left.
127762306a36Sopenharmony_ci	 * However, if we are at the head, we have reached the end of the
127862306a36Sopenharmony_ci	 * complete groups list from the first loop above
127962306a36Sopenharmony_ci	 */
128062306a36Sopenharmony_ci	if (group && &group->list == &rcd->tid_group_list.list)
128162306a36Sopenharmony_ci		goto bail_eagain;
128262306a36Sopenharmony_ci	group = list_prepare_entry(group, &rcd->tid_group_list.list,
128362306a36Sopenharmony_ci				   list);
128462306a36Sopenharmony_ci	if (list_is_last(&group->list, &rcd->tid_group_list.list))
128562306a36Sopenharmony_ci		goto bail_eagain;
128662306a36Sopenharmony_ci	group = list_next_entry(group, list);
128762306a36Sopenharmony_ci	use = min_t(u32, flow->npagesets - pageidx, group->size);
128862306a36Sopenharmony_ci	kern_add_tid_node(flow, rcd, "complete continue", group, use);
128962306a36Sopenharmony_ci	pageidx += use;
129062306a36Sopenharmony_ci	if (pageidx >= flow->npagesets)
129162306a36Sopenharmony_ci		goto ok;
129262306a36Sopenharmony_cibail_eagain:
129362306a36Sopenharmony_ci	trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
129462306a36Sopenharmony_ci				  (u64)flow->npagesets);
129562306a36Sopenharmony_ci	return -EAGAIN;
129662306a36Sopenharmony_ciok:
129762306a36Sopenharmony_ci	return 0;
129862306a36Sopenharmony_ci}
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_cistatic void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
130162306a36Sopenharmony_ci				   u32 *pset_idx)
130262306a36Sopenharmony_ci{
130362306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = flow->req->rcd;
130462306a36Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
130562306a36Sopenharmony_ci	struct kern_tid_node *node = &flow->tnode[grp_num];
130662306a36Sopenharmony_ci	struct tid_group *grp = node->grp;
130762306a36Sopenharmony_ci	struct tid_rdma_pageset *pset;
130862306a36Sopenharmony_ci	u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
130962306a36Sopenharmony_ci	u32 rcventry, npages = 0, pair = 0, tidctrl;
131062306a36Sopenharmony_ci	u8 i, cnt = 0;
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci	for (i = 0; i < grp->size; i++) {
131362306a36Sopenharmony_ci		rcventry = grp->base + i;
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci		if (node->map & BIT(i) || cnt >= node->cnt) {
131662306a36Sopenharmony_ci			rcv_array_wc_fill(dd, rcventry);
131762306a36Sopenharmony_ci			continue;
131862306a36Sopenharmony_ci		}
131962306a36Sopenharmony_ci		pset = &flow->pagesets[(*pset_idx)++];
132062306a36Sopenharmony_ci		if (pset->count) {
132162306a36Sopenharmony_ci			hfi1_put_tid(dd, rcventry, PT_EXPECTED,
132262306a36Sopenharmony_ci				     pset->addr, trdma_pset_order(pset));
132362306a36Sopenharmony_ci		} else {
132462306a36Sopenharmony_ci			hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
132562306a36Sopenharmony_ci		}
132662306a36Sopenharmony_ci		npages += pset->count;
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci		rcventry -= rcd->expected_base;
132962306a36Sopenharmony_ci		tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
133062306a36Sopenharmony_ci		/*
133162306a36Sopenharmony_ci		 * A single TID entry will be used to use a rcvarr pair (with
133262306a36Sopenharmony_ci		 * tidctrl 0x3), if ALL these are true (a) the bit pos is even
133362306a36Sopenharmony_ci		 * (b) the group map shows current and the next bits as free
133462306a36Sopenharmony_ci		 * indicating two consecutive rcvarry entries are available (c)
133562306a36Sopenharmony_ci		 * we actually need 2 more entries
133662306a36Sopenharmony_ci		 */
133762306a36Sopenharmony_ci		pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
133862306a36Sopenharmony_ci			node->cnt >= cnt + 2;
133962306a36Sopenharmony_ci		if (!pair) {
134062306a36Sopenharmony_ci			if (!pset->count)
134162306a36Sopenharmony_ci				tidctrl = 0x1;
134262306a36Sopenharmony_ci			flow->tid_entry[flow->tidcnt++] =
134362306a36Sopenharmony_ci				EXP_TID_SET(IDX, rcventry >> 1) |
134462306a36Sopenharmony_ci				EXP_TID_SET(CTRL, tidctrl) |
134562306a36Sopenharmony_ci				EXP_TID_SET(LEN, npages);
134662306a36Sopenharmony_ci			trace_hfi1_tid_entry_alloc(/* entry */
134762306a36Sopenharmony_ci			   flow->req->qp, flow->tidcnt - 1,
134862306a36Sopenharmony_ci			   flow->tid_entry[flow->tidcnt - 1]);
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_ci			/* Efficient DIV_ROUND_UP(npages, pmtu_pg) */
135162306a36Sopenharmony_ci			flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
135262306a36Sopenharmony_ci			npages = 0;
135362306a36Sopenharmony_ci		}
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ci		if (grp->used == grp->size - 1)
135662306a36Sopenharmony_ci			tid_group_move(grp, &rcd->tid_used_list,
135762306a36Sopenharmony_ci				       &rcd->tid_full_list);
135862306a36Sopenharmony_ci		else if (!grp->used)
135962306a36Sopenharmony_ci			tid_group_move(grp, &rcd->tid_group_list,
136062306a36Sopenharmony_ci				       &rcd->tid_used_list);
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci		grp->used++;
136362306a36Sopenharmony_ci		grp->map |= BIT(i);
136462306a36Sopenharmony_ci		cnt++;
136562306a36Sopenharmony_ci	}
136662306a36Sopenharmony_ci}
136762306a36Sopenharmony_ci
136862306a36Sopenharmony_cistatic void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
136962306a36Sopenharmony_ci{
137062306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = flow->req->rcd;
137162306a36Sopenharmony_ci	struct hfi1_devdata *dd = rcd->dd;
137262306a36Sopenharmony_ci	struct kern_tid_node *node = &flow->tnode[grp_num];
137362306a36Sopenharmony_ci	struct tid_group *grp = node->grp;
137462306a36Sopenharmony_ci	u32 rcventry;
137562306a36Sopenharmony_ci	u8 i, cnt = 0;
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_ci	for (i = 0; i < grp->size; i++) {
137862306a36Sopenharmony_ci		rcventry = grp->base + i;
137962306a36Sopenharmony_ci
138062306a36Sopenharmony_ci		if (node->map & BIT(i) || cnt >= node->cnt) {
138162306a36Sopenharmony_ci			rcv_array_wc_fill(dd, rcventry);
138262306a36Sopenharmony_ci			continue;
138362306a36Sopenharmony_ci		}
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci		hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci		grp->used--;
138862306a36Sopenharmony_ci		grp->map &= ~BIT(i);
138962306a36Sopenharmony_ci		cnt++;
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci		if (grp->used == grp->size - 1)
139262306a36Sopenharmony_ci			tid_group_move(grp, &rcd->tid_full_list,
139362306a36Sopenharmony_ci				       &rcd->tid_used_list);
139462306a36Sopenharmony_ci		else if (!grp->used)
139562306a36Sopenharmony_ci			tid_group_move(grp, &rcd->tid_used_list,
139662306a36Sopenharmony_ci				       &rcd->tid_group_list);
139762306a36Sopenharmony_ci	}
139862306a36Sopenharmony_ci	if (WARN_ON_ONCE(cnt & 1)) {
139962306a36Sopenharmony_ci		struct hfi1_ctxtdata *rcd = flow->req->rcd;
140062306a36Sopenharmony_ci		struct hfi1_devdata *dd = rcd->dd;
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci		dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
140362306a36Sopenharmony_ci			   cnt, grp->map, grp->used);
140462306a36Sopenharmony_ci	}
140562306a36Sopenharmony_ci}
140662306a36Sopenharmony_ci
140762306a36Sopenharmony_cistatic void kern_program_rcvarray(struct tid_rdma_flow *flow)
140862306a36Sopenharmony_ci{
140962306a36Sopenharmony_ci	u32 pset_idx = 0;
141062306a36Sopenharmony_ci	int i;
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci	flow->npkts = 0;
141362306a36Sopenharmony_ci	flow->tidcnt = 0;
141462306a36Sopenharmony_ci	for (i = 0; i < flow->tnode_cnt; i++)
141562306a36Sopenharmony_ci		kern_program_rcv_group(flow, i, &pset_idx);
141662306a36Sopenharmony_ci	trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
141762306a36Sopenharmony_ci}
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci/**
142062306a36Sopenharmony_ci * hfi1_kern_exp_rcv_setup() - setup TID's and flow for one segment of a
142162306a36Sopenharmony_ci * TID RDMA request
142262306a36Sopenharmony_ci *
142362306a36Sopenharmony_ci * @req: TID RDMA request for which the segment/flow is being set up
142462306a36Sopenharmony_ci * @ss: sge state, maintains state across successive segments of a sge
142562306a36Sopenharmony_ci * @last: set to true after the last sge segment has been processed
142662306a36Sopenharmony_ci *
142762306a36Sopenharmony_ci * This function
142862306a36Sopenharmony_ci * (1) finds a free flow entry in the flow circular buffer
142962306a36Sopenharmony_ci * (2) finds pages and continuous physical chunks constituing one segment
143062306a36Sopenharmony_ci *     of an sge
143162306a36Sopenharmony_ci * (3) allocates TID group entries for those chunks
143262306a36Sopenharmony_ci * (4) programs rcvarray entries in the hardware corresponding to those
143362306a36Sopenharmony_ci *     TID's
143462306a36Sopenharmony_ci * (5) computes a tidarray with formatted TID entries which can be sent
143562306a36Sopenharmony_ci *     to the sender
143662306a36Sopenharmony_ci * (6) Reserves and programs HW flows.
143762306a36Sopenharmony_ci * (7) It also manages queing the QP when TID/flow resources are not
143862306a36Sopenharmony_ci *     available.
143962306a36Sopenharmony_ci *
144062306a36Sopenharmony_ci * @req points to struct tid_rdma_request of which the segments are a part. The
144162306a36Sopenharmony_ci * function uses qp, rcd and seg_len members of @req. In the absence of errors,
144262306a36Sopenharmony_ci * req->flow_idx is the index of the flow which has been prepared in this
144362306a36Sopenharmony_ci * invocation of function call. With flow = &req->flows[req->flow_idx],
144462306a36Sopenharmony_ci * flow->tid_entry contains the TID array which the sender can use for TID RDMA
144562306a36Sopenharmony_ci * sends and flow->npkts contains number of packets required to send the
144662306a36Sopenharmony_ci * segment.
144762306a36Sopenharmony_ci *
144862306a36Sopenharmony_ci * hfi1_check_sge_align should be called prior to calling this function and if
144962306a36Sopenharmony_ci * it signals error TID RDMA cannot be used for this sge and this function
145062306a36Sopenharmony_ci * should not be called.
145162306a36Sopenharmony_ci *
145262306a36Sopenharmony_ci * For the queuing, caller must hold the flow->req->qp s_lock from the send
145362306a36Sopenharmony_ci * engine and the function will procure the exp_lock.
145462306a36Sopenharmony_ci *
145562306a36Sopenharmony_ci * Return:
145662306a36Sopenharmony_ci * The function returns -EAGAIN if sufficient number of TID/flow resources to
145762306a36Sopenharmony_ci * map the segment could not be allocated. In this case the function should be
145862306a36Sopenharmony_ci * called again with previous arguments to retry the TID allocation. There are
145962306a36Sopenharmony_ci * no other error returns. The function returns 0 on success.
146062306a36Sopenharmony_ci */
146162306a36Sopenharmony_ciint hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
146262306a36Sopenharmony_ci			    struct rvt_sge_state *ss, bool *last)
146362306a36Sopenharmony_ci	__must_hold(&req->qp->s_lock)
146462306a36Sopenharmony_ci{
146562306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->setup_head];
146662306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = req->rcd;
146762306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = req->qp->priv;
146862306a36Sopenharmony_ci	unsigned long flags;
146962306a36Sopenharmony_ci	struct rvt_qp *fqp;
147062306a36Sopenharmony_ci	u16 clear_tail = req->clear_tail;
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_ci	lockdep_assert_held(&req->qp->s_lock);
147362306a36Sopenharmony_ci	/*
147462306a36Sopenharmony_ci	 * We return error if either (a) we don't have space in the flow
147562306a36Sopenharmony_ci	 * circular buffer, or (b) we already have max entries in the buffer.
147662306a36Sopenharmony_ci	 * Max entries depend on the type of request we are processing and the
147762306a36Sopenharmony_ci	 * negotiated TID RDMA parameters.
147862306a36Sopenharmony_ci	 */
147962306a36Sopenharmony_ci	if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
148062306a36Sopenharmony_ci	    CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
148162306a36Sopenharmony_ci	    req->n_flows)
148262306a36Sopenharmony_ci		return -EINVAL;
148362306a36Sopenharmony_ci
148462306a36Sopenharmony_ci	/*
148562306a36Sopenharmony_ci	 * Get pages, identify contiguous physical memory chunks for the segment
148662306a36Sopenharmony_ci	 * If we can not determine a DMA address mapping we will treat it just
148762306a36Sopenharmony_ci	 * like if we ran out of space above.
148862306a36Sopenharmony_ci	 */
148962306a36Sopenharmony_ci	if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
149062306a36Sopenharmony_ci		hfi1_wait_kmem(flow->req->qp);
149162306a36Sopenharmony_ci		return -ENOMEM;
149262306a36Sopenharmony_ci	}
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
149562306a36Sopenharmony_ci	if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
149662306a36Sopenharmony_ci		goto queue;
149762306a36Sopenharmony_ci
149862306a36Sopenharmony_ci	/*
149962306a36Sopenharmony_ci	 * At this point we know the number of pagesets and hence the number of
150062306a36Sopenharmony_ci	 * TID's to map the segment. Allocate the TID's from the TID groups. If
150162306a36Sopenharmony_ci	 * we cannot allocate the required number we exit and try again later
150262306a36Sopenharmony_ci	 */
150362306a36Sopenharmony_ci	if (kern_alloc_tids(flow))
150462306a36Sopenharmony_ci		goto queue;
150562306a36Sopenharmony_ci	/*
150662306a36Sopenharmony_ci	 * Finally program the TID entries with the pagesets, compute the
150762306a36Sopenharmony_ci	 * tidarray and enable the HW flow
150862306a36Sopenharmony_ci	 */
150962306a36Sopenharmony_ci	kern_program_rcvarray(flow);
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_ci	/*
151262306a36Sopenharmony_ci	 * Setup the flow state with relevant information.
151362306a36Sopenharmony_ci	 * This information is used for tracking the sequence of data packets
151462306a36Sopenharmony_ci	 * for the segment.
151562306a36Sopenharmony_ci	 * The flow is setup here as this is the most accurate time and place
151662306a36Sopenharmony_ci	 * to do so. Doing at a later time runs the risk of the flow data in
151762306a36Sopenharmony_ci	 * qpriv getting out of sync.
151862306a36Sopenharmony_ci	 */
151962306a36Sopenharmony_ci	memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
152062306a36Sopenharmony_ci	flow->idx = qpriv->flow_state.index;
152162306a36Sopenharmony_ci	flow->flow_state.generation = qpriv->flow_state.generation;
152262306a36Sopenharmony_ci	flow->flow_state.spsn = qpriv->flow_state.psn;
152362306a36Sopenharmony_ci	flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
152462306a36Sopenharmony_ci	flow->flow_state.r_next_psn =
152562306a36Sopenharmony_ci		full_flow_psn(flow, flow->flow_state.spsn);
152662306a36Sopenharmony_ci	qpriv->flow_state.psn += flow->npkts;
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ci	dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
152962306a36Sopenharmony_ci	/* get head before dropping lock */
153062306a36Sopenharmony_ci	fqp = first_qp(rcd, &rcd->rarr_queue);
153162306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
153262306a36Sopenharmony_ci	tid_rdma_schedule_tid_wakeup(fqp);
153362306a36Sopenharmony_ci
153462306a36Sopenharmony_ci	req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
153562306a36Sopenharmony_ci	return 0;
153662306a36Sopenharmony_ciqueue:
153762306a36Sopenharmony_ci	queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
153862306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
153962306a36Sopenharmony_ci	return -EAGAIN;
154062306a36Sopenharmony_ci}
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_cistatic void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
154362306a36Sopenharmony_ci{
154462306a36Sopenharmony_ci	flow->npagesets = 0;
154562306a36Sopenharmony_ci}
154662306a36Sopenharmony_ci
154762306a36Sopenharmony_ci/*
154862306a36Sopenharmony_ci * This function is called after one segment has been successfully sent to
154962306a36Sopenharmony_ci * release the flow and TID HW/SW resources for that segment. The segments for a
155062306a36Sopenharmony_ci * TID RDMA request are setup and cleared in FIFO order which is managed using a
155162306a36Sopenharmony_ci * circular buffer.
155262306a36Sopenharmony_ci */
155362306a36Sopenharmony_ciint hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
155462306a36Sopenharmony_ci	__must_hold(&req->qp->s_lock)
155562306a36Sopenharmony_ci{
155662306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
155762306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = req->rcd;
155862306a36Sopenharmony_ci	unsigned long flags;
155962306a36Sopenharmony_ci	int i;
156062306a36Sopenharmony_ci	struct rvt_qp *fqp;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	lockdep_assert_held(&req->qp->s_lock);
156362306a36Sopenharmony_ci	/* Exit if we have nothing in the flow circular buffer */
156462306a36Sopenharmony_ci	if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
156562306a36Sopenharmony_ci		return -EINVAL;
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci	spin_lock_irqsave(&rcd->exp_lock, flags);
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_ci	for (i = 0; i < flow->tnode_cnt; i++)
157062306a36Sopenharmony_ci		kern_unprogram_rcv_group(flow, i);
157162306a36Sopenharmony_ci	/* To prevent double unprogramming */
157262306a36Sopenharmony_ci	flow->tnode_cnt = 0;
157362306a36Sopenharmony_ci	/* get head before dropping lock */
157462306a36Sopenharmony_ci	fqp = first_qp(rcd, &rcd->rarr_queue);
157562306a36Sopenharmony_ci	spin_unlock_irqrestore(&rcd->exp_lock, flags);
157662306a36Sopenharmony_ci
157762306a36Sopenharmony_ci	dma_unmap_flow(flow);
157862306a36Sopenharmony_ci
157962306a36Sopenharmony_ci	hfi1_tid_rdma_reset_flow(flow);
158062306a36Sopenharmony_ci	req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_ci	if (fqp == req->qp) {
158362306a36Sopenharmony_ci		__trigger_tid_waiter(fqp);
158462306a36Sopenharmony_ci		rvt_put_qp(fqp);
158562306a36Sopenharmony_ci	} else {
158662306a36Sopenharmony_ci		tid_rdma_schedule_tid_wakeup(fqp);
158762306a36Sopenharmony_ci	}
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci	return 0;
159062306a36Sopenharmony_ci}
159162306a36Sopenharmony_ci
159262306a36Sopenharmony_ci/*
159362306a36Sopenharmony_ci * This function is called to release all the tid entries for
159462306a36Sopenharmony_ci * a request.
159562306a36Sopenharmony_ci */
159662306a36Sopenharmony_civoid hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
159762306a36Sopenharmony_ci	__must_hold(&req->qp->s_lock)
159862306a36Sopenharmony_ci{
159962306a36Sopenharmony_ci	/* Use memory barrier for proper ordering */
160062306a36Sopenharmony_ci	while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
160162306a36Sopenharmony_ci		if (hfi1_kern_exp_rcv_clear(req))
160262306a36Sopenharmony_ci			break;
160362306a36Sopenharmony_ci	}
160462306a36Sopenharmony_ci}
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci/**
160762306a36Sopenharmony_ci * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
160862306a36Sopenharmony_ci * @req: the tid rdma request to be cleaned
160962306a36Sopenharmony_ci */
161062306a36Sopenharmony_cistatic void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
161162306a36Sopenharmony_ci{
161262306a36Sopenharmony_ci	kfree(req->flows);
161362306a36Sopenharmony_ci	req->flows = NULL;
161462306a36Sopenharmony_ci}
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci/**
161762306a36Sopenharmony_ci * __trdma_clean_swqe - clean up for large sized QPs
161862306a36Sopenharmony_ci * @qp: the queue patch
161962306a36Sopenharmony_ci * @wqe: the send wqe
162062306a36Sopenharmony_ci */
162162306a36Sopenharmony_civoid __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
162262306a36Sopenharmony_ci{
162362306a36Sopenharmony_ci	struct hfi1_swqe_priv *p = wqe->priv;
162462306a36Sopenharmony_ci
162562306a36Sopenharmony_ci	hfi1_kern_exp_rcv_free_flows(&p->tid_req);
162662306a36Sopenharmony_ci}
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci/*
162962306a36Sopenharmony_ci * This can be called at QP create time or in the data path.
163062306a36Sopenharmony_ci */
163162306a36Sopenharmony_cistatic int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
163262306a36Sopenharmony_ci					 gfp_t gfp)
163362306a36Sopenharmony_ci{
163462306a36Sopenharmony_ci	struct tid_rdma_flow *flows;
163562306a36Sopenharmony_ci	int i;
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci	if (likely(req->flows))
163862306a36Sopenharmony_ci		return 0;
163962306a36Sopenharmony_ci	flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
164062306a36Sopenharmony_ci			     req->rcd->numa_id);
164162306a36Sopenharmony_ci	if (!flows)
164262306a36Sopenharmony_ci		return -ENOMEM;
164362306a36Sopenharmony_ci	/* mini init */
164462306a36Sopenharmony_ci	for (i = 0; i < MAX_FLOWS; i++) {
164562306a36Sopenharmony_ci		flows[i].req = req;
164662306a36Sopenharmony_ci		flows[i].npagesets = 0;
164762306a36Sopenharmony_ci		flows[i].pagesets[0].mapped =  0;
164862306a36Sopenharmony_ci		flows[i].resync_npkts = 0;
164962306a36Sopenharmony_ci	}
165062306a36Sopenharmony_ci	req->flows = flows;
165162306a36Sopenharmony_ci	return 0;
165262306a36Sopenharmony_ci}
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_cistatic void hfi1_init_trdma_req(struct rvt_qp *qp,
165562306a36Sopenharmony_ci				struct tid_rdma_request *req)
165662306a36Sopenharmony_ci{
165762306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci	/*
166062306a36Sopenharmony_ci	 * Initialize various TID RDMA request variables.
166162306a36Sopenharmony_ci	 * These variables are "static", which is why they
166262306a36Sopenharmony_ci	 * can be pre-initialized here before the WRs has
166362306a36Sopenharmony_ci	 * even been submitted.
166462306a36Sopenharmony_ci	 * However, non-NULL values for these variables do not
166562306a36Sopenharmony_ci	 * imply that this WQE has been enabled for TID RDMA.
166662306a36Sopenharmony_ci	 * Drivers should check the WQE's opcode to determine
166762306a36Sopenharmony_ci	 * if a request is a TID RDMA one or not.
166862306a36Sopenharmony_ci	 */
166962306a36Sopenharmony_ci	req->qp = qp;
167062306a36Sopenharmony_ci	req->rcd = qpriv->rcd;
167162306a36Sopenharmony_ci}
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ciu64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
167462306a36Sopenharmony_ci			    void *context, int vl, int mode, u64 data)
167562306a36Sopenharmony_ci{
167662306a36Sopenharmony_ci	struct hfi1_devdata *dd = context;
167762306a36Sopenharmony_ci
167862306a36Sopenharmony_ci	return dd->verbs_dev.n_tidwait;
167962306a36Sopenharmony_ci}
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_cistatic struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
168262306a36Sopenharmony_ci					  u32 psn, u16 *fidx)
168362306a36Sopenharmony_ci{
168462306a36Sopenharmony_ci	u16 head, tail;
168562306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci	head = req->setup_head;
168862306a36Sopenharmony_ci	tail = req->clear_tail;
168962306a36Sopenharmony_ci	for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
169062306a36Sopenharmony_ci	     tail = CIRC_NEXT(tail, MAX_FLOWS)) {
169162306a36Sopenharmony_ci		flow = &req->flows[tail];
169262306a36Sopenharmony_ci		if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
169362306a36Sopenharmony_ci		    cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
169462306a36Sopenharmony_ci			if (fidx)
169562306a36Sopenharmony_ci				*fidx = tail;
169662306a36Sopenharmony_ci			return flow;
169762306a36Sopenharmony_ci		}
169862306a36Sopenharmony_ci	}
169962306a36Sopenharmony_ci	return NULL;
170062306a36Sopenharmony_ci}
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci/* TID RDMA READ functions */
170362306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
170462306a36Sopenharmony_ci				    struct ib_other_headers *ohdr, u32 *bth1,
170562306a36Sopenharmony_ci				    u32 *bth2, u32 *len)
170662306a36Sopenharmony_ci{
170762306a36Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
170862306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
170962306a36Sopenharmony_ci	struct rvt_qp *qp = req->qp;
171062306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
171162306a36Sopenharmony_ci	struct hfi1_swqe_priv *wpriv = wqe->priv;
171262306a36Sopenharmony_ci	struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
171362306a36Sopenharmony_ci	struct tid_rdma_params *remote;
171462306a36Sopenharmony_ci	u32 req_len = 0;
171562306a36Sopenharmony_ci	void *req_addr = NULL;
171662306a36Sopenharmony_ci
171762306a36Sopenharmony_ci	/* This is the IB psn used to send the request */
171862306a36Sopenharmony_ci	*bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
171962306a36Sopenharmony_ci	trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_ci	/* TID Entries for TID RDMA READ payload */
172262306a36Sopenharmony_ci	req_addr = &flow->tid_entry[flow->tid_idx];
172362306a36Sopenharmony_ci	req_len = sizeof(*flow->tid_entry) *
172462306a36Sopenharmony_ci			(flow->tidcnt - flow->tid_idx);
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci	memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
172762306a36Sopenharmony_ci	wpriv->ss.sge.vaddr = req_addr;
172862306a36Sopenharmony_ci	wpriv->ss.sge.sge_length = req_len;
172962306a36Sopenharmony_ci	wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
173062306a36Sopenharmony_ci	/*
173162306a36Sopenharmony_ci	 * We can safely zero these out. Since the first SGE covers the
173262306a36Sopenharmony_ci	 * entire packet, nothing else should even look at the MR.
173362306a36Sopenharmony_ci	 */
173462306a36Sopenharmony_ci	wpriv->ss.sge.mr = NULL;
173562306a36Sopenharmony_ci	wpriv->ss.sge.m = 0;
173662306a36Sopenharmony_ci	wpriv->ss.sge.n = 0;
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci	wpriv->ss.sg_list = NULL;
173962306a36Sopenharmony_ci	wpriv->ss.total_len = wpriv->ss.sge.sge_length;
174062306a36Sopenharmony_ci	wpriv->ss.num_sge = 1;
174162306a36Sopenharmony_ci
174262306a36Sopenharmony_ci	/* Construct the TID RDMA READ REQ packet header */
174362306a36Sopenharmony_ci	rcu_read_lock();
174462306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
174562306a36Sopenharmony_ci
174662306a36Sopenharmony_ci	KDETH_RESET(rreq->kdeth0, KVER, 0x1);
174762306a36Sopenharmony_ci	KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
174862306a36Sopenharmony_ci	rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
174962306a36Sopenharmony_ci			   req->cur_seg * req->seg_len + flow->sent);
175062306a36Sopenharmony_ci	rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
175162306a36Sopenharmony_ci	rreq->reth.length = cpu_to_be32(*len);
175262306a36Sopenharmony_ci	rreq->tid_flow_psn =
175362306a36Sopenharmony_ci		cpu_to_be32((flow->flow_state.generation <<
175462306a36Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_SHIFT) |
175562306a36Sopenharmony_ci			    ((flow->flow_state.spsn + flow->pkt) &
175662306a36Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_MASK));
175762306a36Sopenharmony_ci	rreq->tid_flow_qp =
175862306a36Sopenharmony_ci		cpu_to_be32(qpriv->tid_rdma.local.qp |
175962306a36Sopenharmony_ci			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
176062306a36Sopenharmony_ci			     TID_RDMA_DESTQP_FLOW_SHIFT) |
176162306a36Sopenharmony_ci			    qpriv->rcd->ctxt);
176262306a36Sopenharmony_ci	rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
176362306a36Sopenharmony_ci	*bth1 &= ~RVT_QPN_MASK;
176462306a36Sopenharmony_ci	*bth1 |= remote->qp;
176562306a36Sopenharmony_ci	*bth2 |= IB_BTH_REQ_ACK;
176662306a36Sopenharmony_ci	rcu_read_unlock();
176762306a36Sopenharmony_ci
176862306a36Sopenharmony_ci	/* We are done with this segment */
176962306a36Sopenharmony_ci	flow->sent += *len;
177062306a36Sopenharmony_ci	req->cur_seg++;
177162306a36Sopenharmony_ci	qp->s_state = TID_OP(READ_REQ);
177262306a36Sopenharmony_ci	req->ack_pending++;
177362306a36Sopenharmony_ci	req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
177462306a36Sopenharmony_ci	qpriv->pending_tid_r_segs++;
177562306a36Sopenharmony_ci	qp->s_num_rd_atomic++;
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ci	/* Set the TID RDMA READ request payload size */
177862306a36Sopenharmony_ci	*len = req_len;
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
178162306a36Sopenharmony_ci}
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci/*
178462306a36Sopenharmony_ci * @len: contains the data length to read upon entry and the read request
178562306a36Sopenharmony_ci *       payload length upon exit.
178662306a36Sopenharmony_ci */
178762306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
178862306a36Sopenharmony_ci				 struct ib_other_headers *ohdr, u32 *bth1,
178962306a36Sopenharmony_ci				 u32 *bth2, u32 *len)
179062306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
179162306a36Sopenharmony_ci{
179262306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
179362306a36Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
179462306a36Sopenharmony_ci	struct tid_rdma_flow *flow = NULL;
179562306a36Sopenharmony_ci	u32 hdwords = 0;
179662306a36Sopenharmony_ci	bool last;
179762306a36Sopenharmony_ci	bool retry = true;
179862306a36Sopenharmony_ci	u32 npkts = rvt_div_round_up_mtu(qp, *len);
179962306a36Sopenharmony_ci
180062306a36Sopenharmony_ci	trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
180162306a36Sopenharmony_ci					  wqe->lpsn, req);
180262306a36Sopenharmony_ci	/*
180362306a36Sopenharmony_ci	 * Check sync conditions. Make sure that there are no pending
180462306a36Sopenharmony_ci	 * segments before freeing the flow.
180562306a36Sopenharmony_ci	 */
180662306a36Sopenharmony_cisync_check:
180762306a36Sopenharmony_ci	if (req->state == TID_REQUEST_SYNC) {
180862306a36Sopenharmony_ci		if (qpriv->pending_tid_r_segs)
180962306a36Sopenharmony_ci			goto done;
181062306a36Sopenharmony_ci
181162306a36Sopenharmony_ci		hfi1_kern_clear_hw_flow(req->rcd, qp);
181262306a36Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
181362306a36Sopenharmony_ci		req->state = TID_REQUEST_ACTIVE;
181462306a36Sopenharmony_ci	}
181562306a36Sopenharmony_ci
181662306a36Sopenharmony_ci	/*
181762306a36Sopenharmony_ci	 * If the request for this segment is resent, the tid resources should
181862306a36Sopenharmony_ci	 * have been allocated before. In this case, req->flow_idx should
181962306a36Sopenharmony_ci	 * fall behind req->setup_head.
182062306a36Sopenharmony_ci	 */
182162306a36Sopenharmony_ci	if (req->flow_idx == req->setup_head) {
182262306a36Sopenharmony_ci		retry = false;
182362306a36Sopenharmony_ci		if (req->state == TID_REQUEST_RESEND) {
182462306a36Sopenharmony_ci			/*
182562306a36Sopenharmony_ci			 * This is the first new segment for a request whose
182662306a36Sopenharmony_ci			 * earlier segments have been re-sent. We need to
182762306a36Sopenharmony_ci			 * set up the sge pointer correctly.
182862306a36Sopenharmony_ci			 */
182962306a36Sopenharmony_ci			restart_sge(&qp->s_sge, wqe, req->s_next_psn,
183062306a36Sopenharmony_ci				    qp->pmtu);
183162306a36Sopenharmony_ci			req->isge = 0;
183262306a36Sopenharmony_ci			req->state = TID_REQUEST_ACTIVE;
183362306a36Sopenharmony_ci		}
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_ci		/*
183662306a36Sopenharmony_ci		 * Check sync. The last PSN of each generation is reserved for
183762306a36Sopenharmony_ci		 * RESYNC.
183862306a36Sopenharmony_ci		 */
183962306a36Sopenharmony_ci		if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
184062306a36Sopenharmony_ci			req->state = TID_REQUEST_SYNC;
184162306a36Sopenharmony_ci			goto sync_check;
184262306a36Sopenharmony_ci		}
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci		/* Allocate the flow if not yet */
184562306a36Sopenharmony_ci		if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
184662306a36Sopenharmony_ci			goto done;
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci		/*
184962306a36Sopenharmony_ci		 * The following call will advance req->setup_head after
185062306a36Sopenharmony_ci		 * allocating the tid entries.
185162306a36Sopenharmony_ci		 */
185262306a36Sopenharmony_ci		if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
185362306a36Sopenharmony_ci			req->state = TID_REQUEST_QUEUED;
185462306a36Sopenharmony_ci
185562306a36Sopenharmony_ci			/*
185662306a36Sopenharmony_ci			 * We don't have resources for this segment. The QP has
185762306a36Sopenharmony_ci			 * already been queued.
185862306a36Sopenharmony_ci			 */
185962306a36Sopenharmony_ci			goto done;
186062306a36Sopenharmony_ci		}
186162306a36Sopenharmony_ci	}
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_ci	/* req->flow_idx should only be one slot behind req->setup_head */
186462306a36Sopenharmony_ci	flow = &req->flows[req->flow_idx];
186562306a36Sopenharmony_ci	flow->pkt = 0;
186662306a36Sopenharmony_ci	flow->tid_idx = 0;
186762306a36Sopenharmony_ci	flow->sent = 0;
186862306a36Sopenharmony_ci	if (!retry) {
186962306a36Sopenharmony_ci		/* Set the first and last IB PSN for the flow in use.*/
187062306a36Sopenharmony_ci		flow->flow_state.ib_spsn = req->s_next_psn;
187162306a36Sopenharmony_ci		flow->flow_state.ib_lpsn =
187262306a36Sopenharmony_ci			flow->flow_state.ib_spsn + flow->npkts - 1;
187362306a36Sopenharmony_ci	}
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci	/* Calculate the next segment start psn.*/
187662306a36Sopenharmony_ci	req->s_next_psn += flow->npkts;
187762306a36Sopenharmony_ci
187862306a36Sopenharmony_ci	/* Build the packet header */
187962306a36Sopenharmony_ci	hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
188062306a36Sopenharmony_cidone:
188162306a36Sopenharmony_ci	return hdwords;
188262306a36Sopenharmony_ci}
188362306a36Sopenharmony_ci
188462306a36Sopenharmony_ci/*
188562306a36Sopenharmony_ci * Validate and accept the TID RDMA READ request parameters.
188662306a36Sopenharmony_ci * Return 0 if the request is accepted successfully;
188762306a36Sopenharmony_ci * Return 1 otherwise.
188862306a36Sopenharmony_ci */
188962306a36Sopenharmony_cistatic int tid_rdma_rcv_read_request(struct rvt_qp *qp,
189062306a36Sopenharmony_ci				     struct rvt_ack_entry *e,
189162306a36Sopenharmony_ci				     struct hfi1_packet *packet,
189262306a36Sopenharmony_ci				     struct ib_other_headers *ohdr,
189362306a36Sopenharmony_ci				     u32 bth0, u32 psn, u64 vaddr, u32 len)
189462306a36Sopenharmony_ci{
189562306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
189662306a36Sopenharmony_ci	struct tid_rdma_request *req;
189762306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
189862306a36Sopenharmony_ci	u32 flow_psn, i, tidlen = 0, pktlen, tlen;
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci	req = ack_to_tid_req(e);
190162306a36Sopenharmony_ci
190262306a36Sopenharmony_ci	/* Validate the payload first */
190362306a36Sopenharmony_ci	flow = &req->flows[req->setup_head];
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_ci	/* payload length = packet length - (header length + ICRC length) */
190662306a36Sopenharmony_ci	pktlen = packet->tlen - (packet->hlen + 4);
190762306a36Sopenharmony_ci	if (pktlen > sizeof(flow->tid_entry))
190862306a36Sopenharmony_ci		return 1;
190962306a36Sopenharmony_ci	memcpy(flow->tid_entry, packet->ebuf, pktlen);
191062306a36Sopenharmony_ci	flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	/*
191362306a36Sopenharmony_ci	 * Walk the TID_ENTRY list to make sure we have enough space for a
191462306a36Sopenharmony_ci	 * complete segment. Also calculate the number of required packets.
191562306a36Sopenharmony_ci	 */
191662306a36Sopenharmony_ci	flow->npkts = rvt_div_round_up_mtu(qp, len);
191762306a36Sopenharmony_ci	for (i = 0; i < flow->tidcnt; i++) {
191862306a36Sopenharmony_ci		trace_hfi1_tid_entry_rcv_read_req(qp, i,
191962306a36Sopenharmony_ci						  flow->tid_entry[i]);
192062306a36Sopenharmony_ci		tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
192162306a36Sopenharmony_ci		if (!tlen)
192262306a36Sopenharmony_ci			return 1;
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci		/*
192562306a36Sopenharmony_ci		 * For tid pair (tidctr == 3), the buffer size of the pair
192662306a36Sopenharmony_ci		 * should be the sum of the buffer size described by each
192762306a36Sopenharmony_ci		 * tid entry. However, only the first entry needs to be
192862306a36Sopenharmony_ci		 * specified in the request (see WFR HAS Section 8.5.7.1).
192962306a36Sopenharmony_ci		 */
193062306a36Sopenharmony_ci		tidlen += tlen;
193162306a36Sopenharmony_ci	}
193262306a36Sopenharmony_ci	if (tidlen * PAGE_SIZE < len)
193362306a36Sopenharmony_ci		return 1;
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci	/* Empty the flow array */
193662306a36Sopenharmony_ci	req->clear_tail = req->setup_head;
193762306a36Sopenharmony_ci	flow->pkt = 0;
193862306a36Sopenharmony_ci	flow->tid_idx = 0;
193962306a36Sopenharmony_ci	flow->tid_offset = 0;
194062306a36Sopenharmony_ci	flow->sent = 0;
194162306a36Sopenharmony_ci	flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
194262306a36Sopenharmony_ci	flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
194362306a36Sopenharmony_ci		    TID_RDMA_DESTQP_FLOW_MASK;
194462306a36Sopenharmony_ci	flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
194562306a36Sopenharmony_ci	flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
194662306a36Sopenharmony_ci	flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
194762306a36Sopenharmony_ci	flow->length = len;
194862306a36Sopenharmony_ci
194962306a36Sopenharmony_ci	flow->flow_state.lpsn = flow->flow_state.spsn +
195062306a36Sopenharmony_ci		flow->npkts - 1;
195162306a36Sopenharmony_ci	flow->flow_state.ib_spsn = psn;
195262306a36Sopenharmony_ci	flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_ci	trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
195562306a36Sopenharmony_ci	/* Set the initial flow index to the current flow. */
195662306a36Sopenharmony_ci	req->flow_idx = req->setup_head;
195762306a36Sopenharmony_ci
195862306a36Sopenharmony_ci	/* advance circular buffer head */
195962306a36Sopenharmony_ci	req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci	/*
196262306a36Sopenharmony_ci	 * Compute last PSN for request.
196362306a36Sopenharmony_ci	 */
196462306a36Sopenharmony_ci	e->opcode = (bth0 >> 24) & 0xff;
196562306a36Sopenharmony_ci	e->psn = psn;
196662306a36Sopenharmony_ci	e->lpsn = psn + flow->npkts - 1;
196762306a36Sopenharmony_ci	e->sent = 0;
196862306a36Sopenharmony_ci
196962306a36Sopenharmony_ci	req->n_flows = qpriv->tid_rdma.local.max_read;
197062306a36Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
197162306a36Sopenharmony_ci	req->cur_seg = 0;
197262306a36Sopenharmony_ci	req->comp_seg = 0;
197362306a36Sopenharmony_ci	req->ack_seg = 0;
197462306a36Sopenharmony_ci	req->isge = 0;
197562306a36Sopenharmony_ci	req->seg_len = qpriv->tid_rdma.local.max_len;
197662306a36Sopenharmony_ci	req->total_len = len;
197762306a36Sopenharmony_ci	req->total_segs = 1;
197862306a36Sopenharmony_ci	req->r_flow_psn = e->psn;
197962306a36Sopenharmony_ci
198062306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
198162306a36Sopenharmony_ci					req);
198262306a36Sopenharmony_ci	return 0;
198362306a36Sopenharmony_ci}
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_cistatic int tid_rdma_rcv_error(struct hfi1_packet *packet,
198662306a36Sopenharmony_ci			      struct ib_other_headers *ohdr,
198762306a36Sopenharmony_ci			      struct rvt_qp *qp, u32 psn, int diff)
198862306a36Sopenharmony_ci{
198962306a36Sopenharmony_ci	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
199062306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
199162306a36Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
199262306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
199362306a36Sopenharmony_ci	struct rvt_ack_entry *e;
199462306a36Sopenharmony_ci	struct tid_rdma_request *req;
199562306a36Sopenharmony_ci	unsigned long flags;
199662306a36Sopenharmony_ci	u8 prev;
199762306a36Sopenharmony_ci	bool old_req;
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci	trace_hfi1_rsp_tid_rcv_error(qp, psn);
200062306a36Sopenharmony_ci	trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
200162306a36Sopenharmony_ci	if (diff > 0) {
200262306a36Sopenharmony_ci		/* sequence error */
200362306a36Sopenharmony_ci		if (!qp->r_nak_state) {
200462306a36Sopenharmony_ci			ibp->rvp.n_rc_seqnak++;
200562306a36Sopenharmony_ci			qp->r_nak_state = IB_NAK_PSN_ERROR;
200662306a36Sopenharmony_ci			qp->r_ack_psn = qp->r_psn;
200762306a36Sopenharmony_ci			rc_defered_ack(rcd, qp);
200862306a36Sopenharmony_ci		}
200962306a36Sopenharmony_ci		goto done;
201062306a36Sopenharmony_ci	}
201162306a36Sopenharmony_ci
201262306a36Sopenharmony_ci	ibp->rvp.n_rc_dupreq++;
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
201562306a36Sopenharmony_ci	e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
201662306a36Sopenharmony_ci	if (!e || (e->opcode != TID_OP(READ_REQ) &&
201762306a36Sopenharmony_ci		   e->opcode != TID_OP(WRITE_REQ)))
201862306a36Sopenharmony_ci		goto unlock;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci	req = ack_to_tid_req(e);
202162306a36Sopenharmony_ci	req->r_flow_psn = psn;
202262306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
202362306a36Sopenharmony_ci	if (e->opcode == TID_OP(READ_REQ)) {
202462306a36Sopenharmony_ci		struct ib_reth *reth;
202562306a36Sopenharmony_ci		u32 len;
202662306a36Sopenharmony_ci		u32 rkey;
202762306a36Sopenharmony_ci		u64 vaddr;
202862306a36Sopenharmony_ci		int ok;
202962306a36Sopenharmony_ci		u32 bth0;
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_ci		reth = &ohdr->u.tid_rdma.r_req.reth;
203262306a36Sopenharmony_ci		/*
203362306a36Sopenharmony_ci		 * The requester always restarts from the start of the original
203462306a36Sopenharmony_ci		 * request.
203562306a36Sopenharmony_ci		 */
203662306a36Sopenharmony_ci		len = be32_to_cpu(reth->length);
203762306a36Sopenharmony_ci		if (psn != e->psn || len != req->total_len)
203862306a36Sopenharmony_ci			goto unlock;
203962306a36Sopenharmony_ci
204062306a36Sopenharmony_ci		release_rdma_sge_mr(e);
204162306a36Sopenharmony_ci
204262306a36Sopenharmony_ci		rkey = be32_to_cpu(reth->rkey);
204362306a36Sopenharmony_ci		vaddr = get_ib_reth_vaddr(reth);
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci		qp->r_len = len;
204662306a36Sopenharmony_ci		ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
204762306a36Sopenharmony_ci				 IB_ACCESS_REMOTE_READ);
204862306a36Sopenharmony_ci		if (unlikely(!ok))
204962306a36Sopenharmony_ci			goto unlock;
205062306a36Sopenharmony_ci
205162306a36Sopenharmony_ci		/*
205262306a36Sopenharmony_ci		 * If all the response packets for the current request have
205362306a36Sopenharmony_ci		 * been sent out and this request is complete (old_request
205462306a36Sopenharmony_ci		 * == false) and the TID flow may be unusable (the
205562306a36Sopenharmony_ci		 * req->clear_tail is advanced). However, when an earlier
205662306a36Sopenharmony_ci		 * request is received, this request will not be complete any
205762306a36Sopenharmony_ci		 * more (qp->s_tail_ack_queue is moved back, see below).
205862306a36Sopenharmony_ci		 * Consequently, we need to update the TID flow info everytime
205962306a36Sopenharmony_ci		 * a duplicate request is received.
206062306a36Sopenharmony_ci		 */
206162306a36Sopenharmony_ci		bth0 = be32_to_cpu(ohdr->bth[0]);
206262306a36Sopenharmony_ci		if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
206362306a36Sopenharmony_ci					      vaddr, len))
206462306a36Sopenharmony_ci			goto unlock;
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_ci		/*
206762306a36Sopenharmony_ci		 * True if the request is already scheduled (between
206862306a36Sopenharmony_ci		 * qp->s_tail_ack_queue and qp->r_head_ack_queue);
206962306a36Sopenharmony_ci		 */
207062306a36Sopenharmony_ci		if (old_req)
207162306a36Sopenharmony_ci			goto unlock;
207262306a36Sopenharmony_ci	} else {
207362306a36Sopenharmony_ci		struct flow_state *fstate;
207462306a36Sopenharmony_ci		bool schedule = false;
207562306a36Sopenharmony_ci		u8 i;
207662306a36Sopenharmony_ci
207762306a36Sopenharmony_ci		if (req->state == TID_REQUEST_RESEND) {
207862306a36Sopenharmony_ci			req->state = TID_REQUEST_RESEND_ACTIVE;
207962306a36Sopenharmony_ci		} else if (req->state == TID_REQUEST_INIT_RESEND) {
208062306a36Sopenharmony_ci			req->state = TID_REQUEST_INIT;
208162306a36Sopenharmony_ci			schedule = true;
208262306a36Sopenharmony_ci		}
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci		/*
208562306a36Sopenharmony_ci		 * True if the request is already scheduled (between
208662306a36Sopenharmony_ci		 * qp->s_tail_ack_queue and qp->r_head_ack_queue).
208762306a36Sopenharmony_ci		 * Also, don't change requests, which are at the SYNC
208862306a36Sopenharmony_ci		 * point and haven't generated any responses yet.
208962306a36Sopenharmony_ci		 * There is nothing to retransmit for them yet.
209062306a36Sopenharmony_ci		 */
209162306a36Sopenharmony_ci		if (old_req || req->state == TID_REQUEST_INIT ||
209262306a36Sopenharmony_ci		    (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
209362306a36Sopenharmony_ci			for (i = prev + 1; ; i++) {
209462306a36Sopenharmony_ci				if (i > rvt_size_atomic(&dev->rdi))
209562306a36Sopenharmony_ci					i = 0;
209662306a36Sopenharmony_ci				if (i == qp->r_head_ack_queue)
209762306a36Sopenharmony_ci					break;
209862306a36Sopenharmony_ci				e = &qp->s_ack_queue[i];
209962306a36Sopenharmony_ci				req = ack_to_tid_req(e);
210062306a36Sopenharmony_ci				if (e->opcode == TID_OP(WRITE_REQ) &&
210162306a36Sopenharmony_ci				    req->state == TID_REQUEST_INIT)
210262306a36Sopenharmony_ci					req->state = TID_REQUEST_INIT_RESEND;
210362306a36Sopenharmony_ci			}
210462306a36Sopenharmony_ci			/*
210562306a36Sopenharmony_ci			 * If the state of the request has been changed,
210662306a36Sopenharmony_ci			 * the first leg needs to get scheduled in order to
210762306a36Sopenharmony_ci			 * pick up the change. Otherwise, normal response
210862306a36Sopenharmony_ci			 * processing should take care of it.
210962306a36Sopenharmony_ci			 */
211062306a36Sopenharmony_ci			if (!schedule)
211162306a36Sopenharmony_ci				goto unlock;
211262306a36Sopenharmony_ci		}
211362306a36Sopenharmony_ci
211462306a36Sopenharmony_ci		/*
211562306a36Sopenharmony_ci		 * If there is no more allocated segment, just schedule the qp
211662306a36Sopenharmony_ci		 * without changing any state.
211762306a36Sopenharmony_ci		 */
211862306a36Sopenharmony_ci		if (req->clear_tail == req->setup_head)
211962306a36Sopenharmony_ci			goto schedule;
212062306a36Sopenharmony_ci		/*
212162306a36Sopenharmony_ci		 * If this request has sent responses for segments, which have
212262306a36Sopenharmony_ci		 * not received data yet (flow_idx != clear_tail), the flow_idx
212362306a36Sopenharmony_ci		 * pointer needs to be adjusted so the same responses can be
212462306a36Sopenharmony_ci		 * re-sent.
212562306a36Sopenharmony_ci		 */
212662306a36Sopenharmony_ci		if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
212762306a36Sopenharmony_ci			fstate = &req->flows[req->clear_tail].flow_state;
212862306a36Sopenharmony_ci			qpriv->pending_tid_w_segs -=
212962306a36Sopenharmony_ci				CIRC_CNT(req->flow_idx, req->clear_tail,
213062306a36Sopenharmony_ci					 MAX_FLOWS);
213162306a36Sopenharmony_ci			req->flow_idx =
213262306a36Sopenharmony_ci				CIRC_ADD(req->clear_tail,
213362306a36Sopenharmony_ci					 delta_psn(psn, fstate->resp_ib_psn),
213462306a36Sopenharmony_ci					 MAX_FLOWS);
213562306a36Sopenharmony_ci			qpriv->pending_tid_w_segs +=
213662306a36Sopenharmony_ci				delta_psn(psn, fstate->resp_ib_psn);
213762306a36Sopenharmony_ci			/*
213862306a36Sopenharmony_ci			 * When flow_idx == setup_head, we've gotten a duplicate
213962306a36Sopenharmony_ci			 * request for a segment, which has not been allocated
214062306a36Sopenharmony_ci			 * yet. In that case, don't adjust this request.
214162306a36Sopenharmony_ci			 * However, we still want to go through the loop below
214262306a36Sopenharmony_ci			 * to adjust all subsequent requests.
214362306a36Sopenharmony_ci			 */
214462306a36Sopenharmony_ci			if (CIRC_CNT(req->setup_head, req->flow_idx,
214562306a36Sopenharmony_ci				     MAX_FLOWS)) {
214662306a36Sopenharmony_ci				req->cur_seg = delta_psn(psn, e->psn);
214762306a36Sopenharmony_ci				req->state = TID_REQUEST_RESEND_ACTIVE;
214862306a36Sopenharmony_ci			}
214962306a36Sopenharmony_ci		}
215062306a36Sopenharmony_ci
215162306a36Sopenharmony_ci		for (i = prev + 1; ; i++) {
215262306a36Sopenharmony_ci			/*
215362306a36Sopenharmony_ci			 * Look at everything up to and including
215462306a36Sopenharmony_ci			 * s_tail_ack_queue
215562306a36Sopenharmony_ci			 */
215662306a36Sopenharmony_ci			if (i > rvt_size_atomic(&dev->rdi))
215762306a36Sopenharmony_ci				i = 0;
215862306a36Sopenharmony_ci			if (i == qp->r_head_ack_queue)
215962306a36Sopenharmony_ci				break;
216062306a36Sopenharmony_ci			e = &qp->s_ack_queue[i];
216162306a36Sopenharmony_ci			req = ack_to_tid_req(e);
216262306a36Sopenharmony_ci			trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
216362306a36Sopenharmony_ci						   e->lpsn, req);
216462306a36Sopenharmony_ci			if (e->opcode != TID_OP(WRITE_REQ) ||
216562306a36Sopenharmony_ci			    req->cur_seg == req->comp_seg ||
216662306a36Sopenharmony_ci			    req->state == TID_REQUEST_INIT ||
216762306a36Sopenharmony_ci			    req->state == TID_REQUEST_INIT_RESEND) {
216862306a36Sopenharmony_ci				if (req->state == TID_REQUEST_INIT)
216962306a36Sopenharmony_ci					req->state = TID_REQUEST_INIT_RESEND;
217062306a36Sopenharmony_ci				continue;
217162306a36Sopenharmony_ci			}
217262306a36Sopenharmony_ci			qpriv->pending_tid_w_segs -=
217362306a36Sopenharmony_ci				CIRC_CNT(req->flow_idx,
217462306a36Sopenharmony_ci					 req->clear_tail,
217562306a36Sopenharmony_ci					 MAX_FLOWS);
217662306a36Sopenharmony_ci			req->flow_idx = req->clear_tail;
217762306a36Sopenharmony_ci			req->state = TID_REQUEST_RESEND;
217862306a36Sopenharmony_ci			req->cur_seg = req->comp_seg;
217962306a36Sopenharmony_ci		}
218062306a36Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
218162306a36Sopenharmony_ci	}
218262306a36Sopenharmony_ci	/* Re-process old requests.*/
218362306a36Sopenharmony_ci	if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
218462306a36Sopenharmony_ci		qp->s_acked_ack_queue = prev;
218562306a36Sopenharmony_ci	qp->s_tail_ack_queue = prev;
218662306a36Sopenharmony_ci	/*
218762306a36Sopenharmony_ci	 * Since the qp->s_tail_ack_queue is modified, the
218862306a36Sopenharmony_ci	 * qp->s_ack_state must be changed to re-initialize
218962306a36Sopenharmony_ci	 * qp->s_ack_rdma_sge; Otherwise, we will end up in
219062306a36Sopenharmony_ci	 * wrong memory region.
219162306a36Sopenharmony_ci	 */
219262306a36Sopenharmony_ci	qp->s_ack_state = OP(ACKNOWLEDGE);
219362306a36Sopenharmony_cischedule:
219462306a36Sopenharmony_ci	/*
219562306a36Sopenharmony_ci	 * It's possible to receive a retry psn that is earlier than an RNRNAK
219662306a36Sopenharmony_ci	 * psn. In this case, the rnrnak state should be cleared.
219762306a36Sopenharmony_ci	 */
219862306a36Sopenharmony_ci	if (qpriv->rnr_nak_state) {
219962306a36Sopenharmony_ci		qp->s_nak_state = 0;
220062306a36Sopenharmony_ci		qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
220162306a36Sopenharmony_ci		qp->r_psn = e->lpsn + 1;
220262306a36Sopenharmony_ci		hfi1_tid_write_alloc_resources(qp, true);
220362306a36Sopenharmony_ci	}
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci	qp->r_state = e->opcode;
220662306a36Sopenharmony_ci	qp->r_nak_state = 0;
220762306a36Sopenharmony_ci	qp->s_flags |= RVT_S_RESP_PENDING;
220862306a36Sopenharmony_ci	hfi1_schedule_send(qp);
220962306a36Sopenharmony_ciunlock:
221062306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
221162306a36Sopenharmony_cidone:
221262306a36Sopenharmony_ci	return 1;
221362306a36Sopenharmony_ci}
221462306a36Sopenharmony_ci
221562306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
221662306a36Sopenharmony_ci{
221762306a36Sopenharmony_ci	/* HANDLER FOR TID RDMA READ REQUEST packet (Responder side)*/
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_ci	/*
222062306a36Sopenharmony_ci	 * 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
222162306a36Sopenharmony_ci	 *    (see hfi1_rc_rcv())
222262306a36Sopenharmony_ci	 * 2. Put TID RDMA READ REQ into the response queueu (s_ack_queue)
222362306a36Sopenharmony_ci	 *     - Setup struct tid_rdma_req with request info
222462306a36Sopenharmony_ci	 *     - Initialize struct tid_rdma_flow info;
222562306a36Sopenharmony_ci	 *     - Copy TID entries;
222662306a36Sopenharmony_ci	 * 3. Set the qp->s_ack_state.
222762306a36Sopenharmony_ci	 * 4. Set RVT_S_RESP_PENDING in s_flags.
222862306a36Sopenharmony_ci	 * 5. Kick the send engine (hfi1_schedule_send())
222962306a36Sopenharmony_ci	 */
223062306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
223162306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
223262306a36Sopenharmony_ci	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
223362306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
223462306a36Sopenharmony_ci	struct rvt_ack_entry *e;
223562306a36Sopenharmony_ci	unsigned long flags;
223662306a36Sopenharmony_ci	struct ib_reth *reth;
223762306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
223862306a36Sopenharmony_ci	u32 bth0, psn, len, rkey;
223962306a36Sopenharmony_ci	bool fecn;
224062306a36Sopenharmony_ci	u8 next;
224162306a36Sopenharmony_ci	u64 vaddr;
224262306a36Sopenharmony_ci	int diff;
224362306a36Sopenharmony_ci	u8 nack_state = IB_NAK_INVALID_REQUEST;
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ci	bth0 = be32_to_cpu(ohdr->bth[0]);
224662306a36Sopenharmony_ci	if (hfi1_ruc_check_hdr(ibp, packet))
224762306a36Sopenharmony_ci		return;
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ci	fecn = process_ecn(qp, packet);
225062306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
225162306a36Sopenharmony_ci	trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
225262306a36Sopenharmony_ci
225362306a36Sopenharmony_ci	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
225462306a36Sopenharmony_ci		rvt_comm_est(qp);
225562306a36Sopenharmony_ci
225662306a36Sopenharmony_ci	if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
225762306a36Sopenharmony_ci		goto nack_inv;
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci	reth = &ohdr->u.tid_rdma.r_req.reth;
226062306a36Sopenharmony_ci	vaddr = be64_to_cpu(reth->vaddr);
226162306a36Sopenharmony_ci	len = be32_to_cpu(reth->length);
226262306a36Sopenharmony_ci	/* The length needs to be in multiples of PAGE_SIZE */
226362306a36Sopenharmony_ci	if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
226462306a36Sopenharmony_ci		goto nack_inv;
226562306a36Sopenharmony_ci
226662306a36Sopenharmony_ci	diff = delta_psn(psn, qp->r_psn);
226762306a36Sopenharmony_ci	if (unlikely(diff)) {
226862306a36Sopenharmony_ci		tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
226962306a36Sopenharmony_ci		return;
227062306a36Sopenharmony_ci	}
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ci	/* We've verified the request, insert it into the ack queue. */
227362306a36Sopenharmony_ci	next = qp->r_head_ack_queue + 1;
227462306a36Sopenharmony_ci	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
227562306a36Sopenharmony_ci		next = 0;
227662306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
227762306a36Sopenharmony_ci	if (unlikely(next == qp->s_tail_ack_queue)) {
227862306a36Sopenharmony_ci		if (!qp->s_ack_queue[next].sent) {
227962306a36Sopenharmony_ci			nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
228062306a36Sopenharmony_ci			goto nack_inv_unlock;
228162306a36Sopenharmony_ci		}
228262306a36Sopenharmony_ci		update_ack_queue(qp, next);
228362306a36Sopenharmony_ci	}
228462306a36Sopenharmony_ci	e = &qp->s_ack_queue[qp->r_head_ack_queue];
228562306a36Sopenharmony_ci	release_rdma_sge_mr(e);
228662306a36Sopenharmony_ci
228762306a36Sopenharmony_ci	rkey = be32_to_cpu(reth->rkey);
228862306a36Sopenharmony_ci	qp->r_len = len;
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_ci	if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
229162306a36Sopenharmony_ci				  rkey, IB_ACCESS_REMOTE_READ)))
229262306a36Sopenharmony_ci		goto nack_acc;
229362306a36Sopenharmony_ci
229462306a36Sopenharmony_ci	/* Accept the request parameters */
229562306a36Sopenharmony_ci	if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
229662306a36Sopenharmony_ci				      len))
229762306a36Sopenharmony_ci		goto nack_inv_unlock;
229862306a36Sopenharmony_ci
229962306a36Sopenharmony_ci	qp->r_state = e->opcode;
230062306a36Sopenharmony_ci	qp->r_nak_state = 0;
230162306a36Sopenharmony_ci	/*
230262306a36Sopenharmony_ci	 * We need to increment the MSN here instead of when we
230362306a36Sopenharmony_ci	 * finish sending the result since a duplicate request would
230462306a36Sopenharmony_ci	 * increment it more than once.
230562306a36Sopenharmony_ci	 */
230662306a36Sopenharmony_ci	qp->r_msn++;
230762306a36Sopenharmony_ci	qp->r_psn += e->lpsn - e->psn + 1;
230862306a36Sopenharmony_ci
230962306a36Sopenharmony_ci	qp->r_head_ack_queue = next;
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_ci	/*
231262306a36Sopenharmony_ci	 * For all requests other than TID WRITE which are added to the ack
231362306a36Sopenharmony_ci	 * queue, qpriv->r_tid_alloc follows qp->r_head_ack_queue. It is ok to
231462306a36Sopenharmony_ci	 * do this because of interlocks between these and TID WRITE
231562306a36Sopenharmony_ci	 * requests. The same change has also been made in hfi1_rc_rcv().
231662306a36Sopenharmony_ci	 */
231762306a36Sopenharmony_ci	qpriv->r_tid_alloc = qp->r_head_ack_queue;
231862306a36Sopenharmony_ci
231962306a36Sopenharmony_ci	/* Schedule the send tasklet. */
232062306a36Sopenharmony_ci	qp->s_flags |= RVT_S_RESP_PENDING;
232162306a36Sopenharmony_ci	if (fecn)
232262306a36Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
232362306a36Sopenharmony_ci	hfi1_schedule_send(qp);
232462306a36Sopenharmony_ci
232562306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
232662306a36Sopenharmony_ci	return;
232762306a36Sopenharmony_ci
232862306a36Sopenharmony_cinack_inv_unlock:
232962306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
233062306a36Sopenharmony_cinack_inv:
233162306a36Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
233262306a36Sopenharmony_ci	qp->r_nak_state = nack_state;
233362306a36Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
233462306a36Sopenharmony_ci	/* Queue NAK for later */
233562306a36Sopenharmony_ci	rc_defered_ack(rcd, qp);
233662306a36Sopenharmony_ci	return;
233762306a36Sopenharmony_cinack_acc:
233862306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
233962306a36Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
234062306a36Sopenharmony_ci	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
234162306a36Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
234262306a36Sopenharmony_ci}
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
234562306a36Sopenharmony_ci				  struct ib_other_headers *ohdr, u32 *bth0,
234662306a36Sopenharmony_ci				  u32 *bth1, u32 *bth2, u32 *len, bool *last)
234762306a36Sopenharmony_ci{
234862306a36Sopenharmony_ci	struct hfi1_ack_priv *epriv = e->priv;
234962306a36Sopenharmony_ci	struct tid_rdma_request *req = &epriv->tid_req;
235062306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
235162306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
235262306a36Sopenharmony_ci	u32 tidentry = flow->tid_entry[flow->tid_idx];
235362306a36Sopenharmony_ci	u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
235462306a36Sopenharmony_ci	struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
235562306a36Sopenharmony_ci	u32 next_offset, om = KDETH_OM_LARGE;
235662306a36Sopenharmony_ci	bool last_pkt;
235762306a36Sopenharmony_ci	u32 hdwords = 0;
235862306a36Sopenharmony_ci	struct tid_rdma_params *remote;
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci	*len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
236162306a36Sopenharmony_ci	flow->sent += *len;
236262306a36Sopenharmony_ci	next_offset = flow->tid_offset + *len;
236362306a36Sopenharmony_ci	last_pkt = (flow->sent >= flow->length);
236462306a36Sopenharmony_ci
236562306a36Sopenharmony_ci	trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
236662306a36Sopenharmony_ci	trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
236762306a36Sopenharmony_ci
236862306a36Sopenharmony_ci	rcu_read_lock();
236962306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
237062306a36Sopenharmony_ci	if (!remote) {
237162306a36Sopenharmony_ci		rcu_read_unlock();
237262306a36Sopenharmony_ci		goto done;
237362306a36Sopenharmony_ci	}
237462306a36Sopenharmony_ci	KDETH_RESET(resp->kdeth0, KVER, 0x1);
237562306a36Sopenharmony_ci	KDETH_SET(resp->kdeth0, SH, !last_pkt);
237662306a36Sopenharmony_ci	KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
237762306a36Sopenharmony_ci	KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
237862306a36Sopenharmony_ci	KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
237962306a36Sopenharmony_ci	KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
238062306a36Sopenharmony_ci	KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
238162306a36Sopenharmony_ci	KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
238262306a36Sopenharmony_ci	resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
238362306a36Sopenharmony_ci	rcu_read_unlock();
238462306a36Sopenharmony_ci
238562306a36Sopenharmony_ci	resp->aeth = rvt_compute_aeth(qp);
238662306a36Sopenharmony_ci	resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
238762306a36Sopenharmony_ci					       flow->pkt));
238862306a36Sopenharmony_ci
238962306a36Sopenharmony_ci	*bth0 = TID_OP(READ_RESP) << 24;
239062306a36Sopenharmony_ci	*bth1 = flow->tid_qpn;
239162306a36Sopenharmony_ci	*bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
239262306a36Sopenharmony_ci			  HFI1_KDETH_BTH_SEQ_MASK) |
239362306a36Sopenharmony_ci			 (flow->flow_state.generation <<
239462306a36Sopenharmony_ci			  HFI1_KDETH_BTH_SEQ_SHIFT));
239562306a36Sopenharmony_ci	*last = last_pkt;
239662306a36Sopenharmony_ci	if (last_pkt)
239762306a36Sopenharmony_ci		/* Advance to next flow */
239862306a36Sopenharmony_ci		req->clear_tail = (req->clear_tail + 1) &
239962306a36Sopenharmony_ci				  (MAX_FLOWS - 1);
240062306a36Sopenharmony_ci
240162306a36Sopenharmony_ci	if (next_offset >= tidlen) {
240262306a36Sopenharmony_ci		flow->tid_offset = 0;
240362306a36Sopenharmony_ci		flow->tid_idx++;
240462306a36Sopenharmony_ci	} else {
240562306a36Sopenharmony_ci		flow->tid_offset = next_offset;
240662306a36Sopenharmony_ci	}
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci	hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
240962306a36Sopenharmony_ci
241062306a36Sopenharmony_cidone:
241162306a36Sopenharmony_ci	return hdwords;
241262306a36Sopenharmony_ci}
241362306a36Sopenharmony_ci
241462306a36Sopenharmony_cistatic inline struct tid_rdma_request *
241562306a36Sopenharmony_cifind_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
241662306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
241762306a36Sopenharmony_ci{
241862306a36Sopenharmony_ci	struct rvt_swqe *wqe;
241962306a36Sopenharmony_ci	struct tid_rdma_request *req = NULL;
242062306a36Sopenharmony_ci	u32 i, end;
242162306a36Sopenharmony_ci
242262306a36Sopenharmony_ci	end = qp->s_cur + 1;
242362306a36Sopenharmony_ci	if (end == qp->s_size)
242462306a36Sopenharmony_ci		end = 0;
242562306a36Sopenharmony_ci	for (i = qp->s_acked; i != end;) {
242662306a36Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, i);
242762306a36Sopenharmony_ci		if (cmp_psn(psn, wqe->psn) >= 0 &&
242862306a36Sopenharmony_ci		    cmp_psn(psn, wqe->lpsn) <= 0) {
242962306a36Sopenharmony_ci			if (wqe->wr.opcode == opcode)
243062306a36Sopenharmony_ci				req = wqe_to_tid_req(wqe);
243162306a36Sopenharmony_ci			break;
243262306a36Sopenharmony_ci		}
243362306a36Sopenharmony_ci		if (++i == qp->s_size)
243462306a36Sopenharmony_ci			i = 0;
243562306a36Sopenharmony_ci	}
243662306a36Sopenharmony_ci
243762306a36Sopenharmony_ci	return req;
243862306a36Sopenharmony_ci}
243962306a36Sopenharmony_ci
244062306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
244162306a36Sopenharmony_ci{
244262306a36Sopenharmony_ci	/* HANDLER FOR TID RDMA READ RESPONSE packet (Requestor side */
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci	/*
244562306a36Sopenharmony_ci	 * 1. Find matching SWQE
244662306a36Sopenharmony_ci	 * 2. Check that the entire segment has been read.
244762306a36Sopenharmony_ci	 * 3. Remove HFI1_S_WAIT_TID_RESP from s_flags.
244862306a36Sopenharmony_ci	 * 4. Free the TID flow resources.
244962306a36Sopenharmony_ci	 * 5. Kick the send engine (hfi1_schedule_send())
245062306a36Sopenharmony_ci	 */
245162306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
245262306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
245362306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
245462306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
245562306a36Sopenharmony_ci	struct tid_rdma_request *req;
245662306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
245762306a36Sopenharmony_ci	u32 opcode, aeth;
245862306a36Sopenharmony_ci	bool fecn;
245962306a36Sopenharmony_ci	unsigned long flags;
246062306a36Sopenharmony_ci	u32 kpsn, ipsn;
246162306a36Sopenharmony_ci
246262306a36Sopenharmony_ci	trace_hfi1_sender_rcv_tid_read_resp(qp);
246362306a36Sopenharmony_ci	fecn = process_ecn(qp, packet);
246462306a36Sopenharmony_ci	kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
246562306a36Sopenharmony_ci	aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
246662306a36Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
246962306a36Sopenharmony_ci	ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
247062306a36Sopenharmony_ci	req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
247162306a36Sopenharmony_ci	if (unlikely(!req))
247262306a36Sopenharmony_ci		goto ack_op_err;
247362306a36Sopenharmony_ci
247462306a36Sopenharmony_ci	flow = &req->flows[req->clear_tail];
247562306a36Sopenharmony_ci	/* When header suppression is disabled */
247662306a36Sopenharmony_ci	if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
247762306a36Sopenharmony_ci		update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
247862306a36Sopenharmony_ci
247962306a36Sopenharmony_ci		if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
248062306a36Sopenharmony_ci			goto ack_done;
248162306a36Sopenharmony_ci		flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
248262306a36Sopenharmony_ci		/*
248362306a36Sopenharmony_ci		 * Copy the payload to destination buffer if this packet is
248462306a36Sopenharmony_ci		 * delivered as an eager packet due to RSM rule and FECN.
248562306a36Sopenharmony_ci		 * The RSM rule selects FECN bit in BTH and SH bit in
248662306a36Sopenharmony_ci		 * KDETH header and therefore will not match the last
248762306a36Sopenharmony_ci		 * packet of each segment that has SH bit cleared.
248862306a36Sopenharmony_ci		 */
248962306a36Sopenharmony_ci		if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
249062306a36Sopenharmony_ci			struct rvt_sge_state ss;
249162306a36Sopenharmony_ci			u32 len;
249262306a36Sopenharmony_ci			u32 tlen = packet->tlen;
249362306a36Sopenharmony_ci			u16 hdrsize = packet->hlen;
249462306a36Sopenharmony_ci			u8 pad = packet->pad;
249562306a36Sopenharmony_ci			u8 extra_bytes = pad + packet->extra_byte +
249662306a36Sopenharmony_ci				(SIZE_OF_CRC << 2);
249762306a36Sopenharmony_ci			u32 pmtu = qp->pmtu;
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_ci			if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
250062306a36Sopenharmony_ci				goto ack_op_err;
250162306a36Sopenharmony_ci			len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
250262306a36Sopenharmony_ci			if (unlikely(len < pmtu))
250362306a36Sopenharmony_ci				goto ack_op_err;
250462306a36Sopenharmony_ci			rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
250562306a36Sopenharmony_ci				     false);
250662306a36Sopenharmony_ci			/* Raise the sw sequence check flag for next packet */
250762306a36Sopenharmony_ci			priv->s_flags |= HFI1_R_TID_SW_PSN;
250862306a36Sopenharmony_ci		}
250962306a36Sopenharmony_ci
251062306a36Sopenharmony_ci		goto ack_done;
251162306a36Sopenharmony_ci	}
251262306a36Sopenharmony_ci	flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
251362306a36Sopenharmony_ci	req->ack_pending--;
251462306a36Sopenharmony_ci	priv->pending_tid_r_segs--;
251562306a36Sopenharmony_ci	qp->s_num_rd_atomic--;
251662306a36Sopenharmony_ci	if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
251762306a36Sopenharmony_ci	    !qp->s_num_rd_atomic) {
251862306a36Sopenharmony_ci		qp->s_flags &= ~(RVT_S_WAIT_FENCE |
251962306a36Sopenharmony_ci				 RVT_S_WAIT_ACK);
252062306a36Sopenharmony_ci		hfi1_schedule_send(qp);
252162306a36Sopenharmony_ci	}
252262306a36Sopenharmony_ci	if (qp->s_flags & RVT_S_WAIT_RDMAR) {
252362306a36Sopenharmony_ci		qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
252462306a36Sopenharmony_ci		hfi1_schedule_send(qp);
252562306a36Sopenharmony_ci	}
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_ci	trace_hfi1_ack(qp, ipsn);
252862306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
252962306a36Sopenharmony_ci					 req->e.swqe->psn, req->e.swqe->lpsn,
253062306a36Sopenharmony_ci					 req);
253162306a36Sopenharmony_ci	trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
253262306a36Sopenharmony_ci
253362306a36Sopenharmony_ci	/* Release the tid resources */
253462306a36Sopenharmony_ci	hfi1_kern_exp_rcv_clear(req);
253562306a36Sopenharmony_ci
253662306a36Sopenharmony_ci	if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
253762306a36Sopenharmony_ci		goto ack_done;
253862306a36Sopenharmony_ci
253962306a36Sopenharmony_ci	/* If not done yet, build next read request */
254062306a36Sopenharmony_ci	if (++req->comp_seg >= req->total_segs) {
254162306a36Sopenharmony_ci		priv->tid_r_comp++;
254262306a36Sopenharmony_ci		req->state = TID_REQUEST_COMPLETE;
254362306a36Sopenharmony_ci	}
254462306a36Sopenharmony_ci
254562306a36Sopenharmony_ci	/*
254662306a36Sopenharmony_ci	 * Clear the hw flow under two conditions:
254762306a36Sopenharmony_ci	 * 1. This request is a sync point and it is complete;
254862306a36Sopenharmony_ci	 * 2. Current request is completed and there are no more requests.
254962306a36Sopenharmony_ci	 */
255062306a36Sopenharmony_ci	if ((req->state == TID_REQUEST_SYNC &&
255162306a36Sopenharmony_ci	     req->comp_seg == req->cur_seg) ||
255262306a36Sopenharmony_ci	    priv->tid_r_comp == priv->tid_r_reqs) {
255362306a36Sopenharmony_ci		hfi1_kern_clear_hw_flow(priv->rcd, qp);
255462306a36Sopenharmony_ci		priv->s_flags &= ~HFI1_R_TID_SW_PSN;
255562306a36Sopenharmony_ci		if (req->state == TID_REQUEST_SYNC)
255662306a36Sopenharmony_ci			req->state = TID_REQUEST_ACTIVE;
255762306a36Sopenharmony_ci	}
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_ci	hfi1_schedule_send(qp);
256062306a36Sopenharmony_ci	goto ack_done;
256162306a36Sopenharmony_ci
256262306a36Sopenharmony_ciack_op_err:
256362306a36Sopenharmony_ci	/*
256462306a36Sopenharmony_ci	 * The test indicates that the send engine has finished its cleanup
256562306a36Sopenharmony_ci	 * after sending the request and it's now safe to put the QP into error
256662306a36Sopenharmony_ci	 * state. However, if the wqe queue is empty (qp->s_acked == qp->s_tail
256762306a36Sopenharmony_ci	 * == qp->s_head), it would be unsafe to complete the wqe pointed by
256862306a36Sopenharmony_ci	 * qp->s_acked here. Putting the qp into error state will safely flush
256962306a36Sopenharmony_ci	 * all remaining requests.
257062306a36Sopenharmony_ci	 */
257162306a36Sopenharmony_ci	if (qp->s_last == qp->s_acked)
257262306a36Sopenharmony_ci		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
257362306a36Sopenharmony_ci
257462306a36Sopenharmony_ciack_done:
257562306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
257662306a36Sopenharmony_ci}
257762306a36Sopenharmony_ci
257862306a36Sopenharmony_civoid hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
257962306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
258062306a36Sopenharmony_ci{
258162306a36Sopenharmony_ci	u32 n = qp->s_acked;
258262306a36Sopenharmony_ci	struct rvt_swqe *wqe;
258362306a36Sopenharmony_ci	struct tid_rdma_request *req;
258462306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
258562306a36Sopenharmony_ci
258662306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
258762306a36Sopenharmony_ci	/* Free any TID entries */
258862306a36Sopenharmony_ci	while (n != qp->s_tail) {
258962306a36Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, n);
259062306a36Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
259162306a36Sopenharmony_ci			req = wqe_to_tid_req(wqe);
259262306a36Sopenharmony_ci			hfi1_kern_exp_rcv_clear_all(req);
259362306a36Sopenharmony_ci		}
259462306a36Sopenharmony_ci
259562306a36Sopenharmony_ci		if (++n == qp->s_size)
259662306a36Sopenharmony_ci			n = 0;
259762306a36Sopenharmony_ci	}
259862306a36Sopenharmony_ci	/* Free flow */
259962306a36Sopenharmony_ci	hfi1_kern_clear_hw_flow(priv->rcd, qp);
260062306a36Sopenharmony_ci}
260162306a36Sopenharmony_ci
260262306a36Sopenharmony_cistatic bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
260362306a36Sopenharmony_ci{
260462306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_ci	if (rcv_type >= RHF_RCV_TYPE_IB)
260762306a36Sopenharmony_ci		goto done;
260862306a36Sopenharmony_ci
260962306a36Sopenharmony_ci	spin_lock(&qp->s_lock);
261062306a36Sopenharmony_ci
261162306a36Sopenharmony_ci	/*
261262306a36Sopenharmony_ci	 * We've ran out of space in the eager buffer.
261362306a36Sopenharmony_ci	 * Eagerly received KDETH packets which require space in the
261462306a36Sopenharmony_ci	 * Eager buffer (packet that have payload) are TID RDMA WRITE
261562306a36Sopenharmony_ci	 * response packets. In this case, we have to re-transmit the
261662306a36Sopenharmony_ci	 * TID RDMA WRITE request.
261762306a36Sopenharmony_ci	 */
261862306a36Sopenharmony_ci	if (rcv_type == RHF_RCV_TYPE_EAGER) {
261962306a36Sopenharmony_ci		hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
262062306a36Sopenharmony_ci		hfi1_schedule_send(qp);
262162306a36Sopenharmony_ci	}
262262306a36Sopenharmony_ci
262362306a36Sopenharmony_ci	/* Since no payload is delivered, just drop the packet */
262462306a36Sopenharmony_ci	spin_unlock(&qp->s_lock);
262562306a36Sopenharmony_cidone:
262662306a36Sopenharmony_ci	return true;
262762306a36Sopenharmony_ci}
262862306a36Sopenharmony_ci
262962306a36Sopenharmony_cistatic void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
263062306a36Sopenharmony_ci				      struct rvt_qp *qp, struct rvt_swqe *wqe)
263162306a36Sopenharmony_ci{
263262306a36Sopenharmony_ci	struct tid_rdma_request *req;
263362306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci	/* Start from the right segment */
263662306a36Sopenharmony_ci	qp->r_flags |= RVT_R_RDMAR_SEQ;
263762306a36Sopenharmony_ci	req = wqe_to_tid_req(wqe);
263862306a36Sopenharmony_ci	flow = &req->flows[req->clear_tail];
263962306a36Sopenharmony_ci	hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
264062306a36Sopenharmony_ci	if (list_empty(&qp->rspwait)) {
264162306a36Sopenharmony_ci		qp->r_flags |= RVT_R_RSP_SEND;
264262306a36Sopenharmony_ci		rvt_get_qp(qp);
264362306a36Sopenharmony_ci		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
264462306a36Sopenharmony_ci	}
264562306a36Sopenharmony_ci}
264662306a36Sopenharmony_ci
264762306a36Sopenharmony_ci/*
264862306a36Sopenharmony_ci * Handle the KDETH eflags for TID RDMA READ response.
264962306a36Sopenharmony_ci *
265062306a36Sopenharmony_ci * Return true if the last packet for a segment has been received and it is
265162306a36Sopenharmony_ci * time to process the response normally; otherwise, return true.
265262306a36Sopenharmony_ci *
265362306a36Sopenharmony_ci * The caller must hold the packet->qp->r_lock and the rcu_read_lock.
265462306a36Sopenharmony_ci */
265562306a36Sopenharmony_cistatic bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
265662306a36Sopenharmony_ci				     struct hfi1_packet *packet, u8 rcv_type,
265762306a36Sopenharmony_ci				     u8 rte, u32 psn, u32 ibpsn)
265862306a36Sopenharmony_ci	__must_hold(&packet->qp->r_lock) __must_hold(RCU)
265962306a36Sopenharmony_ci{
266062306a36Sopenharmony_ci	struct hfi1_pportdata *ppd = rcd->ppd;
266162306a36Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
266262306a36Sopenharmony_ci	struct hfi1_ibport *ibp;
266362306a36Sopenharmony_ci	struct rvt_swqe *wqe;
266462306a36Sopenharmony_ci	struct tid_rdma_request *req;
266562306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
266662306a36Sopenharmony_ci	u32 ack_psn;
266762306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
266862306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
266962306a36Sopenharmony_ci	bool ret = true;
267062306a36Sopenharmony_ci	int diff = 0;
267162306a36Sopenharmony_ci	u32 fpsn;
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ci	lockdep_assert_held(&qp->r_lock);
267462306a36Sopenharmony_ci	trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
267562306a36Sopenharmony_ci	trace_hfi1_sender_read_kdeth_eflags(qp);
267662306a36Sopenharmony_ci	trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
267762306a36Sopenharmony_ci	spin_lock(&qp->s_lock);
267862306a36Sopenharmony_ci	/* If the psn is out of valid range, drop the packet */
267962306a36Sopenharmony_ci	if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
268062306a36Sopenharmony_ci	    cmp_psn(ibpsn, qp->s_psn) > 0)
268162306a36Sopenharmony_ci		goto s_unlock;
268262306a36Sopenharmony_ci
268362306a36Sopenharmony_ci	/*
268462306a36Sopenharmony_ci	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
268562306a36Sopenharmony_ci	 * requests and implicitly NAK RDMA read and atomic requests issued
268662306a36Sopenharmony_ci	 * before the NAK'ed request.
268762306a36Sopenharmony_ci	 */
268862306a36Sopenharmony_ci	ack_psn = ibpsn - 1;
268962306a36Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
269062306a36Sopenharmony_ci	ibp = to_iport(qp->ibqp.device, qp->port_num);
269162306a36Sopenharmony_ci
269262306a36Sopenharmony_ci	/* Complete WQEs that the PSN finishes. */
269362306a36Sopenharmony_ci	while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
269462306a36Sopenharmony_ci		/*
269562306a36Sopenharmony_ci		 * If this request is a RDMA read or atomic, and the NACK is
269662306a36Sopenharmony_ci		 * for a later operation, this NACK NAKs the RDMA read or
269762306a36Sopenharmony_ci		 * atomic.
269862306a36Sopenharmony_ci		 */
269962306a36Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
270062306a36Sopenharmony_ci		    wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
270162306a36Sopenharmony_ci		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
270262306a36Sopenharmony_ci		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
270362306a36Sopenharmony_ci			/* Retry this request. */
270462306a36Sopenharmony_ci			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
270562306a36Sopenharmony_ci				qp->r_flags |= RVT_R_RDMAR_SEQ;
270662306a36Sopenharmony_ci				if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
270762306a36Sopenharmony_ci					restart_tid_rdma_read_req(rcd, qp,
270862306a36Sopenharmony_ci								  wqe);
270962306a36Sopenharmony_ci				} else {
271062306a36Sopenharmony_ci					hfi1_restart_rc(qp, qp->s_last_psn + 1,
271162306a36Sopenharmony_ci							0);
271262306a36Sopenharmony_ci					if (list_empty(&qp->rspwait)) {
271362306a36Sopenharmony_ci						qp->r_flags |= RVT_R_RSP_SEND;
271462306a36Sopenharmony_ci						rvt_get_qp(qp);
271562306a36Sopenharmony_ci						list_add_tail(/* wait */
271662306a36Sopenharmony_ci						   &qp->rspwait,
271762306a36Sopenharmony_ci						   &rcd->qp_wait_list);
271862306a36Sopenharmony_ci					}
271962306a36Sopenharmony_ci				}
272062306a36Sopenharmony_ci			}
272162306a36Sopenharmony_ci			/*
272262306a36Sopenharmony_ci			 * No need to process the NAK since we are
272362306a36Sopenharmony_ci			 * restarting an earlier request.
272462306a36Sopenharmony_ci			 */
272562306a36Sopenharmony_ci			break;
272662306a36Sopenharmony_ci		}
272762306a36Sopenharmony_ci
272862306a36Sopenharmony_ci		wqe = do_rc_completion(qp, wqe, ibp);
272962306a36Sopenharmony_ci		if (qp->s_acked == qp->s_tail)
273062306a36Sopenharmony_ci			goto s_unlock;
273162306a36Sopenharmony_ci	}
273262306a36Sopenharmony_ci
273362306a36Sopenharmony_ci	if (qp->s_acked == qp->s_tail)
273462306a36Sopenharmony_ci		goto s_unlock;
273562306a36Sopenharmony_ci
273662306a36Sopenharmony_ci	/* Handle the eflags for the request */
273762306a36Sopenharmony_ci	if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
273862306a36Sopenharmony_ci		goto s_unlock;
273962306a36Sopenharmony_ci
274062306a36Sopenharmony_ci	req = wqe_to_tid_req(wqe);
274162306a36Sopenharmony_ci	trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
274262306a36Sopenharmony_ci					     wqe->lpsn, req);
274362306a36Sopenharmony_ci	switch (rcv_type) {
274462306a36Sopenharmony_ci	case RHF_RCV_TYPE_EXPECTED:
274562306a36Sopenharmony_ci		switch (rte) {
274662306a36Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
274762306a36Sopenharmony_ci			/*
274862306a36Sopenharmony_ci			 * On the first occurrence of a Flow Sequence error,
274962306a36Sopenharmony_ci			 * the flag TID_FLOW_SW_PSN is set.
275062306a36Sopenharmony_ci			 *
275162306a36Sopenharmony_ci			 * After that, the flow is *not* reprogrammed and the
275262306a36Sopenharmony_ci			 * protocol falls back to SW PSN checking. This is done
275362306a36Sopenharmony_ci			 * to prevent continuous Flow Sequence errors for any
275462306a36Sopenharmony_ci			 * packets that could be still in the fabric.
275562306a36Sopenharmony_ci			 */
275662306a36Sopenharmony_ci			flow = &req->flows[req->clear_tail];
275762306a36Sopenharmony_ci			trace_hfi1_tid_flow_read_kdeth_eflags(qp,
275862306a36Sopenharmony_ci							      req->clear_tail,
275962306a36Sopenharmony_ci							      flow);
276062306a36Sopenharmony_ci			if (priv->s_flags & HFI1_R_TID_SW_PSN) {
276162306a36Sopenharmony_ci				diff = cmp_psn(psn,
276262306a36Sopenharmony_ci					       flow->flow_state.r_next_psn);
276362306a36Sopenharmony_ci				if (diff > 0) {
276462306a36Sopenharmony_ci					/* Drop the packet.*/
276562306a36Sopenharmony_ci					goto s_unlock;
276662306a36Sopenharmony_ci				} else if (diff < 0) {
276762306a36Sopenharmony_ci					/*
276862306a36Sopenharmony_ci					 * If a response packet for a restarted
276962306a36Sopenharmony_ci					 * request has come back, reset the
277062306a36Sopenharmony_ci					 * restart flag.
277162306a36Sopenharmony_ci					 */
277262306a36Sopenharmony_ci					if (qp->r_flags & RVT_R_RDMAR_SEQ)
277362306a36Sopenharmony_ci						qp->r_flags &=
277462306a36Sopenharmony_ci							~RVT_R_RDMAR_SEQ;
277562306a36Sopenharmony_ci
277662306a36Sopenharmony_ci					/* Drop the packet.*/
277762306a36Sopenharmony_ci					goto s_unlock;
277862306a36Sopenharmony_ci				}
277962306a36Sopenharmony_ci
278062306a36Sopenharmony_ci				/*
278162306a36Sopenharmony_ci				 * If SW PSN verification is successful and
278262306a36Sopenharmony_ci				 * this is the last packet in the segment, tell
278362306a36Sopenharmony_ci				 * the caller to process it as a normal packet.
278462306a36Sopenharmony_ci				 */
278562306a36Sopenharmony_ci				fpsn = full_flow_psn(flow,
278662306a36Sopenharmony_ci						     flow->flow_state.lpsn);
278762306a36Sopenharmony_ci				if (cmp_psn(fpsn, psn) == 0) {
278862306a36Sopenharmony_ci					ret = false;
278962306a36Sopenharmony_ci					if (qp->r_flags & RVT_R_RDMAR_SEQ)
279062306a36Sopenharmony_ci						qp->r_flags &=
279162306a36Sopenharmony_ci							~RVT_R_RDMAR_SEQ;
279262306a36Sopenharmony_ci				}
279362306a36Sopenharmony_ci				flow->flow_state.r_next_psn =
279462306a36Sopenharmony_ci					mask_psn(psn + 1);
279562306a36Sopenharmony_ci			} else {
279662306a36Sopenharmony_ci				u32 last_psn;
279762306a36Sopenharmony_ci
279862306a36Sopenharmony_ci				last_psn = read_r_next_psn(dd, rcd->ctxt,
279962306a36Sopenharmony_ci							   flow->idx);
280062306a36Sopenharmony_ci				flow->flow_state.r_next_psn = last_psn;
280162306a36Sopenharmony_ci				priv->s_flags |= HFI1_R_TID_SW_PSN;
280262306a36Sopenharmony_ci				/*
280362306a36Sopenharmony_ci				 * If no request has been restarted yet,
280462306a36Sopenharmony_ci				 * restart the current one.
280562306a36Sopenharmony_ci				 */
280662306a36Sopenharmony_ci				if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
280762306a36Sopenharmony_ci					restart_tid_rdma_read_req(rcd, qp,
280862306a36Sopenharmony_ci								  wqe);
280962306a36Sopenharmony_ci			}
281062306a36Sopenharmony_ci
281162306a36Sopenharmony_ci			break;
281262306a36Sopenharmony_ci
281362306a36Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
281462306a36Sopenharmony_ci			/*
281562306a36Sopenharmony_ci			 * Since the TID flow is able to ride through
281662306a36Sopenharmony_ci			 * generation mismatch, drop this stale packet.
281762306a36Sopenharmony_ci			 */
281862306a36Sopenharmony_ci			break;
281962306a36Sopenharmony_ci
282062306a36Sopenharmony_ci		default:
282162306a36Sopenharmony_ci			break;
282262306a36Sopenharmony_ci		}
282362306a36Sopenharmony_ci		break;
282462306a36Sopenharmony_ci
282562306a36Sopenharmony_ci	case RHF_RCV_TYPE_ERROR:
282662306a36Sopenharmony_ci		switch (rte) {
282762306a36Sopenharmony_ci		case RHF_RTE_ERROR_OP_CODE_ERR:
282862306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
282962306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_HCRC_ERR:
283062306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_KVER_ERR:
283162306a36Sopenharmony_ci		case RHF_RTE_ERROR_CONTEXT_ERR:
283262306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_TID_ERR:
283362306a36Sopenharmony_ci		default:
283462306a36Sopenharmony_ci			break;
283562306a36Sopenharmony_ci		}
283662306a36Sopenharmony_ci		break;
283762306a36Sopenharmony_ci	default:
283862306a36Sopenharmony_ci		break;
283962306a36Sopenharmony_ci	}
284062306a36Sopenharmony_cis_unlock:
284162306a36Sopenharmony_ci	spin_unlock(&qp->s_lock);
284262306a36Sopenharmony_ci	return ret;
284362306a36Sopenharmony_ci}
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_cibool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
284662306a36Sopenharmony_ci			      struct hfi1_pportdata *ppd,
284762306a36Sopenharmony_ci			      struct hfi1_packet *packet)
284862306a36Sopenharmony_ci{
284962306a36Sopenharmony_ci	struct hfi1_ibport *ibp = &ppd->ibport_data;
285062306a36Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
285162306a36Sopenharmony_ci	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
285262306a36Sopenharmony_ci	u8 rcv_type = rhf_rcv_type(packet->rhf);
285362306a36Sopenharmony_ci	u8 rte = rhf_rcv_type_err(packet->rhf);
285462306a36Sopenharmony_ci	struct ib_header *hdr = packet->hdr;
285562306a36Sopenharmony_ci	struct ib_other_headers *ohdr = NULL;
285662306a36Sopenharmony_ci	int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
285762306a36Sopenharmony_ci	u16 lid  = be16_to_cpu(hdr->lrh[1]);
285862306a36Sopenharmony_ci	u8 opcode;
285962306a36Sopenharmony_ci	u32 qp_num, psn, ibpsn;
286062306a36Sopenharmony_ci	struct rvt_qp *qp;
286162306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv;
286262306a36Sopenharmony_ci	unsigned long flags;
286362306a36Sopenharmony_ci	bool ret = true;
286462306a36Sopenharmony_ci	struct rvt_ack_entry *e;
286562306a36Sopenharmony_ci	struct tid_rdma_request *req;
286662306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
286762306a36Sopenharmony_ci	int diff = 0;
286862306a36Sopenharmony_ci
286962306a36Sopenharmony_ci	trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
287062306a36Sopenharmony_ci					   packet->rhf);
287162306a36Sopenharmony_ci	if (packet->rhf & RHF_ICRC_ERR)
287262306a36Sopenharmony_ci		return ret;
287362306a36Sopenharmony_ci
287462306a36Sopenharmony_ci	packet->ohdr = &hdr->u.oth;
287562306a36Sopenharmony_ci	ohdr = packet->ohdr;
287662306a36Sopenharmony_ci	trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
287762306a36Sopenharmony_ci
287862306a36Sopenharmony_ci	/* Get the destination QP number. */
287962306a36Sopenharmony_ci	qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
288062306a36Sopenharmony_ci		RVT_QPN_MASK;
288162306a36Sopenharmony_ci	if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
288262306a36Sopenharmony_ci		goto drop;
288362306a36Sopenharmony_ci
288462306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
288562306a36Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
288662306a36Sopenharmony_ci
288762306a36Sopenharmony_ci	rcu_read_lock();
288862306a36Sopenharmony_ci	qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
288962306a36Sopenharmony_ci	if (!qp)
289062306a36Sopenharmony_ci		goto rcu_unlock;
289162306a36Sopenharmony_ci
289262306a36Sopenharmony_ci	packet->qp = qp;
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_ci	/* Check for valid receive state. */
289562306a36Sopenharmony_ci	spin_lock_irqsave(&qp->r_lock, flags);
289662306a36Sopenharmony_ci	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
289762306a36Sopenharmony_ci		ibp->rvp.n_pkt_drops++;
289862306a36Sopenharmony_ci		goto r_unlock;
289962306a36Sopenharmony_ci	}
290062306a36Sopenharmony_ci
290162306a36Sopenharmony_ci	if (packet->rhf & RHF_TID_ERR) {
290262306a36Sopenharmony_ci		/* For TIDERR and RC QPs preemptively schedule a NAK */
290362306a36Sopenharmony_ci		u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
290462306a36Sopenharmony_ci
290562306a36Sopenharmony_ci		/* Sanity check packet */
290662306a36Sopenharmony_ci		if (tlen < 24)
290762306a36Sopenharmony_ci			goto r_unlock;
290862306a36Sopenharmony_ci
290962306a36Sopenharmony_ci		/*
291062306a36Sopenharmony_ci		 * Check for GRH. We should never get packets with GRH in this
291162306a36Sopenharmony_ci		 * path.
291262306a36Sopenharmony_ci		 */
291362306a36Sopenharmony_ci		if (lnh == HFI1_LRH_GRH)
291462306a36Sopenharmony_ci			goto r_unlock;
291562306a36Sopenharmony_ci
291662306a36Sopenharmony_ci		if (tid_rdma_tid_err(packet, rcv_type))
291762306a36Sopenharmony_ci			goto r_unlock;
291862306a36Sopenharmony_ci	}
291962306a36Sopenharmony_ci
292062306a36Sopenharmony_ci	/* handle TID RDMA READ */
292162306a36Sopenharmony_ci	if (opcode == TID_OP(READ_RESP)) {
292262306a36Sopenharmony_ci		ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
292362306a36Sopenharmony_ci		ibpsn = mask_psn(ibpsn);
292462306a36Sopenharmony_ci		ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
292562306a36Sopenharmony_ci					       ibpsn);
292662306a36Sopenharmony_ci		goto r_unlock;
292762306a36Sopenharmony_ci	}
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci	/*
293062306a36Sopenharmony_ci	 * qp->s_tail_ack_queue points to the rvt_ack_entry currently being
293162306a36Sopenharmony_ci	 * processed. These a completed sequentially so we can be sure that
293262306a36Sopenharmony_ci	 * the pointer will not change until the entire request has completed.
293362306a36Sopenharmony_ci	 */
293462306a36Sopenharmony_ci	spin_lock(&qp->s_lock);
293562306a36Sopenharmony_ci	qpriv = qp->priv;
293662306a36Sopenharmony_ci	if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
293762306a36Sopenharmony_ci	    qpriv->r_tid_tail == qpriv->r_tid_head)
293862306a36Sopenharmony_ci		goto unlock;
293962306a36Sopenharmony_ci	e = &qp->s_ack_queue[qpriv->r_tid_tail];
294062306a36Sopenharmony_ci	if (e->opcode != TID_OP(WRITE_REQ))
294162306a36Sopenharmony_ci		goto unlock;
294262306a36Sopenharmony_ci	req = ack_to_tid_req(e);
294362306a36Sopenharmony_ci	if (req->comp_seg == req->cur_seg)
294462306a36Sopenharmony_ci		goto unlock;
294562306a36Sopenharmony_ci	flow = &req->flows[req->clear_tail];
294662306a36Sopenharmony_ci	trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
294762306a36Sopenharmony_ci	trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
294862306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
294962306a36Sopenharmony_ci	trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
295062306a36Sopenharmony_ci					       e->lpsn, req);
295162306a36Sopenharmony_ci	trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
295262306a36Sopenharmony_ci
295362306a36Sopenharmony_ci	switch (rcv_type) {
295462306a36Sopenharmony_ci	case RHF_RCV_TYPE_EXPECTED:
295562306a36Sopenharmony_ci		switch (rte) {
295662306a36Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
295762306a36Sopenharmony_ci			if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
295862306a36Sopenharmony_ci				qpriv->s_flags |= HFI1_R_TID_SW_PSN;
295962306a36Sopenharmony_ci				flow->flow_state.r_next_psn =
296062306a36Sopenharmony_ci					read_r_next_psn(dd, rcd->ctxt,
296162306a36Sopenharmony_ci							flow->idx);
296262306a36Sopenharmony_ci				qpriv->r_next_psn_kdeth =
296362306a36Sopenharmony_ci					flow->flow_state.r_next_psn;
296462306a36Sopenharmony_ci				goto nak_psn;
296562306a36Sopenharmony_ci			} else {
296662306a36Sopenharmony_ci				/*
296762306a36Sopenharmony_ci				 * If the received PSN does not match the next
296862306a36Sopenharmony_ci				 * expected PSN, NAK the packet.
296962306a36Sopenharmony_ci				 * However, only do that if we know that the a
297062306a36Sopenharmony_ci				 * NAK has already been sent. Otherwise, this
297162306a36Sopenharmony_ci				 * mismatch could be due to packets that were
297262306a36Sopenharmony_ci				 * already in flight.
297362306a36Sopenharmony_ci				 */
297462306a36Sopenharmony_ci				diff = cmp_psn(psn,
297562306a36Sopenharmony_ci					       flow->flow_state.r_next_psn);
297662306a36Sopenharmony_ci				if (diff > 0)
297762306a36Sopenharmony_ci					goto nak_psn;
297862306a36Sopenharmony_ci				else if (diff < 0)
297962306a36Sopenharmony_ci					break;
298062306a36Sopenharmony_ci
298162306a36Sopenharmony_ci				qpriv->s_nak_state = 0;
298262306a36Sopenharmony_ci				/*
298362306a36Sopenharmony_ci				 * If SW PSN verification is successful and this
298462306a36Sopenharmony_ci				 * is the last packet in the segment, tell the
298562306a36Sopenharmony_ci				 * caller to process it as a normal packet.
298662306a36Sopenharmony_ci				 */
298762306a36Sopenharmony_ci				if (psn == full_flow_psn(flow,
298862306a36Sopenharmony_ci							 flow->flow_state.lpsn))
298962306a36Sopenharmony_ci					ret = false;
299062306a36Sopenharmony_ci				flow->flow_state.r_next_psn =
299162306a36Sopenharmony_ci					mask_psn(psn + 1);
299262306a36Sopenharmony_ci				qpriv->r_next_psn_kdeth =
299362306a36Sopenharmony_ci					flow->flow_state.r_next_psn;
299462306a36Sopenharmony_ci			}
299562306a36Sopenharmony_ci			break;
299662306a36Sopenharmony_ci
299762306a36Sopenharmony_ci		case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
299862306a36Sopenharmony_ci			goto nak_psn;
299962306a36Sopenharmony_ci
300062306a36Sopenharmony_ci		default:
300162306a36Sopenharmony_ci			break;
300262306a36Sopenharmony_ci		}
300362306a36Sopenharmony_ci		break;
300462306a36Sopenharmony_ci
300562306a36Sopenharmony_ci	case RHF_RCV_TYPE_ERROR:
300662306a36Sopenharmony_ci		switch (rte) {
300762306a36Sopenharmony_ci		case RHF_RTE_ERROR_OP_CODE_ERR:
300862306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
300962306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_HCRC_ERR:
301062306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_KVER_ERR:
301162306a36Sopenharmony_ci		case RHF_RTE_ERROR_CONTEXT_ERR:
301262306a36Sopenharmony_ci		case RHF_RTE_ERROR_KHDR_TID_ERR:
301362306a36Sopenharmony_ci		default:
301462306a36Sopenharmony_ci			break;
301562306a36Sopenharmony_ci		}
301662306a36Sopenharmony_ci		break;
301762306a36Sopenharmony_ci	default:
301862306a36Sopenharmony_ci		break;
301962306a36Sopenharmony_ci	}
302062306a36Sopenharmony_ci
302162306a36Sopenharmony_ciunlock:
302262306a36Sopenharmony_ci	spin_unlock(&qp->s_lock);
302362306a36Sopenharmony_cir_unlock:
302462306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->r_lock, flags);
302562306a36Sopenharmony_circu_unlock:
302662306a36Sopenharmony_ci	rcu_read_unlock();
302762306a36Sopenharmony_cidrop:
302862306a36Sopenharmony_ci	return ret;
302962306a36Sopenharmony_cinak_psn:
303062306a36Sopenharmony_ci	ibp->rvp.n_rc_seqnak++;
303162306a36Sopenharmony_ci	if (!qpriv->s_nak_state) {
303262306a36Sopenharmony_ci		qpriv->s_nak_state = IB_NAK_PSN_ERROR;
303362306a36Sopenharmony_ci		/* We are NAK'ing the next expected PSN */
303462306a36Sopenharmony_ci		qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
303562306a36Sopenharmony_ci		tid_rdma_trigger_ack(qp);
303662306a36Sopenharmony_ci	}
303762306a36Sopenharmony_ci	goto unlock;
303862306a36Sopenharmony_ci}
303962306a36Sopenharmony_ci
304062306a36Sopenharmony_ci/*
304162306a36Sopenharmony_ci * "Rewind" the TID request information.
304262306a36Sopenharmony_ci * This means that we reset the state back to ACTIVE,
304362306a36Sopenharmony_ci * find the proper flow, set the flow index to that flow,
304462306a36Sopenharmony_ci * and reset the flow information.
304562306a36Sopenharmony_ci */
304662306a36Sopenharmony_civoid hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
304762306a36Sopenharmony_ci			       u32 *bth2)
304862306a36Sopenharmony_ci{
304962306a36Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
305062306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
305162306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
305262306a36Sopenharmony_ci	int diff, delta_pkts;
305362306a36Sopenharmony_ci	u32 tididx = 0, i;
305462306a36Sopenharmony_ci	u16 fidx;
305562306a36Sopenharmony_ci
305662306a36Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
305762306a36Sopenharmony_ci		*bth2 = mask_psn(qp->s_psn);
305862306a36Sopenharmony_ci		flow = find_flow_ib(req, *bth2, &fidx);
305962306a36Sopenharmony_ci		if (!flow) {
306062306a36Sopenharmony_ci			trace_hfi1_msg_tid_restart_req(/* msg */
306162306a36Sopenharmony_ci			   qp, "!!!!!! Could not find flow to restart: bth2 ",
306262306a36Sopenharmony_ci			   (u64)*bth2);
306362306a36Sopenharmony_ci			trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
306462306a36Sopenharmony_ci						       wqe->psn, wqe->lpsn,
306562306a36Sopenharmony_ci						       req);
306662306a36Sopenharmony_ci			return;
306762306a36Sopenharmony_ci		}
306862306a36Sopenharmony_ci	} else {
306962306a36Sopenharmony_ci		fidx = req->acked_tail;
307062306a36Sopenharmony_ci		flow = &req->flows[fidx];
307162306a36Sopenharmony_ci		*bth2 = mask_psn(req->r_ack_psn);
307262306a36Sopenharmony_ci	}
307362306a36Sopenharmony_ci
307462306a36Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
307562306a36Sopenharmony_ci		delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
307662306a36Sopenharmony_ci	else
307762306a36Sopenharmony_ci		delta_pkts = delta_psn(*bth2,
307862306a36Sopenharmony_ci				       full_flow_psn(flow,
307962306a36Sopenharmony_ci						     flow->flow_state.spsn));
308062306a36Sopenharmony_ci
308162306a36Sopenharmony_ci	trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
308262306a36Sopenharmony_ci	diff = delta_pkts + flow->resync_npkts;
308362306a36Sopenharmony_ci
308462306a36Sopenharmony_ci	flow->sent = 0;
308562306a36Sopenharmony_ci	flow->pkt = 0;
308662306a36Sopenharmony_ci	flow->tid_idx = 0;
308762306a36Sopenharmony_ci	flow->tid_offset = 0;
308862306a36Sopenharmony_ci	if (diff) {
308962306a36Sopenharmony_ci		for (tididx = 0; tididx < flow->tidcnt; tididx++) {
309062306a36Sopenharmony_ci			u32 tidentry = flow->tid_entry[tididx], tidlen,
309162306a36Sopenharmony_ci				tidnpkts, npkts;
309262306a36Sopenharmony_ci
309362306a36Sopenharmony_ci			flow->tid_offset = 0;
309462306a36Sopenharmony_ci			tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
309562306a36Sopenharmony_ci			tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
309662306a36Sopenharmony_ci			npkts = min_t(u32, diff, tidnpkts);
309762306a36Sopenharmony_ci			flow->pkt += npkts;
309862306a36Sopenharmony_ci			flow->sent += (npkts == tidnpkts ? tidlen :
309962306a36Sopenharmony_ci				       npkts * qp->pmtu);
310062306a36Sopenharmony_ci			flow->tid_offset += npkts * qp->pmtu;
310162306a36Sopenharmony_ci			diff -= npkts;
310262306a36Sopenharmony_ci			if (!diff)
310362306a36Sopenharmony_ci				break;
310462306a36Sopenharmony_ci		}
310562306a36Sopenharmony_ci	}
310662306a36Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
310762306a36Sopenharmony_ci		rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
310862306a36Sopenharmony_ci			     flow->sent, 0);
310962306a36Sopenharmony_ci		/*
311062306a36Sopenharmony_ci		 * Packet PSN is based on flow_state.spsn + flow->pkt. However,
311162306a36Sopenharmony_ci		 * during a RESYNC, the generation is incremented and the
311262306a36Sopenharmony_ci		 * sequence is reset to 0. Since we've adjusted the npkts in the
311362306a36Sopenharmony_ci		 * flow and the SGE has been sufficiently advanced, we have to
311462306a36Sopenharmony_ci		 * adjust flow->pkt in order to calculate the correct PSN.
311562306a36Sopenharmony_ci		 */
311662306a36Sopenharmony_ci		flow->pkt -= flow->resync_npkts;
311762306a36Sopenharmony_ci	}
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci	if (flow->tid_offset ==
312062306a36Sopenharmony_ci	    EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
312162306a36Sopenharmony_ci		tididx++;
312262306a36Sopenharmony_ci		flow->tid_offset = 0;
312362306a36Sopenharmony_ci	}
312462306a36Sopenharmony_ci	flow->tid_idx = tididx;
312562306a36Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
312662306a36Sopenharmony_ci		/* Move flow_idx to correct index */
312762306a36Sopenharmony_ci		req->flow_idx = fidx;
312862306a36Sopenharmony_ci	else
312962306a36Sopenharmony_ci		req->clear_tail = fidx;
313062306a36Sopenharmony_ci
313162306a36Sopenharmony_ci	trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
313262306a36Sopenharmony_ci	trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
313362306a36Sopenharmony_ci				       wqe->lpsn, req);
313462306a36Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
313562306a36Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
313662306a36Sopenharmony_ci		/* Reset all the flows that we are going to resend */
313762306a36Sopenharmony_ci		fidx = CIRC_NEXT(fidx, MAX_FLOWS);
313862306a36Sopenharmony_ci		i = qpriv->s_tid_tail;
313962306a36Sopenharmony_ci		do {
314062306a36Sopenharmony_ci			for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
314162306a36Sopenharmony_ci			      fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
314262306a36Sopenharmony_ci				req->flows[fidx].sent = 0;
314362306a36Sopenharmony_ci				req->flows[fidx].pkt = 0;
314462306a36Sopenharmony_ci				req->flows[fidx].tid_idx = 0;
314562306a36Sopenharmony_ci				req->flows[fidx].tid_offset = 0;
314662306a36Sopenharmony_ci				req->flows[fidx].resync_npkts = 0;
314762306a36Sopenharmony_ci			}
314862306a36Sopenharmony_ci			if (i == qpriv->s_tid_cur)
314962306a36Sopenharmony_ci				break;
315062306a36Sopenharmony_ci			do {
315162306a36Sopenharmony_ci				i = (++i == qp->s_size ? 0 : i);
315262306a36Sopenharmony_ci				wqe = rvt_get_swqe_ptr(qp, i);
315362306a36Sopenharmony_ci			} while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
315462306a36Sopenharmony_ci			req = wqe_to_tid_req(wqe);
315562306a36Sopenharmony_ci			req->cur_seg = req->ack_seg;
315662306a36Sopenharmony_ci			fidx = req->acked_tail;
315762306a36Sopenharmony_ci			/* Pull req->clear_tail back */
315862306a36Sopenharmony_ci			req->clear_tail = fidx;
315962306a36Sopenharmony_ci		} while (1);
316062306a36Sopenharmony_ci	}
316162306a36Sopenharmony_ci}
316262306a36Sopenharmony_ci
316362306a36Sopenharmony_civoid hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
316462306a36Sopenharmony_ci{
316562306a36Sopenharmony_ci	int i, ret;
316662306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
316762306a36Sopenharmony_ci	struct tid_flow_state *fs;
316862306a36Sopenharmony_ci
316962306a36Sopenharmony_ci	if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
317062306a36Sopenharmony_ci		return;
317162306a36Sopenharmony_ci
317262306a36Sopenharmony_ci	/*
317362306a36Sopenharmony_ci	 * First, clear the flow to help prevent any delayed packets from
317462306a36Sopenharmony_ci	 * being delivered.
317562306a36Sopenharmony_ci	 */
317662306a36Sopenharmony_ci	fs = &qpriv->flow_state;
317762306a36Sopenharmony_ci	if (fs->index != RXE_NUM_TID_FLOWS)
317862306a36Sopenharmony_ci		hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
317962306a36Sopenharmony_ci
318062306a36Sopenharmony_ci	for (i = qp->s_acked; i != qp->s_head;) {
318162306a36Sopenharmony_ci		struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
318262306a36Sopenharmony_ci
318362306a36Sopenharmony_ci		if (++i == qp->s_size)
318462306a36Sopenharmony_ci			i = 0;
318562306a36Sopenharmony_ci		/* Free only locally allocated TID entries */
318662306a36Sopenharmony_ci		if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
318762306a36Sopenharmony_ci			continue;
318862306a36Sopenharmony_ci		do {
318962306a36Sopenharmony_ci			struct hfi1_swqe_priv *priv = wqe->priv;
319062306a36Sopenharmony_ci
319162306a36Sopenharmony_ci			ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
319262306a36Sopenharmony_ci		} while (!ret);
319362306a36Sopenharmony_ci	}
319462306a36Sopenharmony_ci	for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
319562306a36Sopenharmony_ci		struct rvt_ack_entry *e = &qp->s_ack_queue[i];
319662306a36Sopenharmony_ci
319762306a36Sopenharmony_ci		if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
319862306a36Sopenharmony_ci			i = 0;
319962306a36Sopenharmony_ci		/* Free only locally allocated TID entries */
320062306a36Sopenharmony_ci		if (e->opcode != TID_OP(WRITE_REQ))
320162306a36Sopenharmony_ci			continue;
320262306a36Sopenharmony_ci		do {
320362306a36Sopenharmony_ci			struct hfi1_ack_priv *priv = e->priv;
320462306a36Sopenharmony_ci
320562306a36Sopenharmony_ci			ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
320662306a36Sopenharmony_ci		} while (!ret);
320762306a36Sopenharmony_ci	}
320862306a36Sopenharmony_ci}
320962306a36Sopenharmony_ci
321062306a36Sopenharmony_cibool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
321162306a36Sopenharmony_ci{
321262306a36Sopenharmony_ci	struct rvt_swqe *prev;
321362306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
321462306a36Sopenharmony_ci	u32 s_prev;
321562306a36Sopenharmony_ci	struct tid_rdma_request *req;
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci	s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
321862306a36Sopenharmony_ci	prev = rvt_get_swqe_ptr(qp, s_prev);
321962306a36Sopenharmony_ci
322062306a36Sopenharmony_ci	switch (wqe->wr.opcode) {
322162306a36Sopenharmony_ci	case IB_WR_SEND:
322262306a36Sopenharmony_ci	case IB_WR_SEND_WITH_IMM:
322362306a36Sopenharmony_ci	case IB_WR_SEND_WITH_INV:
322462306a36Sopenharmony_ci	case IB_WR_ATOMIC_CMP_AND_SWP:
322562306a36Sopenharmony_ci	case IB_WR_ATOMIC_FETCH_AND_ADD:
322662306a36Sopenharmony_ci	case IB_WR_RDMA_WRITE:
322762306a36Sopenharmony_ci	case IB_WR_RDMA_WRITE_WITH_IMM:
322862306a36Sopenharmony_ci		switch (prev->wr.opcode) {
322962306a36Sopenharmony_ci		case IB_WR_TID_RDMA_WRITE:
323062306a36Sopenharmony_ci			req = wqe_to_tid_req(prev);
323162306a36Sopenharmony_ci			if (req->ack_seg != req->total_segs)
323262306a36Sopenharmony_ci				goto interlock;
323362306a36Sopenharmony_ci			break;
323462306a36Sopenharmony_ci		default:
323562306a36Sopenharmony_ci			break;
323662306a36Sopenharmony_ci		}
323762306a36Sopenharmony_ci		break;
323862306a36Sopenharmony_ci	case IB_WR_RDMA_READ:
323962306a36Sopenharmony_ci		if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
324062306a36Sopenharmony_ci			break;
324162306a36Sopenharmony_ci		fallthrough;
324262306a36Sopenharmony_ci	case IB_WR_TID_RDMA_READ:
324362306a36Sopenharmony_ci		switch (prev->wr.opcode) {
324462306a36Sopenharmony_ci		case IB_WR_RDMA_READ:
324562306a36Sopenharmony_ci			if (qp->s_acked != qp->s_cur)
324662306a36Sopenharmony_ci				goto interlock;
324762306a36Sopenharmony_ci			break;
324862306a36Sopenharmony_ci		case IB_WR_TID_RDMA_WRITE:
324962306a36Sopenharmony_ci			req = wqe_to_tid_req(prev);
325062306a36Sopenharmony_ci			if (req->ack_seg != req->total_segs)
325162306a36Sopenharmony_ci				goto interlock;
325262306a36Sopenharmony_ci			break;
325362306a36Sopenharmony_ci		default:
325462306a36Sopenharmony_ci			break;
325562306a36Sopenharmony_ci		}
325662306a36Sopenharmony_ci		break;
325762306a36Sopenharmony_ci	default:
325862306a36Sopenharmony_ci		break;
325962306a36Sopenharmony_ci	}
326062306a36Sopenharmony_ci	return false;
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_ciinterlock:
326362306a36Sopenharmony_ci	priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
326462306a36Sopenharmony_ci	return true;
326562306a36Sopenharmony_ci}
326662306a36Sopenharmony_ci
326762306a36Sopenharmony_ci/* Does @sge meet the alignment requirements for tid rdma? */
326862306a36Sopenharmony_cistatic inline bool hfi1_check_sge_align(struct rvt_qp *qp,
326962306a36Sopenharmony_ci					struct rvt_sge *sge, int num_sge)
327062306a36Sopenharmony_ci{
327162306a36Sopenharmony_ci	int i;
327262306a36Sopenharmony_ci
327362306a36Sopenharmony_ci	for (i = 0; i < num_sge; i++, sge++) {
327462306a36Sopenharmony_ci		trace_hfi1_sge_check_align(qp, i, sge);
327562306a36Sopenharmony_ci		if ((u64)sge->vaddr & ~PAGE_MASK ||
327662306a36Sopenharmony_ci		    sge->sge_length & ~PAGE_MASK)
327762306a36Sopenharmony_ci			return false;
327862306a36Sopenharmony_ci	}
327962306a36Sopenharmony_ci	return true;
328062306a36Sopenharmony_ci}
328162306a36Sopenharmony_ci
328262306a36Sopenharmony_civoid setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
328362306a36Sopenharmony_ci{
328462306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
328562306a36Sopenharmony_ci	struct hfi1_swqe_priv *priv = wqe->priv;
328662306a36Sopenharmony_ci	struct tid_rdma_params *remote;
328762306a36Sopenharmony_ci	enum ib_wr_opcode new_opcode;
328862306a36Sopenharmony_ci	bool do_tid_rdma = false;
328962306a36Sopenharmony_ci	struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
329062306a36Sopenharmony_ci
329162306a36Sopenharmony_ci	if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
329262306a36Sopenharmony_ci				ppd->lid)
329362306a36Sopenharmony_ci		return;
329462306a36Sopenharmony_ci	if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
329562306a36Sopenharmony_ci		return;
329662306a36Sopenharmony_ci
329762306a36Sopenharmony_ci	rcu_read_lock();
329862306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
329962306a36Sopenharmony_ci	/*
330062306a36Sopenharmony_ci	 * If TID RDMA is disabled by the negotiation, don't
330162306a36Sopenharmony_ci	 * use it.
330262306a36Sopenharmony_ci	 */
330362306a36Sopenharmony_ci	if (!remote)
330462306a36Sopenharmony_ci		goto exit;
330562306a36Sopenharmony_ci
330662306a36Sopenharmony_ci	if (wqe->wr.opcode == IB_WR_RDMA_READ) {
330762306a36Sopenharmony_ci		if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
330862306a36Sopenharmony_ci					 wqe->wr.num_sge)) {
330962306a36Sopenharmony_ci			new_opcode = IB_WR_TID_RDMA_READ;
331062306a36Sopenharmony_ci			do_tid_rdma = true;
331162306a36Sopenharmony_ci		}
331262306a36Sopenharmony_ci	} else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
331362306a36Sopenharmony_ci		/*
331462306a36Sopenharmony_ci		 * TID RDMA is enabled for this RDMA WRITE request iff:
331562306a36Sopenharmony_ci		 *   1. The remote address is page-aligned,
331662306a36Sopenharmony_ci		 *   2. The length is larger than the minimum segment size,
331762306a36Sopenharmony_ci		 *   3. The length is page-multiple.
331862306a36Sopenharmony_ci		 */
331962306a36Sopenharmony_ci		if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
332062306a36Sopenharmony_ci		    !(wqe->length & ~PAGE_MASK)) {
332162306a36Sopenharmony_ci			new_opcode = IB_WR_TID_RDMA_WRITE;
332262306a36Sopenharmony_ci			do_tid_rdma = true;
332362306a36Sopenharmony_ci		}
332462306a36Sopenharmony_ci	}
332562306a36Sopenharmony_ci
332662306a36Sopenharmony_ci	if (do_tid_rdma) {
332762306a36Sopenharmony_ci		if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
332862306a36Sopenharmony_ci			goto exit;
332962306a36Sopenharmony_ci		wqe->wr.opcode = new_opcode;
333062306a36Sopenharmony_ci		priv->tid_req.seg_len =
333162306a36Sopenharmony_ci			min_t(u32, remote->max_len, wqe->length);
333262306a36Sopenharmony_ci		priv->tid_req.total_segs =
333362306a36Sopenharmony_ci			DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
333462306a36Sopenharmony_ci		/* Compute the last PSN of the request */
333562306a36Sopenharmony_ci		wqe->lpsn = wqe->psn;
333662306a36Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
333762306a36Sopenharmony_ci			priv->tid_req.n_flows = remote->max_read;
333862306a36Sopenharmony_ci			qpriv->tid_r_reqs++;
333962306a36Sopenharmony_ci			wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
334062306a36Sopenharmony_ci		} else {
334162306a36Sopenharmony_ci			wqe->lpsn += priv->tid_req.total_segs - 1;
334262306a36Sopenharmony_ci			atomic_inc(&qpriv->n_requests);
334362306a36Sopenharmony_ci		}
334462306a36Sopenharmony_ci
334562306a36Sopenharmony_ci		priv->tid_req.cur_seg = 0;
334662306a36Sopenharmony_ci		priv->tid_req.comp_seg = 0;
334762306a36Sopenharmony_ci		priv->tid_req.ack_seg = 0;
334862306a36Sopenharmony_ci		priv->tid_req.state = TID_REQUEST_INACTIVE;
334962306a36Sopenharmony_ci		/*
335062306a36Sopenharmony_ci		 * Reset acked_tail.
335162306a36Sopenharmony_ci		 * TID RDMA READ does not have ACKs so it does not
335262306a36Sopenharmony_ci		 * update the pointer. We have to reset it so TID RDMA
335362306a36Sopenharmony_ci		 * WRITE does not get confused.
335462306a36Sopenharmony_ci		 */
335562306a36Sopenharmony_ci		priv->tid_req.acked_tail = priv->tid_req.setup_head;
335662306a36Sopenharmony_ci		trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
335762306a36Sopenharmony_ci						 wqe->psn, wqe->lpsn,
335862306a36Sopenharmony_ci						 &priv->tid_req);
335962306a36Sopenharmony_ci	}
336062306a36Sopenharmony_ciexit:
336162306a36Sopenharmony_ci	rcu_read_unlock();
336262306a36Sopenharmony_ci}
336362306a36Sopenharmony_ci
336462306a36Sopenharmony_ci/* TID RDMA WRITE functions */
336562306a36Sopenharmony_ci
336662306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
336762306a36Sopenharmony_ci				  struct ib_other_headers *ohdr,
336862306a36Sopenharmony_ci				  u32 *bth1, u32 *bth2, u32 *len)
336962306a36Sopenharmony_ci{
337062306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
337162306a36Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
337262306a36Sopenharmony_ci	struct tid_rdma_params *remote;
337362306a36Sopenharmony_ci
337462306a36Sopenharmony_ci	rcu_read_lock();
337562306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
337662306a36Sopenharmony_ci	/*
337762306a36Sopenharmony_ci	 * Set the number of flow to be used based on negotiated
337862306a36Sopenharmony_ci	 * parameters.
337962306a36Sopenharmony_ci	 */
338062306a36Sopenharmony_ci	req->n_flows = remote->max_write;
338162306a36Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
338262306a36Sopenharmony_ci
338362306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
338462306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
338562306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_req.reth.vaddr =
338662306a36Sopenharmony_ci		cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
338762306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_req.reth.rkey =
338862306a36Sopenharmony_ci		cpu_to_be32(wqe->rdma_wr.rkey);
338962306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
339062306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
339162306a36Sopenharmony_ci	*bth1 &= ~RVT_QPN_MASK;
339262306a36Sopenharmony_ci	*bth1 |= remote->qp;
339362306a36Sopenharmony_ci	qp->s_state = TID_OP(WRITE_REQ);
339462306a36Sopenharmony_ci	qp->s_flags |= HFI1_S_WAIT_TID_RESP;
339562306a36Sopenharmony_ci	*bth2 |= IB_BTH_REQ_ACK;
339662306a36Sopenharmony_ci	*len = 0;
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci	rcu_read_unlock();
339962306a36Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
340062306a36Sopenharmony_ci}
340162306a36Sopenharmony_ci
340262306a36Sopenharmony_cistatic u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
340362306a36Sopenharmony_ci{
340462306a36Sopenharmony_ci	/*
340562306a36Sopenharmony_ci	 * Heuristic for computing the RNR timeout when waiting on the flow
340662306a36Sopenharmony_ci	 * queue. Rather than a computationaly expensive exact estimate of when
340762306a36Sopenharmony_ci	 * a flow will be available, we assume that if a QP is at position N in
340862306a36Sopenharmony_ci	 * the flow queue it has to wait approximately (N + 1) * (number of
340962306a36Sopenharmony_ci	 * segments between two sync points). The rationale for this is that
341062306a36Sopenharmony_ci	 * flows are released and recycled at each sync point.
341162306a36Sopenharmony_ci	 */
341262306a36Sopenharmony_ci	return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
341362306a36Sopenharmony_ci}
341462306a36Sopenharmony_ci
341562306a36Sopenharmony_cistatic u32 position_in_queue(struct hfi1_qp_priv *qpriv,
341662306a36Sopenharmony_ci			     struct tid_queue *queue)
341762306a36Sopenharmony_ci{
341862306a36Sopenharmony_ci	return qpriv->tid_enqueue - queue->dequeue;
341962306a36Sopenharmony_ci}
342062306a36Sopenharmony_ci
342162306a36Sopenharmony_ci/*
342262306a36Sopenharmony_ci * @qp: points to rvt_qp context.
342362306a36Sopenharmony_ci * @to_seg: desired RNR timeout in segments.
342462306a36Sopenharmony_ci * Return: index of the next highest timeout in the ib_hfi1_rnr_table[]
342562306a36Sopenharmony_ci */
342662306a36Sopenharmony_cistatic u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
342762306a36Sopenharmony_ci{
342862306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
342962306a36Sopenharmony_ci	u64 timeout;
343062306a36Sopenharmony_ci	u32 bytes_per_us;
343162306a36Sopenharmony_ci	u8 i;
343262306a36Sopenharmony_ci
343362306a36Sopenharmony_ci	bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
343462306a36Sopenharmony_ci	timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
343562306a36Sopenharmony_ci	/*
343662306a36Sopenharmony_ci	 * Find the next highest value in the RNR table to the required
343762306a36Sopenharmony_ci	 * timeout. This gives the responder some padding.
343862306a36Sopenharmony_ci	 */
343962306a36Sopenharmony_ci	for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
344062306a36Sopenharmony_ci		if (rvt_rnr_tbl_to_usec(i) >= timeout)
344162306a36Sopenharmony_ci			return i;
344262306a36Sopenharmony_ci	return 0;
344362306a36Sopenharmony_ci}
344462306a36Sopenharmony_ci
344562306a36Sopenharmony_ci/*
344662306a36Sopenharmony_ci * Central place for resource allocation at TID write responder,
344762306a36Sopenharmony_ci * is called from write_req and write_data interrupt handlers as
344862306a36Sopenharmony_ci * well as the send thread when a queued QP is scheduled for
344962306a36Sopenharmony_ci * resource allocation.
345062306a36Sopenharmony_ci *
345162306a36Sopenharmony_ci * Iterates over (a) segments of a request and then (b) queued requests
345262306a36Sopenharmony_ci * themselves to allocate resources for up to local->max_write
345362306a36Sopenharmony_ci * segments across multiple requests. Stop allocating when we
345462306a36Sopenharmony_ci * hit a sync point, resume allocating after data packets at
345562306a36Sopenharmony_ci * sync point have been received.
345662306a36Sopenharmony_ci *
345762306a36Sopenharmony_ci * Resource allocation and sending of responses is decoupled. The
345862306a36Sopenharmony_ci * request/segment which are being allocated and sent are as follows.
345962306a36Sopenharmony_ci * Resources are allocated for:
346062306a36Sopenharmony_ci *     [request: qpriv->r_tid_alloc, segment: req->alloc_seg]
346162306a36Sopenharmony_ci * The send thread sends:
346262306a36Sopenharmony_ci *     [request: qp->s_tail_ack_queue, segment:req->cur_seg]
346362306a36Sopenharmony_ci */
346462306a36Sopenharmony_cistatic void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
346562306a36Sopenharmony_ci{
346662306a36Sopenharmony_ci	struct tid_rdma_request *req;
346762306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
346862306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = qpriv->rcd;
346962306a36Sopenharmony_ci	struct tid_rdma_params *local = &qpriv->tid_rdma.local;
347062306a36Sopenharmony_ci	struct rvt_ack_entry *e;
347162306a36Sopenharmony_ci	u32 npkts, to_seg;
347262306a36Sopenharmony_ci	bool last;
347362306a36Sopenharmony_ci	int ret = 0;
347462306a36Sopenharmony_ci
347562306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
347662306a36Sopenharmony_ci
347762306a36Sopenharmony_ci	while (1) {
347862306a36Sopenharmony_ci		trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
347962306a36Sopenharmony_ci		trace_hfi1_tid_write_rsp_alloc_res(qp);
348062306a36Sopenharmony_ci		/*
348162306a36Sopenharmony_ci		 * Don't allocate more segments if a RNR NAK has already been
348262306a36Sopenharmony_ci		 * scheduled to avoid messing up qp->r_psn: the RNR NAK will
348362306a36Sopenharmony_ci		 * be sent only when all allocated segments have been sent.
348462306a36Sopenharmony_ci		 * However, if more segments are allocated before that, TID RDMA
348562306a36Sopenharmony_ci		 * WRITE RESP packets will be sent out for these new segments
348662306a36Sopenharmony_ci		 * before the RNR NAK packet. When the requester receives the
348762306a36Sopenharmony_ci		 * RNR NAK packet, it will restart with qp->s_last_psn + 1,
348862306a36Sopenharmony_ci		 * which does not match qp->r_psn and will be dropped.
348962306a36Sopenharmony_ci		 * Consequently, the requester will exhaust its retries and
349062306a36Sopenharmony_ci		 * put the qp into error state.
349162306a36Sopenharmony_ci		 */
349262306a36Sopenharmony_ci		if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
349362306a36Sopenharmony_ci			break;
349462306a36Sopenharmony_ci
349562306a36Sopenharmony_ci		/* No requests left to process */
349662306a36Sopenharmony_ci		if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
349762306a36Sopenharmony_ci			/* If all data has been received, clear the flow */
349862306a36Sopenharmony_ci			if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
349962306a36Sopenharmony_ci			    !qpriv->alloc_w_segs) {
350062306a36Sopenharmony_ci				hfi1_kern_clear_hw_flow(rcd, qp);
350162306a36Sopenharmony_ci				qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
350262306a36Sopenharmony_ci			}
350362306a36Sopenharmony_ci			break;
350462306a36Sopenharmony_ci		}
350562306a36Sopenharmony_ci
350662306a36Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_alloc];
350762306a36Sopenharmony_ci		if (e->opcode != TID_OP(WRITE_REQ))
350862306a36Sopenharmony_ci			goto next_req;
350962306a36Sopenharmony_ci		req = ack_to_tid_req(e);
351062306a36Sopenharmony_ci		trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
351162306a36Sopenharmony_ci						   e->lpsn, req);
351262306a36Sopenharmony_ci		/* Finished allocating for all segments of this request */
351362306a36Sopenharmony_ci		if (req->alloc_seg >= req->total_segs)
351462306a36Sopenharmony_ci			goto next_req;
351562306a36Sopenharmony_ci
351662306a36Sopenharmony_ci		/* Can allocate only a maximum of local->max_write for a QP */
351762306a36Sopenharmony_ci		if (qpriv->alloc_w_segs >= local->max_write)
351862306a36Sopenharmony_ci			break;
351962306a36Sopenharmony_ci
352062306a36Sopenharmony_ci		/* Don't allocate at a sync point with data packets pending */
352162306a36Sopenharmony_ci		if (qpriv->sync_pt && qpriv->alloc_w_segs)
352262306a36Sopenharmony_ci			break;
352362306a36Sopenharmony_ci
352462306a36Sopenharmony_ci		/* All data received at the sync point, continue */
352562306a36Sopenharmony_ci		if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
352662306a36Sopenharmony_ci			hfi1_kern_clear_hw_flow(rcd, qp);
352762306a36Sopenharmony_ci			qpriv->sync_pt = false;
352862306a36Sopenharmony_ci			qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
352962306a36Sopenharmony_ci		}
353062306a36Sopenharmony_ci
353162306a36Sopenharmony_ci		/* Allocate flow if we don't have one */
353262306a36Sopenharmony_ci		if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
353362306a36Sopenharmony_ci			ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
353462306a36Sopenharmony_ci			if (ret) {
353562306a36Sopenharmony_ci				to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
353662306a36Sopenharmony_ci					position_in_queue(qpriv,
353762306a36Sopenharmony_ci							  &rcd->flow_queue);
353862306a36Sopenharmony_ci				break;
353962306a36Sopenharmony_ci			}
354062306a36Sopenharmony_ci		}
354162306a36Sopenharmony_ci
354262306a36Sopenharmony_ci		npkts = rvt_div_round_up_mtu(qp, req->seg_len);
354362306a36Sopenharmony_ci
354462306a36Sopenharmony_ci		/*
354562306a36Sopenharmony_ci		 * We are at a sync point if we run out of KDETH PSN space.
354662306a36Sopenharmony_ci		 * Last PSN of every generation is reserved for RESYNC.
354762306a36Sopenharmony_ci		 */
354862306a36Sopenharmony_ci		if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
354962306a36Sopenharmony_ci			qpriv->sync_pt = true;
355062306a36Sopenharmony_ci			break;
355162306a36Sopenharmony_ci		}
355262306a36Sopenharmony_ci
355362306a36Sopenharmony_ci		/*
355462306a36Sopenharmony_ci		 * If overtaking req->acked_tail, send an RNR NAK. Because the
355562306a36Sopenharmony_ci		 * QP is not queued in this case, and the issue can only be
355662306a36Sopenharmony_ci		 * caused by a delay in scheduling the second leg which we
355762306a36Sopenharmony_ci		 * cannot estimate, we use a rather arbitrary RNR timeout of
355862306a36Sopenharmony_ci		 * (MAX_FLOWS / 2) segments
355962306a36Sopenharmony_ci		 */
356062306a36Sopenharmony_ci		if (!CIRC_SPACE(req->setup_head, req->acked_tail,
356162306a36Sopenharmony_ci				MAX_FLOWS)) {
356262306a36Sopenharmony_ci			ret = -EAGAIN;
356362306a36Sopenharmony_ci			to_seg = MAX_FLOWS >> 1;
356462306a36Sopenharmony_ci			tid_rdma_trigger_ack(qp);
356562306a36Sopenharmony_ci			break;
356662306a36Sopenharmony_ci		}
356762306a36Sopenharmony_ci
356862306a36Sopenharmony_ci		/* Try to allocate rcv array / TID entries */
356962306a36Sopenharmony_ci		ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
357062306a36Sopenharmony_ci		if (ret == -EAGAIN)
357162306a36Sopenharmony_ci			to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
357262306a36Sopenharmony_ci		if (ret)
357362306a36Sopenharmony_ci			break;
357462306a36Sopenharmony_ci
357562306a36Sopenharmony_ci		qpriv->alloc_w_segs++;
357662306a36Sopenharmony_ci		req->alloc_seg++;
357762306a36Sopenharmony_ci		continue;
357862306a36Sopenharmony_cinext_req:
357962306a36Sopenharmony_ci		/* Begin processing the next request */
358062306a36Sopenharmony_ci		if (++qpriv->r_tid_alloc >
358162306a36Sopenharmony_ci		    rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
358262306a36Sopenharmony_ci			qpriv->r_tid_alloc = 0;
358362306a36Sopenharmony_ci	}
358462306a36Sopenharmony_ci
358562306a36Sopenharmony_ci	/*
358662306a36Sopenharmony_ci	 * Schedule an RNR NAK to be sent if (a) flow or rcv array allocation
358762306a36Sopenharmony_ci	 * has failed (b) we are called from the rcv handler interrupt context
358862306a36Sopenharmony_ci	 * (c) an RNR NAK has not already been scheduled
358962306a36Sopenharmony_ci	 */
359062306a36Sopenharmony_ci	if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
359162306a36Sopenharmony_ci		goto send_rnr_nak;
359262306a36Sopenharmony_ci
359362306a36Sopenharmony_ci	return;
359462306a36Sopenharmony_ci
359562306a36Sopenharmony_cisend_rnr_nak:
359662306a36Sopenharmony_ci	lockdep_assert_held(&qp->r_lock);
359762306a36Sopenharmony_ci
359862306a36Sopenharmony_ci	/* Set r_nak_state to prevent unrelated events from generating NAK's */
359962306a36Sopenharmony_ci	qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
360062306a36Sopenharmony_ci
360162306a36Sopenharmony_ci	/* Pull back r_psn to the segment being RNR NAK'd */
360262306a36Sopenharmony_ci	qp->r_psn = e->psn + req->alloc_seg;
360362306a36Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
360462306a36Sopenharmony_ci	/*
360562306a36Sopenharmony_ci	 * Pull back r_head_ack_queue to the ack entry following the request
360662306a36Sopenharmony_ci	 * being RNR NAK'd. This allows resources to be allocated to the request
360762306a36Sopenharmony_ci	 * if the queued QP is scheduled.
360862306a36Sopenharmony_ci	 */
360962306a36Sopenharmony_ci	qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
361062306a36Sopenharmony_ci	if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
361162306a36Sopenharmony_ci		qp->r_head_ack_queue = 0;
361262306a36Sopenharmony_ci	qpriv->r_tid_head = qp->r_head_ack_queue;
361362306a36Sopenharmony_ci	/*
361462306a36Sopenharmony_ci	 * These send side fields are used in make_rc_ack(). They are set in
361562306a36Sopenharmony_ci	 * hfi1_send_rc_ack() but must be set here before dropping qp->s_lock
361662306a36Sopenharmony_ci	 * for consistency
361762306a36Sopenharmony_ci	 */
361862306a36Sopenharmony_ci	qp->s_nak_state = qp->r_nak_state;
361962306a36Sopenharmony_ci	qp->s_ack_psn = qp->r_ack_psn;
362062306a36Sopenharmony_ci	/*
362162306a36Sopenharmony_ci	 * Clear the ACK PENDING flag to prevent unwanted ACK because we
362262306a36Sopenharmony_ci	 * have modified qp->s_ack_psn here.
362362306a36Sopenharmony_ci	 */
362462306a36Sopenharmony_ci	qp->s_flags &= ~(RVT_S_ACK_PENDING);
362562306a36Sopenharmony_ci
362662306a36Sopenharmony_ci	trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
362762306a36Sopenharmony_ci	/*
362862306a36Sopenharmony_ci	 * qpriv->rnr_nak_state is used to determine when the scheduled RNR NAK
362962306a36Sopenharmony_ci	 * has actually been sent. qp->s_flags RVT_S_ACK_PENDING bit cannot be
363062306a36Sopenharmony_ci	 * used for this because qp->s_lock is dropped before calling
363162306a36Sopenharmony_ci	 * hfi1_send_rc_ack() leading to inconsistency between the receive
363262306a36Sopenharmony_ci	 * interrupt handlers and the send thread in make_rc_ack()
363362306a36Sopenharmony_ci	 */
363462306a36Sopenharmony_ci	qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
363562306a36Sopenharmony_ci
363662306a36Sopenharmony_ci	/*
363762306a36Sopenharmony_ci	 * Schedule RNR NAK to be sent. RNR NAK's are scheduled from the receive
363862306a36Sopenharmony_ci	 * interrupt handlers but will be sent from the send engine behind any
363962306a36Sopenharmony_ci	 * previous responses that may have been scheduled
364062306a36Sopenharmony_ci	 */
364162306a36Sopenharmony_ci	rc_defered_ack(rcd, qp);
364262306a36Sopenharmony_ci}
364362306a36Sopenharmony_ci
364462306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
364562306a36Sopenharmony_ci{
364662306a36Sopenharmony_ci	/* HANDLER FOR TID RDMA WRITE REQUEST packet (Responder side)*/
364762306a36Sopenharmony_ci
364862306a36Sopenharmony_ci	/*
364962306a36Sopenharmony_ci	 * 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
365062306a36Sopenharmony_ci	 *    (see hfi1_rc_rcv())
365162306a36Sopenharmony_ci	 *     - Don't allow 0-length requests.
365262306a36Sopenharmony_ci	 * 2. Put TID RDMA WRITE REQ into the response queueu (s_ack_queue)
365362306a36Sopenharmony_ci	 *     - Setup struct tid_rdma_req with request info
365462306a36Sopenharmony_ci	 *     - Prepare struct tid_rdma_flow array?
365562306a36Sopenharmony_ci	 * 3. Set the qp->s_ack_state as state diagram in design doc.
365662306a36Sopenharmony_ci	 * 4. Set RVT_S_RESP_PENDING in s_flags.
365762306a36Sopenharmony_ci	 * 5. Kick the send engine (hfi1_schedule_send())
365862306a36Sopenharmony_ci	 */
365962306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
366062306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
366162306a36Sopenharmony_ci	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
366262306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
366362306a36Sopenharmony_ci	struct rvt_ack_entry *e;
366462306a36Sopenharmony_ci	unsigned long flags;
366562306a36Sopenharmony_ci	struct ib_reth *reth;
366662306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
366762306a36Sopenharmony_ci	struct tid_rdma_request *req;
366862306a36Sopenharmony_ci	u32 bth0, psn, len, rkey, num_segs;
366962306a36Sopenharmony_ci	bool fecn;
367062306a36Sopenharmony_ci	u8 next;
367162306a36Sopenharmony_ci	u64 vaddr;
367262306a36Sopenharmony_ci	int diff;
367362306a36Sopenharmony_ci
367462306a36Sopenharmony_ci	bth0 = be32_to_cpu(ohdr->bth[0]);
367562306a36Sopenharmony_ci	if (hfi1_ruc_check_hdr(ibp, packet))
367662306a36Sopenharmony_ci		return;
367762306a36Sopenharmony_ci
367862306a36Sopenharmony_ci	fecn = process_ecn(qp, packet);
367962306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
368062306a36Sopenharmony_ci	trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
368162306a36Sopenharmony_ci
368262306a36Sopenharmony_ci	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
368362306a36Sopenharmony_ci		rvt_comm_est(qp);
368462306a36Sopenharmony_ci
368562306a36Sopenharmony_ci	if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
368662306a36Sopenharmony_ci		goto nack_inv;
368762306a36Sopenharmony_ci
368862306a36Sopenharmony_ci	reth = &ohdr->u.tid_rdma.w_req.reth;
368962306a36Sopenharmony_ci	vaddr = be64_to_cpu(reth->vaddr);
369062306a36Sopenharmony_ci	len = be32_to_cpu(reth->length);
369162306a36Sopenharmony_ci
369262306a36Sopenharmony_ci	num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
369362306a36Sopenharmony_ci	diff = delta_psn(psn, qp->r_psn);
369462306a36Sopenharmony_ci	if (unlikely(diff)) {
369562306a36Sopenharmony_ci		tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
369662306a36Sopenharmony_ci		return;
369762306a36Sopenharmony_ci	}
369862306a36Sopenharmony_ci
369962306a36Sopenharmony_ci	/*
370062306a36Sopenharmony_ci	 * The resent request which was previously RNR NAK'd is inserted at the
370162306a36Sopenharmony_ci	 * location of the original request, which is one entry behind
370262306a36Sopenharmony_ci	 * r_head_ack_queue
370362306a36Sopenharmony_ci	 */
370462306a36Sopenharmony_ci	if (qpriv->rnr_nak_state)
370562306a36Sopenharmony_ci		qp->r_head_ack_queue = qp->r_head_ack_queue ?
370662306a36Sopenharmony_ci			qp->r_head_ack_queue - 1 :
370762306a36Sopenharmony_ci			rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
370862306a36Sopenharmony_ci
370962306a36Sopenharmony_ci	/* We've verified the request, insert it into the ack queue. */
371062306a36Sopenharmony_ci	next = qp->r_head_ack_queue + 1;
371162306a36Sopenharmony_ci	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
371262306a36Sopenharmony_ci		next = 0;
371362306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
371462306a36Sopenharmony_ci	if (unlikely(next == qp->s_acked_ack_queue)) {
371562306a36Sopenharmony_ci		if (!qp->s_ack_queue[next].sent)
371662306a36Sopenharmony_ci			goto nack_inv_unlock;
371762306a36Sopenharmony_ci		update_ack_queue(qp, next);
371862306a36Sopenharmony_ci	}
371962306a36Sopenharmony_ci	e = &qp->s_ack_queue[qp->r_head_ack_queue];
372062306a36Sopenharmony_ci	req = ack_to_tid_req(e);
372162306a36Sopenharmony_ci
372262306a36Sopenharmony_ci	/* Bring previously RNR NAK'd request back to life */
372362306a36Sopenharmony_ci	if (qpriv->rnr_nak_state) {
372462306a36Sopenharmony_ci		qp->r_nak_state = 0;
372562306a36Sopenharmony_ci		qp->s_nak_state = 0;
372662306a36Sopenharmony_ci		qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
372762306a36Sopenharmony_ci		qp->r_psn = e->lpsn + 1;
372862306a36Sopenharmony_ci		req->state = TID_REQUEST_INIT;
372962306a36Sopenharmony_ci		goto update_head;
373062306a36Sopenharmony_ci	}
373162306a36Sopenharmony_ci
373262306a36Sopenharmony_ci	release_rdma_sge_mr(e);
373362306a36Sopenharmony_ci
373462306a36Sopenharmony_ci	/* The length needs to be in multiples of PAGE_SIZE */
373562306a36Sopenharmony_ci	if (!len || len & ~PAGE_MASK)
373662306a36Sopenharmony_ci		goto nack_inv_unlock;
373762306a36Sopenharmony_ci
373862306a36Sopenharmony_ci	rkey = be32_to_cpu(reth->rkey);
373962306a36Sopenharmony_ci	qp->r_len = len;
374062306a36Sopenharmony_ci
374162306a36Sopenharmony_ci	if (e->opcode == TID_OP(WRITE_REQ) &&
374262306a36Sopenharmony_ci	    (req->setup_head != req->clear_tail ||
374362306a36Sopenharmony_ci	     req->clear_tail != req->acked_tail))
374462306a36Sopenharmony_ci		goto nack_inv_unlock;
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
374762306a36Sopenharmony_ci				  rkey, IB_ACCESS_REMOTE_WRITE)))
374862306a36Sopenharmony_ci		goto nack_acc;
374962306a36Sopenharmony_ci
375062306a36Sopenharmony_ci	qp->r_psn += num_segs - 1;
375162306a36Sopenharmony_ci
375262306a36Sopenharmony_ci	e->opcode = (bth0 >> 24) & 0xff;
375362306a36Sopenharmony_ci	e->psn = psn;
375462306a36Sopenharmony_ci	e->lpsn = qp->r_psn;
375562306a36Sopenharmony_ci	e->sent = 0;
375662306a36Sopenharmony_ci
375762306a36Sopenharmony_ci	req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
375862306a36Sopenharmony_ci	req->state = TID_REQUEST_INIT;
375962306a36Sopenharmony_ci	req->cur_seg = 0;
376062306a36Sopenharmony_ci	req->comp_seg = 0;
376162306a36Sopenharmony_ci	req->ack_seg = 0;
376262306a36Sopenharmony_ci	req->alloc_seg = 0;
376362306a36Sopenharmony_ci	req->isge = 0;
376462306a36Sopenharmony_ci	req->seg_len = qpriv->tid_rdma.local.max_len;
376562306a36Sopenharmony_ci	req->total_len = len;
376662306a36Sopenharmony_ci	req->total_segs = num_segs;
376762306a36Sopenharmony_ci	req->r_flow_psn = e->psn;
376862306a36Sopenharmony_ci	req->ss.sge = e->rdma_sge;
376962306a36Sopenharmony_ci	req->ss.num_sge = 1;
377062306a36Sopenharmony_ci
377162306a36Sopenharmony_ci	req->flow_idx = req->setup_head;
377262306a36Sopenharmony_ci	req->clear_tail = req->setup_head;
377362306a36Sopenharmony_ci	req->acked_tail = req->setup_head;
377462306a36Sopenharmony_ci
377562306a36Sopenharmony_ci	qp->r_state = e->opcode;
377662306a36Sopenharmony_ci	qp->r_nak_state = 0;
377762306a36Sopenharmony_ci	/*
377862306a36Sopenharmony_ci	 * We need to increment the MSN here instead of when we
377962306a36Sopenharmony_ci	 * finish sending the result since a duplicate request would
378062306a36Sopenharmony_ci	 * increment it more than once.
378162306a36Sopenharmony_ci	 */
378262306a36Sopenharmony_ci	qp->r_msn++;
378362306a36Sopenharmony_ci	qp->r_psn++;
378462306a36Sopenharmony_ci
378562306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
378662306a36Sopenharmony_ci					 req);
378762306a36Sopenharmony_ci
378862306a36Sopenharmony_ci	if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
378962306a36Sopenharmony_ci		qpriv->r_tid_tail = qp->r_head_ack_queue;
379062306a36Sopenharmony_ci	} else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
379162306a36Sopenharmony_ci		struct tid_rdma_request *ptr;
379262306a36Sopenharmony_ci
379362306a36Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_tail];
379462306a36Sopenharmony_ci		ptr = ack_to_tid_req(e);
379562306a36Sopenharmony_ci
379662306a36Sopenharmony_ci		if (e->opcode != TID_OP(WRITE_REQ) ||
379762306a36Sopenharmony_ci		    ptr->comp_seg == ptr->total_segs) {
379862306a36Sopenharmony_ci			if (qpriv->r_tid_tail == qpriv->r_tid_ack)
379962306a36Sopenharmony_ci				qpriv->r_tid_ack = qp->r_head_ack_queue;
380062306a36Sopenharmony_ci			qpriv->r_tid_tail = qp->r_head_ack_queue;
380162306a36Sopenharmony_ci		}
380262306a36Sopenharmony_ci	}
380362306a36Sopenharmony_ciupdate_head:
380462306a36Sopenharmony_ci	qp->r_head_ack_queue = next;
380562306a36Sopenharmony_ci	qpriv->r_tid_head = qp->r_head_ack_queue;
380662306a36Sopenharmony_ci
380762306a36Sopenharmony_ci	hfi1_tid_write_alloc_resources(qp, true);
380862306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_rcv_req(qp);
380962306a36Sopenharmony_ci
381062306a36Sopenharmony_ci	/* Schedule the send tasklet. */
381162306a36Sopenharmony_ci	qp->s_flags |= RVT_S_RESP_PENDING;
381262306a36Sopenharmony_ci	if (fecn)
381362306a36Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
381462306a36Sopenharmony_ci	hfi1_schedule_send(qp);
381562306a36Sopenharmony_ci
381662306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
381762306a36Sopenharmony_ci	return;
381862306a36Sopenharmony_ci
381962306a36Sopenharmony_cinack_inv_unlock:
382062306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
382162306a36Sopenharmony_cinack_inv:
382262306a36Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
382362306a36Sopenharmony_ci	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
382462306a36Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
382562306a36Sopenharmony_ci	/* Queue NAK for later */
382662306a36Sopenharmony_ci	rc_defered_ack(rcd, qp);
382762306a36Sopenharmony_ci	return;
382862306a36Sopenharmony_cinack_acc:
382962306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
383062306a36Sopenharmony_ci	rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
383162306a36Sopenharmony_ci	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
383262306a36Sopenharmony_ci	qp->r_ack_psn = qp->r_psn;
383362306a36Sopenharmony_ci}
383462306a36Sopenharmony_ci
383562306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
383662306a36Sopenharmony_ci				   struct ib_other_headers *ohdr, u32 *bth1,
383762306a36Sopenharmony_ci				   u32 bth2, u32 *len,
383862306a36Sopenharmony_ci				   struct rvt_sge_state **ss)
383962306a36Sopenharmony_ci{
384062306a36Sopenharmony_ci	struct hfi1_ack_priv *epriv = e->priv;
384162306a36Sopenharmony_ci	struct tid_rdma_request *req = &epriv->tid_req;
384262306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
384362306a36Sopenharmony_ci	struct tid_rdma_flow *flow = NULL;
384462306a36Sopenharmony_ci	u32 resp_len = 0, hdwords = 0;
384562306a36Sopenharmony_ci	void *resp_addr = NULL;
384662306a36Sopenharmony_ci	struct tid_rdma_params *remote;
384762306a36Sopenharmony_ci
384862306a36Sopenharmony_ci	trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
384962306a36Sopenharmony_ci					    req);
385062306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_build_resp(qp);
385162306a36Sopenharmony_ci	trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
385262306a36Sopenharmony_ci	flow = &req->flows[req->flow_idx];
385362306a36Sopenharmony_ci	switch (req->state) {
385462306a36Sopenharmony_ci	default:
385562306a36Sopenharmony_ci		/*
385662306a36Sopenharmony_ci		 * Try to allocate resources here in case QP was queued and was
385762306a36Sopenharmony_ci		 * later scheduled when resources became available
385862306a36Sopenharmony_ci		 */
385962306a36Sopenharmony_ci		hfi1_tid_write_alloc_resources(qp, false);
386062306a36Sopenharmony_ci
386162306a36Sopenharmony_ci		/* We've already sent everything which is ready */
386262306a36Sopenharmony_ci		if (req->cur_seg >= req->alloc_seg)
386362306a36Sopenharmony_ci			goto done;
386462306a36Sopenharmony_ci
386562306a36Sopenharmony_ci		/*
386662306a36Sopenharmony_ci		 * Resources can be assigned but responses cannot be sent in
386762306a36Sopenharmony_ci		 * rnr_nak state, till the resent request is received
386862306a36Sopenharmony_ci		 */
386962306a36Sopenharmony_ci		if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
387062306a36Sopenharmony_ci			goto done;
387162306a36Sopenharmony_ci
387262306a36Sopenharmony_ci		req->state = TID_REQUEST_ACTIVE;
387362306a36Sopenharmony_ci		trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
387462306a36Sopenharmony_ci		req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
387562306a36Sopenharmony_ci		hfi1_add_tid_reap_timer(qp);
387662306a36Sopenharmony_ci		break;
387762306a36Sopenharmony_ci
387862306a36Sopenharmony_ci	case TID_REQUEST_RESEND_ACTIVE:
387962306a36Sopenharmony_ci	case TID_REQUEST_RESEND:
388062306a36Sopenharmony_ci		trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
388162306a36Sopenharmony_ci		req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
388262306a36Sopenharmony_ci		if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
388362306a36Sopenharmony_ci			req->state = TID_REQUEST_ACTIVE;
388462306a36Sopenharmony_ci
388562306a36Sopenharmony_ci		hfi1_mod_tid_reap_timer(qp);
388662306a36Sopenharmony_ci		break;
388762306a36Sopenharmony_ci	}
388862306a36Sopenharmony_ci	flow->flow_state.resp_ib_psn = bth2;
388962306a36Sopenharmony_ci	resp_addr = (void *)flow->tid_entry;
389062306a36Sopenharmony_ci	resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
389162306a36Sopenharmony_ci	req->cur_seg++;
389262306a36Sopenharmony_ci
389362306a36Sopenharmony_ci	memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
389462306a36Sopenharmony_ci	epriv->ss.sge.vaddr = resp_addr;
389562306a36Sopenharmony_ci	epriv->ss.sge.sge_length = resp_len;
389662306a36Sopenharmony_ci	epriv->ss.sge.length = epriv->ss.sge.sge_length;
389762306a36Sopenharmony_ci	/*
389862306a36Sopenharmony_ci	 * We can safely zero these out. Since the first SGE covers the
389962306a36Sopenharmony_ci	 * entire packet, nothing else should even look at the MR.
390062306a36Sopenharmony_ci	 */
390162306a36Sopenharmony_ci	epriv->ss.sge.mr = NULL;
390262306a36Sopenharmony_ci	epriv->ss.sge.m = 0;
390362306a36Sopenharmony_ci	epriv->ss.sge.n = 0;
390462306a36Sopenharmony_ci
390562306a36Sopenharmony_ci	epriv->ss.sg_list = NULL;
390662306a36Sopenharmony_ci	epriv->ss.total_len = epriv->ss.sge.sge_length;
390762306a36Sopenharmony_ci	epriv->ss.num_sge = 1;
390862306a36Sopenharmony_ci
390962306a36Sopenharmony_ci	*ss = &epriv->ss;
391062306a36Sopenharmony_ci	*len = epriv->ss.total_len;
391162306a36Sopenharmony_ci
391262306a36Sopenharmony_ci	/* Construct the TID RDMA WRITE RESP packet header */
391362306a36Sopenharmony_ci	rcu_read_lock();
391462306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
391562306a36Sopenharmony_ci
391662306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
391762306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
391862306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
391962306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
392062306a36Sopenharmony_ci		cpu_to_be32((flow->flow_state.generation <<
392162306a36Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_SHIFT) |
392262306a36Sopenharmony_ci			    (flow->flow_state.spsn &
392362306a36Sopenharmony_ci			     HFI1_KDETH_BTH_SEQ_MASK));
392462306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
392562306a36Sopenharmony_ci		cpu_to_be32(qpriv->tid_rdma.local.qp |
392662306a36Sopenharmony_ci			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
392762306a36Sopenharmony_ci			     TID_RDMA_DESTQP_FLOW_SHIFT) |
392862306a36Sopenharmony_ci			    qpriv->rcd->ctxt);
392962306a36Sopenharmony_ci	ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
393062306a36Sopenharmony_ci	*bth1 = remote->qp;
393162306a36Sopenharmony_ci	rcu_read_unlock();
393262306a36Sopenharmony_ci	hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
393362306a36Sopenharmony_ci	qpriv->pending_tid_w_segs++;
393462306a36Sopenharmony_cidone:
393562306a36Sopenharmony_ci	return hdwords;
393662306a36Sopenharmony_ci}
393762306a36Sopenharmony_ci
393862306a36Sopenharmony_cistatic void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
393962306a36Sopenharmony_ci{
394062306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
394162306a36Sopenharmony_ci
394262306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
394362306a36Sopenharmony_ci	if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
394462306a36Sopenharmony_ci		qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
394562306a36Sopenharmony_ci		qpriv->s_tid_timer.expires = jiffies +
394662306a36Sopenharmony_ci			qpriv->tid_timer_timeout_jiffies;
394762306a36Sopenharmony_ci		add_timer(&qpriv->s_tid_timer);
394862306a36Sopenharmony_ci	}
394962306a36Sopenharmony_ci}
395062306a36Sopenharmony_ci
395162306a36Sopenharmony_cistatic void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
395262306a36Sopenharmony_ci{
395362306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
395462306a36Sopenharmony_ci
395562306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
395662306a36Sopenharmony_ci	qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
395762306a36Sopenharmony_ci	mod_timer(&qpriv->s_tid_timer, jiffies +
395862306a36Sopenharmony_ci		  qpriv->tid_timer_timeout_jiffies);
395962306a36Sopenharmony_ci}
396062306a36Sopenharmony_ci
396162306a36Sopenharmony_cistatic int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
396262306a36Sopenharmony_ci{
396362306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
396462306a36Sopenharmony_ci	int rval = 0;
396562306a36Sopenharmony_ci
396662306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
396762306a36Sopenharmony_ci	if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
396862306a36Sopenharmony_ci		rval = del_timer(&qpriv->s_tid_timer);
396962306a36Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
397062306a36Sopenharmony_ci	}
397162306a36Sopenharmony_ci	return rval;
397262306a36Sopenharmony_ci}
397362306a36Sopenharmony_ci
397462306a36Sopenharmony_civoid hfi1_del_tid_reap_timer(struct rvt_qp *qp)
397562306a36Sopenharmony_ci{
397662306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
397762306a36Sopenharmony_ci
397862306a36Sopenharmony_ci	del_timer_sync(&qpriv->s_tid_timer);
397962306a36Sopenharmony_ci	qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
398062306a36Sopenharmony_ci}
398162306a36Sopenharmony_ci
398262306a36Sopenharmony_cistatic void hfi1_tid_timeout(struct timer_list *t)
398362306a36Sopenharmony_ci{
398462306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
398562306a36Sopenharmony_ci	struct rvt_qp *qp = qpriv->owner;
398662306a36Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
398762306a36Sopenharmony_ci	unsigned long flags;
398862306a36Sopenharmony_ci	u32 i;
398962306a36Sopenharmony_ci
399062306a36Sopenharmony_ci	spin_lock_irqsave(&qp->r_lock, flags);
399162306a36Sopenharmony_ci	spin_lock(&qp->s_lock);
399262306a36Sopenharmony_ci	if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
399362306a36Sopenharmony_ci		dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
399462306a36Sopenharmony_ci			    qp->ibqp.qp_num, __func__, __LINE__);
399562306a36Sopenharmony_ci		trace_hfi1_msg_tid_timeout(/* msg */
399662306a36Sopenharmony_ci			qp, "resource timeout = ",
399762306a36Sopenharmony_ci			(u64)qpriv->tid_timer_timeout_jiffies);
399862306a36Sopenharmony_ci		hfi1_stop_tid_reap_timer(qp);
399962306a36Sopenharmony_ci		/*
400062306a36Sopenharmony_ci		 * Go though the entire ack queue and clear any outstanding
400162306a36Sopenharmony_ci		 * HW flow and RcvArray resources.
400262306a36Sopenharmony_ci		 */
400362306a36Sopenharmony_ci		hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
400462306a36Sopenharmony_ci		for (i = 0; i < rvt_max_atomic(rdi); i++) {
400562306a36Sopenharmony_ci			struct tid_rdma_request *req =
400662306a36Sopenharmony_ci				ack_to_tid_req(&qp->s_ack_queue[i]);
400762306a36Sopenharmony_ci
400862306a36Sopenharmony_ci			hfi1_kern_exp_rcv_clear_all(req);
400962306a36Sopenharmony_ci		}
401062306a36Sopenharmony_ci		spin_unlock(&qp->s_lock);
401162306a36Sopenharmony_ci		if (qp->ibqp.event_handler) {
401262306a36Sopenharmony_ci			struct ib_event ev;
401362306a36Sopenharmony_ci
401462306a36Sopenharmony_ci			ev.device = qp->ibqp.device;
401562306a36Sopenharmony_ci			ev.element.qp = &qp->ibqp;
401662306a36Sopenharmony_ci			ev.event = IB_EVENT_QP_FATAL;
401762306a36Sopenharmony_ci			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
401862306a36Sopenharmony_ci		}
401962306a36Sopenharmony_ci		rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
402062306a36Sopenharmony_ci		goto unlock_r_lock;
402162306a36Sopenharmony_ci	}
402262306a36Sopenharmony_ci	spin_unlock(&qp->s_lock);
402362306a36Sopenharmony_ciunlock_r_lock:
402462306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->r_lock, flags);
402562306a36Sopenharmony_ci}
402662306a36Sopenharmony_ci
402762306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
402862306a36Sopenharmony_ci{
402962306a36Sopenharmony_ci	/* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requestor side */
403062306a36Sopenharmony_ci
403162306a36Sopenharmony_ci	/*
403262306a36Sopenharmony_ci	 * 1. Find matching SWQE
403362306a36Sopenharmony_ci	 * 2. Check that TIDENTRY array has enough space for a complete
403462306a36Sopenharmony_ci	 *    segment. If not, put QP in error state.
403562306a36Sopenharmony_ci	 * 3. Save response data in struct tid_rdma_req and struct tid_rdma_flow
403662306a36Sopenharmony_ci	 * 4. Remove HFI1_S_WAIT_TID_RESP from s_flags.
403762306a36Sopenharmony_ci	 * 5. Set qp->s_state
403862306a36Sopenharmony_ci	 * 6. Kick the send engine (hfi1_schedule_send())
403962306a36Sopenharmony_ci	 */
404062306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
404162306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
404262306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
404362306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = packet->rcd;
404462306a36Sopenharmony_ci	struct rvt_swqe *wqe;
404562306a36Sopenharmony_ci	struct tid_rdma_request *req;
404662306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
404762306a36Sopenharmony_ci	enum ib_wc_status status;
404862306a36Sopenharmony_ci	u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
404962306a36Sopenharmony_ci	bool fecn;
405062306a36Sopenharmony_ci	unsigned long flags;
405162306a36Sopenharmony_ci
405262306a36Sopenharmony_ci	fecn = process_ecn(qp, packet);
405362306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
405462306a36Sopenharmony_ci	aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
405562306a36Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
405662306a36Sopenharmony_ci
405762306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
405862306a36Sopenharmony_ci
405962306a36Sopenharmony_ci	/* Ignore invalid responses */
406062306a36Sopenharmony_ci	if (cmp_psn(psn, qp->s_next_psn) >= 0)
406162306a36Sopenharmony_ci		goto ack_done;
406262306a36Sopenharmony_ci
406362306a36Sopenharmony_ci	/* Ignore duplicate responses. */
406462306a36Sopenharmony_ci	if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
406562306a36Sopenharmony_ci		goto ack_done;
406662306a36Sopenharmony_ci
406762306a36Sopenharmony_ci	if (unlikely(qp->s_acked == qp->s_tail))
406862306a36Sopenharmony_ci		goto ack_done;
406962306a36Sopenharmony_ci
407062306a36Sopenharmony_ci	/*
407162306a36Sopenharmony_ci	 * If we are waiting for a particular packet sequence number
407262306a36Sopenharmony_ci	 * due to a request being resent, check for it. Otherwise,
407362306a36Sopenharmony_ci	 * ensure that we haven't missed anything.
407462306a36Sopenharmony_ci	 */
407562306a36Sopenharmony_ci	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
407662306a36Sopenharmony_ci		if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
407762306a36Sopenharmony_ci			goto ack_done;
407862306a36Sopenharmony_ci		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
407962306a36Sopenharmony_ci	}
408062306a36Sopenharmony_ci
408162306a36Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
408262306a36Sopenharmony_ci	if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
408362306a36Sopenharmony_ci		goto ack_op_err;
408462306a36Sopenharmony_ci
408562306a36Sopenharmony_ci	req = wqe_to_tid_req(wqe);
408662306a36Sopenharmony_ci	/*
408762306a36Sopenharmony_ci	 * If we've lost ACKs and our acked_tail pointer is too far
408862306a36Sopenharmony_ci	 * behind, don't overwrite segments. Just drop the packet and
408962306a36Sopenharmony_ci	 * let the reliability protocol take care of it.
409062306a36Sopenharmony_ci	 */
409162306a36Sopenharmony_ci	if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
409262306a36Sopenharmony_ci		goto ack_done;
409362306a36Sopenharmony_ci
409462306a36Sopenharmony_ci	/*
409562306a36Sopenharmony_ci	 * The call to do_rc_ack() should be last in the chain of
409662306a36Sopenharmony_ci	 * packet checks because it will end up updating the QP state.
409762306a36Sopenharmony_ci	 * Therefore, anything that would prevent the packet from
409862306a36Sopenharmony_ci	 * being accepted as a successful response should be prior
409962306a36Sopenharmony_ci	 * to it.
410062306a36Sopenharmony_ci	 */
410162306a36Sopenharmony_ci	if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
410262306a36Sopenharmony_ci		goto ack_done;
410362306a36Sopenharmony_ci
410462306a36Sopenharmony_ci	trace_hfi1_ack(qp, psn);
410562306a36Sopenharmony_ci
410662306a36Sopenharmony_ci	flow = &req->flows[req->setup_head];
410762306a36Sopenharmony_ci	flow->pkt = 0;
410862306a36Sopenharmony_ci	flow->tid_idx = 0;
410962306a36Sopenharmony_ci	flow->tid_offset = 0;
411062306a36Sopenharmony_ci	flow->sent = 0;
411162306a36Sopenharmony_ci	flow->resync_npkts = 0;
411262306a36Sopenharmony_ci	flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
411362306a36Sopenharmony_ci	flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
411462306a36Sopenharmony_ci		TID_RDMA_DESTQP_FLOW_MASK;
411562306a36Sopenharmony_ci	flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
411662306a36Sopenharmony_ci	flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
411762306a36Sopenharmony_ci	flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
411862306a36Sopenharmony_ci	flow->flow_state.resp_ib_psn = psn;
411962306a36Sopenharmony_ci	flow->length = min_t(u32, req->seg_len,
412062306a36Sopenharmony_ci			     (wqe->length - (req->comp_seg * req->seg_len)));
412162306a36Sopenharmony_ci
412262306a36Sopenharmony_ci	flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
412362306a36Sopenharmony_ci	flow->flow_state.lpsn = flow->flow_state.spsn +
412462306a36Sopenharmony_ci		flow->npkts - 1;
412562306a36Sopenharmony_ci	/* payload length = packet length - (header length + ICRC length) */
412662306a36Sopenharmony_ci	pktlen = packet->tlen - (packet->hlen + 4);
412762306a36Sopenharmony_ci	if (pktlen > sizeof(flow->tid_entry)) {
412862306a36Sopenharmony_ci		status = IB_WC_LOC_LEN_ERR;
412962306a36Sopenharmony_ci		goto ack_err;
413062306a36Sopenharmony_ci	}
413162306a36Sopenharmony_ci	memcpy(flow->tid_entry, packet->ebuf, pktlen);
413262306a36Sopenharmony_ci	flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
413362306a36Sopenharmony_ci	trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
413462306a36Sopenharmony_ci
413562306a36Sopenharmony_ci	req->comp_seg++;
413662306a36Sopenharmony_ci	trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
413762306a36Sopenharmony_ci	/*
413862306a36Sopenharmony_ci	 * Walk the TID_ENTRY list to make sure we have enough space for a
413962306a36Sopenharmony_ci	 * complete segment.
414062306a36Sopenharmony_ci	 */
414162306a36Sopenharmony_ci	for (i = 0; i < flow->tidcnt; i++) {
414262306a36Sopenharmony_ci		trace_hfi1_tid_entry_rcv_write_resp(/* entry */
414362306a36Sopenharmony_ci			qp, i, flow->tid_entry[i]);
414462306a36Sopenharmony_ci		if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
414562306a36Sopenharmony_ci			status = IB_WC_LOC_LEN_ERR;
414662306a36Sopenharmony_ci			goto ack_err;
414762306a36Sopenharmony_ci		}
414862306a36Sopenharmony_ci		tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
414962306a36Sopenharmony_ci	}
415062306a36Sopenharmony_ci	if (tidlen * PAGE_SIZE < flow->length) {
415162306a36Sopenharmony_ci		status = IB_WC_LOC_LEN_ERR;
415262306a36Sopenharmony_ci		goto ack_err;
415362306a36Sopenharmony_ci	}
415462306a36Sopenharmony_ci
415562306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
415662306a36Sopenharmony_ci					  wqe->lpsn, req);
415762306a36Sopenharmony_ci	/*
415862306a36Sopenharmony_ci	 * If this is the first response for this request, set the initial
415962306a36Sopenharmony_ci	 * flow index to the current flow.
416062306a36Sopenharmony_ci	 */
416162306a36Sopenharmony_ci	if (!cmp_psn(psn, wqe->psn)) {
416262306a36Sopenharmony_ci		req->r_last_acked = mask_psn(wqe->psn - 1);
416362306a36Sopenharmony_ci		/* Set acked flow index to head index */
416462306a36Sopenharmony_ci		req->acked_tail = req->setup_head;
416562306a36Sopenharmony_ci	}
416662306a36Sopenharmony_ci
416762306a36Sopenharmony_ci	/* advance circular buffer head */
416862306a36Sopenharmony_ci	req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
416962306a36Sopenharmony_ci	req->state = TID_REQUEST_ACTIVE;
417062306a36Sopenharmony_ci
417162306a36Sopenharmony_ci	/*
417262306a36Sopenharmony_ci	 * If all responses for this TID RDMA WRITE request have been received
417362306a36Sopenharmony_ci	 * advance the pointer to the next one.
417462306a36Sopenharmony_ci	 * Since TID RDMA requests could be mixed in with regular IB requests,
417562306a36Sopenharmony_ci	 * they might not appear sequentially in the queue. Therefore, the
417662306a36Sopenharmony_ci	 * next request needs to be "found".
417762306a36Sopenharmony_ci	 */
417862306a36Sopenharmony_ci	if (qpriv->s_tid_cur != qpriv->s_tid_head &&
417962306a36Sopenharmony_ci	    req->comp_seg == req->total_segs) {
418062306a36Sopenharmony_ci		for (i = qpriv->s_tid_cur + 1; ; i++) {
418162306a36Sopenharmony_ci			if (i == qp->s_size)
418262306a36Sopenharmony_ci				i = 0;
418362306a36Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, i);
418462306a36Sopenharmony_ci			if (i == qpriv->s_tid_head)
418562306a36Sopenharmony_ci				break;
418662306a36Sopenharmony_ci			if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
418762306a36Sopenharmony_ci				break;
418862306a36Sopenharmony_ci		}
418962306a36Sopenharmony_ci		qpriv->s_tid_cur = i;
419062306a36Sopenharmony_ci	}
419162306a36Sopenharmony_ci	qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
419262306a36Sopenharmony_ci	hfi1_schedule_tid_send(qp);
419362306a36Sopenharmony_ci	goto ack_done;
419462306a36Sopenharmony_ci
419562306a36Sopenharmony_ciack_op_err:
419662306a36Sopenharmony_ci	status = IB_WC_LOC_QP_OP_ERR;
419762306a36Sopenharmony_ciack_err:
419862306a36Sopenharmony_ci	rvt_error_qp(qp, status);
419962306a36Sopenharmony_ciack_done:
420062306a36Sopenharmony_ci	if (fecn)
420162306a36Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
420262306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
420362306a36Sopenharmony_ci}
420462306a36Sopenharmony_ci
420562306a36Sopenharmony_cibool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
420662306a36Sopenharmony_ci				struct ib_other_headers *ohdr,
420762306a36Sopenharmony_ci				u32 *bth1, u32 *bth2, u32 *len)
420862306a36Sopenharmony_ci{
420962306a36Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
421062306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
421162306a36Sopenharmony_ci	struct tid_rdma_params *remote;
421262306a36Sopenharmony_ci	struct rvt_qp *qp = req->qp;
421362306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
421462306a36Sopenharmony_ci	u32 tidentry = flow->tid_entry[flow->tid_idx];
421562306a36Sopenharmony_ci	u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
421662306a36Sopenharmony_ci	struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
421762306a36Sopenharmony_ci	u32 next_offset, om = KDETH_OM_LARGE;
421862306a36Sopenharmony_ci	bool last_pkt;
421962306a36Sopenharmony_ci
422062306a36Sopenharmony_ci	if (!tidlen) {
422162306a36Sopenharmony_ci		hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
422262306a36Sopenharmony_ci		rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
422362306a36Sopenharmony_ci	}
422462306a36Sopenharmony_ci
422562306a36Sopenharmony_ci	*len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
422662306a36Sopenharmony_ci	flow->sent += *len;
422762306a36Sopenharmony_ci	next_offset = flow->tid_offset + *len;
422862306a36Sopenharmony_ci	last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
422962306a36Sopenharmony_ci		    next_offset >= tidlen) || (flow->sent >= flow->length);
423062306a36Sopenharmony_ci	trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
423162306a36Sopenharmony_ci	trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
423262306a36Sopenharmony_ci
423362306a36Sopenharmony_ci	rcu_read_lock();
423462306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
423562306a36Sopenharmony_ci	KDETH_RESET(wd->kdeth0, KVER, 0x1);
423662306a36Sopenharmony_ci	KDETH_SET(wd->kdeth0, SH, !last_pkt);
423762306a36Sopenharmony_ci	KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
423862306a36Sopenharmony_ci	KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
423962306a36Sopenharmony_ci	KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
424062306a36Sopenharmony_ci	KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
424162306a36Sopenharmony_ci	KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
424262306a36Sopenharmony_ci	KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
424362306a36Sopenharmony_ci	wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
424462306a36Sopenharmony_ci	rcu_read_unlock();
424562306a36Sopenharmony_ci
424662306a36Sopenharmony_ci	*bth1 = flow->tid_qpn;
424762306a36Sopenharmony_ci	*bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
424862306a36Sopenharmony_ci			 HFI1_KDETH_BTH_SEQ_MASK) |
424962306a36Sopenharmony_ci			 (flow->flow_state.generation <<
425062306a36Sopenharmony_ci			  HFI1_KDETH_BTH_SEQ_SHIFT));
425162306a36Sopenharmony_ci	if (last_pkt) {
425262306a36Sopenharmony_ci		/* PSNs are zero-based, so +1 to count number of packets */
425362306a36Sopenharmony_ci		if (flow->flow_state.lpsn + 1 +
425462306a36Sopenharmony_ci		    rvt_div_round_up_mtu(qp, req->seg_len) >
425562306a36Sopenharmony_ci		    MAX_TID_FLOW_PSN)
425662306a36Sopenharmony_ci			req->state = TID_REQUEST_SYNC;
425762306a36Sopenharmony_ci		*bth2 |= IB_BTH_REQ_ACK;
425862306a36Sopenharmony_ci	}
425962306a36Sopenharmony_ci
426062306a36Sopenharmony_ci	if (next_offset >= tidlen) {
426162306a36Sopenharmony_ci		flow->tid_offset = 0;
426262306a36Sopenharmony_ci		flow->tid_idx++;
426362306a36Sopenharmony_ci	} else {
426462306a36Sopenharmony_ci		flow->tid_offset = next_offset;
426562306a36Sopenharmony_ci	}
426662306a36Sopenharmony_ci	return last_pkt;
426762306a36Sopenharmony_ci}
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
427062306a36Sopenharmony_ci{
427162306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
427262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
427362306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = priv->rcd;
427462306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
427562306a36Sopenharmony_ci	struct rvt_ack_entry *e;
427662306a36Sopenharmony_ci	struct tid_rdma_request *req;
427762306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
427862306a36Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
427962306a36Sopenharmony_ci	unsigned long flags;
428062306a36Sopenharmony_ci	u32 psn, next;
428162306a36Sopenharmony_ci	u8 opcode;
428262306a36Sopenharmony_ci	bool fecn;
428362306a36Sopenharmony_ci
428462306a36Sopenharmony_ci	fecn = process_ecn(qp, packet);
428562306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
428662306a36Sopenharmony_ci	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
428762306a36Sopenharmony_ci
428862306a36Sopenharmony_ci	/*
428962306a36Sopenharmony_ci	 * All error handling should be done by now. If we are here, the packet
429062306a36Sopenharmony_ci	 * is either good or been accepted by the error handler.
429162306a36Sopenharmony_ci	 */
429262306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
429362306a36Sopenharmony_ci	e = &qp->s_ack_queue[priv->r_tid_tail];
429462306a36Sopenharmony_ci	req = ack_to_tid_req(e);
429562306a36Sopenharmony_ci	flow = &req->flows[req->clear_tail];
429662306a36Sopenharmony_ci	if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
429762306a36Sopenharmony_ci		update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
429862306a36Sopenharmony_ci
429962306a36Sopenharmony_ci		if (cmp_psn(psn, flow->flow_state.r_next_psn))
430062306a36Sopenharmony_ci			goto send_nak;
430162306a36Sopenharmony_ci
430262306a36Sopenharmony_ci		flow->flow_state.r_next_psn = mask_psn(psn + 1);
430362306a36Sopenharmony_ci		/*
430462306a36Sopenharmony_ci		 * Copy the payload to destination buffer if this packet is
430562306a36Sopenharmony_ci		 * delivered as an eager packet due to RSM rule and FECN.
430662306a36Sopenharmony_ci		 * The RSM rule selects FECN bit in BTH and SH bit in
430762306a36Sopenharmony_ci		 * KDETH header and therefore will not match the last
430862306a36Sopenharmony_ci		 * packet of each segment that has SH bit cleared.
430962306a36Sopenharmony_ci		 */
431062306a36Sopenharmony_ci		if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
431162306a36Sopenharmony_ci			struct rvt_sge_state ss;
431262306a36Sopenharmony_ci			u32 len;
431362306a36Sopenharmony_ci			u32 tlen = packet->tlen;
431462306a36Sopenharmony_ci			u16 hdrsize = packet->hlen;
431562306a36Sopenharmony_ci			u8 pad = packet->pad;
431662306a36Sopenharmony_ci			u8 extra_bytes = pad + packet->extra_byte +
431762306a36Sopenharmony_ci				(SIZE_OF_CRC << 2);
431862306a36Sopenharmony_ci			u32 pmtu = qp->pmtu;
431962306a36Sopenharmony_ci
432062306a36Sopenharmony_ci			if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
432162306a36Sopenharmony_ci				goto send_nak;
432262306a36Sopenharmony_ci			len = req->comp_seg * req->seg_len;
432362306a36Sopenharmony_ci			len += delta_psn(psn,
432462306a36Sopenharmony_ci				full_flow_psn(flow, flow->flow_state.spsn)) *
432562306a36Sopenharmony_ci				pmtu;
432662306a36Sopenharmony_ci			if (unlikely(req->total_len - len < pmtu))
432762306a36Sopenharmony_ci				goto send_nak;
432862306a36Sopenharmony_ci
432962306a36Sopenharmony_ci			/*
433062306a36Sopenharmony_ci			 * The e->rdma_sge field is set when TID RDMA WRITE REQ
433162306a36Sopenharmony_ci			 * is first received and is never modified thereafter.
433262306a36Sopenharmony_ci			 */
433362306a36Sopenharmony_ci			ss.sge = e->rdma_sge;
433462306a36Sopenharmony_ci			ss.sg_list = NULL;
433562306a36Sopenharmony_ci			ss.num_sge = 1;
433662306a36Sopenharmony_ci			ss.total_len = req->total_len;
433762306a36Sopenharmony_ci			rvt_skip_sge(&ss, len, false);
433862306a36Sopenharmony_ci			rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
433962306a36Sopenharmony_ci				     false);
434062306a36Sopenharmony_ci			/* Raise the sw sequence check flag for next packet */
434162306a36Sopenharmony_ci			priv->r_next_psn_kdeth = mask_psn(psn + 1);
434262306a36Sopenharmony_ci			priv->s_flags |= HFI1_R_TID_SW_PSN;
434362306a36Sopenharmony_ci		}
434462306a36Sopenharmony_ci		goto exit;
434562306a36Sopenharmony_ci	}
434662306a36Sopenharmony_ci	flow->flow_state.r_next_psn = mask_psn(psn + 1);
434762306a36Sopenharmony_ci	hfi1_kern_exp_rcv_clear(req);
434862306a36Sopenharmony_ci	priv->alloc_w_segs--;
434962306a36Sopenharmony_ci	rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
435062306a36Sopenharmony_ci	req->comp_seg++;
435162306a36Sopenharmony_ci	priv->s_nak_state = 0;
435262306a36Sopenharmony_ci
435362306a36Sopenharmony_ci	/*
435462306a36Sopenharmony_ci	 * Release the flow if one of the following conditions has been met:
435562306a36Sopenharmony_ci	 *  - The request has reached a sync point AND all outstanding
435662306a36Sopenharmony_ci	 *    segments have been completed, or
435762306a36Sopenharmony_ci	 *  - The entire request is complete and there are no more requests
435862306a36Sopenharmony_ci	 *    (of any kind) in the queue.
435962306a36Sopenharmony_ci	 */
436062306a36Sopenharmony_ci	trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
436162306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
436262306a36Sopenharmony_ci					  req);
436362306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_rcv_data(qp);
436462306a36Sopenharmony_ci	validate_r_tid_ack(priv);
436562306a36Sopenharmony_ci
436662306a36Sopenharmony_ci	if (opcode == TID_OP(WRITE_DATA_LAST)) {
436762306a36Sopenharmony_ci		release_rdma_sge_mr(e);
436862306a36Sopenharmony_ci		for (next = priv->r_tid_tail + 1; ; next++) {
436962306a36Sopenharmony_ci			if (next > rvt_size_atomic(&dev->rdi))
437062306a36Sopenharmony_ci				next = 0;
437162306a36Sopenharmony_ci			if (next == priv->r_tid_head)
437262306a36Sopenharmony_ci				break;
437362306a36Sopenharmony_ci			e = &qp->s_ack_queue[next];
437462306a36Sopenharmony_ci			if (e->opcode == TID_OP(WRITE_REQ))
437562306a36Sopenharmony_ci				break;
437662306a36Sopenharmony_ci		}
437762306a36Sopenharmony_ci		priv->r_tid_tail = next;
437862306a36Sopenharmony_ci		if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
437962306a36Sopenharmony_ci			qp->s_acked_ack_queue = 0;
438062306a36Sopenharmony_ci	}
438162306a36Sopenharmony_ci
438262306a36Sopenharmony_ci	hfi1_tid_write_alloc_resources(qp, true);
438362306a36Sopenharmony_ci
438462306a36Sopenharmony_ci	/*
438562306a36Sopenharmony_ci	 * If we need to generate more responses, schedule the
438662306a36Sopenharmony_ci	 * send engine.
438762306a36Sopenharmony_ci	 */
438862306a36Sopenharmony_ci	if (req->cur_seg < req->total_segs ||
438962306a36Sopenharmony_ci	    qp->s_tail_ack_queue != qp->r_head_ack_queue) {
439062306a36Sopenharmony_ci		qp->s_flags |= RVT_S_RESP_PENDING;
439162306a36Sopenharmony_ci		hfi1_schedule_send(qp);
439262306a36Sopenharmony_ci	}
439362306a36Sopenharmony_ci
439462306a36Sopenharmony_ci	priv->pending_tid_w_segs--;
439562306a36Sopenharmony_ci	if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
439662306a36Sopenharmony_ci		if (priv->pending_tid_w_segs)
439762306a36Sopenharmony_ci			hfi1_mod_tid_reap_timer(req->qp);
439862306a36Sopenharmony_ci		else
439962306a36Sopenharmony_ci			hfi1_stop_tid_reap_timer(req->qp);
440062306a36Sopenharmony_ci	}
440162306a36Sopenharmony_ci
440262306a36Sopenharmony_cidone:
440362306a36Sopenharmony_ci	tid_rdma_schedule_ack(qp);
440462306a36Sopenharmony_ciexit:
440562306a36Sopenharmony_ci	priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
440662306a36Sopenharmony_ci	if (fecn)
440762306a36Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
440862306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
440962306a36Sopenharmony_ci	return;
441062306a36Sopenharmony_ci
441162306a36Sopenharmony_cisend_nak:
441262306a36Sopenharmony_ci	if (!priv->s_nak_state) {
441362306a36Sopenharmony_ci		priv->s_nak_state = IB_NAK_PSN_ERROR;
441462306a36Sopenharmony_ci		priv->s_nak_psn = flow->flow_state.r_next_psn;
441562306a36Sopenharmony_ci		tid_rdma_trigger_ack(qp);
441662306a36Sopenharmony_ci	}
441762306a36Sopenharmony_ci	goto done;
441862306a36Sopenharmony_ci}
441962306a36Sopenharmony_ci
442062306a36Sopenharmony_cistatic bool hfi1_tid_rdma_is_resync_psn(u32 psn)
442162306a36Sopenharmony_ci{
442262306a36Sopenharmony_ci	return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
442362306a36Sopenharmony_ci		      HFI1_KDETH_BTH_SEQ_MASK);
442462306a36Sopenharmony_ci}
442562306a36Sopenharmony_ci
442662306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
442762306a36Sopenharmony_ci				  struct ib_other_headers *ohdr, u16 iflow,
442862306a36Sopenharmony_ci				  u32 *bth1, u32 *bth2)
442962306a36Sopenharmony_ci{
443062306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
443162306a36Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
443262306a36Sopenharmony_ci	struct tid_rdma_request *req = ack_to_tid_req(e);
443362306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[iflow];
443462306a36Sopenharmony_ci	struct tid_rdma_params *remote;
443562306a36Sopenharmony_ci
443662306a36Sopenharmony_ci	rcu_read_lock();
443762306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
443862306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
443962306a36Sopenharmony_ci	ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
444062306a36Sopenharmony_ci	*bth1 = remote->qp;
444162306a36Sopenharmony_ci	rcu_read_unlock();
444262306a36Sopenharmony_ci
444362306a36Sopenharmony_ci	if (qpriv->resync) {
444462306a36Sopenharmony_ci		*bth2 = mask_psn((fs->generation <<
444562306a36Sopenharmony_ci				  HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
444662306a36Sopenharmony_ci		ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
444762306a36Sopenharmony_ci	} else if (qpriv->s_nak_state) {
444862306a36Sopenharmony_ci		*bth2 = mask_psn(qpriv->s_nak_psn);
444962306a36Sopenharmony_ci		ohdr->u.tid_rdma.ack.aeth =
445062306a36Sopenharmony_ci			cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
445162306a36Sopenharmony_ci				    (qpriv->s_nak_state <<
445262306a36Sopenharmony_ci				     IB_AETH_CREDIT_SHIFT));
445362306a36Sopenharmony_ci	} else {
445462306a36Sopenharmony_ci		*bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
445562306a36Sopenharmony_ci		ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
445662306a36Sopenharmony_ci	}
445762306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
445862306a36Sopenharmony_ci	ohdr->u.tid_rdma.ack.tid_flow_qp =
445962306a36Sopenharmony_ci		cpu_to_be32(qpriv->tid_rdma.local.qp |
446062306a36Sopenharmony_ci			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
446162306a36Sopenharmony_ci			     TID_RDMA_DESTQP_FLOW_SHIFT) |
446262306a36Sopenharmony_ci			    qpriv->rcd->ctxt);
446362306a36Sopenharmony_ci
446462306a36Sopenharmony_ci	ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
446562306a36Sopenharmony_ci	ohdr->u.tid_rdma.ack.verbs_psn =
446662306a36Sopenharmony_ci		cpu_to_be32(flow->flow_state.resp_ib_psn);
446762306a36Sopenharmony_ci
446862306a36Sopenharmony_ci	if (qpriv->resync) {
446962306a36Sopenharmony_ci		/*
447062306a36Sopenharmony_ci		 * If the PSN before the current expect KDETH PSN is the
447162306a36Sopenharmony_ci		 * RESYNC PSN, then we never received a good TID RDMA WRITE
447262306a36Sopenharmony_ci		 * DATA packet after a previous RESYNC.
447362306a36Sopenharmony_ci		 * In this case, the next expected KDETH PSN stays the same.
447462306a36Sopenharmony_ci		 */
447562306a36Sopenharmony_ci		if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
447662306a36Sopenharmony_ci			ohdr->u.tid_rdma.ack.tid_flow_psn =
447762306a36Sopenharmony_ci				cpu_to_be32(qpriv->r_next_psn_kdeth_save);
447862306a36Sopenharmony_ci		} else {
447962306a36Sopenharmony_ci			/*
448062306a36Sopenharmony_ci			 * Because the KDETH PSNs jump during a RESYNC, it's
448162306a36Sopenharmony_ci			 * not possible to infer (or compute) the previous value
448262306a36Sopenharmony_ci			 * of r_next_psn_kdeth in the case of back-to-back
448362306a36Sopenharmony_ci			 * RESYNC packets. Therefore, we save it.
448462306a36Sopenharmony_ci			 */
448562306a36Sopenharmony_ci			qpriv->r_next_psn_kdeth_save =
448662306a36Sopenharmony_ci				qpriv->r_next_psn_kdeth - 1;
448762306a36Sopenharmony_ci			ohdr->u.tid_rdma.ack.tid_flow_psn =
448862306a36Sopenharmony_ci				cpu_to_be32(qpriv->r_next_psn_kdeth_save);
448962306a36Sopenharmony_ci			qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
449062306a36Sopenharmony_ci		}
449162306a36Sopenharmony_ci		qpriv->resync = false;
449262306a36Sopenharmony_ci	}
449362306a36Sopenharmony_ci
449462306a36Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
449562306a36Sopenharmony_ci}
449662306a36Sopenharmony_ci
449762306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
449862306a36Sopenharmony_ci{
449962306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
450062306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
450162306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
450262306a36Sopenharmony_ci	struct rvt_swqe *wqe;
450362306a36Sopenharmony_ci	struct tid_rdma_request *req;
450462306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
450562306a36Sopenharmony_ci	u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
450662306a36Sopenharmony_ci	unsigned long flags;
450762306a36Sopenharmony_ci	u16 fidx;
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ci	trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
451062306a36Sopenharmony_ci	process_ecn(qp, packet);
451162306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
451262306a36Sopenharmony_ci	aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
451362306a36Sopenharmony_ci	req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
451462306a36Sopenharmony_ci	resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
451562306a36Sopenharmony_ci
451662306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
451762306a36Sopenharmony_ci	trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
451862306a36Sopenharmony_ci
451962306a36Sopenharmony_ci	/* If we are waiting for an ACK to RESYNC, drop any other packets */
452062306a36Sopenharmony_ci	if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
452162306a36Sopenharmony_ci	    cmp_psn(psn, qpriv->s_resync_psn))
452262306a36Sopenharmony_ci		goto ack_op_err;
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci	ack_psn = req_psn;
452562306a36Sopenharmony_ci	if (hfi1_tid_rdma_is_resync_psn(psn))
452662306a36Sopenharmony_ci		ack_kpsn = resync_psn;
452762306a36Sopenharmony_ci	else
452862306a36Sopenharmony_ci		ack_kpsn = psn;
452962306a36Sopenharmony_ci	if (aeth >> 29) {
453062306a36Sopenharmony_ci		ack_psn--;
453162306a36Sopenharmony_ci		ack_kpsn--;
453262306a36Sopenharmony_ci	}
453362306a36Sopenharmony_ci
453462306a36Sopenharmony_ci	if (unlikely(qp->s_acked == qp->s_tail))
453562306a36Sopenharmony_ci		goto ack_op_err;
453662306a36Sopenharmony_ci
453762306a36Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
453862306a36Sopenharmony_ci
453962306a36Sopenharmony_ci	if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
454062306a36Sopenharmony_ci		goto ack_op_err;
454162306a36Sopenharmony_ci
454262306a36Sopenharmony_ci	req = wqe_to_tid_req(wqe);
454362306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
454462306a36Sopenharmony_ci				       wqe->lpsn, req);
454562306a36Sopenharmony_ci	flow = &req->flows[req->acked_tail];
454662306a36Sopenharmony_ci	trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
454762306a36Sopenharmony_ci
454862306a36Sopenharmony_ci	/* Drop stale ACK/NAK */
454962306a36Sopenharmony_ci	if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
455062306a36Sopenharmony_ci	    cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
455162306a36Sopenharmony_ci		goto ack_op_err;
455262306a36Sopenharmony_ci
455362306a36Sopenharmony_ci	while (cmp_psn(ack_kpsn,
455462306a36Sopenharmony_ci		       full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
455562306a36Sopenharmony_ci	       req->ack_seg < req->cur_seg) {
455662306a36Sopenharmony_ci		req->ack_seg++;
455762306a36Sopenharmony_ci		/* advance acked segment pointer */
455862306a36Sopenharmony_ci		req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
455962306a36Sopenharmony_ci		req->r_last_acked = flow->flow_state.resp_ib_psn;
456062306a36Sopenharmony_ci		trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
456162306a36Sopenharmony_ci					       wqe->lpsn, req);
456262306a36Sopenharmony_ci		if (req->ack_seg == req->total_segs) {
456362306a36Sopenharmony_ci			req->state = TID_REQUEST_COMPLETE;
456462306a36Sopenharmony_ci			wqe = do_rc_completion(qp, wqe,
456562306a36Sopenharmony_ci					       to_iport(qp->ibqp.device,
456662306a36Sopenharmony_ci							qp->port_num));
456762306a36Sopenharmony_ci			trace_hfi1_sender_rcv_tid_ack(qp);
456862306a36Sopenharmony_ci			atomic_dec(&qpriv->n_tid_requests);
456962306a36Sopenharmony_ci			if (qp->s_acked == qp->s_tail)
457062306a36Sopenharmony_ci				break;
457162306a36Sopenharmony_ci			if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
457262306a36Sopenharmony_ci				break;
457362306a36Sopenharmony_ci			req = wqe_to_tid_req(wqe);
457462306a36Sopenharmony_ci		}
457562306a36Sopenharmony_ci		flow = &req->flows[req->acked_tail];
457662306a36Sopenharmony_ci		trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
457762306a36Sopenharmony_ci	}
457862306a36Sopenharmony_ci
457962306a36Sopenharmony_ci	trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
458062306a36Sopenharmony_ci				       wqe->lpsn, req);
458162306a36Sopenharmony_ci	switch (aeth >> 29) {
458262306a36Sopenharmony_ci	case 0:         /* ACK */
458362306a36Sopenharmony_ci		if (qpriv->s_flags & RVT_S_WAIT_ACK)
458462306a36Sopenharmony_ci			qpriv->s_flags &= ~RVT_S_WAIT_ACK;
458562306a36Sopenharmony_ci		if (!hfi1_tid_rdma_is_resync_psn(psn)) {
458662306a36Sopenharmony_ci			/* Check if there is any pending TID ACK */
458762306a36Sopenharmony_ci			if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
458862306a36Sopenharmony_ci			    req->ack_seg < req->cur_seg)
458962306a36Sopenharmony_ci				hfi1_mod_tid_retry_timer(qp);
459062306a36Sopenharmony_ci			else
459162306a36Sopenharmony_ci				hfi1_stop_tid_retry_timer(qp);
459262306a36Sopenharmony_ci			hfi1_schedule_send(qp);
459362306a36Sopenharmony_ci		} else {
459462306a36Sopenharmony_ci			u32 spsn, fpsn, last_acked, generation;
459562306a36Sopenharmony_ci			struct tid_rdma_request *rptr;
459662306a36Sopenharmony_ci
459762306a36Sopenharmony_ci			/* ACK(RESYNC) */
459862306a36Sopenharmony_ci			hfi1_stop_tid_retry_timer(qp);
459962306a36Sopenharmony_ci			/* Allow new requests (see hfi1_make_tid_rdma_pkt) */
460062306a36Sopenharmony_ci			qp->s_flags &= ~HFI1_S_WAIT_HALT;
460162306a36Sopenharmony_ci			/*
460262306a36Sopenharmony_ci			 * Clear RVT_S_SEND_ONE flag in case that the TID RDMA
460362306a36Sopenharmony_ci			 * ACK is received after the TID retry timer is fired
460462306a36Sopenharmony_ci			 * again. In this case, do not send any more TID
460562306a36Sopenharmony_ci			 * RESYNC request or wait for any more TID ACK packet.
460662306a36Sopenharmony_ci			 */
460762306a36Sopenharmony_ci			qpriv->s_flags &= ~RVT_S_SEND_ONE;
460862306a36Sopenharmony_ci			hfi1_schedule_send(qp);
460962306a36Sopenharmony_ci
461062306a36Sopenharmony_ci			if ((qp->s_acked == qpriv->s_tid_tail &&
461162306a36Sopenharmony_ci			     req->ack_seg == req->total_segs) ||
461262306a36Sopenharmony_ci			    qp->s_acked == qp->s_tail) {
461362306a36Sopenharmony_ci				qpriv->s_state = TID_OP(WRITE_DATA_LAST);
461462306a36Sopenharmony_ci				goto done;
461562306a36Sopenharmony_ci			}
461662306a36Sopenharmony_ci
461762306a36Sopenharmony_ci			if (req->ack_seg == req->comp_seg) {
461862306a36Sopenharmony_ci				qpriv->s_state = TID_OP(WRITE_DATA);
461962306a36Sopenharmony_ci				goto done;
462062306a36Sopenharmony_ci			}
462162306a36Sopenharmony_ci
462262306a36Sopenharmony_ci			/*
462362306a36Sopenharmony_ci			 * The PSN to start with is the next PSN after the
462462306a36Sopenharmony_ci			 * RESYNC PSN.
462562306a36Sopenharmony_ci			 */
462662306a36Sopenharmony_ci			psn = mask_psn(psn + 1);
462762306a36Sopenharmony_ci			generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
462862306a36Sopenharmony_ci			spsn = 0;
462962306a36Sopenharmony_ci
463062306a36Sopenharmony_ci			/*
463162306a36Sopenharmony_ci			 * Update to the correct WQE when we get an ACK(RESYNC)
463262306a36Sopenharmony_ci			 * in the middle of a request.
463362306a36Sopenharmony_ci			 */
463462306a36Sopenharmony_ci			if (delta_psn(ack_psn, wqe->lpsn))
463562306a36Sopenharmony_ci				wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
463662306a36Sopenharmony_ci			req = wqe_to_tid_req(wqe);
463762306a36Sopenharmony_ci			flow = &req->flows[req->acked_tail];
463862306a36Sopenharmony_ci			/*
463962306a36Sopenharmony_ci			 * RESYNC re-numbers the PSN ranges of all remaining
464062306a36Sopenharmony_ci			 * segments. Also, PSN's start from 0 in the middle of a
464162306a36Sopenharmony_ci			 * segment and the first segment size is less than the
464262306a36Sopenharmony_ci			 * default number of packets. flow->resync_npkts is used
464362306a36Sopenharmony_ci			 * to track the number of packets from the start of the
464462306a36Sopenharmony_ci			 * real segment to the point of 0 PSN after the RESYNC
464562306a36Sopenharmony_ci			 * in order to later correctly rewind the SGE.
464662306a36Sopenharmony_ci			 */
464762306a36Sopenharmony_ci			fpsn = full_flow_psn(flow, flow->flow_state.spsn);
464862306a36Sopenharmony_ci			req->r_ack_psn = psn;
464962306a36Sopenharmony_ci			/*
465062306a36Sopenharmony_ci			 * If resync_psn points to the last flow PSN for a
465162306a36Sopenharmony_ci			 * segment and the new segment (likely from a new
465262306a36Sopenharmony_ci			 * request) starts with a new generation number, we
465362306a36Sopenharmony_ci			 * need to adjust resync_psn accordingly.
465462306a36Sopenharmony_ci			 */
465562306a36Sopenharmony_ci			if (flow->flow_state.generation !=
465662306a36Sopenharmony_ci			    (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
465762306a36Sopenharmony_ci				resync_psn = mask_psn(fpsn - 1);
465862306a36Sopenharmony_ci			flow->resync_npkts +=
465962306a36Sopenharmony_ci				delta_psn(mask_psn(resync_psn + 1), fpsn);
466062306a36Sopenharmony_ci			/*
466162306a36Sopenharmony_ci			 * Renumber all packet sequence number ranges
466262306a36Sopenharmony_ci			 * based on the new generation.
466362306a36Sopenharmony_ci			 */
466462306a36Sopenharmony_ci			last_acked = qp->s_acked;
466562306a36Sopenharmony_ci			rptr = req;
466662306a36Sopenharmony_ci			while (1) {
466762306a36Sopenharmony_ci				/* start from last acked segment */
466862306a36Sopenharmony_ci				for (fidx = rptr->acked_tail;
466962306a36Sopenharmony_ci				     CIRC_CNT(rptr->setup_head, fidx,
467062306a36Sopenharmony_ci					      MAX_FLOWS);
467162306a36Sopenharmony_ci				     fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
467262306a36Sopenharmony_ci					u32 lpsn;
467362306a36Sopenharmony_ci					u32 gen;
467462306a36Sopenharmony_ci
467562306a36Sopenharmony_ci					flow = &rptr->flows[fidx];
467662306a36Sopenharmony_ci					gen = flow->flow_state.generation;
467762306a36Sopenharmony_ci					if (WARN_ON(gen == generation &&
467862306a36Sopenharmony_ci						    flow->flow_state.spsn !=
467962306a36Sopenharmony_ci						     spsn))
468062306a36Sopenharmony_ci						continue;
468162306a36Sopenharmony_ci					lpsn = flow->flow_state.lpsn;
468262306a36Sopenharmony_ci					lpsn = full_flow_psn(flow, lpsn);
468362306a36Sopenharmony_ci					flow->npkts =
468462306a36Sopenharmony_ci						delta_psn(lpsn,
468562306a36Sopenharmony_ci							  mask_psn(resync_psn)
468662306a36Sopenharmony_ci							  );
468762306a36Sopenharmony_ci					flow->flow_state.generation =
468862306a36Sopenharmony_ci						generation;
468962306a36Sopenharmony_ci					flow->flow_state.spsn = spsn;
469062306a36Sopenharmony_ci					flow->flow_state.lpsn =
469162306a36Sopenharmony_ci						flow->flow_state.spsn +
469262306a36Sopenharmony_ci						flow->npkts - 1;
469362306a36Sopenharmony_ci					flow->pkt = 0;
469462306a36Sopenharmony_ci					spsn += flow->npkts;
469562306a36Sopenharmony_ci					resync_psn += flow->npkts;
469662306a36Sopenharmony_ci					trace_hfi1_tid_flow_rcv_tid_ack(qp,
469762306a36Sopenharmony_ci									fidx,
469862306a36Sopenharmony_ci									flow);
469962306a36Sopenharmony_ci				}
470062306a36Sopenharmony_ci				if (++last_acked == qpriv->s_tid_cur + 1)
470162306a36Sopenharmony_ci					break;
470262306a36Sopenharmony_ci				if (last_acked == qp->s_size)
470362306a36Sopenharmony_ci					last_acked = 0;
470462306a36Sopenharmony_ci				wqe = rvt_get_swqe_ptr(qp, last_acked);
470562306a36Sopenharmony_ci				rptr = wqe_to_tid_req(wqe);
470662306a36Sopenharmony_ci			}
470762306a36Sopenharmony_ci			req->cur_seg = req->ack_seg;
470862306a36Sopenharmony_ci			qpriv->s_tid_tail = qp->s_acked;
470962306a36Sopenharmony_ci			qpriv->s_state = TID_OP(WRITE_REQ);
471062306a36Sopenharmony_ci			hfi1_schedule_tid_send(qp);
471162306a36Sopenharmony_ci		}
471262306a36Sopenharmony_cidone:
471362306a36Sopenharmony_ci		qpriv->s_retry = qp->s_retry_cnt;
471462306a36Sopenharmony_ci		break;
471562306a36Sopenharmony_ci
471662306a36Sopenharmony_ci	case 3:         /* NAK */
471762306a36Sopenharmony_ci		hfi1_stop_tid_retry_timer(qp);
471862306a36Sopenharmony_ci		switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
471962306a36Sopenharmony_ci			IB_AETH_CREDIT_MASK) {
472062306a36Sopenharmony_ci		case 0: /* PSN sequence error */
472162306a36Sopenharmony_ci			if (!req->flows)
472262306a36Sopenharmony_ci				break;
472362306a36Sopenharmony_ci			flow = &req->flows[req->acked_tail];
472462306a36Sopenharmony_ci			flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
472562306a36Sopenharmony_ci			if (cmp_psn(psn, flpsn) > 0)
472662306a36Sopenharmony_ci				break;
472762306a36Sopenharmony_ci			trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
472862306a36Sopenharmony_ci							flow);
472962306a36Sopenharmony_ci			req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
473062306a36Sopenharmony_ci			req->cur_seg = req->ack_seg;
473162306a36Sopenharmony_ci			qpriv->s_tid_tail = qp->s_acked;
473262306a36Sopenharmony_ci			qpriv->s_state = TID_OP(WRITE_REQ);
473362306a36Sopenharmony_ci			qpriv->s_retry = qp->s_retry_cnt;
473462306a36Sopenharmony_ci			hfi1_schedule_tid_send(qp);
473562306a36Sopenharmony_ci			break;
473662306a36Sopenharmony_ci
473762306a36Sopenharmony_ci		default:
473862306a36Sopenharmony_ci			break;
473962306a36Sopenharmony_ci		}
474062306a36Sopenharmony_ci		break;
474162306a36Sopenharmony_ci
474262306a36Sopenharmony_ci	default:
474362306a36Sopenharmony_ci		break;
474462306a36Sopenharmony_ci	}
474562306a36Sopenharmony_ci
474662306a36Sopenharmony_ciack_op_err:
474762306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
474862306a36Sopenharmony_ci}
474962306a36Sopenharmony_ci
475062306a36Sopenharmony_civoid hfi1_add_tid_retry_timer(struct rvt_qp *qp)
475162306a36Sopenharmony_ci{
475262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
475362306a36Sopenharmony_ci	struct ib_qp *ibqp = &qp->ibqp;
475462306a36Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
475562306a36Sopenharmony_ci
475662306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
475762306a36Sopenharmony_ci	if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
475862306a36Sopenharmony_ci		priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
475962306a36Sopenharmony_ci		priv->s_tid_retry_timer.expires = jiffies +
476062306a36Sopenharmony_ci			priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
476162306a36Sopenharmony_ci		add_timer(&priv->s_tid_retry_timer);
476262306a36Sopenharmony_ci	}
476362306a36Sopenharmony_ci}
476462306a36Sopenharmony_ci
476562306a36Sopenharmony_cistatic void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
476662306a36Sopenharmony_ci{
476762306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
476862306a36Sopenharmony_ci	struct ib_qp *ibqp = &qp->ibqp;
476962306a36Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
477062306a36Sopenharmony_ci
477162306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
477262306a36Sopenharmony_ci	priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
477362306a36Sopenharmony_ci	mod_timer(&priv->s_tid_retry_timer, jiffies +
477462306a36Sopenharmony_ci		  priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
477562306a36Sopenharmony_ci}
477662306a36Sopenharmony_ci
477762306a36Sopenharmony_cistatic int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
477862306a36Sopenharmony_ci{
477962306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
478062306a36Sopenharmony_ci	int rval = 0;
478162306a36Sopenharmony_ci
478262306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
478362306a36Sopenharmony_ci	if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
478462306a36Sopenharmony_ci		rval = del_timer(&priv->s_tid_retry_timer);
478562306a36Sopenharmony_ci		priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
478662306a36Sopenharmony_ci	}
478762306a36Sopenharmony_ci	return rval;
478862306a36Sopenharmony_ci}
478962306a36Sopenharmony_ci
479062306a36Sopenharmony_civoid hfi1_del_tid_retry_timer(struct rvt_qp *qp)
479162306a36Sopenharmony_ci{
479262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
479362306a36Sopenharmony_ci
479462306a36Sopenharmony_ci	del_timer_sync(&priv->s_tid_retry_timer);
479562306a36Sopenharmony_ci	priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
479662306a36Sopenharmony_ci}
479762306a36Sopenharmony_ci
479862306a36Sopenharmony_cistatic void hfi1_tid_retry_timeout(struct timer_list *t)
479962306a36Sopenharmony_ci{
480062306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
480162306a36Sopenharmony_ci	struct rvt_qp *qp = priv->owner;
480262306a36Sopenharmony_ci	struct rvt_swqe *wqe;
480362306a36Sopenharmony_ci	unsigned long flags;
480462306a36Sopenharmony_ci	struct tid_rdma_request *req;
480562306a36Sopenharmony_ci
480662306a36Sopenharmony_ci	spin_lock_irqsave(&qp->r_lock, flags);
480762306a36Sopenharmony_ci	spin_lock(&qp->s_lock);
480862306a36Sopenharmony_ci	trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
480962306a36Sopenharmony_ci	if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
481062306a36Sopenharmony_ci		hfi1_stop_tid_retry_timer(qp);
481162306a36Sopenharmony_ci		if (!priv->s_retry) {
481262306a36Sopenharmony_ci			trace_hfi1_msg_tid_retry_timeout(/* msg */
481362306a36Sopenharmony_ci				qp,
481462306a36Sopenharmony_ci				"Exhausted retries. Tid retry timeout = ",
481562306a36Sopenharmony_ci				(u64)priv->tid_retry_timeout_jiffies);
481662306a36Sopenharmony_ci
481762306a36Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
481862306a36Sopenharmony_ci			hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
481962306a36Sopenharmony_ci			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
482062306a36Sopenharmony_ci		} else {
482162306a36Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
482262306a36Sopenharmony_ci			req = wqe_to_tid_req(wqe);
482362306a36Sopenharmony_ci			trace_hfi1_tid_req_tid_retry_timeout(/* req */
482462306a36Sopenharmony_ci			   qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
482562306a36Sopenharmony_ci
482662306a36Sopenharmony_ci			priv->s_flags &= ~RVT_S_WAIT_ACK;
482762306a36Sopenharmony_ci			/* Only send one packet (the RESYNC) */
482862306a36Sopenharmony_ci			priv->s_flags |= RVT_S_SEND_ONE;
482962306a36Sopenharmony_ci			/*
483062306a36Sopenharmony_ci			 * No additional request shall be made by this QP until
483162306a36Sopenharmony_ci			 * the RESYNC has been complete.
483262306a36Sopenharmony_ci			 */
483362306a36Sopenharmony_ci			qp->s_flags |= HFI1_S_WAIT_HALT;
483462306a36Sopenharmony_ci			priv->s_state = TID_OP(RESYNC);
483562306a36Sopenharmony_ci			priv->s_retry--;
483662306a36Sopenharmony_ci			hfi1_schedule_tid_send(qp);
483762306a36Sopenharmony_ci		}
483862306a36Sopenharmony_ci	}
483962306a36Sopenharmony_ci	spin_unlock(&qp->s_lock);
484062306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->r_lock, flags);
484162306a36Sopenharmony_ci}
484262306a36Sopenharmony_ci
484362306a36Sopenharmony_ciu32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
484462306a36Sopenharmony_ci			       struct ib_other_headers *ohdr, u32 *bth1,
484562306a36Sopenharmony_ci			       u32 *bth2, u16 fidx)
484662306a36Sopenharmony_ci{
484762306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
484862306a36Sopenharmony_ci	struct tid_rdma_params *remote;
484962306a36Sopenharmony_ci	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
485062306a36Sopenharmony_ci	struct tid_rdma_flow *flow = &req->flows[fidx];
485162306a36Sopenharmony_ci	u32 generation;
485262306a36Sopenharmony_ci
485362306a36Sopenharmony_ci	rcu_read_lock();
485462306a36Sopenharmony_ci	remote = rcu_dereference(qpriv->tid_rdma.remote);
485562306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
485662306a36Sopenharmony_ci	ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
485762306a36Sopenharmony_ci	*bth1 = remote->qp;
485862306a36Sopenharmony_ci	rcu_read_unlock();
485962306a36Sopenharmony_ci
486062306a36Sopenharmony_ci	generation = kern_flow_generation_next(flow->flow_state.generation);
486162306a36Sopenharmony_ci	*bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
486262306a36Sopenharmony_ci	qpriv->s_resync_psn = *bth2;
486362306a36Sopenharmony_ci	*bth2 |= IB_BTH_REQ_ACK;
486462306a36Sopenharmony_ci	KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
486562306a36Sopenharmony_ci
486662306a36Sopenharmony_ci	return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
486762306a36Sopenharmony_ci}
486862306a36Sopenharmony_ci
486962306a36Sopenharmony_civoid hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
487062306a36Sopenharmony_ci{
487162306a36Sopenharmony_ci	struct ib_other_headers *ohdr = packet->ohdr;
487262306a36Sopenharmony_ci	struct rvt_qp *qp = packet->qp;
487362306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
487462306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = qpriv->rcd;
487562306a36Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
487662306a36Sopenharmony_ci	struct rvt_ack_entry *e;
487762306a36Sopenharmony_ci	struct tid_rdma_request *req;
487862306a36Sopenharmony_ci	struct tid_rdma_flow *flow;
487962306a36Sopenharmony_ci	struct tid_flow_state *fs = &qpriv->flow_state;
488062306a36Sopenharmony_ci	u32 psn, generation, idx, gen_next;
488162306a36Sopenharmony_ci	bool fecn;
488262306a36Sopenharmony_ci	unsigned long flags;
488362306a36Sopenharmony_ci
488462306a36Sopenharmony_ci	fecn = process_ecn(qp, packet);
488562306a36Sopenharmony_ci	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
488662306a36Sopenharmony_ci
488762306a36Sopenharmony_ci	generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
488862306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, flags);
488962306a36Sopenharmony_ci
489062306a36Sopenharmony_ci	gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
489162306a36Sopenharmony_ci		generation : kern_flow_generation_next(fs->generation);
489262306a36Sopenharmony_ci	/*
489362306a36Sopenharmony_ci	 * RESYNC packet contains the "next" generation and can only be
489462306a36Sopenharmony_ci	 * from the current or previous generations
489562306a36Sopenharmony_ci	 */
489662306a36Sopenharmony_ci	if (generation != mask_generation(gen_next - 1) &&
489762306a36Sopenharmony_ci	    generation != gen_next)
489862306a36Sopenharmony_ci		goto bail;
489962306a36Sopenharmony_ci	/* Already processing a resync */
490062306a36Sopenharmony_ci	if (qpriv->resync)
490162306a36Sopenharmony_ci		goto bail;
490262306a36Sopenharmony_ci
490362306a36Sopenharmony_ci	spin_lock(&rcd->exp_lock);
490462306a36Sopenharmony_ci	if (fs->index >= RXE_NUM_TID_FLOWS) {
490562306a36Sopenharmony_ci		/*
490662306a36Sopenharmony_ci		 * If we don't have a flow, save the generation so it can be
490762306a36Sopenharmony_ci		 * applied when a new flow is allocated
490862306a36Sopenharmony_ci		 */
490962306a36Sopenharmony_ci		fs->generation = generation;
491062306a36Sopenharmony_ci	} else {
491162306a36Sopenharmony_ci		/* Reprogram the QP flow with new generation */
491262306a36Sopenharmony_ci		rcd->flows[fs->index].generation = generation;
491362306a36Sopenharmony_ci		fs->generation = kern_setup_hw_flow(rcd, fs->index);
491462306a36Sopenharmony_ci	}
491562306a36Sopenharmony_ci	fs->psn = 0;
491662306a36Sopenharmony_ci	/*
491762306a36Sopenharmony_ci	 * Disable SW PSN checking since a RESYNC is equivalent to a
491862306a36Sopenharmony_ci	 * sync point and the flow has/will be reprogrammed
491962306a36Sopenharmony_ci	 */
492062306a36Sopenharmony_ci	qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
492162306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_rcv_resync(qp);
492262306a36Sopenharmony_ci
492362306a36Sopenharmony_ci	/*
492462306a36Sopenharmony_ci	 * Reset all TID flow information with the new generation.
492562306a36Sopenharmony_ci	 * This is done for all requests and segments after the
492662306a36Sopenharmony_ci	 * last received segment
492762306a36Sopenharmony_ci	 */
492862306a36Sopenharmony_ci	for (idx = qpriv->r_tid_tail; ; idx++) {
492962306a36Sopenharmony_ci		u16 flow_idx;
493062306a36Sopenharmony_ci
493162306a36Sopenharmony_ci		if (idx > rvt_size_atomic(&dev->rdi))
493262306a36Sopenharmony_ci			idx = 0;
493362306a36Sopenharmony_ci		e = &qp->s_ack_queue[idx];
493462306a36Sopenharmony_ci		if (e->opcode == TID_OP(WRITE_REQ)) {
493562306a36Sopenharmony_ci			req = ack_to_tid_req(e);
493662306a36Sopenharmony_ci			trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
493762306a36Sopenharmony_ci						      e->lpsn, req);
493862306a36Sopenharmony_ci
493962306a36Sopenharmony_ci			/* start from last unacked segment */
494062306a36Sopenharmony_ci			for (flow_idx = req->clear_tail;
494162306a36Sopenharmony_ci			     CIRC_CNT(req->setup_head, flow_idx,
494262306a36Sopenharmony_ci				      MAX_FLOWS);
494362306a36Sopenharmony_ci			     flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
494462306a36Sopenharmony_ci				u32 lpsn;
494562306a36Sopenharmony_ci				u32 next;
494662306a36Sopenharmony_ci
494762306a36Sopenharmony_ci				flow = &req->flows[flow_idx];
494862306a36Sopenharmony_ci				lpsn = full_flow_psn(flow,
494962306a36Sopenharmony_ci						     flow->flow_state.lpsn);
495062306a36Sopenharmony_ci				next = flow->flow_state.r_next_psn;
495162306a36Sopenharmony_ci				flow->npkts = delta_psn(lpsn, next - 1);
495262306a36Sopenharmony_ci				flow->flow_state.generation = fs->generation;
495362306a36Sopenharmony_ci				flow->flow_state.spsn = fs->psn;
495462306a36Sopenharmony_ci				flow->flow_state.lpsn =
495562306a36Sopenharmony_ci					flow->flow_state.spsn + flow->npkts - 1;
495662306a36Sopenharmony_ci				flow->flow_state.r_next_psn =
495762306a36Sopenharmony_ci					full_flow_psn(flow,
495862306a36Sopenharmony_ci						      flow->flow_state.spsn);
495962306a36Sopenharmony_ci				fs->psn += flow->npkts;
496062306a36Sopenharmony_ci				trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
496162306a36Sopenharmony_ci							       flow);
496262306a36Sopenharmony_ci			}
496362306a36Sopenharmony_ci		}
496462306a36Sopenharmony_ci		if (idx == qp->s_tail_ack_queue)
496562306a36Sopenharmony_ci			break;
496662306a36Sopenharmony_ci	}
496762306a36Sopenharmony_ci
496862306a36Sopenharmony_ci	spin_unlock(&rcd->exp_lock);
496962306a36Sopenharmony_ci	qpriv->resync = true;
497062306a36Sopenharmony_ci	/* RESYNC request always gets a TID RDMA ACK. */
497162306a36Sopenharmony_ci	qpriv->s_nak_state = 0;
497262306a36Sopenharmony_ci	tid_rdma_trigger_ack(qp);
497362306a36Sopenharmony_cibail:
497462306a36Sopenharmony_ci	if (fecn)
497562306a36Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
497662306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, flags);
497762306a36Sopenharmony_ci}
497862306a36Sopenharmony_ci
497962306a36Sopenharmony_ci/*
498062306a36Sopenharmony_ci * Call this function when the last TID RDMA WRITE DATA packet for a request
498162306a36Sopenharmony_ci * is built.
498262306a36Sopenharmony_ci */
498362306a36Sopenharmony_cistatic void update_tid_tail(struct rvt_qp *qp)
498462306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
498562306a36Sopenharmony_ci{
498662306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
498762306a36Sopenharmony_ci	u32 i;
498862306a36Sopenharmony_ci	struct rvt_swqe *wqe;
498962306a36Sopenharmony_ci
499062306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
499162306a36Sopenharmony_ci	/* Can't move beyond s_tid_cur */
499262306a36Sopenharmony_ci	if (priv->s_tid_tail == priv->s_tid_cur)
499362306a36Sopenharmony_ci		return;
499462306a36Sopenharmony_ci	for (i = priv->s_tid_tail + 1; ; i++) {
499562306a36Sopenharmony_ci		if (i == qp->s_size)
499662306a36Sopenharmony_ci			i = 0;
499762306a36Sopenharmony_ci
499862306a36Sopenharmony_ci		if (i == priv->s_tid_cur)
499962306a36Sopenharmony_ci			break;
500062306a36Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, i);
500162306a36Sopenharmony_ci		if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
500262306a36Sopenharmony_ci			break;
500362306a36Sopenharmony_ci	}
500462306a36Sopenharmony_ci	priv->s_tid_tail = i;
500562306a36Sopenharmony_ci	priv->s_state = TID_OP(WRITE_RESP);
500662306a36Sopenharmony_ci}
500762306a36Sopenharmony_ci
500862306a36Sopenharmony_ciint hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
500962306a36Sopenharmony_ci	__must_hold(&qp->s_lock)
501062306a36Sopenharmony_ci{
501162306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
501262306a36Sopenharmony_ci	struct rvt_swqe *wqe;
501362306a36Sopenharmony_ci	u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
501462306a36Sopenharmony_ci	struct ib_other_headers *ohdr;
501562306a36Sopenharmony_ci	struct rvt_sge_state *ss = &qp->s_sge;
501662306a36Sopenharmony_ci	struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
501762306a36Sopenharmony_ci	struct tid_rdma_request *req = ack_to_tid_req(e);
501862306a36Sopenharmony_ci	bool last = false;
501962306a36Sopenharmony_ci	u8 opcode = TID_OP(WRITE_DATA);
502062306a36Sopenharmony_ci
502162306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
502262306a36Sopenharmony_ci	trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
502362306a36Sopenharmony_ci	/*
502462306a36Sopenharmony_ci	 * Prioritize the sending of the requests and responses over the
502562306a36Sopenharmony_ci	 * sending of the TID RDMA data packets.
502662306a36Sopenharmony_ci	 */
502762306a36Sopenharmony_ci	if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
502862306a36Sopenharmony_ci	     atomic_read(&priv->n_requests) &&
502962306a36Sopenharmony_ci	     !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
503062306a36Sopenharmony_ci			     HFI1_S_ANY_WAIT_IO))) ||
503162306a36Sopenharmony_ci	    (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
503262306a36Sopenharmony_ci	     !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
503362306a36Sopenharmony_ci		struct iowait_work *iowork;
503462306a36Sopenharmony_ci
503562306a36Sopenharmony_ci		iowork = iowait_get_ib_work(&priv->s_iowait);
503662306a36Sopenharmony_ci		ps->s_txreq = get_waiting_verbs_txreq(iowork);
503762306a36Sopenharmony_ci		if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
503862306a36Sopenharmony_ci			priv->s_flags |= HFI1_S_TID_BUSY_SET;
503962306a36Sopenharmony_ci			return 1;
504062306a36Sopenharmony_ci		}
504162306a36Sopenharmony_ci	}
504262306a36Sopenharmony_ci
504362306a36Sopenharmony_ci	ps->s_txreq = get_txreq(ps->dev, qp);
504462306a36Sopenharmony_ci	if (!ps->s_txreq)
504562306a36Sopenharmony_ci		goto bail_no_tx;
504662306a36Sopenharmony_ci
504762306a36Sopenharmony_ci	ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
504862306a36Sopenharmony_ci
504962306a36Sopenharmony_ci	if ((priv->s_flags & RVT_S_ACK_PENDING) &&
505062306a36Sopenharmony_ci	    make_tid_rdma_ack(qp, ohdr, ps))
505162306a36Sopenharmony_ci		return 1;
505262306a36Sopenharmony_ci
505362306a36Sopenharmony_ci	/*
505462306a36Sopenharmony_ci	 * Bail out if we can't send data.
505562306a36Sopenharmony_ci	 * Be reminded that this check must been done after the call to
505662306a36Sopenharmony_ci	 * make_tid_rdma_ack() because the responding QP could be in
505762306a36Sopenharmony_ci	 * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA.
505862306a36Sopenharmony_ci	 */
505962306a36Sopenharmony_ci	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
506062306a36Sopenharmony_ci		goto bail;
506162306a36Sopenharmony_ci
506262306a36Sopenharmony_ci	if (priv->s_flags & RVT_S_WAIT_ACK)
506362306a36Sopenharmony_ci		goto bail;
506462306a36Sopenharmony_ci
506562306a36Sopenharmony_ci	/* Check whether there is anything to do. */
506662306a36Sopenharmony_ci	if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
506762306a36Sopenharmony_ci		goto bail;
506862306a36Sopenharmony_ci	wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
506962306a36Sopenharmony_ci	req = wqe_to_tid_req(wqe);
507062306a36Sopenharmony_ci	trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
507162306a36Sopenharmony_ci					wqe->lpsn, req);
507262306a36Sopenharmony_ci	switch (priv->s_state) {
507362306a36Sopenharmony_ci	case TID_OP(WRITE_REQ):
507462306a36Sopenharmony_ci	case TID_OP(WRITE_RESP):
507562306a36Sopenharmony_ci		priv->tid_ss.sge = wqe->sg_list[0];
507662306a36Sopenharmony_ci		priv->tid_ss.sg_list = wqe->sg_list + 1;
507762306a36Sopenharmony_ci		priv->tid_ss.num_sge = wqe->wr.num_sge;
507862306a36Sopenharmony_ci		priv->tid_ss.total_len = wqe->length;
507962306a36Sopenharmony_ci
508062306a36Sopenharmony_ci		if (priv->s_state == TID_OP(WRITE_REQ))
508162306a36Sopenharmony_ci			hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
508262306a36Sopenharmony_ci		priv->s_state = TID_OP(WRITE_DATA);
508362306a36Sopenharmony_ci		fallthrough;
508462306a36Sopenharmony_ci
508562306a36Sopenharmony_ci	case TID_OP(WRITE_DATA):
508662306a36Sopenharmony_ci		/*
508762306a36Sopenharmony_ci		 * 1. Check whether TID RDMA WRITE RESP available.
508862306a36Sopenharmony_ci		 * 2. If no:
508962306a36Sopenharmony_ci		 *    2.1 If have more segments and no TID RDMA WRITE RESP,
509062306a36Sopenharmony_ci		 *        set HFI1_S_WAIT_TID_RESP
509162306a36Sopenharmony_ci		 *    2.2 Return indicating no progress made.
509262306a36Sopenharmony_ci		 * 3. If yes:
509362306a36Sopenharmony_ci		 *    3.1 Build TID RDMA WRITE DATA packet.
509462306a36Sopenharmony_ci		 *    3.2 If last packet in segment:
509562306a36Sopenharmony_ci		 *        3.2.1 Change KDETH header bits
509662306a36Sopenharmony_ci		 *        3.2.2 Advance RESP pointers.
509762306a36Sopenharmony_ci		 *    3.3 Return indicating progress made.
509862306a36Sopenharmony_ci		 */
509962306a36Sopenharmony_ci		trace_hfi1_sender_make_tid_pkt(qp);
510062306a36Sopenharmony_ci		trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
510162306a36Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
510262306a36Sopenharmony_ci		req = wqe_to_tid_req(wqe);
510362306a36Sopenharmony_ci		len = wqe->length;
510462306a36Sopenharmony_ci
510562306a36Sopenharmony_ci		if (!req->comp_seg || req->cur_seg == req->comp_seg)
510662306a36Sopenharmony_ci			goto bail;
510762306a36Sopenharmony_ci
510862306a36Sopenharmony_ci		trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
510962306a36Sopenharmony_ci						wqe->psn, wqe->lpsn, req);
511062306a36Sopenharmony_ci		last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
511162306a36Sopenharmony_ci						  &len);
511262306a36Sopenharmony_ci
511362306a36Sopenharmony_ci		if (last) {
511462306a36Sopenharmony_ci			/* move pointer to next flow */
511562306a36Sopenharmony_ci			req->clear_tail = CIRC_NEXT(req->clear_tail,
511662306a36Sopenharmony_ci						    MAX_FLOWS);
511762306a36Sopenharmony_ci			if (++req->cur_seg < req->total_segs) {
511862306a36Sopenharmony_ci				if (!CIRC_CNT(req->setup_head, req->clear_tail,
511962306a36Sopenharmony_ci					      MAX_FLOWS))
512062306a36Sopenharmony_ci					qp->s_flags |= HFI1_S_WAIT_TID_RESP;
512162306a36Sopenharmony_ci			} else {
512262306a36Sopenharmony_ci				priv->s_state = TID_OP(WRITE_DATA_LAST);
512362306a36Sopenharmony_ci				opcode = TID_OP(WRITE_DATA_LAST);
512462306a36Sopenharmony_ci
512562306a36Sopenharmony_ci				/* Advance the s_tid_tail now */
512662306a36Sopenharmony_ci				update_tid_tail(qp);
512762306a36Sopenharmony_ci			}
512862306a36Sopenharmony_ci		}
512962306a36Sopenharmony_ci		hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
513062306a36Sopenharmony_ci		ss = &priv->tid_ss;
513162306a36Sopenharmony_ci		break;
513262306a36Sopenharmony_ci
513362306a36Sopenharmony_ci	case TID_OP(RESYNC):
513462306a36Sopenharmony_ci		trace_hfi1_sender_make_tid_pkt(qp);
513562306a36Sopenharmony_ci		/* Use generation from the most recently received response */
513662306a36Sopenharmony_ci		wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
513762306a36Sopenharmony_ci		req = wqe_to_tid_req(wqe);
513862306a36Sopenharmony_ci		/* If no responses for this WQE look at the previous one */
513962306a36Sopenharmony_ci		if (!req->comp_seg) {
514062306a36Sopenharmony_ci			wqe = rvt_get_swqe_ptr(qp,
514162306a36Sopenharmony_ci					       (!priv->s_tid_cur ? qp->s_size :
514262306a36Sopenharmony_ci						priv->s_tid_cur) - 1);
514362306a36Sopenharmony_ci			req = wqe_to_tid_req(wqe);
514462306a36Sopenharmony_ci		}
514562306a36Sopenharmony_ci		hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
514662306a36Sopenharmony_ci						     &bth2,
514762306a36Sopenharmony_ci						     CIRC_PREV(req->setup_head,
514862306a36Sopenharmony_ci							       MAX_FLOWS));
514962306a36Sopenharmony_ci		ss = NULL;
515062306a36Sopenharmony_ci		len = 0;
515162306a36Sopenharmony_ci		opcode = TID_OP(RESYNC);
515262306a36Sopenharmony_ci		break;
515362306a36Sopenharmony_ci
515462306a36Sopenharmony_ci	default:
515562306a36Sopenharmony_ci		goto bail;
515662306a36Sopenharmony_ci	}
515762306a36Sopenharmony_ci	if (priv->s_flags & RVT_S_SEND_ONE) {
515862306a36Sopenharmony_ci		priv->s_flags &= ~RVT_S_SEND_ONE;
515962306a36Sopenharmony_ci		priv->s_flags |= RVT_S_WAIT_ACK;
516062306a36Sopenharmony_ci		bth2 |= IB_BTH_REQ_ACK;
516162306a36Sopenharmony_ci	}
516262306a36Sopenharmony_ci	qp->s_len -= len;
516362306a36Sopenharmony_ci	ps->s_txreq->hdr_dwords = hwords;
516462306a36Sopenharmony_ci	ps->s_txreq->sde = priv->s_sde;
516562306a36Sopenharmony_ci	ps->s_txreq->ss = ss;
516662306a36Sopenharmony_ci	ps->s_txreq->s_cur_size = len;
516762306a36Sopenharmony_ci	hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
516862306a36Sopenharmony_ci			     middle, ps);
516962306a36Sopenharmony_ci	return 1;
517062306a36Sopenharmony_cibail:
517162306a36Sopenharmony_ci	hfi1_put_txreq(ps->s_txreq);
517262306a36Sopenharmony_cibail_no_tx:
517362306a36Sopenharmony_ci	ps->s_txreq = NULL;
517462306a36Sopenharmony_ci	priv->s_flags &= ~RVT_S_BUSY;
517562306a36Sopenharmony_ci	/*
517662306a36Sopenharmony_ci	 * If we didn't get a txreq, the QP will be woken up later to try
517762306a36Sopenharmony_ci	 * again, set the flags to the wake up which work item to wake
517862306a36Sopenharmony_ci	 * up.
517962306a36Sopenharmony_ci	 * (A better algorithm should be found to do this and generalize the
518062306a36Sopenharmony_ci	 * sleep/wakeup flags.)
518162306a36Sopenharmony_ci	 */
518262306a36Sopenharmony_ci	iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
518362306a36Sopenharmony_ci	return 0;
518462306a36Sopenharmony_ci}
518562306a36Sopenharmony_ci
518662306a36Sopenharmony_cistatic int make_tid_rdma_ack(struct rvt_qp *qp,
518762306a36Sopenharmony_ci			     struct ib_other_headers *ohdr,
518862306a36Sopenharmony_ci			     struct hfi1_pkt_state *ps)
518962306a36Sopenharmony_ci{
519062306a36Sopenharmony_ci	struct rvt_ack_entry *e;
519162306a36Sopenharmony_ci	struct hfi1_qp_priv *qpriv = qp->priv;
519262306a36Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
519362306a36Sopenharmony_ci	u32 hwords, next;
519462306a36Sopenharmony_ci	u32 len = 0;
519562306a36Sopenharmony_ci	u32 bth1 = 0, bth2 = 0;
519662306a36Sopenharmony_ci	int middle = 0;
519762306a36Sopenharmony_ci	u16 flow;
519862306a36Sopenharmony_ci	struct tid_rdma_request *req, *nreq;
519962306a36Sopenharmony_ci
520062306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_make_tid_ack(qp);
520162306a36Sopenharmony_ci	/* Don't send an ACK if we aren't supposed to. */
520262306a36Sopenharmony_ci	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
520362306a36Sopenharmony_ci		goto bail;
520462306a36Sopenharmony_ci
520562306a36Sopenharmony_ci	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
520662306a36Sopenharmony_ci	hwords = 5;
520762306a36Sopenharmony_ci
520862306a36Sopenharmony_ci	e = &qp->s_ack_queue[qpriv->r_tid_ack];
520962306a36Sopenharmony_ci	req = ack_to_tid_req(e);
521062306a36Sopenharmony_ci	/*
521162306a36Sopenharmony_ci	 * In the RESYNC case, we are exactly one segment past the
521262306a36Sopenharmony_ci	 * previously sent ack or at the previously sent NAK. So to send
521362306a36Sopenharmony_ci	 * the resync ack, we go back one segment (which might be part of
521462306a36Sopenharmony_ci	 * the previous request) and let the do-while loop execute again.
521562306a36Sopenharmony_ci	 * The advantage of executing the do-while loop is that any data
521662306a36Sopenharmony_ci	 * received after the previous ack is automatically acked in the
521762306a36Sopenharmony_ci	 * RESYNC ack. It turns out that for the do-while loop we only need
521862306a36Sopenharmony_ci	 * to pull back qpriv->r_tid_ack, not the segment
521962306a36Sopenharmony_ci	 * indices/counters. The scheme works even if the previous request
522062306a36Sopenharmony_ci	 * was not a TID WRITE request.
522162306a36Sopenharmony_ci	 */
522262306a36Sopenharmony_ci	if (qpriv->resync) {
522362306a36Sopenharmony_ci		if (!req->ack_seg || req->ack_seg == req->total_segs)
522462306a36Sopenharmony_ci			qpriv->r_tid_ack = !qpriv->r_tid_ack ?
522562306a36Sopenharmony_ci				rvt_size_atomic(&dev->rdi) :
522662306a36Sopenharmony_ci				qpriv->r_tid_ack - 1;
522762306a36Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_ack];
522862306a36Sopenharmony_ci		req = ack_to_tid_req(e);
522962306a36Sopenharmony_ci	}
523062306a36Sopenharmony_ci
523162306a36Sopenharmony_ci	trace_hfi1_rsp_make_tid_ack(qp, e->psn);
523262306a36Sopenharmony_ci	trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
523362306a36Sopenharmony_ci					req);
523462306a36Sopenharmony_ci	/*
523562306a36Sopenharmony_ci	 * If we've sent all the ACKs that we can, we are done
523662306a36Sopenharmony_ci	 * until we get more segments...
523762306a36Sopenharmony_ci	 */
523862306a36Sopenharmony_ci	if (!qpriv->s_nak_state && !qpriv->resync &&
523962306a36Sopenharmony_ci	    req->ack_seg == req->comp_seg)
524062306a36Sopenharmony_ci		goto bail;
524162306a36Sopenharmony_ci
524262306a36Sopenharmony_ci	do {
524362306a36Sopenharmony_ci		/*
524462306a36Sopenharmony_ci		 * To deal with coalesced ACKs, the acked_tail pointer
524562306a36Sopenharmony_ci		 * into the flow array is used. The distance between it
524662306a36Sopenharmony_ci		 * and the clear_tail is the number of flows that are
524762306a36Sopenharmony_ci		 * being ACK'ed.
524862306a36Sopenharmony_ci		 */
524962306a36Sopenharmony_ci		req->ack_seg +=
525062306a36Sopenharmony_ci			/* Get up-to-date value */
525162306a36Sopenharmony_ci			CIRC_CNT(req->clear_tail, req->acked_tail,
525262306a36Sopenharmony_ci				 MAX_FLOWS);
525362306a36Sopenharmony_ci		/* Advance acked index */
525462306a36Sopenharmony_ci		req->acked_tail = req->clear_tail;
525562306a36Sopenharmony_ci
525662306a36Sopenharmony_ci		/*
525762306a36Sopenharmony_ci		 * req->clear_tail points to the segment currently being
525862306a36Sopenharmony_ci		 * received. So, when sending an ACK, the previous
525962306a36Sopenharmony_ci		 * segment is being ACK'ed.
526062306a36Sopenharmony_ci		 */
526162306a36Sopenharmony_ci		flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
526262306a36Sopenharmony_ci		if (req->ack_seg != req->total_segs)
526362306a36Sopenharmony_ci			break;
526462306a36Sopenharmony_ci		req->state = TID_REQUEST_COMPLETE;
526562306a36Sopenharmony_ci
526662306a36Sopenharmony_ci		next = qpriv->r_tid_ack + 1;
526762306a36Sopenharmony_ci		if (next > rvt_size_atomic(&dev->rdi))
526862306a36Sopenharmony_ci			next = 0;
526962306a36Sopenharmony_ci		qpriv->r_tid_ack = next;
527062306a36Sopenharmony_ci		if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
527162306a36Sopenharmony_ci			break;
527262306a36Sopenharmony_ci		nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
527362306a36Sopenharmony_ci		if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
527462306a36Sopenharmony_ci			break;
527562306a36Sopenharmony_ci
527662306a36Sopenharmony_ci		/* Move to the next ack entry now */
527762306a36Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_ack];
527862306a36Sopenharmony_ci		req = ack_to_tid_req(e);
527962306a36Sopenharmony_ci	} while (1);
528062306a36Sopenharmony_ci
528162306a36Sopenharmony_ci	/*
528262306a36Sopenharmony_ci	 * At this point qpriv->r_tid_ack == qpriv->r_tid_tail but e and
528362306a36Sopenharmony_ci	 * req could be pointing at the previous ack queue entry
528462306a36Sopenharmony_ci	 */
528562306a36Sopenharmony_ci	if (qpriv->s_nak_state ||
528662306a36Sopenharmony_ci	    (qpriv->resync &&
528762306a36Sopenharmony_ci	     !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
528862306a36Sopenharmony_ci	     (cmp_psn(qpriv->r_next_psn_kdeth - 1,
528962306a36Sopenharmony_ci		      full_flow_psn(&req->flows[flow],
529062306a36Sopenharmony_ci				    req->flows[flow].flow_state.lpsn)) > 0))) {
529162306a36Sopenharmony_ci		/*
529262306a36Sopenharmony_ci		 * A NAK will implicitly acknowledge all previous TID RDMA
529362306a36Sopenharmony_ci		 * requests. Therefore, we NAK with the req->acked_tail
529462306a36Sopenharmony_ci		 * segment for the request at qpriv->r_tid_ack (same at
529562306a36Sopenharmony_ci		 * this point as the req->clear_tail segment for the
529662306a36Sopenharmony_ci		 * qpriv->r_tid_tail request)
529762306a36Sopenharmony_ci		 */
529862306a36Sopenharmony_ci		e = &qp->s_ack_queue[qpriv->r_tid_ack];
529962306a36Sopenharmony_ci		req = ack_to_tid_req(e);
530062306a36Sopenharmony_ci		flow = req->acked_tail;
530162306a36Sopenharmony_ci	} else if (req->ack_seg == req->total_segs &&
530262306a36Sopenharmony_ci		   qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
530362306a36Sopenharmony_ci		qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
530462306a36Sopenharmony_ci
530562306a36Sopenharmony_ci	trace_hfi1_tid_write_rsp_make_tid_ack(qp);
530662306a36Sopenharmony_ci	trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
530762306a36Sopenharmony_ci					req);
530862306a36Sopenharmony_ci	hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
530962306a36Sopenharmony_ci						&bth2);
531062306a36Sopenharmony_ci	len = 0;
531162306a36Sopenharmony_ci	qpriv->s_flags &= ~RVT_S_ACK_PENDING;
531262306a36Sopenharmony_ci	ps->s_txreq->hdr_dwords = hwords;
531362306a36Sopenharmony_ci	ps->s_txreq->sde = qpriv->s_sde;
531462306a36Sopenharmony_ci	ps->s_txreq->s_cur_size = len;
531562306a36Sopenharmony_ci	ps->s_txreq->ss = NULL;
531662306a36Sopenharmony_ci	hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
531762306a36Sopenharmony_ci			     ps);
531862306a36Sopenharmony_ci	ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
531962306a36Sopenharmony_ci	return 1;
532062306a36Sopenharmony_cibail:
532162306a36Sopenharmony_ci	/*
532262306a36Sopenharmony_ci	 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
532362306a36Sopenharmony_ci	 * RVT_S_RESP_PENDING
532462306a36Sopenharmony_ci	 */
532562306a36Sopenharmony_ci	smp_wmb();
532662306a36Sopenharmony_ci	qpriv->s_flags &= ~RVT_S_ACK_PENDING;
532762306a36Sopenharmony_ci	return 0;
532862306a36Sopenharmony_ci}
532962306a36Sopenharmony_ci
533062306a36Sopenharmony_cistatic int hfi1_send_tid_ok(struct rvt_qp *qp)
533162306a36Sopenharmony_ci{
533262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
533362306a36Sopenharmony_ci
533462306a36Sopenharmony_ci	return !(priv->s_flags & RVT_S_BUSY ||
533562306a36Sopenharmony_ci		 qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
533662306a36Sopenharmony_ci		(verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
533762306a36Sopenharmony_ci		 (priv->s_flags & RVT_S_RESP_PENDING) ||
533862306a36Sopenharmony_ci		 !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
533962306a36Sopenharmony_ci}
534062306a36Sopenharmony_ci
534162306a36Sopenharmony_civoid _hfi1_do_tid_send(struct work_struct *work)
534262306a36Sopenharmony_ci{
534362306a36Sopenharmony_ci	struct iowait_work *w = container_of(work, struct iowait_work, iowork);
534462306a36Sopenharmony_ci	struct rvt_qp *qp = iowait_to_qp(w->iow);
534562306a36Sopenharmony_ci
534662306a36Sopenharmony_ci	hfi1_do_tid_send(qp);
534762306a36Sopenharmony_ci}
534862306a36Sopenharmony_ci
534962306a36Sopenharmony_cistatic void hfi1_do_tid_send(struct rvt_qp *qp)
535062306a36Sopenharmony_ci{
535162306a36Sopenharmony_ci	struct hfi1_pkt_state ps;
535262306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
535362306a36Sopenharmony_ci
535462306a36Sopenharmony_ci	ps.dev = to_idev(qp->ibqp.device);
535562306a36Sopenharmony_ci	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
535662306a36Sopenharmony_ci	ps.ppd = ppd_from_ibp(ps.ibp);
535762306a36Sopenharmony_ci	ps.wait = iowait_get_tid_work(&priv->s_iowait);
535862306a36Sopenharmony_ci	ps.in_thread = false;
535962306a36Sopenharmony_ci	ps.timeout_int = qp->timeout_jiffies / 8;
536062306a36Sopenharmony_ci
536162306a36Sopenharmony_ci	trace_hfi1_rc_do_tid_send(qp, false);
536262306a36Sopenharmony_ci	spin_lock_irqsave(&qp->s_lock, ps.flags);
536362306a36Sopenharmony_ci
536462306a36Sopenharmony_ci	/* Return if we are already busy processing a work request. */
536562306a36Sopenharmony_ci	if (!hfi1_send_tid_ok(qp)) {
536662306a36Sopenharmony_ci		if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
536762306a36Sopenharmony_ci			iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
536862306a36Sopenharmony_ci		spin_unlock_irqrestore(&qp->s_lock, ps.flags);
536962306a36Sopenharmony_ci		return;
537062306a36Sopenharmony_ci	}
537162306a36Sopenharmony_ci
537262306a36Sopenharmony_ci	priv->s_flags |= RVT_S_BUSY;
537362306a36Sopenharmony_ci
537462306a36Sopenharmony_ci	ps.timeout = jiffies + ps.timeout_int;
537562306a36Sopenharmony_ci	ps.cpu = priv->s_sde ? priv->s_sde->cpu :
537662306a36Sopenharmony_ci		cpumask_first(cpumask_of_node(ps.ppd->dd->node));
537762306a36Sopenharmony_ci	ps.pkts_sent = false;
537862306a36Sopenharmony_ci
537962306a36Sopenharmony_ci	/* insure a pre-built packet is handled  */
538062306a36Sopenharmony_ci	ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
538162306a36Sopenharmony_ci	do {
538262306a36Sopenharmony_ci		/* Check for a constructed packet to be sent. */
538362306a36Sopenharmony_ci		if (ps.s_txreq) {
538462306a36Sopenharmony_ci			if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
538562306a36Sopenharmony_ci				qp->s_flags |= RVT_S_BUSY;
538662306a36Sopenharmony_ci				ps.wait = iowait_get_ib_work(&priv->s_iowait);
538762306a36Sopenharmony_ci			}
538862306a36Sopenharmony_ci			spin_unlock_irqrestore(&qp->s_lock, ps.flags);
538962306a36Sopenharmony_ci
539062306a36Sopenharmony_ci			/*
539162306a36Sopenharmony_ci			 * If the packet cannot be sent now, return and
539262306a36Sopenharmony_ci			 * the send tasklet will be woken up later.
539362306a36Sopenharmony_ci			 */
539462306a36Sopenharmony_ci			if (hfi1_verbs_send(qp, &ps))
539562306a36Sopenharmony_ci				return;
539662306a36Sopenharmony_ci
539762306a36Sopenharmony_ci			/* allow other tasks to run */
539862306a36Sopenharmony_ci			if (hfi1_schedule_send_yield(qp, &ps, true))
539962306a36Sopenharmony_ci				return;
540062306a36Sopenharmony_ci
540162306a36Sopenharmony_ci			spin_lock_irqsave(&qp->s_lock, ps.flags);
540262306a36Sopenharmony_ci			if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
540362306a36Sopenharmony_ci				qp->s_flags &= ~RVT_S_BUSY;
540462306a36Sopenharmony_ci				priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
540562306a36Sopenharmony_ci				ps.wait = iowait_get_tid_work(&priv->s_iowait);
540662306a36Sopenharmony_ci				if (iowait_flag_set(&priv->s_iowait,
540762306a36Sopenharmony_ci						    IOWAIT_PENDING_IB))
540862306a36Sopenharmony_ci					hfi1_schedule_send(qp);
540962306a36Sopenharmony_ci			}
541062306a36Sopenharmony_ci		}
541162306a36Sopenharmony_ci	} while (hfi1_make_tid_rdma_pkt(qp, &ps));
541262306a36Sopenharmony_ci	iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
541362306a36Sopenharmony_ci	spin_unlock_irqrestore(&qp->s_lock, ps.flags);
541462306a36Sopenharmony_ci}
541562306a36Sopenharmony_ci
541662306a36Sopenharmony_cistatic bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
541762306a36Sopenharmony_ci{
541862306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
541962306a36Sopenharmony_ci	struct hfi1_ibport *ibp =
542062306a36Sopenharmony_ci		to_iport(qp->ibqp.device, qp->port_num);
542162306a36Sopenharmony_ci	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
542262306a36Sopenharmony_ci	struct hfi1_devdata *dd = ppd->dd;
542362306a36Sopenharmony_ci
542462306a36Sopenharmony_ci	if ((dd->flags & HFI1_SHUTDOWN))
542562306a36Sopenharmony_ci		return true;
542662306a36Sopenharmony_ci
542762306a36Sopenharmony_ci	return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
542862306a36Sopenharmony_ci				   priv->s_sde ?
542962306a36Sopenharmony_ci				   priv->s_sde->cpu :
543062306a36Sopenharmony_ci				   cpumask_first(cpumask_of_node(dd->node)));
543162306a36Sopenharmony_ci}
543262306a36Sopenharmony_ci
543362306a36Sopenharmony_ci/**
543462306a36Sopenharmony_ci * hfi1_schedule_tid_send - schedule progress on TID RDMA state machine
543562306a36Sopenharmony_ci * @qp: the QP
543662306a36Sopenharmony_ci *
543762306a36Sopenharmony_ci * This schedules qp progress on the TID RDMA state machine. Caller
543862306a36Sopenharmony_ci * should hold the s_lock.
543962306a36Sopenharmony_ci * Unlike hfi1_schedule_send(), this cannot use hfi1_send_ok() because
544062306a36Sopenharmony_ci * the two state machines can step on each other with respect to the
544162306a36Sopenharmony_ci * RVT_S_BUSY flag.
544262306a36Sopenharmony_ci * Therefore, a modified test is used.
544362306a36Sopenharmony_ci * @return true if the second leg is scheduled;
544462306a36Sopenharmony_ci *  false if the second leg is not scheduled.
544562306a36Sopenharmony_ci */
544662306a36Sopenharmony_cibool hfi1_schedule_tid_send(struct rvt_qp *qp)
544762306a36Sopenharmony_ci{
544862306a36Sopenharmony_ci	lockdep_assert_held(&qp->s_lock);
544962306a36Sopenharmony_ci	if (hfi1_send_tid_ok(qp)) {
545062306a36Sopenharmony_ci		/*
545162306a36Sopenharmony_ci		 * The following call returns true if the qp is not on the
545262306a36Sopenharmony_ci		 * queue and false if the qp is already on the queue before
545362306a36Sopenharmony_ci		 * this call. Either way, the qp will be on the queue when the
545462306a36Sopenharmony_ci		 * call returns.
545562306a36Sopenharmony_ci		 */
545662306a36Sopenharmony_ci		_hfi1_schedule_tid_send(qp);
545762306a36Sopenharmony_ci		return true;
545862306a36Sopenharmony_ci	}
545962306a36Sopenharmony_ci	if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
546062306a36Sopenharmony_ci		iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
546162306a36Sopenharmony_ci				IOWAIT_PENDING_TID);
546262306a36Sopenharmony_ci	return false;
546362306a36Sopenharmony_ci}
546462306a36Sopenharmony_ci
546562306a36Sopenharmony_cibool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
546662306a36Sopenharmony_ci{
546762306a36Sopenharmony_ci	struct rvt_ack_entry *prev;
546862306a36Sopenharmony_ci	struct tid_rdma_request *req;
546962306a36Sopenharmony_ci	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
547062306a36Sopenharmony_ci	struct hfi1_qp_priv *priv = qp->priv;
547162306a36Sopenharmony_ci	u32 s_prev;
547262306a36Sopenharmony_ci
547362306a36Sopenharmony_ci	s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
547462306a36Sopenharmony_ci		(qp->s_tail_ack_queue - 1);
547562306a36Sopenharmony_ci	prev = &qp->s_ack_queue[s_prev];
547662306a36Sopenharmony_ci
547762306a36Sopenharmony_ci	if ((e->opcode == TID_OP(READ_REQ) ||
547862306a36Sopenharmony_ci	     e->opcode == OP(RDMA_READ_REQUEST)) &&
547962306a36Sopenharmony_ci	    prev->opcode == TID_OP(WRITE_REQ)) {
548062306a36Sopenharmony_ci		req = ack_to_tid_req(prev);
548162306a36Sopenharmony_ci		if (req->ack_seg != req->total_segs) {
548262306a36Sopenharmony_ci			priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
548362306a36Sopenharmony_ci			return true;
548462306a36Sopenharmony_ci		}
548562306a36Sopenharmony_ci	}
548662306a36Sopenharmony_ci	return false;
548762306a36Sopenharmony_ci}
548862306a36Sopenharmony_ci
548962306a36Sopenharmony_cistatic u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
549062306a36Sopenharmony_ci{
549162306a36Sopenharmony_ci	u64 reg;
549262306a36Sopenharmony_ci
549362306a36Sopenharmony_ci	/*
549462306a36Sopenharmony_ci	 * The only sane way to get the amount of
549562306a36Sopenharmony_ci	 * progress is to read the HW flow state.
549662306a36Sopenharmony_ci	 */
549762306a36Sopenharmony_ci	reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
549862306a36Sopenharmony_ci	return mask_psn(reg);
549962306a36Sopenharmony_ci}
550062306a36Sopenharmony_ci
550162306a36Sopenharmony_cistatic void tid_rdma_rcv_err(struct hfi1_packet *packet,
550262306a36Sopenharmony_ci			     struct ib_other_headers *ohdr,
550362306a36Sopenharmony_ci			     struct rvt_qp *qp, u32 psn, int diff, bool fecn)
550462306a36Sopenharmony_ci{
550562306a36Sopenharmony_ci	unsigned long flags;
550662306a36Sopenharmony_ci
550762306a36Sopenharmony_ci	tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
550862306a36Sopenharmony_ci	if (fecn) {
550962306a36Sopenharmony_ci		spin_lock_irqsave(&qp->s_lock, flags);
551062306a36Sopenharmony_ci		qp->s_flags |= RVT_S_ECN;
551162306a36Sopenharmony_ci		spin_unlock_irqrestore(&qp->s_lock, flags);
551262306a36Sopenharmony_ci	}
551362306a36Sopenharmony_ci}
551462306a36Sopenharmony_ci
551562306a36Sopenharmony_cistatic void update_r_next_psn_fecn(struct hfi1_packet *packet,
551662306a36Sopenharmony_ci				   struct hfi1_qp_priv *priv,
551762306a36Sopenharmony_ci				   struct hfi1_ctxtdata *rcd,
551862306a36Sopenharmony_ci				   struct tid_rdma_flow *flow,
551962306a36Sopenharmony_ci				   bool fecn)
552062306a36Sopenharmony_ci{
552162306a36Sopenharmony_ci	/*
552262306a36Sopenharmony_ci	 * If a start/middle packet is delivered here due to
552362306a36Sopenharmony_ci	 * RSM rule and FECN, we need to update the r_next_psn.
552462306a36Sopenharmony_ci	 */
552562306a36Sopenharmony_ci	if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
552662306a36Sopenharmony_ci	    !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
552762306a36Sopenharmony_ci		struct hfi1_devdata *dd = rcd->dd;
552862306a36Sopenharmony_ci
552962306a36Sopenharmony_ci		flow->flow_state.r_next_psn =
553062306a36Sopenharmony_ci			read_r_next_psn(dd, rcd->ctxt, flow->idx);
553162306a36Sopenharmony_ci	}
553262306a36Sopenharmony_ci}
5533