1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7#include <linux/skbuff.h>
8#include <crypto/hash.h>
9
10#include "rxe.h"
11#include "rxe_loc.h"
12#include "rxe_queue.h"
13
14static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
15		       u32 opcode);
16
17static inline void retry_first_write_send(struct rxe_qp *qp,
18					  struct rxe_send_wqe *wqe,
19					  unsigned int mask, int npsn)
20{
21	int i;
22
23	for (i = 0; i < npsn; i++) {
24		int to_send = (wqe->dma.resid > qp->mtu) ?
25				qp->mtu : wqe->dma.resid;
26
27		qp->req.opcode = next_opcode(qp, wqe,
28					     wqe->wr.opcode);
29
30		if (wqe->wr.send_flags & IB_SEND_INLINE) {
31			wqe->dma.resid -= to_send;
32			wqe->dma.sge_offset += to_send;
33		} else {
34			advance_dma_data(&wqe->dma, to_send);
35		}
36		if (mask & WR_WRITE_MASK)
37			wqe->iova += qp->mtu;
38	}
39}
40
41static void req_retry(struct rxe_qp *qp)
42{
43	struct rxe_send_wqe *wqe;
44	unsigned int wqe_index;
45	unsigned int mask;
46	int npsn;
47	int first = 1;
48
49	qp->req.wqe_index	= consumer_index(qp->sq.queue);
50	qp->req.psn		= qp->comp.psn;
51	qp->req.opcode		= -1;
52
53	for (wqe_index = consumer_index(qp->sq.queue);
54		wqe_index != producer_index(qp->sq.queue);
55		wqe_index = next_index(qp->sq.queue, wqe_index)) {
56		wqe = addr_from_index(qp->sq.queue, wqe_index);
57		mask = wr_opcode_mask(wqe->wr.opcode, qp);
58
59		if (wqe->state == wqe_state_posted)
60			break;
61
62		if (wqe->state == wqe_state_done)
63			continue;
64
65		wqe->iova = (mask & WR_ATOMIC_MASK) ?
66			     wqe->wr.wr.atomic.remote_addr :
67			     (mask & WR_READ_OR_WRITE_MASK) ?
68			     wqe->wr.wr.rdma.remote_addr :
69			     0;
70
71		if (!first || (mask & WR_READ_MASK) == 0) {
72			wqe->dma.resid = wqe->dma.length;
73			wqe->dma.cur_sge = 0;
74			wqe->dma.sge_offset = 0;
75		}
76
77		if (first) {
78			first = 0;
79
80			if (mask & WR_WRITE_OR_SEND_MASK) {
81				npsn = (qp->comp.psn - wqe->first_psn) &
82					BTH_PSN_MASK;
83				retry_first_write_send(qp, wqe, mask, npsn);
84			}
85
86			if (mask & WR_READ_MASK) {
87				npsn = (wqe->dma.length - wqe->dma.resid) /
88					qp->mtu;
89				wqe->iova += npsn * qp->mtu;
90			}
91		}
92
93		wqe->state = wqe_state_posted;
94	}
95}
96
97void rnr_nak_timer(struct timer_list *t)
98{
99	struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
100
101	pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
102	rxe_run_task(&qp->req.task, 1);
103}
104
105static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
106{
107	struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);
108	unsigned long flags;
109
110	if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
111		/* check to see if we are drained;
112		 * state_lock used by requester and completer
113		 */
114		spin_lock_irqsave(&qp->state_lock, flags);
115		do {
116			if (qp->req.state != QP_STATE_DRAIN) {
117				/* comp just finished */
118				spin_unlock_irqrestore(&qp->state_lock,
119						       flags);
120				break;
121			}
122
123			if (wqe && ((qp->req.wqe_index !=
124				consumer_index(qp->sq.queue)) ||
125				(wqe->state != wqe_state_posted))) {
126				/* comp not done yet */
127				spin_unlock_irqrestore(&qp->state_lock,
128						       flags);
129				break;
130			}
131
132			qp->req.state = QP_STATE_DRAINED;
133			spin_unlock_irqrestore(&qp->state_lock, flags);
134
135			if (qp->ibqp.event_handler) {
136				struct ib_event ev;
137
138				ev.device = qp->ibqp.device;
139				ev.element.qp = &qp->ibqp;
140				ev.event = IB_EVENT_SQ_DRAINED;
141				qp->ibqp.event_handler(&ev,
142					qp->ibqp.qp_context);
143			}
144		} while (0);
145	}
146
147	if (qp->req.wqe_index == producer_index(qp->sq.queue))
148		return NULL;
149
150	wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);
151
152	if (unlikely((qp->req.state == QP_STATE_DRAIN ||
153		      qp->req.state == QP_STATE_DRAINED) &&
154		     (wqe->state != wqe_state_processing)))
155		return NULL;
156
157	if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
158		     (qp->req.wqe_index != consumer_index(qp->sq.queue)))) {
159		qp->req.wait_fence = 1;
160		return NULL;
161	}
162
163	wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
164	return wqe;
165}
166
167static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
168{
169	switch (opcode) {
170	case IB_WR_RDMA_WRITE:
171		if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
172		    qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
173			return fits ?
174				IB_OPCODE_RC_RDMA_WRITE_LAST :
175				IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
176		else
177			return fits ?
178				IB_OPCODE_RC_RDMA_WRITE_ONLY :
179				IB_OPCODE_RC_RDMA_WRITE_FIRST;
180
181	case IB_WR_RDMA_WRITE_WITH_IMM:
182		if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
183		    qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
184			return fits ?
185				IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
186				IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
187		else
188			return fits ?
189				IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
190				IB_OPCODE_RC_RDMA_WRITE_FIRST;
191
192	case IB_WR_SEND:
193		if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
194		    qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
195			return fits ?
196				IB_OPCODE_RC_SEND_LAST :
197				IB_OPCODE_RC_SEND_MIDDLE;
198		else
199			return fits ?
200				IB_OPCODE_RC_SEND_ONLY :
201				IB_OPCODE_RC_SEND_FIRST;
202
203	case IB_WR_SEND_WITH_IMM:
204		if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
205		    qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
206			return fits ?
207				IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
208				IB_OPCODE_RC_SEND_MIDDLE;
209		else
210			return fits ?
211				IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
212				IB_OPCODE_RC_SEND_FIRST;
213
214	case IB_WR_RDMA_READ:
215		return IB_OPCODE_RC_RDMA_READ_REQUEST;
216
217	case IB_WR_ATOMIC_CMP_AND_SWP:
218		return IB_OPCODE_RC_COMPARE_SWAP;
219
220	case IB_WR_ATOMIC_FETCH_AND_ADD:
221		return IB_OPCODE_RC_FETCH_ADD;
222
223	case IB_WR_SEND_WITH_INV:
224		if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
225		    qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
226			return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
227				IB_OPCODE_RC_SEND_MIDDLE;
228		else
229			return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
230				IB_OPCODE_RC_SEND_FIRST;
231	case IB_WR_REG_MR:
232	case IB_WR_LOCAL_INV:
233		return opcode;
234	}
235
236	return -EINVAL;
237}
238
239static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits)
240{
241	switch (opcode) {
242	case IB_WR_RDMA_WRITE:
243		if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
244		    qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
245			return fits ?
246				IB_OPCODE_UC_RDMA_WRITE_LAST :
247				IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
248		else
249			return fits ?
250				IB_OPCODE_UC_RDMA_WRITE_ONLY :
251				IB_OPCODE_UC_RDMA_WRITE_FIRST;
252
253	case IB_WR_RDMA_WRITE_WITH_IMM:
254		if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
255		    qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
256			return fits ?
257				IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
258				IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
259		else
260			return fits ?
261				IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
262				IB_OPCODE_UC_RDMA_WRITE_FIRST;
263
264	case IB_WR_SEND:
265		if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
266		    qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
267			return fits ?
268				IB_OPCODE_UC_SEND_LAST :
269				IB_OPCODE_UC_SEND_MIDDLE;
270		else
271			return fits ?
272				IB_OPCODE_UC_SEND_ONLY :
273				IB_OPCODE_UC_SEND_FIRST;
274
275	case IB_WR_SEND_WITH_IMM:
276		if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
277		    qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
278			return fits ?
279				IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
280				IB_OPCODE_UC_SEND_MIDDLE;
281		else
282			return fits ?
283				IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
284				IB_OPCODE_UC_SEND_FIRST;
285	}
286
287	return -EINVAL;
288}
289
290static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
291		       u32 opcode)
292{
293	int fits = (wqe->dma.resid <= qp->mtu);
294
295	switch (qp_type(qp)) {
296	case IB_QPT_RC:
297		return next_opcode_rc(qp, opcode, fits);
298
299	case IB_QPT_UC:
300		return next_opcode_uc(qp, opcode, fits);
301
302	case IB_QPT_SMI:
303	case IB_QPT_UD:
304	case IB_QPT_GSI:
305		switch (opcode) {
306		case IB_WR_SEND:
307			return IB_OPCODE_UD_SEND_ONLY;
308
309		case IB_WR_SEND_WITH_IMM:
310			return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
311		}
312		break;
313
314	default:
315		break;
316	}
317
318	return -EINVAL;
319}
320
321static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
322{
323	int depth;
324
325	if (wqe->has_rd_atomic)
326		return 0;
327
328	qp->req.need_rd_atomic = 1;
329	depth = atomic_dec_return(&qp->req.rd_atomic);
330
331	if (depth >= 0) {
332		qp->req.need_rd_atomic = 0;
333		wqe->has_rd_atomic = 1;
334		return 0;
335	}
336
337	atomic_inc(&qp->req.rd_atomic);
338	return -EAGAIN;
339}
340
341static inline int get_mtu(struct rxe_qp *qp)
342{
343	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
344
345	if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
346		return qp->mtu;
347
348	return rxe->port.mtu_cap;
349}
350
351static struct sk_buff *init_req_packet(struct rxe_qp *qp,
352				       struct rxe_send_wqe *wqe,
353				       int opcode, int payload,
354				       struct rxe_pkt_info *pkt)
355{
356	struct rxe_dev		*rxe = to_rdev(qp->ibqp.device);
357	struct sk_buff		*skb;
358	struct rxe_send_wr	*ibwr = &wqe->wr;
359	struct rxe_av		*av;
360	int			pad = (-payload) & 0x3;
361	int			paylen;
362	int			solicited;
363	u16			pkey;
364	u32			qp_num;
365	int			ack_req;
366
367	/* length from start of bth to end of icrc */
368	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
369
370	/* pkt->hdr, rxe, port_num and mask are initialized in ifc
371	 * layer
372	 */
373	pkt->opcode	= opcode;
374	pkt->qp		= qp;
375	pkt->psn	= qp->req.psn;
376	pkt->mask	= rxe_opcode[opcode].mask;
377	pkt->paylen	= paylen;
378	pkt->offset	= 0;
379	pkt->wqe	= wqe;
380
381	/* init skb */
382	av = rxe_get_av(pkt);
383	skb = rxe_init_packet(rxe, av, paylen, pkt);
384	if (unlikely(!skb))
385		return NULL;
386
387	/* init bth */
388	solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
389			(pkt->mask & RXE_END_MASK) &&
390			((pkt->mask & (RXE_SEND_MASK)) ||
391			(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
392			(RXE_WRITE_MASK | RXE_IMMDT_MASK));
393
394	pkey = IB_DEFAULT_PKEY_FULL;
395
396	qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
397					 qp->attr.dest_qp_num;
398
399	ack_req = ((pkt->mask & RXE_END_MASK) ||
400		(qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
401	if (ack_req)
402		qp->req.noack_pkts = 0;
403
404	bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
405		 ack_req, pkt->psn);
406
407	/* init optional headers */
408	if (pkt->mask & RXE_RETH_MASK) {
409		reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
410		reth_set_va(pkt, wqe->iova);
411		reth_set_len(pkt, wqe->dma.resid);
412	}
413
414	if (pkt->mask & RXE_IMMDT_MASK)
415		immdt_set_imm(pkt, ibwr->ex.imm_data);
416
417	if (pkt->mask & RXE_IETH_MASK)
418		ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
419
420	if (pkt->mask & RXE_ATMETH_MASK) {
421		atmeth_set_va(pkt, wqe->iova);
422		if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
423		    opcode == IB_OPCODE_RD_COMPARE_SWAP) {
424			atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
425			atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
426		} else {
427			atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
428		}
429		atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
430	}
431
432	if (pkt->mask & RXE_DETH_MASK) {
433		if (qp->ibqp.qp_num == 1)
434			deth_set_qkey(pkt, GSI_QKEY);
435		else
436			deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
437		deth_set_sqp(pkt, qp->ibqp.qp_num);
438	}
439
440	return skb;
441}
442
443static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
444		       struct rxe_pkt_info *pkt, struct sk_buff *skb,
445		       int paylen)
446{
447	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
448	u32 crc = 0;
449	u32 *p;
450	int err;
451
452	err = rxe_prepare(pkt, skb, &crc);
453	if (err)
454		return err;
455
456	if (pkt->mask & RXE_WRITE_OR_SEND) {
457		if (wqe->wr.send_flags & IB_SEND_INLINE) {
458			u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
459
460			crc = rxe_crc32(rxe, crc, tmp, paylen);
461			memcpy(payload_addr(pkt), tmp, paylen);
462
463			wqe->dma.resid -= paylen;
464			wqe->dma.sge_offset += paylen;
465		} else {
466			err = copy_data(qp->pd, 0, &wqe->dma,
467					payload_addr(pkt), paylen,
468					from_mem_obj,
469					&crc);
470			if (err)
471				return err;
472		}
473		if (bth_pad(pkt)) {
474			u8 *pad = payload_addr(pkt) + paylen;
475
476			memset(pad, 0, bth_pad(pkt));
477			crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
478		}
479	}
480	p = payload_addr(pkt) + paylen + bth_pad(pkt);
481
482	*p = ~crc;
483
484	return 0;
485}
486
487static void update_wqe_state(struct rxe_qp *qp,
488		struct rxe_send_wqe *wqe,
489		struct rxe_pkt_info *pkt)
490{
491	if (pkt->mask & RXE_END_MASK) {
492		if (qp_type(qp) == IB_QPT_RC)
493			wqe->state = wqe_state_pending;
494	} else {
495		wqe->state = wqe_state_processing;
496	}
497}
498
499static void update_wqe_psn(struct rxe_qp *qp,
500			   struct rxe_send_wqe *wqe,
501			   struct rxe_pkt_info *pkt,
502			   int payload)
503{
504	/* number of packets left to send including current one */
505	int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
506
507	/* handle zero length packet case */
508	if (num_pkt == 0)
509		num_pkt = 1;
510
511	if (pkt->mask & RXE_START_MASK) {
512		wqe->first_psn = qp->req.psn;
513		wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
514	}
515
516	if (pkt->mask & RXE_READ_MASK)
517		qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
518	else
519		qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
520}
521
522static void save_state(struct rxe_send_wqe *wqe,
523		       struct rxe_qp *qp,
524		       struct rxe_send_wqe *rollback_wqe,
525		       u32 *rollback_psn)
526{
527	rollback_wqe->state     = wqe->state;
528	rollback_wqe->first_psn = wqe->first_psn;
529	rollback_wqe->last_psn  = wqe->last_psn;
530	*rollback_psn		= qp->req.psn;
531}
532
533static void rollback_state(struct rxe_send_wqe *wqe,
534			   struct rxe_qp *qp,
535			   struct rxe_send_wqe *rollback_wqe,
536			   u32 rollback_psn)
537{
538	wqe->state     = rollback_wqe->state;
539	wqe->first_psn = rollback_wqe->first_psn;
540	wqe->last_psn  = rollback_wqe->last_psn;
541	qp->req.psn    = rollback_psn;
542}
543
544static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
545			 struct rxe_pkt_info *pkt, int payload)
546{
547	qp->req.opcode = pkt->opcode;
548
549	if (pkt->mask & RXE_END_MASK)
550		qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
551
552	qp->need_req_skb = 0;
553
554	if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
555		mod_timer(&qp->retrans_timer,
556			  jiffies + qp->qp_timeout_jiffies);
557}
558
559int rxe_requester(void *arg)
560{
561	struct rxe_qp *qp = (struct rxe_qp *)arg;
562	struct rxe_pkt_info pkt;
563	struct sk_buff *skb;
564	struct rxe_send_wqe *wqe;
565	enum rxe_hdr_mask mask;
566	int payload;
567	int mtu;
568	int opcode;
569	int ret;
570	struct rxe_send_wqe rollback_wqe;
571	u32 rollback_psn;
572
573	rxe_add_ref(qp);
574
575next_wqe:
576	if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
577		goto exit;
578
579	if (unlikely(qp->req.state == QP_STATE_RESET)) {
580		qp->req.wqe_index = consumer_index(qp->sq.queue);
581		qp->req.opcode = -1;
582		qp->req.need_rd_atomic = 0;
583		qp->req.wait_psn = 0;
584		qp->req.need_retry = 0;
585		goto exit;
586	}
587
588	if (unlikely(qp->req.need_retry)) {
589		req_retry(qp);
590		qp->req.need_retry = 0;
591	}
592
593	wqe = req_next_wqe(qp);
594	if (unlikely(!wqe))
595		goto exit;
596
597	if (wqe->mask & WR_REG_MASK) {
598		if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
599			struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
600			struct rxe_mem *rmr;
601
602			rmr = rxe_pool_get_index(&rxe->mr_pool,
603						 wqe->wr.ex.invalidate_rkey >> 8);
604			if (!rmr) {
605				pr_err("No mr for key %#x\n",
606				       wqe->wr.ex.invalidate_rkey);
607				wqe->state = wqe_state_error;
608				wqe->status = IB_WC_MW_BIND_ERR;
609				goto exit;
610			}
611			rmr->state = RXE_MEM_STATE_FREE;
612			rxe_drop_ref(rmr);
613			wqe->state = wqe_state_done;
614			wqe->status = IB_WC_SUCCESS;
615		} else if (wqe->wr.opcode == IB_WR_REG_MR) {
616			struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr);
617
618			rmr->state = RXE_MEM_STATE_VALID;
619			rmr->access = wqe->wr.wr.reg.access;
620			rmr->ibmr.lkey = wqe->wr.wr.reg.key;
621			rmr->ibmr.rkey = wqe->wr.wr.reg.key;
622			rmr->iova = wqe->wr.wr.reg.mr->iova;
623			wqe->state = wqe_state_done;
624			wqe->status = IB_WC_SUCCESS;
625		} else {
626			goto exit;
627		}
628		if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
629		    qp->sq_sig_type == IB_SIGNAL_ALL_WR)
630			rxe_run_task(&qp->comp.task, 1);
631		qp->req.wqe_index = next_index(qp->sq.queue,
632						qp->req.wqe_index);
633		goto next_wqe;
634	}
635
636	if (unlikely(qp_type(qp) == IB_QPT_RC &&
637		psn_compare(qp->req.psn, (qp->comp.psn +
638				RXE_MAX_UNACKED_PSNS)) > 0)) {
639		qp->req.wait_psn = 1;
640		goto exit;
641	}
642
643	/* Limit the number of inflight SKBs per QP */
644	if (unlikely(atomic_read(&qp->skb_out) >
645		     RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
646		qp->need_req_skb = 1;
647		goto exit;
648	}
649
650	opcode = next_opcode(qp, wqe, wqe->wr.opcode);
651	if (unlikely(opcode < 0)) {
652		wqe->status = IB_WC_LOC_QP_OP_ERR;
653		goto err;
654	}
655
656	mask = rxe_opcode[opcode].mask;
657	if (unlikely(mask & RXE_READ_OR_ATOMIC)) {
658		if (check_init_depth(qp, wqe))
659			goto exit;
660	}
661
662	mtu = get_mtu(qp);
663	payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
664	if (payload > mtu) {
665		if (qp_type(qp) == IB_QPT_UD) {
666			/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
667			 * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
668			 * shall not emit any packets for this message. Further, the CI shall not
669			 * generate an error due to this condition.
670			 */
671
672			/* fake a successful UD send */
673			wqe->first_psn = qp->req.psn;
674			wqe->last_psn = qp->req.psn;
675			qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
676			qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
677			qp->req.wqe_index = next_index(qp->sq.queue,
678						       qp->req.wqe_index);
679			wqe->state = wqe_state_done;
680			wqe->status = IB_WC_SUCCESS;
681			__rxe_do_task(&qp->comp.task);
682			rxe_drop_ref(qp);
683			return 0;
684		}
685		payload = mtu;
686	}
687
688	skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
689	if (unlikely(!skb)) {
690		pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
691		goto err;
692	}
693
694	if (fill_packet(qp, wqe, &pkt, skb, payload)) {
695		pr_debug("qp#%d Error during fill packet\n", qp_num(qp));
696		kfree_skb(skb);
697		goto err;
698	}
699
700	/*
701	 * To prevent a race on wqe access between requester and completer,
702	 * wqe members state and psn need to be set before calling
703	 * rxe_xmit_packet().
704	 * Otherwise, completer might initiate an unjustified retry flow.
705	 */
706	save_state(wqe, qp, &rollback_wqe, &rollback_psn);
707	update_wqe_state(qp, wqe, &pkt);
708	update_wqe_psn(qp, wqe, &pkt, payload);
709	ret = rxe_xmit_packet(qp, &pkt, skb);
710	if (ret) {
711		qp->need_req_skb = 1;
712
713		rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
714
715		if (ret == -EAGAIN) {
716			rxe_run_task(&qp->req.task, 1);
717			goto exit;
718		}
719
720		goto err;
721	}
722
723	update_state(qp, wqe, &pkt, payload);
724
725	goto next_wqe;
726
727err:
728	wqe->status = IB_WC_LOC_PROT_ERR;
729	wqe->state = wqe_state_error;
730	__rxe_do_task(&qp->comp.task);
731
732exit:
733	rxe_drop_ref(qp);
734	return -EAGAIN;
735}
736