1/*
2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license.  When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 *  - Redistributions of source code must retain the above copyright
25 *    notice, this list of conditions and the following disclaimer.
26 *  - Redistributions in binary form must reproduce the above copyright
27 *    notice, this list of conditions and the following disclaimer in
28 *    the documentation and/or other materials provided with the
29 *    distribution.
30 *  - Neither the name of Intel Corporation nor the names of its
31 *    contributors may be used to endorse or promote products derived
32 *    from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47
48#include "hfi.h"
49#include "verbs_txreq.h"
50#include "qp.h"
51
52/* cut down ridiculously long IB macro names */
53#define OP(x) UC_OP(x)
54
55/**
56 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
57 * @qp: a pointer to the QP
58 *
59 * Assume s_lock is held.
60 *
61 * Return 1 if constructed; otherwise, return 0.
62 */
63int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
64{
65	struct hfi1_qp_priv *priv = qp->priv;
66	struct ib_other_headers *ohdr;
67	struct rvt_swqe *wqe;
68	u32 hwords;
69	u32 bth0 = 0;
70	u32 len;
71	u32 pmtu = qp->pmtu;
72	int middle = 0;
73
74	ps->s_txreq = get_txreq(ps->dev, qp);
75	if (!ps->s_txreq)
76		goto bail_no_tx;
77
78	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
79		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
80			goto bail;
81		/* We are in the error state, flush the work request. */
82		if (qp->s_last == READ_ONCE(qp->s_head))
83			goto bail;
84		/* If DMAs are in progress, we can't flush immediately. */
85		if (iowait_sdma_pending(&priv->s_iowait)) {
86			qp->s_flags |= RVT_S_WAIT_DMA;
87			goto bail;
88		}
89		clear_ahg(qp);
90		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
91		rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
92		goto done_free_tx;
93	}
94
95	if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
96		/* header size in 32-bit words LRH+BTH = (8+12)/4. */
97		hwords = 5;
98		if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
99			ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
100		else
101			ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
102	} else {
103		/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
104		hwords = 7;
105		if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
106		    (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
107			ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
108		else
109			ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
110	}
111
112	/* Get the next send request. */
113	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
114	qp->s_wqe = NULL;
115	switch (qp->s_state) {
116	default:
117		if (!(ib_rvt_state_ops[qp->state] &
118		    RVT_PROCESS_NEXT_SEND_OK))
119			goto bail;
120		/* Check if send work queue is empty. */
121		if (qp->s_cur == READ_ONCE(qp->s_head)) {
122			clear_ahg(qp);
123			goto bail;
124		}
125		/*
126		 * Local operations are processed immediately
127		 * after all prior requests have completed.
128		 */
129		if (wqe->wr.opcode == IB_WR_REG_MR ||
130		    wqe->wr.opcode == IB_WR_LOCAL_INV) {
131			int local_ops = 0;
132			int err = 0;
133
134			if (qp->s_last != qp->s_cur)
135				goto bail;
136			if (++qp->s_cur == qp->s_size)
137				qp->s_cur = 0;
138			if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
139				err = rvt_invalidate_rkey(
140					qp, wqe->wr.ex.invalidate_rkey);
141				local_ops = 1;
142			}
143			rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
144							: IB_WC_SUCCESS);
145			if (local_ops)
146				atomic_dec(&qp->local_ops_pending);
147			goto done_free_tx;
148		}
149		/*
150		 * Start a new request.
151		 */
152		qp->s_psn = wqe->psn;
153		qp->s_sge.sge = wqe->sg_list[0];
154		qp->s_sge.sg_list = wqe->sg_list + 1;
155		qp->s_sge.num_sge = wqe->wr.num_sge;
156		qp->s_sge.total_len = wqe->length;
157		len = wqe->length;
158		qp->s_len = len;
159		switch (wqe->wr.opcode) {
160		case IB_WR_SEND:
161		case IB_WR_SEND_WITH_IMM:
162			if (len > pmtu) {
163				qp->s_state = OP(SEND_FIRST);
164				len = pmtu;
165				break;
166			}
167			if (wqe->wr.opcode == IB_WR_SEND) {
168				qp->s_state = OP(SEND_ONLY);
169			} else {
170				qp->s_state =
171					OP(SEND_ONLY_WITH_IMMEDIATE);
172				/* Immediate data comes after the BTH */
173				ohdr->u.imm_data = wqe->wr.ex.imm_data;
174				hwords += 1;
175			}
176			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
177				bth0 |= IB_BTH_SOLICITED;
178			qp->s_wqe = wqe;
179			if (++qp->s_cur >= qp->s_size)
180				qp->s_cur = 0;
181			break;
182
183		case IB_WR_RDMA_WRITE:
184		case IB_WR_RDMA_WRITE_WITH_IMM:
185			ohdr->u.rc.reth.vaddr =
186				cpu_to_be64(wqe->rdma_wr.remote_addr);
187			ohdr->u.rc.reth.rkey =
188				cpu_to_be32(wqe->rdma_wr.rkey);
189			ohdr->u.rc.reth.length = cpu_to_be32(len);
190			hwords += sizeof(struct ib_reth) / 4;
191			if (len > pmtu) {
192				qp->s_state = OP(RDMA_WRITE_FIRST);
193				len = pmtu;
194				break;
195			}
196			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
197				qp->s_state = OP(RDMA_WRITE_ONLY);
198			} else {
199				qp->s_state =
200					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
201				/* Immediate data comes after the RETH */
202				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
203				hwords += 1;
204				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
205					bth0 |= IB_BTH_SOLICITED;
206			}
207			qp->s_wqe = wqe;
208			if (++qp->s_cur >= qp->s_size)
209				qp->s_cur = 0;
210			break;
211
212		default:
213			goto bail;
214		}
215		break;
216
217	case OP(SEND_FIRST):
218		qp->s_state = OP(SEND_MIDDLE);
219		fallthrough;
220	case OP(SEND_MIDDLE):
221		len = qp->s_len;
222		if (len > pmtu) {
223			len = pmtu;
224			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
225			break;
226		}
227		if (wqe->wr.opcode == IB_WR_SEND) {
228			qp->s_state = OP(SEND_LAST);
229		} else {
230			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
231			/* Immediate data comes after the BTH */
232			ohdr->u.imm_data = wqe->wr.ex.imm_data;
233			hwords += 1;
234		}
235		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
236			bth0 |= IB_BTH_SOLICITED;
237		qp->s_wqe = wqe;
238		if (++qp->s_cur >= qp->s_size)
239			qp->s_cur = 0;
240		break;
241
242	case OP(RDMA_WRITE_FIRST):
243		qp->s_state = OP(RDMA_WRITE_MIDDLE);
244		fallthrough;
245	case OP(RDMA_WRITE_MIDDLE):
246		len = qp->s_len;
247		if (len > pmtu) {
248			len = pmtu;
249			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
250			break;
251		}
252		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
253			qp->s_state = OP(RDMA_WRITE_LAST);
254		} else {
255			qp->s_state =
256				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
257			/* Immediate data comes after the BTH */
258			ohdr->u.imm_data = wqe->wr.ex.imm_data;
259			hwords += 1;
260			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
261				bth0 |= IB_BTH_SOLICITED;
262		}
263		qp->s_wqe = wqe;
264		if (++qp->s_cur >= qp->s_size)
265			qp->s_cur = 0;
266		break;
267	}
268	qp->s_len -= len;
269	ps->s_txreq->hdr_dwords = hwords;
270	ps->s_txreq->sde = priv->s_sde;
271	ps->s_txreq->ss = &qp->s_sge;
272	ps->s_txreq->s_cur_size = len;
273	hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
274			     qp->remote_qpn, mask_psn(qp->s_psn++),
275			     middle, ps);
276	return 1;
277
278done_free_tx:
279	hfi1_put_txreq(ps->s_txreq);
280	ps->s_txreq = NULL;
281	return 1;
282
283bail:
284	hfi1_put_txreq(ps->s_txreq);
285
286bail_no_tx:
287	ps->s_txreq = NULL;
288	qp->s_flags &= ~RVT_S_BUSY;
289	return 0;
290}
291
292/**
293 * hfi1_uc_rcv - handle an incoming UC packet
294 * @ibp: the port the packet came in on
295 * @hdr: the header of the packet
296 * @rcv_flags: flags relevant to rcv processing
297 * @data: the packet data
298 * @tlen: the length of the packet
299 * @qp: the QP for this packet.
300 *
301 * This is called from qp_rcv() to process an incoming UC packet
302 * for the given QP.
303 * Called at interrupt level.
304 */
305void hfi1_uc_rcv(struct hfi1_packet *packet)
306{
307	struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
308	void *data = packet->payload;
309	u32 tlen = packet->tlen;
310	struct rvt_qp *qp = packet->qp;
311	struct ib_other_headers *ohdr = packet->ohdr;
312	u32 opcode = packet->opcode;
313	u32 hdrsize = packet->hlen;
314	u32 psn;
315	u32 pad = packet->pad;
316	struct ib_wc wc;
317	u32 pmtu = qp->pmtu;
318	struct ib_reth *reth;
319	int ret;
320	u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
321
322	if (hfi1_ruc_check_hdr(ibp, packet))
323		return;
324
325	process_ecn(qp, packet);
326
327	psn = ib_bth_get_psn(ohdr);
328	/* Compare the PSN verses the expected PSN. */
329	if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
330		/*
331		 * Handle a sequence error.
332		 * Silently drop any current message.
333		 */
334		qp->r_psn = psn;
335inv:
336		if (qp->r_state == OP(SEND_FIRST) ||
337		    qp->r_state == OP(SEND_MIDDLE)) {
338			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
339			qp->r_sge.num_sge = 0;
340		} else {
341			rvt_put_ss(&qp->r_sge);
342		}
343		qp->r_state = OP(SEND_LAST);
344		switch (opcode) {
345		case OP(SEND_FIRST):
346		case OP(SEND_ONLY):
347		case OP(SEND_ONLY_WITH_IMMEDIATE):
348			goto send_first;
349
350		case OP(RDMA_WRITE_FIRST):
351		case OP(RDMA_WRITE_ONLY):
352		case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
353			goto rdma_first;
354
355		default:
356			goto drop;
357		}
358	}
359
360	/* Check for opcode sequence errors. */
361	switch (qp->r_state) {
362	case OP(SEND_FIRST):
363	case OP(SEND_MIDDLE):
364		if (opcode == OP(SEND_MIDDLE) ||
365		    opcode == OP(SEND_LAST) ||
366		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
367			break;
368		goto inv;
369
370	case OP(RDMA_WRITE_FIRST):
371	case OP(RDMA_WRITE_MIDDLE):
372		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
373		    opcode == OP(RDMA_WRITE_LAST) ||
374		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
375			break;
376		goto inv;
377
378	default:
379		if (opcode == OP(SEND_FIRST) ||
380		    opcode == OP(SEND_ONLY) ||
381		    opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
382		    opcode == OP(RDMA_WRITE_FIRST) ||
383		    opcode == OP(RDMA_WRITE_ONLY) ||
384		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
385			break;
386		goto inv;
387	}
388
389	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
390		rvt_comm_est(qp);
391
392	/* OK, process the packet. */
393	switch (opcode) {
394	case OP(SEND_FIRST):
395	case OP(SEND_ONLY):
396	case OP(SEND_ONLY_WITH_IMMEDIATE):
397send_first:
398		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
399			qp->r_sge = qp->s_rdma_read_sge;
400		} else {
401			ret = rvt_get_rwqe(qp, false);
402			if (ret < 0)
403				goto op_err;
404			if (!ret)
405				goto drop;
406			/*
407			 * qp->s_rdma_read_sge will be the owner
408			 * of the mr references.
409			 */
410			qp->s_rdma_read_sge = qp->r_sge;
411		}
412		qp->r_rcv_len = 0;
413		if (opcode == OP(SEND_ONLY))
414			goto no_immediate_data;
415		else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
416			goto send_last_imm;
417		fallthrough;
418	case OP(SEND_MIDDLE):
419		/* Check for invalid length PMTU or posted rwqe len. */
420		/*
421		 * There will be no padding for 9B packet but 16B packets
422		 * will come in with some padding since we always add
423		 * CRC and LT bytes which will need to be flit aligned
424		 */
425		if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
426			goto rewind;
427		qp->r_rcv_len += pmtu;
428		if (unlikely(qp->r_rcv_len > qp->r_len))
429			goto rewind;
430		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
431		break;
432
433	case OP(SEND_LAST_WITH_IMMEDIATE):
434send_last_imm:
435		wc.ex.imm_data = ohdr->u.imm_data;
436		wc.wc_flags = IB_WC_WITH_IMM;
437		goto send_last;
438	case OP(SEND_LAST):
439no_immediate_data:
440		wc.ex.imm_data = 0;
441		wc.wc_flags = 0;
442send_last:
443		/* Check for invalid length. */
444		/* LAST len should be >= 1 */
445		if (unlikely(tlen < (hdrsize + extra_bytes)))
446			goto rewind;
447		/* Don't count the CRC. */
448		tlen -= (hdrsize + extra_bytes);
449		wc.byte_len = tlen + qp->r_rcv_len;
450		if (unlikely(wc.byte_len > qp->r_len))
451			goto rewind;
452		wc.opcode = IB_WC_RECV;
453		rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
454		rvt_put_ss(&qp->s_rdma_read_sge);
455last_imm:
456		wc.wr_id = qp->r_wr_id;
457		wc.status = IB_WC_SUCCESS;
458		wc.qp = &qp->ibqp;
459		wc.src_qp = qp->remote_qpn;
460		wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
461		/*
462		 * It seems that IB mandates the presence of an SL in a
463		 * work completion only for the UD transport (see section
464		 * 11.4.2 of IBTA Vol. 1).
465		 *
466		 * However, the way the SL is chosen below is consistent
467		 * with the way that IB/qib works and is trying avoid
468		 * introducing incompatibilities.
469		 *
470		 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
471		 */
472		wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
473		/* zero fields that are N/A */
474		wc.vendor_err = 0;
475		wc.pkey_index = 0;
476		wc.dlid_path_bits = 0;
477		wc.port_num = 0;
478		/* Signal completion event if the solicited bit is set. */
479		rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
480		break;
481
482	case OP(RDMA_WRITE_FIRST):
483	case OP(RDMA_WRITE_ONLY):
484	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
485rdma_first:
486		if (unlikely(!(qp->qp_access_flags &
487			       IB_ACCESS_REMOTE_WRITE))) {
488			goto drop;
489		}
490		reth = &ohdr->u.rc.reth;
491		qp->r_len = be32_to_cpu(reth->length);
492		qp->r_rcv_len = 0;
493		qp->r_sge.sg_list = NULL;
494		if (qp->r_len != 0) {
495			u32 rkey = be32_to_cpu(reth->rkey);
496			u64 vaddr = be64_to_cpu(reth->vaddr);
497			int ok;
498
499			/* Check rkey */
500			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
501					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
502			if (unlikely(!ok))
503				goto drop;
504			qp->r_sge.num_sge = 1;
505		} else {
506			qp->r_sge.num_sge = 0;
507			qp->r_sge.sge.mr = NULL;
508			qp->r_sge.sge.vaddr = NULL;
509			qp->r_sge.sge.length = 0;
510			qp->r_sge.sge.sge_length = 0;
511		}
512		if (opcode == OP(RDMA_WRITE_ONLY)) {
513			goto rdma_last;
514		} else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
515			wc.ex.imm_data = ohdr->u.rc.imm_data;
516			goto rdma_last_imm;
517		}
518		fallthrough;
519	case OP(RDMA_WRITE_MIDDLE):
520		/* Check for invalid length PMTU or posted rwqe len. */
521		if (unlikely(tlen != (hdrsize + pmtu + 4)))
522			goto drop;
523		qp->r_rcv_len += pmtu;
524		if (unlikely(qp->r_rcv_len > qp->r_len))
525			goto drop;
526		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
527		break;
528
529	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
530		wc.ex.imm_data = ohdr->u.imm_data;
531rdma_last_imm:
532		wc.wc_flags = IB_WC_WITH_IMM;
533
534		/* Check for invalid length. */
535		/* LAST len should be >= 1 */
536		if (unlikely(tlen < (hdrsize + pad + 4)))
537			goto drop;
538		/* Don't count the CRC. */
539		tlen -= (hdrsize + extra_bytes);
540		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
541			goto drop;
542		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
543			rvt_put_ss(&qp->s_rdma_read_sge);
544		} else {
545			ret = rvt_get_rwqe(qp, true);
546			if (ret < 0)
547				goto op_err;
548			if (!ret)
549				goto drop;
550		}
551		wc.byte_len = qp->r_len;
552		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
553		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
554		rvt_put_ss(&qp->r_sge);
555		goto last_imm;
556
557	case OP(RDMA_WRITE_LAST):
558rdma_last:
559		/* Check for invalid length. */
560		/* LAST len should be >= 1 */
561		if (unlikely(tlen < (hdrsize + pad + 4)))
562			goto drop;
563		/* Don't count the CRC. */
564		tlen -= (hdrsize + extra_bytes);
565		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
566			goto drop;
567		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
568		rvt_put_ss(&qp->r_sge);
569		break;
570
571	default:
572		/* Drop packet for unknown opcodes. */
573		goto drop;
574	}
575	qp->r_psn++;
576	qp->r_state = opcode;
577	return;
578
579rewind:
580	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
581	qp->r_sge.num_sge = 0;
582drop:
583	ibp->rvp.n_pkt_drops++;
584	return;
585
586op_err:
587	rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
588}
589