162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2020 - 2023 Cornelis Networks, Inc.
462306a36Sopenharmony_ci * Copyright(c) 2015 - 2018 Intel Corporation.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/mm.h>
862306a36Sopenharmony_ci#include <linux/types.h>
962306a36Sopenharmony_ci#include <linux/device.h>
1062306a36Sopenharmony_ci#include <linux/dmapool.h>
1162306a36Sopenharmony_ci#include <linux/slab.h>
1262306a36Sopenharmony_ci#include <linux/list.h>
1362306a36Sopenharmony_ci#include <linux/highmem.h>
1462306a36Sopenharmony_ci#include <linux/io.h>
1562306a36Sopenharmony_ci#include <linux/uio.h>
1662306a36Sopenharmony_ci#include <linux/rbtree.h>
1762306a36Sopenharmony_ci#include <linux/spinlock.h>
1862306a36Sopenharmony_ci#include <linux/delay.h>
1962306a36Sopenharmony_ci#include <linux/kthread.h>
2062306a36Sopenharmony_ci#include <linux/mmu_context.h>
2162306a36Sopenharmony_ci#include <linux/module.h>
2262306a36Sopenharmony_ci#include <linux/vmalloc.h>
2362306a36Sopenharmony_ci#include <linux/string.h>
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#include "hfi.h"
2662306a36Sopenharmony_ci#include "sdma.h"
2762306a36Sopenharmony_ci#include "user_sdma.h"
2862306a36Sopenharmony_ci#include "verbs.h"  /* for the headers */
2962306a36Sopenharmony_ci#include "common.h" /* for struct hfi1_tid_info */
3062306a36Sopenharmony_ci#include "trace.h"
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_cistatic uint hfi1_sdma_comp_ring_size = 128;
3362306a36Sopenharmony_cimodule_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
3462306a36Sopenharmony_ciMODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic unsigned initial_pkt_count = 8;
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
3962306a36Sopenharmony_cistatic void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
4062306a36Sopenharmony_cistatic inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
4162306a36Sopenharmony_cistatic void user_sdma_free_request(struct user_sdma_request *req);
4262306a36Sopenharmony_cistatic int check_header_template(struct user_sdma_request *req,
4362306a36Sopenharmony_ci				 struct hfi1_pkt_header *hdr, u32 lrhlen,
4462306a36Sopenharmony_ci				 u32 datalen);
4562306a36Sopenharmony_cistatic int set_txreq_header(struct user_sdma_request *req,
4662306a36Sopenharmony_ci			    struct user_sdma_txreq *tx, u32 datalen);
4762306a36Sopenharmony_cistatic int set_txreq_header_ahg(struct user_sdma_request *req,
4862306a36Sopenharmony_ci				struct user_sdma_txreq *tx, u32 len);
4962306a36Sopenharmony_cistatic inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
5062306a36Sopenharmony_ci				  struct hfi1_user_sdma_comp_q *cq,
5162306a36Sopenharmony_ci				  u16 idx, enum hfi1_sdma_comp_state state,
5262306a36Sopenharmony_ci				  int ret);
5362306a36Sopenharmony_cistatic inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
5462306a36Sopenharmony_cistatic inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cistatic int defer_packet_queue(
5762306a36Sopenharmony_ci	struct sdma_engine *sde,
5862306a36Sopenharmony_ci	struct iowait_work *wait,
5962306a36Sopenharmony_ci	struct sdma_txreq *txreq,
6062306a36Sopenharmony_ci	uint seq,
6162306a36Sopenharmony_ci	bool pkts_sent);
6262306a36Sopenharmony_cistatic void activate_packet_queue(struct iowait *wait, int reason);
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cistatic int defer_packet_queue(
6562306a36Sopenharmony_ci	struct sdma_engine *sde,
6662306a36Sopenharmony_ci	struct iowait_work *wait,
6762306a36Sopenharmony_ci	struct sdma_txreq *txreq,
6862306a36Sopenharmony_ci	uint seq,
6962306a36Sopenharmony_ci	bool pkts_sent)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq =
7262306a36Sopenharmony_ci		container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	write_seqlock(&sde->waitlock);
7562306a36Sopenharmony_ci	trace_hfi1_usdma_defer(pq, sde, &pq->busy);
7662306a36Sopenharmony_ci	if (sdma_progress(sde, seq, txreq))
7762306a36Sopenharmony_ci		goto eagain;
7862306a36Sopenharmony_ci	/*
7962306a36Sopenharmony_ci	 * We are assuming that if the list is enqueued somewhere, it
8062306a36Sopenharmony_ci	 * is to the dmawait list since that is the only place where
8162306a36Sopenharmony_ci	 * it is supposed to be enqueued.
8262306a36Sopenharmony_ci	 */
8362306a36Sopenharmony_ci	xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
8462306a36Sopenharmony_ci	if (list_empty(&pq->busy.list)) {
8562306a36Sopenharmony_ci		pq->busy.lock = &sde->waitlock;
8662306a36Sopenharmony_ci		iowait_get_priority(&pq->busy);
8762306a36Sopenharmony_ci		iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
8862306a36Sopenharmony_ci	}
8962306a36Sopenharmony_ci	write_sequnlock(&sde->waitlock);
9062306a36Sopenharmony_ci	return -EBUSY;
9162306a36Sopenharmony_cieagain:
9262306a36Sopenharmony_ci	write_sequnlock(&sde->waitlock);
9362306a36Sopenharmony_ci	return -EAGAIN;
9462306a36Sopenharmony_ci}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistatic void activate_packet_queue(struct iowait *wait, int reason)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq =
9962306a36Sopenharmony_ci		container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	trace_hfi1_usdma_activate(pq, wait, reason);
10262306a36Sopenharmony_ci	xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
10362306a36Sopenharmony_ci	wake_up(&wait->wait_dma);
10462306a36Sopenharmony_ci};
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ciint hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
10762306a36Sopenharmony_ci				struct hfi1_filedata *fd)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	int ret = -ENOMEM;
11062306a36Sopenharmony_ci	char buf[64];
11162306a36Sopenharmony_ci	struct hfi1_devdata *dd;
11262306a36Sopenharmony_ci	struct hfi1_user_sdma_comp_q *cq;
11362306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	if (!uctxt || !fd)
11662306a36Sopenharmony_ci		return -EBADF;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	if (!hfi1_sdma_comp_ring_size)
11962306a36Sopenharmony_ci		return -EINVAL;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	dd = uctxt->dd;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	pq = kzalloc(sizeof(*pq), GFP_KERNEL);
12462306a36Sopenharmony_ci	if (!pq)
12562306a36Sopenharmony_ci		return -ENOMEM;
12662306a36Sopenharmony_ci	pq->dd = dd;
12762306a36Sopenharmony_ci	pq->ctxt = uctxt->ctxt;
12862306a36Sopenharmony_ci	pq->subctxt = fd->subctxt;
12962306a36Sopenharmony_ci	pq->n_max_reqs = hfi1_sdma_comp_ring_size;
13062306a36Sopenharmony_ci	atomic_set(&pq->n_reqs, 0);
13162306a36Sopenharmony_ci	init_waitqueue_head(&pq->wait);
13262306a36Sopenharmony_ci	atomic_set(&pq->n_locked, 0);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
13562306a36Sopenharmony_ci		    activate_packet_queue, NULL, NULL);
13662306a36Sopenharmony_ci	pq->reqidx = 0;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
13962306a36Sopenharmony_ci			   sizeof(*pq->reqs),
14062306a36Sopenharmony_ci			   GFP_KERNEL);
14162306a36Sopenharmony_ci	if (!pq->reqs)
14262306a36Sopenharmony_ci		goto pq_reqs_nomem;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL);
14562306a36Sopenharmony_ci	if (!pq->req_in_use)
14662306a36Sopenharmony_ci		goto pq_reqs_no_in_use;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
14962306a36Sopenharmony_ci		 fd->subctxt);
15062306a36Sopenharmony_ci	pq->txreq_cache = kmem_cache_create(buf,
15162306a36Sopenharmony_ci					    sizeof(struct user_sdma_txreq),
15262306a36Sopenharmony_ci					    L1_CACHE_BYTES,
15362306a36Sopenharmony_ci					    SLAB_HWCACHE_ALIGN,
15462306a36Sopenharmony_ci					    NULL);
15562306a36Sopenharmony_ci	if (!pq->txreq_cache) {
15662306a36Sopenharmony_ci		dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n",
15762306a36Sopenharmony_ci			   uctxt->ctxt);
15862306a36Sopenharmony_ci		goto pq_txreq_nomem;
15962306a36Sopenharmony_ci	}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
16262306a36Sopenharmony_ci	if (!cq)
16362306a36Sopenharmony_ci		goto cq_nomem;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
16662306a36Sopenharmony_ci				 * hfi1_sdma_comp_ring_size));
16762306a36Sopenharmony_ci	if (!cq->comps)
16862306a36Sopenharmony_ci		goto cq_comps_nomem;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	cq->nentries = hfi1_sdma_comp_ring_size;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	ret = hfi1_init_system_pinning(pq);
17362306a36Sopenharmony_ci	if (ret)
17462306a36Sopenharmony_ci		goto pq_mmu_fail;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	rcu_assign_pointer(fd->pq, pq);
17762306a36Sopenharmony_ci	fd->cq = cq;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	return 0;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_cipq_mmu_fail:
18262306a36Sopenharmony_ci	vfree(cq->comps);
18362306a36Sopenharmony_cicq_comps_nomem:
18462306a36Sopenharmony_ci	kfree(cq);
18562306a36Sopenharmony_cicq_nomem:
18662306a36Sopenharmony_ci	kmem_cache_destroy(pq->txreq_cache);
18762306a36Sopenharmony_cipq_txreq_nomem:
18862306a36Sopenharmony_ci	bitmap_free(pq->req_in_use);
18962306a36Sopenharmony_cipq_reqs_no_in_use:
19062306a36Sopenharmony_ci	kfree(pq->reqs);
19162306a36Sopenharmony_cipq_reqs_nomem:
19262306a36Sopenharmony_ci	kfree(pq);
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	return ret;
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistatic void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	unsigned long flags;
20062306a36Sopenharmony_ci	seqlock_t *lock = pq->busy.lock;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	if (!lock)
20362306a36Sopenharmony_ci		return;
20462306a36Sopenharmony_ci	write_seqlock_irqsave(lock, flags);
20562306a36Sopenharmony_ci	if (!list_empty(&pq->busy.list)) {
20662306a36Sopenharmony_ci		list_del_init(&pq->busy.list);
20762306a36Sopenharmony_ci		pq->busy.lock = NULL;
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci	write_sequnlock_irqrestore(lock, flags);
21062306a36Sopenharmony_ci}
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ciint hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
21362306a36Sopenharmony_ci			       struct hfi1_ctxtdata *uctxt)
21462306a36Sopenharmony_ci{
21562306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	spin_lock(&fd->pq_rcu_lock);
22062306a36Sopenharmony_ci	pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
22162306a36Sopenharmony_ci				    lockdep_is_held(&fd->pq_rcu_lock));
22262306a36Sopenharmony_ci	if (pq) {
22362306a36Sopenharmony_ci		rcu_assign_pointer(fd->pq, NULL);
22462306a36Sopenharmony_ci		spin_unlock(&fd->pq_rcu_lock);
22562306a36Sopenharmony_ci		synchronize_srcu(&fd->pq_srcu);
22662306a36Sopenharmony_ci		/* at this point there can be no more new requests */
22762306a36Sopenharmony_ci		iowait_sdma_drain(&pq->busy);
22862306a36Sopenharmony_ci		/* Wait until all requests have been freed. */
22962306a36Sopenharmony_ci		wait_event_interruptible(
23062306a36Sopenharmony_ci			pq->wait,
23162306a36Sopenharmony_ci			!atomic_read(&pq->n_reqs));
23262306a36Sopenharmony_ci		kfree(pq->reqs);
23362306a36Sopenharmony_ci		hfi1_free_system_pinning(pq);
23462306a36Sopenharmony_ci		bitmap_free(pq->req_in_use);
23562306a36Sopenharmony_ci		kmem_cache_destroy(pq->txreq_cache);
23662306a36Sopenharmony_ci		flush_pq_iowait(pq);
23762306a36Sopenharmony_ci		kfree(pq);
23862306a36Sopenharmony_ci	} else {
23962306a36Sopenharmony_ci		spin_unlock(&fd->pq_rcu_lock);
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci	if (fd->cq) {
24262306a36Sopenharmony_ci		vfree(fd->cq->comps);
24362306a36Sopenharmony_ci		kfree(fd->cq);
24462306a36Sopenharmony_ci		fd->cq = NULL;
24562306a36Sopenharmony_ci	}
24662306a36Sopenharmony_ci	return 0;
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_cistatic u8 dlid_to_selector(u16 dlid)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	static u8 mapping[256];
25262306a36Sopenharmony_ci	static int initialized;
25362306a36Sopenharmony_ci	static u8 next;
25462306a36Sopenharmony_ci	int hash;
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	if (!initialized) {
25762306a36Sopenharmony_ci		memset(mapping, 0xFF, 256);
25862306a36Sopenharmony_ci		initialized = 1;
25962306a36Sopenharmony_ci	}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	hash = ((dlid >> 8) ^ dlid) & 0xFF;
26262306a36Sopenharmony_ci	if (mapping[hash] == 0xFF) {
26362306a36Sopenharmony_ci		mapping[hash] = next;
26462306a36Sopenharmony_ci		next = (next + 1) & 0x7F;
26562306a36Sopenharmony_ci	}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	return mapping[hash];
26862306a36Sopenharmony_ci}
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci/**
27162306a36Sopenharmony_ci * hfi1_user_sdma_process_request() - Process and start a user sdma request
27262306a36Sopenharmony_ci * @fd: valid file descriptor
27362306a36Sopenharmony_ci * @iovec: array of io vectors to process
27462306a36Sopenharmony_ci * @dim: overall iovec array size
27562306a36Sopenharmony_ci * @count: number of io vector array entries processed
27662306a36Sopenharmony_ci */
27762306a36Sopenharmony_ciint hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
27862306a36Sopenharmony_ci				   struct iovec *iovec, unsigned long dim,
27962306a36Sopenharmony_ci				   unsigned long *count)
28062306a36Sopenharmony_ci{
28162306a36Sopenharmony_ci	int ret = 0, i;
28262306a36Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
28362306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq =
28462306a36Sopenharmony_ci		srcu_dereference(fd->pq, &fd->pq_srcu);
28562306a36Sopenharmony_ci	struct hfi1_user_sdma_comp_q *cq = fd->cq;
28662306a36Sopenharmony_ci	struct hfi1_devdata *dd = pq->dd;
28762306a36Sopenharmony_ci	unsigned long idx = 0;
28862306a36Sopenharmony_ci	u8 pcount = initial_pkt_count;
28962306a36Sopenharmony_ci	struct sdma_req_info info;
29062306a36Sopenharmony_ci	struct user_sdma_request *req;
29162306a36Sopenharmony_ci	u8 opcode, sc, vl;
29262306a36Sopenharmony_ci	u16 pkey;
29362306a36Sopenharmony_ci	u32 slid;
29462306a36Sopenharmony_ci	u16 dlid;
29562306a36Sopenharmony_ci	u32 selector;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
29862306a36Sopenharmony_ci		hfi1_cdbg(
29962306a36Sopenharmony_ci		   SDMA,
30062306a36Sopenharmony_ci		   "[%u:%u:%u] First vector not big enough for header %lu/%lu",
30162306a36Sopenharmony_ci		   dd->unit, uctxt->ctxt, fd->subctxt,
30262306a36Sopenharmony_ci		   iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr));
30362306a36Sopenharmony_ci		return -EINVAL;
30462306a36Sopenharmony_ci	}
30562306a36Sopenharmony_ci	ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info));
30662306a36Sopenharmony_ci	if (ret) {
30762306a36Sopenharmony_ci		hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)",
30862306a36Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, ret);
30962306a36Sopenharmony_ci		return -EFAULT;
31062306a36Sopenharmony_ci	}
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
31362306a36Sopenharmony_ci				     (u16 *)&info);
31462306a36Sopenharmony_ci	if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
31562306a36Sopenharmony_ci		hfi1_cdbg(SDMA,
31662306a36Sopenharmony_ci			  "[%u:%u:%u:%u] Invalid comp index",
31762306a36Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
31862306a36Sopenharmony_ci		return -EINVAL;
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	/*
32262306a36Sopenharmony_ci	 * Sanity check the header io vector count.  Need at least 1 vector
32362306a36Sopenharmony_ci	 * (header) and cannot be larger than the actual io vector count.
32462306a36Sopenharmony_ci	 */
32562306a36Sopenharmony_ci	if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) {
32662306a36Sopenharmony_ci		hfi1_cdbg(SDMA,
32762306a36Sopenharmony_ci			  "[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
32862306a36Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
32962306a36Sopenharmony_ci			  req_iovcnt(info.ctrl), dim);
33062306a36Sopenharmony_ci		return -EINVAL;
33162306a36Sopenharmony_ci	}
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	if (!info.fragsize) {
33462306a36Sopenharmony_ci		hfi1_cdbg(SDMA,
33562306a36Sopenharmony_ci			  "[%u:%u:%u:%u] Request does not specify fragsize",
33662306a36Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
33762306a36Sopenharmony_ci		return -EINVAL;
33862306a36Sopenharmony_ci	}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	/* Try to claim the request. */
34162306a36Sopenharmony_ci	if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
34262306a36Sopenharmony_ci		hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
34362306a36Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt,
34462306a36Sopenharmony_ci			  info.comp_idx);
34562306a36Sopenharmony_ci		return -EBADSLT;
34662306a36Sopenharmony_ci	}
34762306a36Sopenharmony_ci	/*
34862306a36Sopenharmony_ci	 * All safety checks have been done and this request has been claimed.
34962306a36Sopenharmony_ci	 */
35062306a36Sopenharmony_ci	trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
35162306a36Sopenharmony_ci					     info.comp_idx);
35262306a36Sopenharmony_ci	req = pq->reqs + info.comp_idx;
35362306a36Sopenharmony_ci	req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
35462306a36Sopenharmony_ci	req->data_len  = 0;
35562306a36Sopenharmony_ci	req->pq = pq;
35662306a36Sopenharmony_ci	req->cq = cq;
35762306a36Sopenharmony_ci	req->ahg_idx = -1;
35862306a36Sopenharmony_ci	req->iov_idx = 0;
35962306a36Sopenharmony_ci	req->sent = 0;
36062306a36Sopenharmony_ci	req->seqnum = 0;
36162306a36Sopenharmony_ci	req->seqcomp = 0;
36262306a36Sopenharmony_ci	req->seqsubmitted = 0;
36362306a36Sopenharmony_ci	req->tids = NULL;
36462306a36Sopenharmony_ci	req->has_error = 0;
36562306a36Sopenharmony_ci	INIT_LIST_HEAD(&req->txps);
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	memcpy(&req->info, &info, sizeof(info));
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	/* The request is initialized, count it */
37062306a36Sopenharmony_ci	atomic_inc(&pq->n_reqs);
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	if (req_opcode(info.ctrl) == EXPECTED) {
37362306a36Sopenharmony_ci		/* expected must have a TID info and at least one data vector */
37462306a36Sopenharmony_ci		if (req->data_iovs < 2) {
37562306a36Sopenharmony_ci			SDMA_DBG(req,
37662306a36Sopenharmony_ci				 "Not enough vectors for expected request");
37762306a36Sopenharmony_ci			ret = -EINVAL;
37862306a36Sopenharmony_ci			goto free_req;
37962306a36Sopenharmony_ci		}
38062306a36Sopenharmony_ci		req->data_iovs--;
38162306a36Sopenharmony_ci	}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) {
38462306a36Sopenharmony_ci		SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
38562306a36Sopenharmony_ci			 MAX_VECTORS_PER_REQ);
38662306a36Sopenharmony_ci		ret = -EINVAL;
38762306a36Sopenharmony_ci		goto free_req;
38862306a36Sopenharmony_ci	}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	/* Copy the header from the user buffer */
39162306a36Sopenharmony_ci	ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
39262306a36Sopenharmony_ci			     sizeof(req->hdr));
39362306a36Sopenharmony_ci	if (ret) {
39462306a36Sopenharmony_ci		SDMA_DBG(req, "Failed to copy header template (%d)", ret);
39562306a36Sopenharmony_ci		ret = -EFAULT;
39662306a36Sopenharmony_ci		goto free_req;
39762306a36Sopenharmony_ci	}
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	/* If Static rate control is not enabled, sanitize the header. */
40062306a36Sopenharmony_ci	if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL))
40162306a36Sopenharmony_ci		req->hdr.pbc[2] = 0;
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	/* Validate the opcode. Do not trust packets from user space blindly. */
40462306a36Sopenharmony_ci	opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff;
40562306a36Sopenharmony_ci	if ((opcode & USER_OPCODE_CHECK_MASK) !=
40662306a36Sopenharmony_ci	     USER_OPCODE_CHECK_VAL) {
40762306a36Sopenharmony_ci		SDMA_DBG(req, "Invalid opcode (%d)", opcode);
40862306a36Sopenharmony_ci		ret = -EINVAL;
40962306a36Sopenharmony_ci		goto free_req;
41062306a36Sopenharmony_ci	}
41162306a36Sopenharmony_ci	/*
41262306a36Sopenharmony_ci	 * Validate the vl. Do not trust packets from user space blindly.
41362306a36Sopenharmony_ci	 * VL comes from PBC, SC comes from LRH, and the VL needs to
41462306a36Sopenharmony_ci	 * match the SC look up.
41562306a36Sopenharmony_ci	 */
41662306a36Sopenharmony_ci	vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF;
41762306a36Sopenharmony_ci	sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) |
41862306a36Sopenharmony_ci	      (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4));
41962306a36Sopenharmony_ci	if (vl >= dd->pport->vls_operational ||
42062306a36Sopenharmony_ci	    vl != sc_to_vlt(dd, sc)) {
42162306a36Sopenharmony_ci		SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl);
42262306a36Sopenharmony_ci		ret = -EINVAL;
42362306a36Sopenharmony_ci		goto free_req;
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	/* Checking P_KEY for requests from user-space */
42762306a36Sopenharmony_ci	pkey = (u16)be32_to_cpu(req->hdr.bth[0]);
42862306a36Sopenharmony_ci	slid = be16_to_cpu(req->hdr.lrh[3]);
42962306a36Sopenharmony_ci	if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) {
43062306a36Sopenharmony_ci		ret = -EINVAL;
43162306a36Sopenharmony_ci		goto free_req;
43262306a36Sopenharmony_ci	}
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	/*
43562306a36Sopenharmony_ci	 * Also should check the BTH.lnh. If it says the next header is GRH then
43662306a36Sopenharmony_ci	 * the RXE parsing will be off and will land in the middle of the KDETH
43762306a36Sopenharmony_ci	 * or miss it entirely.
43862306a36Sopenharmony_ci	 */
43962306a36Sopenharmony_ci	if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) {
44062306a36Sopenharmony_ci		SDMA_DBG(req, "User tried to pass in a GRH");
44162306a36Sopenharmony_ci		ret = -EINVAL;
44262306a36Sopenharmony_ci		goto free_req;
44362306a36Sopenharmony_ci	}
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
44662306a36Sopenharmony_ci	/*
44762306a36Sopenharmony_ci	 * Calculate the initial TID offset based on the values of
44862306a36Sopenharmony_ci	 * KDETH.OFFSET and KDETH.OM that are passed in.
44962306a36Sopenharmony_ci	 */
45062306a36Sopenharmony_ci	req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
45162306a36Sopenharmony_ci		(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
45262306a36Sopenharmony_ci		 KDETH_OM_LARGE : KDETH_OM_SMALL);
45362306a36Sopenharmony_ci	trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt,
45462306a36Sopenharmony_ci					       info.comp_idx, req->tidoffset);
45562306a36Sopenharmony_ci	idx++;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	/* Save all the IO vector structures */
45862306a36Sopenharmony_ci	for (i = 0; i < req->data_iovs; i++) {
45962306a36Sopenharmony_ci		req->iovs[i].offset = 0;
46062306a36Sopenharmony_ci		INIT_LIST_HEAD(&req->iovs[i].list);
46162306a36Sopenharmony_ci		memcpy(&req->iovs[i].iov,
46262306a36Sopenharmony_ci		       iovec + idx++,
46362306a36Sopenharmony_ci		       sizeof(req->iovs[i].iov));
46462306a36Sopenharmony_ci		if (req->iovs[i].iov.iov_len == 0) {
46562306a36Sopenharmony_ci			ret = -EINVAL;
46662306a36Sopenharmony_ci			goto free_req;
46762306a36Sopenharmony_ci		}
46862306a36Sopenharmony_ci		req->data_len += req->iovs[i].iov.iov_len;
46962306a36Sopenharmony_ci	}
47062306a36Sopenharmony_ci	trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt,
47162306a36Sopenharmony_ci					 info.comp_idx, req->data_len);
47262306a36Sopenharmony_ci	if (pcount > req->info.npkts)
47362306a36Sopenharmony_ci		pcount = req->info.npkts;
47462306a36Sopenharmony_ci	/*
47562306a36Sopenharmony_ci	 * Copy any TID info
47662306a36Sopenharmony_ci	 * User space will provide the TID info only when the
47762306a36Sopenharmony_ci	 * request type is EXPECTED. This is true even if there is
47862306a36Sopenharmony_ci	 * only one packet in the request and the header is already
47962306a36Sopenharmony_ci	 * setup. The reason for the singular TID case is that the
48062306a36Sopenharmony_ci	 * driver needs to perform safety checks.
48162306a36Sopenharmony_ci	 */
48262306a36Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
48362306a36Sopenharmony_ci		u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
48462306a36Sopenharmony_ci		u32 *tmp;
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci		if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
48762306a36Sopenharmony_ci			ret = -EINVAL;
48862306a36Sopenharmony_ci			goto free_req;
48962306a36Sopenharmony_ci		}
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci		/*
49262306a36Sopenharmony_ci		 * We have to copy all of the tids because they may vary
49362306a36Sopenharmony_ci		 * in size and, therefore, the TID count might not be
49462306a36Sopenharmony_ci		 * equal to the pkt count. However, there is no way to
49562306a36Sopenharmony_ci		 * tell at this point.
49662306a36Sopenharmony_ci		 */
49762306a36Sopenharmony_ci		tmp = memdup_user(iovec[idx].iov_base,
49862306a36Sopenharmony_ci				  ntids * sizeof(*req->tids));
49962306a36Sopenharmony_ci		if (IS_ERR(tmp)) {
50062306a36Sopenharmony_ci			ret = PTR_ERR(tmp);
50162306a36Sopenharmony_ci			SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
50262306a36Sopenharmony_ci				 ntids, ret);
50362306a36Sopenharmony_ci			goto free_req;
50462306a36Sopenharmony_ci		}
50562306a36Sopenharmony_ci		req->tids = tmp;
50662306a36Sopenharmony_ci		req->n_tids = ntids;
50762306a36Sopenharmony_ci		req->tididx = 0;
50862306a36Sopenharmony_ci		idx++;
50962306a36Sopenharmony_ci	}
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	dlid = be16_to_cpu(req->hdr.lrh[1]);
51262306a36Sopenharmony_ci	selector = dlid_to_selector(dlid);
51362306a36Sopenharmony_ci	selector += uctxt->ctxt + fd->subctxt;
51462306a36Sopenharmony_ci	req->sde = sdma_select_user_engine(dd, selector, vl);
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci	if (!req->sde || !sdma_running(req->sde)) {
51762306a36Sopenharmony_ci		ret = -ECOMM;
51862306a36Sopenharmony_ci		goto free_req;
51962306a36Sopenharmony_ci	}
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	/* We don't need an AHG entry if the request contains only one packet */
52262306a36Sopenharmony_ci	if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
52362306a36Sopenharmony_ci		req->ahg_idx = sdma_ahg_alloc(req->sde);
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
52662306a36Sopenharmony_ci	pq->state = SDMA_PKT_Q_ACTIVE;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	/*
52962306a36Sopenharmony_ci	 * This is a somewhat blocking send implementation.
53062306a36Sopenharmony_ci	 * The driver will block the caller until all packets of the
53162306a36Sopenharmony_ci	 * request have been submitted to the SDMA engine. However, it
53262306a36Sopenharmony_ci	 * will not wait for send completions.
53362306a36Sopenharmony_ci	 */
53462306a36Sopenharmony_ci	while (req->seqsubmitted != req->info.npkts) {
53562306a36Sopenharmony_ci		ret = user_sdma_send_pkts(req, pcount);
53662306a36Sopenharmony_ci		if (ret < 0) {
53762306a36Sopenharmony_ci			int we_ret;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci			if (ret != -EBUSY)
54062306a36Sopenharmony_ci				goto free_req;
54162306a36Sopenharmony_ci			we_ret = wait_event_interruptible_timeout(
54262306a36Sopenharmony_ci				pq->busy.wait_dma,
54362306a36Sopenharmony_ci				pq->state == SDMA_PKT_Q_ACTIVE,
54462306a36Sopenharmony_ci				msecs_to_jiffies(
54562306a36Sopenharmony_ci					SDMA_IOWAIT_TIMEOUT));
54662306a36Sopenharmony_ci			trace_hfi1_usdma_we(pq, we_ret);
54762306a36Sopenharmony_ci			if (we_ret <= 0)
54862306a36Sopenharmony_ci				flush_pq_iowait(pq);
54962306a36Sopenharmony_ci		}
55062306a36Sopenharmony_ci	}
55162306a36Sopenharmony_ci	*count += idx;
55262306a36Sopenharmony_ci	return 0;
55362306a36Sopenharmony_cifree_req:
55462306a36Sopenharmony_ci	/*
55562306a36Sopenharmony_ci	 * If the submitted seqsubmitted == npkts, the completion routine
55662306a36Sopenharmony_ci	 * controls the final state.  If sequbmitted < npkts, wait for any
55762306a36Sopenharmony_ci	 * outstanding packets to finish before cleaning up.
55862306a36Sopenharmony_ci	 */
55962306a36Sopenharmony_ci	if (req->seqsubmitted < req->info.npkts) {
56062306a36Sopenharmony_ci		if (req->seqsubmitted)
56162306a36Sopenharmony_ci			wait_event(pq->busy.wait_dma,
56262306a36Sopenharmony_ci				   (req->seqcomp == req->seqsubmitted - 1));
56362306a36Sopenharmony_ci		user_sdma_free_request(req);
56462306a36Sopenharmony_ci		pq_update(pq);
56562306a36Sopenharmony_ci		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
56662306a36Sopenharmony_ci	}
56762306a36Sopenharmony_ci	return ret;
56862306a36Sopenharmony_ci}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_cistatic inline u32 compute_data_length(struct user_sdma_request *req,
57162306a36Sopenharmony_ci				      struct user_sdma_txreq *tx)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	/*
57462306a36Sopenharmony_ci	 * Determine the proper size of the packet data.
57562306a36Sopenharmony_ci	 * The size of the data of the first packet is in the header
57662306a36Sopenharmony_ci	 * template. However, it includes the header and ICRC, which need
57762306a36Sopenharmony_ci	 * to be subtracted.
57862306a36Sopenharmony_ci	 * The minimum representable packet data length in a header is 4 bytes,
57962306a36Sopenharmony_ci	 * therefore, when the data length request is less than 4 bytes, there's
58062306a36Sopenharmony_ci	 * only one packet, and the packet data length is equal to that of the
58162306a36Sopenharmony_ci	 * request data length.
58262306a36Sopenharmony_ci	 * The size of the remaining packets is the minimum of the frag
58362306a36Sopenharmony_ci	 * size (MTU) or remaining data in the request.
58462306a36Sopenharmony_ci	 */
58562306a36Sopenharmony_ci	u32 len;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	if (!req->seqnum) {
58862306a36Sopenharmony_ci		if (req->data_len < sizeof(u32))
58962306a36Sopenharmony_ci			len = req->data_len;
59062306a36Sopenharmony_ci		else
59162306a36Sopenharmony_ci			len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
59262306a36Sopenharmony_ci			       (sizeof(tx->hdr) - 4));
59362306a36Sopenharmony_ci	} else if (req_opcode(req->info.ctrl) == EXPECTED) {
59462306a36Sopenharmony_ci		u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
59562306a36Sopenharmony_ci			PAGE_SIZE;
59662306a36Sopenharmony_ci		/*
59762306a36Sopenharmony_ci		 * Get the data length based on the remaining space in the
59862306a36Sopenharmony_ci		 * TID pair.
59962306a36Sopenharmony_ci		 */
60062306a36Sopenharmony_ci		len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
60162306a36Sopenharmony_ci		/* If we've filled up the TID pair, move to the next one. */
60262306a36Sopenharmony_ci		if (unlikely(!len) && ++req->tididx < req->n_tids &&
60362306a36Sopenharmony_ci		    req->tids[req->tididx]) {
60462306a36Sopenharmony_ci			tidlen = EXP_TID_GET(req->tids[req->tididx],
60562306a36Sopenharmony_ci					     LEN) * PAGE_SIZE;
60662306a36Sopenharmony_ci			req->tidoffset = 0;
60762306a36Sopenharmony_ci			len = min_t(u32, tidlen, req->info.fragsize);
60862306a36Sopenharmony_ci		}
60962306a36Sopenharmony_ci		/*
61062306a36Sopenharmony_ci		 * Since the TID pairs map entire pages, make sure that we
61162306a36Sopenharmony_ci		 * are not going to try to send more data that we have
61262306a36Sopenharmony_ci		 * remaining.
61362306a36Sopenharmony_ci		 */
61462306a36Sopenharmony_ci		len = min(len, req->data_len - req->sent);
61562306a36Sopenharmony_ci	} else {
61662306a36Sopenharmony_ci		len = min(req->data_len - req->sent, (u32)req->info.fragsize);
61762306a36Sopenharmony_ci	}
61862306a36Sopenharmony_ci	trace_hfi1_sdma_user_compute_length(req->pq->dd,
61962306a36Sopenharmony_ci					    req->pq->ctxt,
62062306a36Sopenharmony_ci					    req->pq->subctxt,
62162306a36Sopenharmony_ci					    req->info.comp_idx,
62262306a36Sopenharmony_ci					    len);
62362306a36Sopenharmony_ci	return len;
62462306a36Sopenharmony_ci}
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_cistatic inline u32 pad_len(u32 len)
62762306a36Sopenharmony_ci{
62862306a36Sopenharmony_ci	if (len & (sizeof(u32) - 1))
62962306a36Sopenharmony_ci		len += sizeof(u32) - (len & (sizeof(u32) - 1));
63062306a36Sopenharmony_ci	return len;
63162306a36Sopenharmony_ci}
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_cistatic inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
63462306a36Sopenharmony_ci{
63562306a36Sopenharmony_ci	/* (Size of complete header - size of PBC) + 4B ICRC + data length */
63662306a36Sopenharmony_ci	return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_cistatic int user_sdma_txadd_ahg(struct user_sdma_request *req,
64062306a36Sopenharmony_ci			       struct user_sdma_txreq *tx,
64162306a36Sopenharmony_ci			       u32 datalen)
64262306a36Sopenharmony_ci{
64362306a36Sopenharmony_ci	int ret;
64462306a36Sopenharmony_ci	u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
64562306a36Sopenharmony_ci	u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen));
64662306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	/*
64962306a36Sopenharmony_ci	 * Copy the request header into the tx header
65062306a36Sopenharmony_ci	 * because the HW needs a cacheline-aligned
65162306a36Sopenharmony_ci	 * address.
65262306a36Sopenharmony_ci	 * This copy can be optimized out if the hdr
65362306a36Sopenharmony_ci	 * member of user_sdma_request were also
65462306a36Sopenharmony_ci	 * cacheline aligned.
65562306a36Sopenharmony_ci	 */
65662306a36Sopenharmony_ci	memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr));
65762306a36Sopenharmony_ci	if (PBC2LRH(pbclen) != lrhlen) {
65862306a36Sopenharmony_ci		pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
65962306a36Sopenharmony_ci		tx->hdr.pbc[0] = cpu_to_le16(pbclen);
66062306a36Sopenharmony_ci	}
66162306a36Sopenharmony_ci	ret = check_header_template(req, &tx->hdr, lrhlen, datalen);
66262306a36Sopenharmony_ci	if (ret)
66362306a36Sopenharmony_ci		return ret;
66462306a36Sopenharmony_ci	ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY,
66562306a36Sopenharmony_ci			      sizeof(tx->hdr) + datalen, req->ahg_idx,
66662306a36Sopenharmony_ci			      0, NULL, 0, user_sdma_txreq_cb);
66762306a36Sopenharmony_ci	if (ret)
66862306a36Sopenharmony_ci		return ret;
66962306a36Sopenharmony_ci	ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr));
67062306a36Sopenharmony_ci	if (ret)
67162306a36Sopenharmony_ci		sdma_txclean(pq->dd, &tx->txreq);
67262306a36Sopenharmony_ci	return ret;
67362306a36Sopenharmony_ci}
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_cistatic int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
67662306a36Sopenharmony_ci{
67762306a36Sopenharmony_ci	int ret = 0;
67862306a36Sopenharmony_ci	u16 count;
67962306a36Sopenharmony_ci	unsigned npkts = 0;
68062306a36Sopenharmony_ci	struct user_sdma_txreq *tx = NULL;
68162306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = NULL;
68262306a36Sopenharmony_ci	struct user_sdma_iovec *iovec = NULL;
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	if (!req->pq)
68562306a36Sopenharmony_ci		return -EINVAL;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	pq = req->pq;
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	/* If tx completion has reported an error, we are done. */
69062306a36Sopenharmony_ci	if (READ_ONCE(req->has_error))
69162306a36Sopenharmony_ci		return -EFAULT;
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	/*
69462306a36Sopenharmony_ci	 * Check if we might have sent the entire request already
69562306a36Sopenharmony_ci	 */
69662306a36Sopenharmony_ci	if (unlikely(req->seqnum == req->info.npkts)) {
69762306a36Sopenharmony_ci		if (!list_empty(&req->txps))
69862306a36Sopenharmony_ci			goto dosend;
69962306a36Sopenharmony_ci		return ret;
70062306a36Sopenharmony_ci	}
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	if (!maxpkts || maxpkts > req->info.npkts - req->seqnum)
70362306a36Sopenharmony_ci		maxpkts = req->info.npkts - req->seqnum;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	while (npkts < maxpkts) {
70662306a36Sopenharmony_ci		u32 datalen = 0;
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci		/*
70962306a36Sopenharmony_ci		 * Check whether any of the completions have come back
71062306a36Sopenharmony_ci		 * with errors. If so, we are not going to process any
71162306a36Sopenharmony_ci		 * more packets from this request.
71262306a36Sopenharmony_ci		 */
71362306a36Sopenharmony_ci		if (READ_ONCE(req->has_error))
71462306a36Sopenharmony_ci			return -EFAULT;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci		tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
71762306a36Sopenharmony_ci		if (!tx)
71862306a36Sopenharmony_ci			return -ENOMEM;
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci		tx->flags = 0;
72162306a36Sopenharmony_ci		tx->req = req;
72262306a36Sopenharmony_ci		INIT_LIST_HEAD(&tx->list);
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci		/*
72562306a36Sopenharmony_ci		 * For the last packet set the ACK request
72662306a36Sopenharmony_ci		 * and disable header suppression.
72762306a36Sopenharmony_ci		 */
72862306a36Sopenharmony_ci		if (req->seqnum == req->info.npkts - 1)
72962306a36Sopenharmony_ci			tx->flags |= (TXREQ_FLAGS_REQ_ACK |
73062306a36Sopenharmony_ci				      TXREQ_FLAGS_REQ_DISABLE_SH);
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci		/*
73362306a36Sopenharmony_ci		 * Calculate the payload size - this is min of the fragment
73462306a36Sopenharmony_ci		 * (MTU) size or the remaining bytes in the request but only
73562306a36Sopenharmony_ci		 * if we have payload data.
73662306a36Sopenharmony_ci		 */
73762306a36Sopenharmony_ci		if (req->data_len) {
73862306a36Sopenharmony_ci			iovec = &req->iovs[req->iov_idx];
73962306a36Sopenharmony_ci			if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) {
74062306a36Sopenharmony_ci				if (++req->iov_idx == req->data_iovs) {
74162306a36Sopenharmony_ci					ret = -EFAULT;
74262306a36Sopenharmony_ci					goto free_tx;
74362306a36Sopenharmony_ci				}
74462306a36Sopenharmony_ci				iovec = &req->iovs[req->iov_idx];
74562306a36Sopenharmony_ci				WARN_ON(iovec->offset);
74662306a36Sopenharmony_ci			}
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci			datalen = compute_data_length(req, tx);
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci			/*
75162306a36Sopenharmony_ci			 * Disable header suppression for the payload <= 8DWS.
75262306a36Sopenharmony_ci			 * If there is an uncorrectable error in the receive
75362306a36Sopenharmony_ci			 * data FIFO when the received payload size is less than
75462306a36Sopenharmony_ci			 * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
75562306a36Sopenharmony_ci			 * not reported.There is set RHF.EccErr if the header
75662306a36Sopenharmony_ci			 * is not suppressed.
75762306a36Sopenharmony_ci			 */
75862306a36Sopenharmony_ci			if (!datalen) {
75962306a36Sopenharmony_ci				SDMA_DBG(req,
76062306a36Sopenharmony_ci					 "Request has data but pkt len is 0");
76162306a36Sopenharmony_ci				ret = -EFAULT;
76262306a36Sopenharmony_ci				goto free_tx;
76362306a36Sopenharmony_ci			} else if (datalen <= 32) {
76462306a36Sopenharmony_ci				tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
76562306a36Sopenharmony_ci			}
76662306a36Sopenharmony_ci		}
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci		if (req->ahg_idx >= 0) {
76962306a36Sopenharmony_ci			if (!req->seqnum) {
77062306a36Sopenharmony_ci				ret = user_sdma_txadd_ahg(req, tx, datalen);
77162306a36Sopenharmony_ci				if (ret)
77262306a36Sopenharmony_ci					goto free_tx;
77362306a36Sopenharmony_ci			} else {
77462306a36Sopenharmony_ci				int changes;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci				changes = set_txreq_header_ahg(req, tx,
77762306a36Sopenharmony_ci							       datalen);
77862306a36Sopenharmony_ci				if (changes < 0) {
77962306a36Sopenharmony_ci					ret = changes;
78062306a36Sopenharmony_ci					goto free_tx;
78162306a36Sopenharmony_ci				}
78262306a36Sopenharmony_ci			}
78362306a36Sopenharmony_ci		} else {
78462306a36Sopenharmony_ci			ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
78562306a36Sopenharmony_ci					  datalen, user_sdma_txreq_cb);
78662306a36Sopenharmony_ci			if (ret)
78762306a36Sopenharmony_ci				goto free_tx;
78862306a36Sopenharmony_ci			/*
78962306a36Sopenharmony_ci			 * Modify the header for this packet. This only needs
79062306a36Sopenharmony_ci			 * to be done if we are not going to use AHG. Otherwise,
79162306a36Sopenharmony_ci			 * the HW will do it based on the changes we gave it
79262306a36Sopenharmony_ci			 * during sdma_txinit_ahg().
79362306a36Sopenharmony_ci			 */
79462306a36Sopenharmony_ci			ret = set_txreq_header(req, tx, datalen);
79562306a36Sopenharmony_ci			if (ret)
79662306a36Sopenharmony_ci				goto free_txreq;
79762306a36Sopenharmony_ci		}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci		req->koffset += datalen;
80062306a36Sopenharmony_ci		if (req_opcode(req->info.ctrl) == EXPECTED)
80162306a36Sopenharmony_ci			req->tidoffset += datalen;
80262306a36Sopenharmony_ci		req->sent += datalen;
80362306a36Sopenharmony_ci		while (datalen) {
80462306a36Sopenharmony_ci			ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec,
80562306a36Sopenharmony_ci							    &datalen);
80662306a36Sopenharmony_ci			if (ret)
80762306a36Sopenharmony_ci				goto free_txreq;
80862306a36Sopenharmony_ci			iovec = &req->iovs[req->iov_idx];
80962306a36Sopenharmony_ci		}
81062306a36Sopenharmony_ci		list_add_tail(&tx->txreq.list, &req->txps);
81162306a36Sopenharmony_ci		/*
81262306a36Sopenharmony_ci		 * It is important to increment this here as it is used to
81362306a36Sopenharmony_ci		 * generate the BTH.PSN and, therefore, can't be bulk-updated
81462306a36Sopenharmony_ci		 * outside of the loop.
81562306a36Sopenharmony_ci		 */
81662306a36Sopenharmony_ci		tx->seqnum = req->seqnum++;
81762306a36Sopenharmony_ci		npkts++;
81862306a36Sopenharmony_ci	}
81962306a36Sopenharmony_cidosend:
82062306a36Sopenharmony_ci	ret = sdma_send_txlist(req->sde,
82162306a36Sopenharmony_ci			       iowait_get_ib_work(&pq->busy),
82262306a36Sopenharmony_ci			       &req->txps, &count);
82362306a36Sopenharmony_ci	req->seqsubmitted += count;
82462306a36Sopenharmony_ci	if (req->seqsubmitted == req->info.npkts) {
82562306a36Sopenharmony_ci		/*
82662306a36Sopenharmony_ci		 * The txreq has already been submitted to the HW queue
82762306a36Sopenharmony_ci		 * so we can free the AHG entry now. Corruption will not
82862306a36Sopenharmony_ci		 * happen due to the sequential manner in which
82962306a36Sopenharmony_ci		 * descriptors are processed.
83062306a36Sopenharmony_ci		 */
83162306a36Sopenharmony_ci		if (req->ahg_idx >= 0)
83262306a36Sopenharmony_ci			sdma_ahg_free(req->sde, req->ahg_idx);
83362306a36Sopenharmony_ci	}
83462306a36Sopenharmony_ci	return ret;
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_cifree_txreq:
83762306a36Sopenharmony_ci	sdma_txclean(pq->dd, &tx->txreq);
83862306a36Sopenharmony_cifree_tx:
83962306a36Sopenharmony_ci	kmem_cache_free(pq->txreq_cache, tx);
84062306a36Sopenharmony_ci	return ret;
84162306a36Sopenharmony_ci}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_cistatic int check_header_template(struct user_sdma_request *req,
84462306a36Sopenharmony_ci				 struct hfi1_pkt_header *hdr, u32 lrhlen,
84562306a36Sopenharmony_ci				 u32 datalen)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	/*
84862306a36Sopenharmony_ci	 * Perform safety checks for any type of packet:
84962306a36Sopenharmony_ci	 *    - transfer size is multiple of 64bytes
85062306a36Sopenharmony_ci	 *    - packet length is multiple of 4 bytes
85162306a36Sopenharmony_ci	 *    - packet length is not larger than MTU size
85262306a36Sopenharmony_ci	 *
85362306a36Sopenharmony_ci	 * These checks are only done for the first packet of the
85462306a36Sopenharmony_ci	 * transfer since the header is "given" to us by user space.
85562306a36Sopenharmony_ci	 * For the remainder of the packets we compute the values.
85662306a36Sopenharmony_ci	 */
85762306a36Sopenharmony_ci	if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 ||
85862306a36Sopenharmony_ci	    lrhlen > get_lrh_len(*hdr, req->info.fragsize))
85962306a36Sopenharmony_ci		return -EINVAL;
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
86262306a36Sopenharmony_ci		/*
86362306a36Sopenharmony_ci		 * The header is checked only on the first packet. Furthermore,
86462306a36Sopenharmony_ci		 * we ensure that at least one TID entry is copied when the
86562306a36Sopenharmony_ci		 * request is submitted. Therefore, we don't have to verify that
86662306a36Sopenharmony_ci		 * tididx points to something sane.
86762306a36Sopenharmony_ci		 */
86862306a36Sopenharmony_ci		u32 tidval = req->tids[req->tididx],
86962306a36Sopenharmony_ci			tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE,
87062306a36Sopenharmony_ci			tididx = EXP_TID_GET(tidval, IDX),
87162306a36Sopenharmony_ci			tidctrl = EXP_TID_GET(tidval, CTRL),
87262306a36Sopenharmony_ci			tidoff;
87362306a36Sopenharmony_ci		__le32 kval = hdr->kdeth.ver_tid_offset;
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci		tidoff = KDETH_GET(kval, OFFSET) *
87662306a36Sopenharmony_ci			  (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
87762306a36Sopenharmony_ci			   KDETH_OM_LARGE : KDETH_OM_SMALL);
87862306a36Sopenharmony_ci		/*
87962306a36Sopenharmony_ci		 * Expected receive packets have the following
88062306a36Sopenharmony_ci		 * additional checks:
88162306a36Sopenharmony_ci		 *     - offset is not larger than the TID size
88262306a36Sopenharmony_ci		 *     - TIDCtrl values match between header and TID array
88362306a36Sopenharmony_ci		 *     - TID indexes match between header and TID array
88462306a36Sopenharmony_ci		 */
88562306a36Sopenharmony_ci		if ((tidoff + datalen > tidlen) ||
88662306a36Sopenharmony_ci		    KDETH_GET(kval, TIDCTRL) != tidctrl ||
88762306a36Sopenharmony_ci		    KDETH_GET(kval, TID) != tididx)
88862306a36Sopenharmony_ci			return -EINVAL;
88962306a36Sopenharmony_ci	}
89062306a36Sopenharmony_ci	return 0;
89162306a36Sopenharmony_ci}
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci/*
89462306a36Sopenharmony_ci * Correctly set the BTH.PSN field based on type of
89562306a36Sopenharmony_ci * transfer - eager packets can just increment the PSN but
89662306a36Sopenharmony_ci * expected packets encode generation and sequence in the
89762306a36Sopenharmony_ci * BTH.PSN field so just incrementing will result in errors.
89862306a36Sopenharmony_ci */
89962306a36Sopenharmony_cistatic inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
90062306a36Sopenharmony_ci{
90162306a36Sopenharmony_ci	u32 val = be32_to_cpu(bthpsn),
90262306a36Sopenharmony_ci		mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull :
90362306a36Sopenharmony_ci			0xffffffull),
90462306a36Sopenharmony_ci		psn = val & mask;
90562306a36Sopenharmony_ci	if (expct)
90662306a36Sopenharmony_ci		psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
90762306a36Sopenharmony_ci			((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
90862306a36Sopenharmony_ci	else
90962306a36Sopenharmony_ci		psn = psn + frags;
91062306a36Sopenharmony_ci	return psn & mask;
91162306a36Sopenharmony_ci}
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_cistatic int set_txreq_header(struct user_sdma_request *req,
91462306a36Sopenharmony_ci			    struct user_sdma_txreq *tx, u32 datalen)
91562306a36Sopenharmony_ci{
91662306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
91762306a36Sopenharmony_ci	struct hfi1_pkt_header *hdr = &tx->hdr;
91862306a36Sopenharmony_ci	u8 omfactor; /* KDETH.OM */
91962306a36Sopenharmony_ci	u16 pbclen;
92062306a36Sopenharmony_ci	int ret;
92162306a36Sopenharmony_ci	u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	/* Copy the header template to the request before modification */
92462306a36Sopenharmony_ci	memcpy(hdr, &req->hdr, sizeof(*hdr));
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	/*
92762306a36Sopenharmony_ci	 * Check if the PBC and LRH length are mismatched. If so
92862306a36Sopenharmony_ci	 * adjust both in the header.
92962306a36Sopenharmony_ci	 */
93062306a36Sopenharmony_ci	pbclen = le16_to_cpu(hdr->pbc[0]);
93162306a36Sopenharmony_ci	if (PBC2LRH(pbclen) != lrhlen) {
93262306a36Sopenharmony_ci		pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
93362306a36Sopenharmony_ci		hdr->pbc[0] = cpu_to_le16(pbclen);
93462306a36Sopenharmony_ci		hdr->lrh[2] = cpu_to_be16(lrhlen >> 2);
93562306a36Sopenharmony_ci		/*
93662306a36Sopenharmony_ci		 * Third packet
93762306a36Sopenharmony_ci		 * This is the first packet in the sequence that has
93862306a36Sopenharmony_ci		 * a "static" size that can be used for the rest of
93962306a36Sopenharmony_ci		 * the packets (besides the last one).
94062306a36Sopenharmony_ci		 */
94162306a36Sopenharmony_ci		if (unlikely(req->seqnum == 2)) {
94262306a36Sopenharmony_ci			/*
94362306a36Sopenharmony_ci			 * From this point on the lengths in both the
94462306a36Sopenharmony_ci			 * PBC and LRH are the same until the last
94562306a36Sopenharmony_ci			 * packet.
94662306a36Sopenharmony_ci			 * Adjust the template so we don't have to update
94762306a36Sopenharmony_ci			 * every packet
94862306a36Sopenharmony_ci			 */
94962306a36Sopenharmony_ci			req->hdr.pbc[0] = hdr->pbc[0];
95062306a36Sopenharmony_ci			req->hdr.lrh[2] = hdr->lrh[2];
95162306a36Sopenharmony_ci		}
95262306a36Sopenharmony_ci	}
95362306a36Sopenharmony_ci	/*
95462306a36Sopenharmony_ci	 * We only have to modify the header if this is not the
95562306a36Sopenharmony_ci	 * first packet in the request. Otherwise, we use the
95662306a36Sopenharmony_ci	 * header given to us.
95762306a36Sopenharmony_ci	 */
95862306a36Sopenharmony_ci	if (unlikely(!req->seqnum)) {
95962306a36Sopenharmony_ci		ret = check_header_template(req, hdr, lrhlen, datalen);
96062306a36Sopenharmony_ci		if (ret)
96162306a36Sopenharmony_ci			return ret;
96262306a36Sopenharmony_ci		goto done;
96362306a36Sopenharmony_ci	}
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	hdr->bth[2] = cpu_to_be32(
96662306a36Sopenharmony_ci		set_pkt_bth_psn(hdr->bth[2],
96762306a36Sopenharmony_ci				(req_opcode(req->info.ctrl) == EXPECTED),
96862306a36Sopenharmony_ci				req->seqnum));
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	/* Set ACK request on last packet */
97162306a36Sopenharmony_ci	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
97262306a36Sopenharmony_ci		hdr->bth[2] |= cpu_to_be32(1UL << 31);
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci	/* Set the new offset */
97562306a36Sopenharmony_ci	hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
97662306a36Sopenharmony_ci	/* Expected packets have to fill in the new TID information */
97762306a36Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
97862306a36Sopenharmony_ci		tidval = req->tids[req->tididx];
97962306a36Sopenharmony_ci		/*
98062306a36Sopenharmony_ci		 * If the offset puts us at the end of the current TID,
98162306a36Sopenharmony_ci		 * advance everything.
98262306a36Sopenharmony_ci		 */
98362306a36Sopenharmony_ci		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
98462306a36Sopenharmony_ci					 PAGE_SIZE)) {
98562306a36Sopenharmony_ci			req->tidoffset = 0;
98662306a36Sopenharmony_ci			/*
98762306a36Sopenharmony_ci			 * Since we don't copy all the TIDs, all at once,
98862306a36Sopenharmony_ci			 * we have to check again.
98962306a36Sopenharmony_ci			 */
99062306a36Sopenharmony_ci			if (++req->tididx > req->n_tids - 1 ||
99162306a36Sopenharmony_ci			    !req->tids[req->tididx]) {
99262306a36Sopenharmony_ci				return -EINVAL;
99362306a36Sopenharmony_ci			}
99462306a36Sopenharmony_ci			tidval = req->tids[req->tididx];
99562306a36Sopenharmony_ci		}
99662306a36Sopenharmony_ci		omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
99762306a36Sopenharmony_ci			KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
99862306a36Sopenharmony_ci			KDETH_OM_SMALL_SHIFT;
99962306a36Sopenharmony_ci		/* Set KDETH.TIDCtrl based on value for this TID. */
100062306a36Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
100162306a36Sopenharmony_ci			  EXP_TID_GET(tidval, CTRL));
100262306a36Sopenharmony_ci		/* Set KDETH.TID based on value for this TID */
100362306a36Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
100462306a36Sopenharmony_ci			  EXP_TID_GET(tidval, IDX));
100562306a36Sopenharmony_ci		/* Clear KDETH.SH when DISABLE_SH flag is set */
100662306a36Sopenharmony_ci		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
100762306a36Sopenharmony_ci			KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
100862306a36Sopenharmony_ci		/*
100962306a36Sopenharmony_ci		 * Set the KDETH.OFFSET and KDETH.OM based on size of
101062306a36Sopenharmony_ci		 * transfer.
101162306a36Sopenharmony_ci		 */
101262306a36Sopenharmony_ci		trace_hfi1_sdma_user_tid_info(
101362306a36Sopenharmony_ci			pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx,
101462306a36Sopenharmony_ci			req->tidoffset, req->tidoffset >> omfactor,
101562306a36Sopenharmony_ci			omfactor != KDETH_OM_SMALL_SHIFT);
101662306a36Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
101762306a36Sopenharmony_ci			  req->tidoffset >> omfactor);
101862306a36Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
101962306a36Sopenharmony_ci			  omfactor != KDETH_OM_SMALL_SHIFT);
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_cidone:
102262306a36Sopenharmony_ci	trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
102362306a36Sopenharmony_ci				    req->info.comp_idx, hdr, tidval);
102462306a36Sopenharmony_ci	return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr));
102562306a36Sopenharmony_ci}
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_cistatic int set_txreq_header_ahg(struct user_sdma_request *req,
102862306a36Sopenharmony_ci				struct user_sdma_txreq *tx, u32 datalen)
102962306a36Sopenharmony_ci{
103062306a36Sopenharmony_ci	u32 ahg[AHG_KDETH_ARRAY_SIZE];
103162306a36Sopenharmony_ci	int idx = 0;
103262306a36Sopenharmony_ci	u8 omfactor; /* KDETH.OM */
103362306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
103462306a36Sopenharmony_ci	struct hfi1_pkt_header *hdr = &req->hdr;
103562306a36Sopenharmony_ci	u16 pbclen = le16_to_cpu(hdr->pbc[0]);
103662306a36Sopenharmony_ci	u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
103762306a36Sopenharmony_ci	size_t array_size = ARRAY_SIZE(ahg);
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_ci	if (PBC2LRH(pbclen) != lrhlen) {
104062306a36Sopenharmony_ci		/* PBC.PbcLengthDWs */
104162306a36Sopenharmony_ci		idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12,
104262306a36Sopenharmony_ci				     (__force u16)cpu_to_le16(LRH2PBC(lrhlen)));
104362306a36Sopenharmony_ci		if (idx < 0)
104462306a36Sopenharmony_ci			return idx;
104562306a36Sopenharmony_ci		/* LRH.PktLen (we need the full 16 bits due to byte swap) */
104662306a36Sopenharmony_ci		idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16,
104762306a36Sopenharmony_ci				     (__force u16)cpu_to_be16(lrhlen >> 2));
104862306a36Sopenharmony_ci		if (idx < 0)
104962306a36Sopenharmony_ci			return idx;
105062306a36Sopenharmony_ci	}
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	/*
105362306a36Sopenharmony_ci	 * Do the common updates
105462306a36Sopenharmony_ci	 */
105562306a36Sopenharmony_ci	/* BTH.PSN and BTH.A */
105662306a36Sopenharmony_ci	val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
105762306a36Sopenharmony_ci		(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
105862306a36Sopenharmony_ci	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
105962306a36Sopenharmony_ci		val32 |= 1UL << 31;
106062306a36Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16,
106162306a36Sopenharmony_ci			     (__force u16)cpu_to_be16(val32 >> 16));
106262306a36Sopenharmony_ci	if (idx < 0)
106362306a36Sopenharmony_ci		return idx;
106462306a36Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16,
106562306a36Sopenharmony_ci			     (__force u16)cpu_to_be16(val32 & 0xffff));
106662306a36Sopenharmony_ci	if (idx < 0)
106762306a36Sopenharmony_ci		return idx;
106862306a36Sopenharmony_ci	/* KDETH.Offset */
106962306a36Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16,
107062306a36Sopenharmony_ci			     (__force u16)cpu_to_le16(req->koffset & 0xffff));
107162306a36Sopenharmony_ci	if (idx < 0)
107262306a36Sopenharmony_ci		return idx;
107362306a36Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16,
107462306a36Sopenharmony_ci			     (__force u16)cpu_to_le16(req->koffset >> 16));
107562306a36Sopenharmony_ci	if (idx < 0)
107662306a36Sopenharmony_ci		return idx;
107762306a36Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
107862306a36Sopenharmony_ci		__le16 val;
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci		tidval = req->tids[req->tididx];
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci		/*
108362306a36Sopenharmony_ci		 * If the offset puts us at the end of the current TID,
108462306a36Sopenharmony_ci		 * advance everything.
108562306a36Sopenharmony_ci		 */
108662306a36Sopenharmony_ci		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
108762306a36Sopenharmony_ci					 PAGE_SIZE)) {
108862306a36Sopenharmony_ci			req->tidoffset = 0;
108962306a36Sopenharmony_ci			/*
109062306a36Sopenharmony_ci			 * Since we don't copy all the TIDs, all at once,
109162306a36Sopenharmony_ci			 * we have to check again.
109262306a36Sopenharmony_ci			 */
109362306a36Sopenharmony_ci			if (++req->tididx > req->n_tids - 1 ||
109462306a36Sopenharmony_ci			    !req->tids[req->tididx])
109562306a36Sopenharmony_ci				return -EINVAL;
109662306a36Sopenharmony_ci			tidval = req->tids[req->tididx];
109762306a36Sopenharmony_ci		}
109862306a36Sopenharmony_ci		omfactor = ((EXP_TID_GET(tidval, LEN) *
109962306a36Sopenharmony_ci				  PAGE_SIZE) >=
110062306a36Sopenharmony_ci				 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
110162306a36Sopenharmony_ci				 KDETH_OM_SMALL_SHIFT;
110262306a36Sopenharmony_ci		/* KDETH.OM and KDETH.OFFSET (TID) */
110362306a36Sopenharmony_ci		idx = ahg_header_set(
110462306a36Sopenharmony_ci				ahg, idx, array_size, 7, 0, 16,
110562306a36Sopenharmony_ci				((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
110662306a36Sopenharmony_ci				((req->tidoffset >> omfactor)
110762306a36Sopenharmony_ci				& 0x7fff)));
110862306a36Sopenharmony_ci		if (idx < 0)
110962306a36Sopenharmony_ci			return idx;
111062306a36Sopenharmony_ci		/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
111162306a36Sopenharmony_ci		val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
111262306a36Sopenharmony_ci				   (EXP_TID_GET(tidval, IDX) & 0x3ff));
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
111562306a36Sopenharmony_ci			val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
111662306a36Sopenharmony_ci						      INTR) <<
111762306a36Sopenharmony_ci					    AHG_KDETH_INTR_SHIFT));
111862306a36Sopenharmony_ci		} else {
111962306a36Sopenharmony_ci			val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
112062306a36Sopenharmony_ci			       cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
112162306a36Sopenharmony_ci			       cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
112262306a36Sopenharmony_ci						      INTR) <<
112362306a36Sopenharmony_ci					     AHG_KDETH_INTR_SHIFT));
112462306a36Sopenharmony_ci		}
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci		idx = ahg_header_set(ahg, idx, array_size,
112762306a36Sopenharmony_ci				     7, 16, 14, (__force u16)val);
112862306a36Sopenharmony_ci		if (idx < 0)
112962306a36Sopenharmony_ci			return idx;
113062306a36Sopenharmony_ci	}
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
113362306a36Sopenharmony_ci					req->info.comp_idx, req->sde->this_idx,
113462306a36Sopenharmony_ci					req->ahg_idx, ahg, idx, tidval);
113562306a36Sopenharmony_ci	sdma_txinit_ahg(&tx->txreq,
113662306a36Sopenharmony_ci			SDMA_TXREQ_F_USE_AHG,
113762306a36Sopenharmony_ci			datalen, req->ahg_idx, idx,
113862306a36Sopenharmony_ci			ahg, sizeof(req->hdr),
113962306a36Sopenharmony_ci			user_sdma_txreq_cb);
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci	return idx;
114262306a36Sopenharmony_ci}
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci/**
114562306a36Sopenharmony_ci * user_sdma_txreq_cb() - SDMA tx request completion callback.
114662306a36Sopenharmony_ci * @txreq: valid sdma tx request
114762306a36Sopenharmony_ci * @status: success/failure of request
114862306a36Sopenharmony_ci *
114962306a36Sopenharmony_ci * Called when the SDMA progress state machine gets notification that
115062306a36Sopenharmony_ci * the SDMA descriptors for this tx request have been processed by the
115162306a36Sopenharmony_ci * DMA engine. Called in interrupt context.
115262306a36Sopenharmony_ci * Only do work on completed sequences.
115362306a36Sopenharmony_ci */
115462306a36Sopenharmony_cistatic void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
115562306a36Sopenharmony_ci{
115662306a36Sopenharmony_ci	struct user_sdma_txreq *tx =
115762306a36Sopenharmony_ci		container_of(txreq, struct user_sdma_txreq, txreq);
115862306a36Sopenharmony_ci	struct user_sdma_request *req;
115962306a36Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq;
116062306a36Sopenharmony_ci	struct hfi1_user_sdma_comp_q *cq;
116162306a36Sopenharmony_ci	enum hfi1_sdma_comp_state state = COMPLETE;
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci	if (!tx->req)
116462306a36Sopenharmony_ci		return;
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	req = tx->req;
116762306a36Sopenharmony_ci	pq = req->pq;
116862306a36Sopenharmony_ci	cq = req->cq;
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	if (status != SDMA_TXREQ_S_OK) {
117162306a36Sopenharmony_ci		SDMA_DBG(req, "SDMA completion with error %d",
117262306a36Sopenharmony_ci			 status);
117362306a36Sopenharmony_ci		WRITE_ONCE(req->has_error, 1);
117462306a36Sopenharmony_ci		state = ERROR;
117562306a36Sopenharmony_ci	}
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci	req->seqcomp = tx->seqnum;
117862306a36Sopenharmony_ci	kmem_cache_free(pq->txreq_cache, tx);
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci	/* sequence isn't complete?  We are done */
118162306a36Sopenharmony_ci	if (req->seqcomp != req->info.npkts - 1)
118262306a36Sopenharmony_ci		return;
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_ci	user_sdma_free_request(req);
118562306a36Sopenharmony_ci	set_comp_state(pq, cq, req->info.comp_idx, state, status);
118662306a36Sopenharmony_ci	pq_update(pq);
118762306a36Sopenharmony_ci}
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_cistatic inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
119062306a36Sopenharmony_ci{
119162306a36Sopenharmony_ci	if (atomic_dec_and_test(&pq->n_reqs))
119262306a36Sopenharmony_ci		wake_up(&pq->wait);
119362306a36Sopenharmony_ci}
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_cistatic void user_sdma_free_request(struct user_sdma_request *req)
119662306a36Sopenharmony_ci{
119762306a36Sopenharmony_ci	if (!list_empty(&req->txps)) {
119862306a36Sopenharmony_ci		struct sdma_txreq *t, *p;
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci		list_for_each_entry_safe(t, p, &req->txps, list) {
120162306a36Sopenharmony_ci			struct user_sdma_txreq *tx =
120262306a36Sopenharmony_ci				container_of(t, struct user_sdma_txreq, txreq);
120362306a36Sopenharmony_ci			list_del_init(&t->list);
120462306a36Sopenharmony_ci			sdma_txclean(req->pq->dd, t);
120562306a36Sopenharmony_ci			kmem_cache_free(req->pq->txreq_cache, tx);
120662306a36Sopenharmony_ci		}
120762306a36Sopenharmony_ci	}
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	kfree(req->tids);
121062306a36Sopenharmony_ci	clear_bit(req->info.comp_idx, req->pq->req_in_use);
121162306a36Sopenharmony_ci}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_cistatic inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
121462306a36Sopenharmony_ci				  struct hfi1_user_sdma_comp_q *cq,
121562306a36Sopenharmony_ci				  u16 idx, enum hfi1_sdma_comp_state state,
121662306a36Sopenharmony_ci				  int ret)
121762306a36Sopenharmony_ci{
121862306a36Sopenharmony_ci	if (state == ERROR)
121962306a36Sopenharmony_ci		cq->comps[idx].errcode = -ret;
122062306a36Sopenharmony_ci	smp_wmb(); /* make sure errcode is visible first */
122162306a36Sopenharmony_ci	cq->comps[idx].status = state;
122262306a36Sopenharmony_ci	trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
122362306a36Sopenharmony_ci					idx, state, ret);
122462306a36Sopenharmony_ci}
1225