18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright(c) 2020 - Cornelis Networks, Inc.
38c2ecf20Sopenharmony_ci * Copyright(c) 2015 - 2018 Intel Corporation.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license.  When using or
68c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
118c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as
128c2ecf20Sopenharmony_ci * published by the Free Software Foundation.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but
158c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of
168c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
178c2ecf20Sopenharmony_ci * General Public License for more details.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * BSD LICENSE
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
228c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions
238c2ecf20Sopenharmony_ci * are met:
248c2ecf20Sopenharmony_ci *
258c2ecf20Sopenharmony_ci *  - Redistributions of source code must retain the above copyright
268c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer.
278c2ecf20Sopenharmony_ci *  - Redistributions in binary form must reproduce the above copyright
288c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in
298c2ecf20Sopenharmony_ci *    the documentation and/or other materials provided with the
308c2ecf20Sopenharmony_ci *    distribution.
318c2ecf20Sopenharmony_ci *  - Neither the name of Intel Corporation nor the names of its
328c2ecf20Sopenharmony_ci *    contributors may be used to endorse or promote products derived
338c2ecf20Sopenharmony_ci *    from this software without specific prior written permission.
348c2ecf20Sopenharmony_ci *
358c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
368c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
378c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
388c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
398c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
408c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
418c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
428c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
438c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
448c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
458c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
468c2ecf20Sopenharmony_ci *
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_ci#include <linux/mm.h>
498c2ecf20Sopenharmony_ci#include <linux/types.h>
508c2ecf20Sopenharmony_ci#include <linux/device.h>
518c2ecf20Sopenharmony_ci#include <linux/dmapool.h>
528c2ecf20Sopenharmony_ci#include <linux/slab.h>
538c2ecf20Sopenharmony_ci#include <linux/list.h>
548c2ecf20Sopenharmony_ci#include <linux/highmem.h>
558c2ecf20Sopenharmony_ci#include <linux/io.h>
568c2ecf20Sopenharmony_ci#include <linux/uio.h>
578c2ecf20Sopenharmony_ci#include <linux/rbtree.h>
588c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
598c2ecf20Sopenharmony_ci#include <linux/delay.h>
608c2ecf20Sopenharmony_ci#include <linux/kthread.h>
618c2ecf20Sopenharmony_ci#include <linux/mmu_context.h>
628c2ecf20Sopenharmony_ci#include <linux/module.h>
638c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
648c2ecf20Sopenharmony_ci#include <linux/string.h>
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci#include "hfi.h"
678c2ecf20Sopenharmony_ci#include "sdma.h"
688c2ecf20Sopenharmony_ci#include "user_sdma.h"
698c2ecf20Sopenharmony_ci#include "verbs.h"  /* for the headers */
708c2ecf20Sopenharmony_ci#include "common.h" /* for struct hfi1_tid_info */
718c2ecf20Sopenharmony_ci#include "trace.h"
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_cistatic uint hfi1_sdma_comp_ring_size = 128;
748c2ecf20Sopenharmony_cimodule_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
758c2ecf20Sopenharmony_ciMODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_cistatic unsigned initial_pkt_count = 8;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cistatic int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
808c2ecf20Sopenharmony_cistatic void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
818c2ecf20Sopenharmony_cistatic inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
828c2ecf20Sopenharmony_cistatic void user_sdma_free_request(struct user_sdma_request *req);
838c2ecf20Sopenharmony_cistatic int check_header_template(struct user_sdma_request *req,
848c2ecf20Sopenharmony_ci				 struct hfi1_pkt_header *hdr, u32 lrhlen,
858c2ecf20Sopenharmony_ci				 u32 datalen);
868c2ecf20Sopenharmony_cistatic int set_txreq_header(struct user_sdma_request *req,
878c2ecf20Sopenharmony_ci			    struct user_sdma_txreq *tx, u32 datalen);
888c2ecf20Sopenharmony_cistatic int set_txreq_header_ahg(struct user_sdma_request *req,
898c2ecf20Sopenharmony_ci				struct user_sdma_txreq *tx, u32 len);
908c2ecf20Sopenharmony_cistatic inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
918c2ecf20Sopenharmony_ci				  struct hfi1_user_sdma_comp_q *cq,
928c2ecf20Sopenharmony_ci				  u16 idx, enum hfi1_sdma_comp_state state,
938c2ecf20Sopenharmony_ci				  int ret);
948c2ecf20Sopenharmony_cistatic inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
958c2ecf20Sopenharmony_cistatic inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_cistatic int defer_packet_queue(
988c2ecf20Sopenharmony_ci	struct sdma_engine *sde,
998c2ecf20Sopenharmony_ci	struct iowait_work *wait,
1008c2ecf20Sopenharmony_ci	struct sdma_txreq *txreq,
1018c2ecf20Sopenharmony_ci	uint seq,
1028c2ecf20Sopenharmony_ci	bool pkts_sent);
1038c2ecf20Sopenharmony_cistatic void activate_packet_queue(struct iowait *wait, int reason);
1048c2ecf20Sopenharmony_cistatic bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
1058c2ecf20Sopenharmony_ci			   unsigned long len);
1068c2ecf20Sopenharmony_cistatic int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
1078c2ecf20Sopenharmony_ci			 void *arg2, bool *stop);
1088c2ecf20Sopenharmony_cistatic void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_cistatic struct mmu_rb_ops sdma_rb_ops = {
1118c2ecf20Sopenharmony_ci	.filter = sdma_rb_filter,
1128c2ecf20Sopenharmony_ci	.evict = sdma_rb_evict,
1138c2ecf20Sopenharmony_ci	.remove = sdma_rb_remove,
1148c2ecf20Sopenharmony_ci};
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
1178c2ecf20Sopenharmony_ci					   struct user_sdma_txreq *tx,
1188c2ecf20Sopenharmony_ci					   struct user_sdma_iovec *iovec,
1198c2ecf20Sopenharmony_ci					   u32 *pkt_remaining);
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_cistatic int defer_packet_queue(
1228c2ecf20Sopenharmony_ci	struct sdma_engine *sde,
1238c2ecf20Sopenharmony_ci	struct iowait_work *wait,
1248c2ecf20Sopenharmony_ci	struct sdma_txreq *txreq,
1258c2ecf20Sopenharmony_ci	uint seq,
1268c2ecf20Sopenharmony_ci	bool pkts_sent)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq =
1298c2ecf20Sopenharmony_ci		container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	write_seqlock(&sde->waitlock);
1328c2ecf20Sopenharmony_ci	trace_hfi1_usdma_defer(pq, sde, &pq->busy);
1338c2ecf20Sopenharmony_ci	if (sdma_progress(sde, seq, txreq))
1348c2ecf20Sopenharmony_ci		goto eagain;
1358c2ecf20Sopenharmony_ci	/*
1368c2ecf20Sopenharmony_ci	 * We are assuming that if the list is enqueued somewhere, it
1378c2ecf20Sopenharmony_ci	 * is to the dmawait list since that is the only place where
1388c2ecf20Sopenharmony_ci	 * it is supposed to be enqueued.
1398c2ecf20Sopenharmony_ci	 */
1408c2ecf20Sopenharmony_ci	xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
1418c2ecf20Sopenharmony_ci	if (list_empty(&pq->busy.list)) {
1428c2ecf20Sopenharmony_ci		pq->busy.lock = &sde->waitlock;
1438c2ecf20Sopenharmony_ci		iowait_get_priority(&pq->busy);
1448c2ecf20Sopenharmony_ci		iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
1458c2ecf20Sopenharmony_ci	}
1468c2ecf20Sopenharmony_ci	write_sequnlock(&sde->waitlock);
1478c2ecf20Sopenharmony_ci	return -EBUSY;
1488c2ecf20Sopenharmony_cieagain:
1498c2ecf20Sopenharmony_ci	write_sequnlock(&sde->waitlock);
1508c2ecf20Sopenharmony_ci	return -EAGAIN;
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_cistatic void activate_packet_queue(struct iowait *wait, int reason)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq =
1568c2ecf20Sopenharmony_ci		container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	trace_hfi1_usdma_activate(pq, wait, reason);
1598c2ecf20Sopenharmony_ci	xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
1608c2ecf20Sopenharmony_ci	wake_up(&wait->wait_dma);
1618c2ecf20Sopenharmony_ci};
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ciint hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
1648c2ecf20Sopenharmony_ci				struct hfi1_filedata *fd)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
1678c2ecf20Sopenharmony_ci	char buf[64];
1688c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd;
1698c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_comp_q *cq;
1708c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	if (!uctxt || !fd)
1738c2ecf20Sopenharmony_ci		return -EBADF;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	if (!hfi1_sdma_comp_ring_size)
1768c2ecf20Sopenharmony_ci		return -EINVAL;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	dd = uctxt->dd;
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	pq = kzalloc(sizeof(*pq), GFP_KERNEL);
1818c2ecf20Sopenharmony_ci	if (!pq)
1828c2ecf20Sopenharmony_ci		return -ENOMEM;
1838c2ecf20Sopenharmony_ci	pq->dd = dd;
1848c2ecf20Sopenharmony_ci	pq->ctxt = uctxt->ctxt;
1858c2ecf20Sopenharmony_ci	pq->subctxt = fd->subctxt;
1868c2ecf20Sopenharmony_ci	pq->n_max_reqs = hfi1_sdma_comp_ring_size;
1878c2ecf20Sopenharmony_ci	atomic_set(&pq->n_reqs, 0);
1888c2ecf20Sopenharmony_ci	init_waitqueue_head(&pq->wait);
1898c2ecf20Sopenharmony_ci	atomic_set(&pq->n_locked, 0);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
1928c2ecf20Sopenharmony_ci		    activate_packet_queue, NULL, NULL);
1938c2ecf20Sopenharmony_ci	pq->reqidx = 0;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
1968c2ecf20Sopenharmony_ci			   sizeof(*pq->reqs),
1978c2ecf20Sopenharmony_ci			   GFP_KERNEL);
1988c2ecf20Sopenharmony_ci	if (!pq->reqs)
1998c2ecf20Sopenharmony_ci		goto pq_reqs_nomem;
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL);
2028c2ecf20Sopenharmony_ci	if (!pq->req_in_use)
2038c2ecf20Sopenharmony_ci		goto pq_reqs_no_in_use;
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
2068c2ecf20Sopenharmony_ci		 fd->subctxt);
2078c2ecf20Sopenharmony_ci	pq->txreq_cache = kmem_cache_create(buf,
2088c2ecf20Sopenharmony_ci					    sizeof(struct user_sdma_txreq),
2098c2ecf20Sopenharmony_ci					    L1_CACHE_BYTES,
2108c2ecf20Sopenharmony_ci					    SLAB_HWCACHE_ALIGN,
2118c2ecf20Sopenharmony_ci					    NULL);
2128c2ecf20Sopenharmony_ci	if (!pq->txreq_cache) {
2138c2ecf20Sopenharmony_ci		dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n",
2148c2ecf20Sopenharmony_ci			   uctxt->ctxt);
2158c2ecf20Sopenharmony_ci		goto pq_txreq_nomem;
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
2198c2ecf20Sopenharmony_ci	if (!cq)
2208c2ecf20Sopenharmony_ci		goto cq_nomem;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
2238c2ecf20Sopenharmony_ci				 * hfi1_sdma_comp_ring_size));
2248c2ecf20Sopenharmony_ci	if (!cq->comps)
2258c2ecf20Sopenharmony_ci		goto cq_comps_nomem;
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	cq->nentries = hfi1_sdma_comp_ring_size;
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
2308c2ecf20Sopenharmony_ci				   &pq->handler);
2318c2ecf20Sopenharmony_ci	if (ret) {
2328c2ecf20Sopenharmony_ci		dd_dev_err(dd, "Failed to register with MMU %d", ret);
2338c2ecf20Sopenharmony_ci		goto pq_mmu_fail;
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	rcu_assign_pointer(fd->pq, pq);
2378c2ecf20Sopenharmony_ci	fd->cq = cq;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	return 0;
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_cipq_mmu_fail:
2428c2ecf20Sopenharmony_ci	vfree(cq->comps);
2438c2ecf20Sopenharmony_cicq_comps_nomem:
2448c2ecf20Sopenharmony_ci	kfree(cq);
2458c2ecf20Sopenharmony_cicq_nomem:
2468c2ecf20Sopenharmony_ci	kmem_cache_destroy(pq->txreq_cache);
2478c2ecf20Sopenharmony_cipq_txreq_nomem:
2488c2ecf20Sopenharmony_ci	bitmap_free(pq->req_in_use);
2498c2ecf20Sopenharmony_cipq_reqs_no_in_use:
2508c2ecf20Sopenharmony_ci	kfree(pq->reqs);
2518c2ecf20Sopenharmony_cipq_reqs_nomem:
2528c2ecf20Sopenharmony_ci	kfree(pq);
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci	return ret;
2558c2ecf20Sopenharmony_ci}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_cistatic void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
2588c2ecf20Sopenharmony_ci{
2598c2ecf20Sopenharmony_ci	unsigned long flags;
2608c2ecf20Sopenharmony_ci	seqlock_t *lock = pq->busy.lock;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	if (!lock)
2638c2ecf20Sopenharmony_ci		return;
2648c2ecf20Sopenharmony_ci	write_seqlock_irqsave(lock, flags);
2658c2ecf20Sopenharmony_ci	if (!list_empty(&pq->busy.list)) {
2668c2ecf20Sopenharmony_ci		list_del_init(&pq->busy.list);
2678c2ecf20Sopenharmony_ci		pq->busy.lock = NULL;
2688c2ecf20Sopenharmony_ci	}
2698c2ecf20Sopenharmony_ci	write_sequnlock_irqrestore(lock, flags);
2708c2ecf20Sopenharmony_ci}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ciint hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
2738c2ecf20Sopenharmony_ci			       struct hfi1_ctxtdata *uctxt)
2748c2ecf20Sopenharmony_ci{
2758c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq;
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	spin_lock(&fd->pq_rcu_lock);
2808c2ecf20Sopenharmony_ci	pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
2818c2ecf20Sopenharmony_ci				    lockdep_is_held(&fd->pq_rcu_lock));
2828c2ecf20Sopenharmony_ci	if (pq) {
2838c2ecf20Sopenharmony_ci		rcu_assign_pointer(fd->pq, NULL);
2848c2ecf20Sopenharmony_ci		spin_unlock(&fd->pq_rcu_lock);
2858c2ecf20Sopenharmony_ci		synchronize_srcu(&fd->pq_srcu);
2868c2ecf20Sopenharmony_ci		/* at this point there can be no more new requests */
2878c2ecf20Sopenharmony_ci		iowait_sdma_drain(&pq->busy);
2888c2ecf20Sopenharmony_ci		/* Wait until all requests have been freed. */
2898c2ecf20Sopenharmony_ci		wait_event_interruptible(
2908c2ecf20Sopenharmony_ci			pq->wait,
2918c2ecf20Sopenharmony_ci			!atomic_read(&pq->n_reqs));
2928c2ecf20Sopenharmony_ci		kfree(pq->reqs);
2938c2ecf20Sopenharmony_ci		if (pq->handler)
2948c2ecf20Sopenharmony_ci			hfi1_mmu_rb_unregister(pq->handler);
2958c2ecf20Sopenharmony_ci		bitmap_free(pq->req_in_use);
2968c2ecf20Sopenharmony_ci		kmem_cache_destroy(pq->txreq_cache);
2978c2ecf20Sopenharmony_ci		flush_pq_iowait(pq);
2988c2ecf20Sopenharmony_ci		kfree(pq);
2998c2ecf20Sopenharmony_ci	} else {
3008c2ecf20Sopenharmony_ci		spin_unlock(&fd->pq_rcu_lock);
3018c2ecf20Sopenharmony_ci	}
3028c2ecf20Sopenharmony_ci	if (fd->cq) {
3038c2ecf20Sopenharmony_ci		vfree(fd->cq->comps);
3048c2ecf20Sopenharmony_ci		kfree(fd->cq);
3058c2ecf20Sopenharmony_ci		fd->cq = NULL;
3068c2ecf20Sopenharmony_ci	}
3078c2ecf20Sopenharmony_ci	return 0;
3088c2ecf20Sopenharmony_ci}
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_cistatic u8 dlid_to_selector(u16 dlid)
3118c2ecf20Sopenharmony_ci{
3128c2ecf20Sopenharmony_ci	static u8 mapping[256];
3138c2ecf20Sopenharmony_ci	static int initialized;
3148c2ecf20Sopenharmony_ci	static u8 next;
3158c2ecf20Sopenharmony_ci	int hash;
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	if (!initialized) {
3188c2ecf20Sopenharmony_ci		memset(mapping, 0xFF, 256);
3198c2ecf20Sopenharmony_ci		initialized = 1;
3208c2ecf20Sopenharmony_ci	}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	hash = ((dlid >> 8) ^ dlid) & 0xFF;
3238c2ecf20Sopenharmony_ci	if (mapping[hash] == 0xFF) {
3248c2ecf20Sopenharmony_ci		mapping[hash] = next;
3258c2ecf20Sopenharmony_ci		next = (next + 1) & 0x7F;
3268c2ecf20Sopenharmony_ci	}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	return mapping[hash];
3298c2ecf20Sopenharmony_ci}
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci/**
3328c2ecf20Sopenharmony_ci * hfi1_user_sdma_process_request() - Process and start a user sdma request
3338c2ecf20Sopenharmony_ci * @fd: valid file descriptor
3348c2ecf20Sopenharmony_ci * @iovec: array of io vectors to process
3358c2ecf20Sopenharmony_ci * @dim: overall iovec array size
3368c2ecf20Sopenharmony_ci * @count: number of io vector array entries processed
3378c2ecf20Sopenharmony_ci */
3388c2ecf20Sopenharmony_ciint hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
3398c2ecf20Sopenharmony_ci				   struct iovec *iovec, unsigned long dim,
3408c2ecf20Sopenharmony_ci				   unsigned long *count)
3418c2ecf20Sopenharmony_ci{
3428c2ecf20Sopenharmony_ci	int ret = 0, i;
3438c2ecf20Sopenharmony_ci	struct hfi1_ctxtdata *uctxt = fd->uctxt;
3448c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq =
3458c2ecf20Sopenharmony_ci		srcu_dereference(fd->pq, &fd->pq_srcu);
3468c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_comp_q *cq = fd->cq;
3478c2ecf20Sopenharmony_ci	struct hfi1_devdata *dd = pq->dd;
3488c2ecf20Sopenharmony_ci	unsigned long idx = 0;
3498c2ecf20Sopenharmony_ci	u8 pcount = initial_pkt_count;
3508c2ecf20Sopenharmony_ci	struct sdma_req_info info;
3518c2ecf20Sopenharmony_ci	struct user_sdma_request *req;
3528c2ecf20Sopenharmony_ci	u8 opcode, sc, vl;
3538c2ecf20Sopenharmony_ci	u16 pkey;
3548c2ecf20Sopenharmony_ci	u32 slid;
3558c2ecf20Sopenharmony_ci	u16 dlid;
3568c2ecf20Sopenharmony_ci	u32 selector;
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
3598c2ecf20Sopenharmony_ci		hfi1_cdbg(
3608c2ecf20Sopenharmony_ci		   SDMA,
3618c2ecf20Sopenharmony_ci		   "[%u:%u:%u] First vector not big enough for header %lu/%lu",
3628c2ecf20Sopenharmony_ci		   dd->unit, uctxt->ctxt, fd->subctxt,
3638c2ecf20Sopenharmony_ci		   iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr));
3648c2ecf20Sopenharmony_ci		return -EINVAL;
3658c2ecf20Sopenharmony_ci	}
3668c2ecf20Sopenharmony_ci	ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info));
3678c2ecf20Sopenharmony_ci	if (ret) {
3688c2ecf20Sopenharmony_ci		hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)",
3698c2ecf20Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, ret);
3708c2ecf20Sopenharmony_ci		return -EFAULT;
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
3748c2ecf20Sopenharmony_ci				     (u16 *)&info);
3758c2ecf20Sopenharmony_ci	if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
3768c2ecf20Sopenharmony_ci		hfi1_cdbg(SDMA,
3778c2ecf20Sopenharmony_ci			  "[%u:%u:%u:%u] Invalid comp index",
3788c2ecf20Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
3798c2ecf20Sopenharmony_ci		return -EINVAL;
3808c2ecf20Sopenharmony_ci	}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	/*
3838c2ecf20Sopenharmony_ci	 * Sanity check the header io vector count.  Need at least 1 vector
3848c2ecf20Sopenharmony_ci	 * (header) and cannot be larger than the actual io vector count.
3858c2ecf20Sopenharmony_ci	 */
3868c2ecf20Sopenharmony_ci	if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) {
3878c2ecf20Sopenharmony_ci		hfi1_cdbg(SDMA,
3888c2ecf20Sopenharmony_ci			  "[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
3898c2ecf20Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
3908c2ecf20Sopenharmony_ci			  req_iovcnt(info.ctrl), dim);
3918c2ecf20Sopenharmony_ci		return -EINVAL;
3928c2ecf20Sopenharmony_ci	}
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	if (!info.fragsize) {
3958c2ecf20Sopenharmony_ci		hfi1_cdbg(SDMA,
3968c2ecf20Sopenharmony_ci			  "[%u:%u:%u:%u] Request does not specify fragsize",
3978c2ecf20Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
3988c2ecf20Sopenharmony_ci		return -EINVAL;
3998c2ecf20Sopenharmony_ci	}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	/* Try to claim the request. */
4028c2ecf20Sopenharmony_ci	if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
4038c2ecf20Sopenharmony_ci		hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
4048c2ecf20Sopenharmony_ci			  dd->unit, uctxt->ctxt, fd->subctxt,
4058c2ecf20Sopenharmony_ci			  info.comp_idx);
4068c2ecf20Sopenharmony_ci		return -EBADSLT;
4078c2ecf20Sopenharmony_ci	}
4088c2ecf20Sopenharmony_ci	/*
4098c2ecf20Sopenharmony_ci	 * All safety checks have been done and this request has been claimed.
4108c2ecf20Sopenharmony_ci	 */
4118c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
4128c2ecf20Sopenharmony_ci					     info.comp_idx);
4138c2ecf20Sopenharmony_ci	req = pq->reqs + info.comp_idx;
4148c2ecf20Sopenharmony_ci	req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
4158c2ecf20Sopenharmony_ci	req->data_len  = 0;
4168c2ecf20Sopenharmony_ci	req->pq = pq;
4178c2ecf20Sopenharmony_ci	req->cq = cq;
4188c2ecf20Sopenharmony_ci	req->ahg_idx = -1;
4198c2ecf20Sopenharmony_ci	req->iov_idx = 0;
4208c2ecf20Sopenharmony_ci	req->sent = 0;
4218c2ecf20Sopenharmony_ci	req->seqnum = 0;
4228c2ecf20Sopenharmony_ci	req->seqcomp = 0;
4238c2ecf20Sopenharmony_ci	req->seqsubmitted = 0;
4248c2ecf20Sopenharmony_ci	req->tids = NULL;
4258c2ecf20Sopenharmony_ci	req->has_error = 0;
4268c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&req->txps);
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	memcpy(&req->info, &info, sizeof(info));
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	/* The request is initialized, count it */
4318c2ecf20Sopenharmony_ci	atomic_inc(&pq->n_reqs);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	if (req_opcode(info.ctrl) == EXPECTED) {
4348c2ecf20Sopenharmony_ci		/* expected must have a TID info and at least one data vector */
4358c2ecf20Sopenharmony_ci		if (req->data_iovs < 2) {
4368c2ecf20Sopenharmony_ci			SDMA_DBG(req,
4378c2ecf20Sopenharmony_ci				 "Not enough vectors for expected request");
4388c2ecf20Sopenharmony_ci			ret = -EINVAL;
4398c2ecf20Sopenharmony_ci			goto free_req;
4408c2ecf20Sopenharmony_ci		}
4418c2ecf20Sopenharmony_ci		req->data_iovs--;
4428c2ecf20Sopenharmony_ci	}
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) {
4458c2ecf20Sopenharmony_ci		SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
4468c2ecf20Sopenharmony_ci			 MAX_VECTORS_PER_REQ);
4478c2ecf20Sopenharmony_ci		ret = -EINVAL;
4488c2ecf20Sopenharmony_ci		goto free_req;
4498c2ecf20Sopenharmony_ci	}
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	/* Copy the header from the user buffer */
4528c2ecf20Sopenharmony_ci	ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
4538c2ecf20Sopenharmony_ci			     sizeof(req->hdr));
4548c2ecf20Sopenharmony_ci	if (ret) {
4558c2ecf20Sopenharmony_ci		SDMA_DBG(req, "Failed to copy header template (%d)", ret);
4568c2ecf20Sopenharmony_ci		ret = -EFAULT;
4578c2ecf20Sopenharmony_ci		goto free_req;
4588c2ecf20Sopenharmony_ci	}
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	/* If Static rate control is not enabled, sanitize the header. */
4618c2ecf20Sopenharmony_ci	if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL))
4628c2ecf20Sopenharmony_ci		req->hdr.pbc[2] = 0;
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	/* Validate the opcode. Do not trust packets from user space blindly. */
4658c2ecf20Sopenharmony_ci	opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff;
4668c2ecf20Sopenharmony_ci	if ((opcode & USER_OPCODE_CHECK_MASK) !=
4678c2ecf20Sopenharmony_ci	     USER_OPCODE_CHECK_VAL) {
4688c2ecf20Sopenharmony_ci		SDMA_DBG(req, "Invalid opcode (%d)", opcode);
4698c2ecf20Sopenharmony_ci		ret = -EINVAL;
4708c2ecf20Sopenharmony_ci		goto free_req;
4718c2ecf20Sopenharmony_ci	}
4728c2ecf20Sopenharmony_ci	/*
4738c2ecf20Sopenharmony_ci	 * Validate the vl. Do not trust packets from user space blindly.
4748c2ecf20Sopenharmony_ci	 * VL comes from PBC, SC comes from LRH, and the VL needs to
4758c2ecf20Sopenharmony_ci	 * match the SC look up.
4768c2ecf20Sopenharmony_ci	 */
4778c2ecf20Sopenharmony_ci	vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF;
4788c2ecf20Sopenharmony_ci	sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) |
4798c2ecf20Sopenharmony_ci	      (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4));
4808c2ecf20Sopenharmony_ci	if (vl >= dd->pport->vls_operational ||
4818c2ecf20Sopenharmony_ci	    vl != sc_to_vlt(dd, sc)) {
4828c2ecf20Sopenharmony_ci		SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl);
4838c2ecf20Sopenharmony_ci		ret = -EINVAL;
4848c2ecf20Sopenharmony_ci		goto free_req;
4858c2ecf20Sopenharmony_ci	}
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	/* Checking P_KEY for requests from user-space */
4888c2ecf20Sopenharmony_ci	pkey = (u16)be32_to_cpu(req->hdr.bth[0]);
4898c2ecf20Sopenharmony_ci	slid = be16_to_cpu(req->hdr.lrh[3]);
4908c2ecf20Sopenharmony_ci	if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) {
4918c2ecf20Sopenharmony_ci		ret = -EINVAL;
4928c2ecf20Sopenharmony_ci		goto free_req;
4938c2ecf20Sopenharmony_ci	}
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci	/*
4968c2ecf20Sopenharmony_ci	 * Also should check the BTH.lnh. If it says the next header is GRH then
4978c2ecf20Sopenharmony_ci	 * the RXE parsing will be off and will land in the middle of the KDETH
4988c2ecf20Sopenharmony_ci	 * or miss it entirely.
4998c2ecf20Sopenharmony_ci	 */
5008c2ecf20Sopenharmony_ci	if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) {
5018c2ecf20Sopenharmony_ci		SDMA_DBG(req, "User tried to pass in a GRH");
5028c2ecf20Sopenharmony_ci		ret = -EINVAL;
5038c2ecf20Sopenharmony_ci		goto free_req;
5048c2ecf20Sopenharmony_ci	}
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci	req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
5078c2ecf20Sopenharmony_ci	/*
5088c2ecf20Sopenharmony_ci	 * Calculate the initial TID offset based on the values of
5098c2ecf20Sopenharmony_ci	 * KDETH.OFFSET and KDETH.OM that are passed in.
5108c2ecf20Sopenharmony_ci	 */
5118c2ecf20Sopenharmony_ci	req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
5128c2ecf20Sopenharmony_ci		(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
5138c2ecf20Sopenharmony_ci		 KDETH_OM_LARGE : KDETH_OM_SMALL);
5148c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt,
5158c2ecf20Sopenharmony_ci					       info.comp_idx, req->tidoffset);
5168c2ecf20Sopenharmony_ci	idx++;
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	/* Save all the IO vector structures */
5198c2ecf20Sopenharmony_ci	for (i = 0; i < req->data_iovs; i++) {
5208c2ecf20Sopenharmony_ci		req->iovs[i].offset = 0;
5218c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&req->iovs[i].list);
5228c2ecf20Sopenharmony_ci		memcpy(&req->iovs[i].iov,
5238c2ecf20Sopenharmony_ci		       iovec + idx++,
5248c2ecf20Sopenharmony_ci		       sizeof(req->iovs[i].iov));
5258c2ecf20Sopenharmony_ci		if (req->iovs[i].iov.iov_len == 0) {
5268c2ecf20Sopenharmony_ci			ret = -EINVAL;
5278c2ecf20Sopenharmony_ci			goto free_req;
5288c2ecf20Sopenharmony_ci		}
5298c2ecf20Sopenharmony_ci		req->data_len += req->iovs[i].iov.iov_len;
5308c2ecf20Sopenharmony_ci	}
5318c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt,
5328c2ecf20Sopenharmony_ci					 info.comp_idx, req->data_len);
5338c2ecf20Sopenharmony_ci	if (pcount > req->info.npkts)
5348c2ecf20Sopenharmony_ci		pcount = req->info.npkts;
5358c2ecf20Sopenharmony_ci	/*
5368c2ecf20Sopenharmony_ci	 * Copy any TID info
5378c2ecf20Sopenharmony_ci	 * User space will provide the TID info only when the
5388c2ecf20Sopenharmony_ci	 * request type is EXPECTED. This is true even if there is
5398c2ecf20Sopenharmony_ci	 * only one packet in the request and the header is already
5408c2ecf20Sopenharmony_ci	 * setup. The reason for the singular TID case is that the
5418c2ecf20Sopenharmony_ci	 * driver needs to perform safety checks.
5428c2ecf20Sopenharmony_ci	 */
5438c2ecf20Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
5448c2ecf20Sopenharmony_ci		u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
5458c2ecf20Sopenharmony_ci		u32 *tmp;
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci		if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
5488c2ecf20Sopenharmony_ci			ret = -EINVAL;
5498c2ecf20Sopenharmony_ci			goto free_req;
5508c2ecf20Sopenharmony_ci		}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci		/*
5538c2ecf20Sopenharmony_ci		 * We have to copy all of the tids because they may vary
5548c2ecf20Sopenharmony_ci		 * in size and, therefore, the TID count might not be
5558c2ecf20Sopenharmony_ci		 * equal to the pkt count. However, there is no way to
5568c2ecf20Sopenharmony_ci		 * tell at this point.
5578c2ecf20Sopenharmony_ci		 */
5588c2ecf20Sopenharmony_ci		tmp = memdup_user(iovec[idx].iov_base,
5598c2ecf20Sopenharmony_ci				  ntids * sizeof(*req->tids));
5608c2ecf20Sopenharmony_ci		if (IS_ERR(tmp)) {
5618c2ecf20Sopenharmony_ci			ret = PTR_ERR(tmp);
5628c2ecf20Sopenharmony_ci			SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
5638c2ecf20Sopenharmony_ci				 ntids, ret);
5648c2ecf20Sopenharmony_ci			goto free_req;
5658c2ecf20Sopenharmony_ci		}
5668c2ecf20Sopenharmony_ci		req->tids = tmp;
5678c2ecf20Sopenharmony_ci		req->n_tids = ntids;
5688c2ecf20Sopenharmony_ci		req->tididx = 0;
5698c2ecf20Sopenharmony_ci		idx++;
5708c2ecf20Sopenharmony_ci	}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	dlid = be16_to_cpu(req->hdr.lrh[1]);
5738c2ecf20Sopenharmony_ci	selector = dlid_to_selector(dlid);
5748c2ecf20Sopenharmony_ci	selector += uctxt->ctxt + fd->subctxt;
5758c2ecf20Sopenharmony_ci	req->sde = sdma_select_user_engine(dd, selector, vl);
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	if (!req->sde || !sdma_running(req->sde)) {
5788c2ecf20Sopenharmony_ci		ret = -ECOMM;
5798c2ecf20Sopenharmony_ci		goto free_req;
5808c2ecf20Sopenharmony_ci	}
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	/* We don't need an AHG entry if the request contains only one packet */
5838c2ecf20Sopenharmony_ci	if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
5848c2ecf20Sopenharmony_ci		req->ahg_idx = sdma_ahg_alloc(req->sde);
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
5878c2ecf20Sopenharmony_ci	pq->state = SDMA_PKT_Q_ACTIVE;
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	/*
5908c2ecf20Sopenharmony_ci	 * This is a somewhat blocking send implementation.
5918c2ecf20Sopenharmony_ci	 * The driver will block the caller until all packets of the
5928c2ecf20Sopenharmony_ci	 * request have been submitted to the SDMA engine. However, it
5938c2ecf20Sopenharmony_ci	 * will not wait for send completions.
5948c2ecf20Sopenharmony_ci	 */
5958c2ecf20Sopenharmony_ci	while (req->seqsubmitted != req->info.npkts) {
5968c2ecf20Sopenharmony_ci		ret = user_sdma_send_pkts(req, pcount);
5978c2ecf20Sopenharmony_ci		if (ret < 0) {
5988c2ecf20Sopenharmony_ci			int we_ret;
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci			if (ret != -EBUSY)
6018c2ecf20Sopenharmony_ci				goto free_req;
6028c2ecf20Sopenharmony_ci			we_ret = wait_event_interruptible_timeout(
6038c2ecf20Sopenharmony_ci				pq->busy.wait_dma,
6048c2ecf20Sopenharmony_ci				pq->state == SDMA_PKT_Q_ACTIVE,
6058c2ecf20Sopenharmony_ci				msecs_to_jiffies(
6068c2ecf20Sopenharmony_ci					SDMA_IOWAIT_TIMEOUT));
6078c2ecf20Sopenharmony_ci			trace_hfi1_usdma_we(pq, we_ret);
6088c2ecf20Sopenharmony_ci			if (we_ret <= 0)
6098c2ecf20Sopenharmony_ci				flush_pq_iowait(pq);
6108c2ecf20Sopenharmony_ci		}
6118c2ecf20Sopenharmony_ci	}
6128c2ecf20Sopenharmony_ci	*count += idx;
6138c2ecf20Sopenharmony_ci	return 0;
6148c2ecf20Sopenharmony_cifree_req:
6158c2ecf20Sopenharmony_ci	/*
6168c2ecf20Sopenharmony_ci	 * If the submitted seqsubmitted == npkts, the completion routine
6178c2ecf20Sopenharmony_ci	 * controls the final state.  If sequbmitted < npkts, wait for any
6188c2ecf20Sopenharmony_ci	 * outstanding packets to finish before cleaning up.
6198c2ecf20Sopenharmony_ci	 */
6208c2ecf20Sopenharmony_ci	if (req->seqsubmitted < req->info.npkts) {
6218c2ecf20Sopenharmony_ci		if (req->seqsubmitted)
6228c2ecf20Sopenharmony_ci			wait_event(pq->busy.wait_dma,
6238c2ecf20Sopenharmony_ci				   (req->seqcomp == req->seqsubmitted - 1));
6248c2ecf20Sopenharmony_ci		user_sdma_free_request(req);
6258c2ecf20Sopenharmony_ci		pq_update(pq);
6268c2ecf20Sopenharmony_ci		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
6278c2ecf20Sopenharmony_ci	}
6288c2ecf20Sopenharmony_ci	return ret;
6298c2ecf20Sopenharmony_ci}
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_cistatic inline u32 compute_data_length(struct user_sdma_request *req,
6328c2ecf20Sopenharmony_ci				      struct user_sdma_txreq *tx)
6338c2ecf20Sopenharmony_ci{
6348c2ecf20Sopenharmony_ci	/*
6358c2ecf20Sopenharmony_ci	 * Determine the proper size of the packet data.
6368c2ecf20Sopenharmony_ci	 * The size of the data of the first packet is in the header
6378c2ecf20Sopenharmony_ci	 * template. However, it includes the header and ICRC, which need
6388c2ecf20Sopenharmony_ci	 * to be subtracted.
6398c2ecf20Sopenharmony_ci	 * The minimum representable packet data length in a header is 4 bytes,
6408c2ecf20Sopenharmony_ci	 * therefore, when the data length request is less than 4 bytes, there's
6418c2ecf20Sopenharmony_ci	 * only one packet, and the packet data length is equal to that of the
6428c2ecf20Sopenharmony_ci	 * request data length.
6438c2ecf20Sopenharmony_ci	 * The size of the remaining packets is the minimum of the frag
6448c2ecf20Sopenharmony_ci	 * size (MTU) or remaining data in the request.
6458c2ecf20Sopenharmony_ci	 */
6468c2ecf20Sopenharmony_ci	u32 len;
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_ci	if (!req->seqnum) {
6498c2ecf20Sopenharmony_ci		if (req->data_len < sizeof(u32))
6508c2ecf20Sopenharmony_ci			len = req->data_len;
6518c2ecf20Sopenharmony_ci		else
6528c2ecf20Sopenharmony_ci			len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
6538c2ecf20Sopenharmony_ci			       (sizeof(tx->hdr) - 4));
6548c2ecf20Sopenharmony_ci	} else if (req_opcode(req->info.ctrl) == EXPECTED) {
6558c2ecf20Sopenharmony_ci		u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
6568c2ecf20Sopenharmony_ci			PAGE_SIZE;
6578c2ecf20Sopenharmony_ci		/*
6588c2ecf20Sopenharmony_ci		 * Get the data length based on the remaining space in the
6598c2ecf20Sopenharmony_ci		 * TID pair.
6608c2ecf20Sopenharmony_ci		 */
6618c2ecf20Sopenharmony_ci		len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
6628c2ecf20Sopenharmony_ci		/* If we've filled up the TID pair, move to the next one. */
6638c2ecf20Sopenharmony_ci		if (unlikely(!len) && ++req->tididx < req->n_tids &&
6648c2ecf20Sopenharmony_ci		    req->tids[req->tididx]) {
6658c2ecf20Sopenharmony_ci			tidlen = EXP_TID_GET(req->tids[req->tididx],
6668c2ecf20Sopenharmony_ci					     LEN) * PAGE_SIZE;
6678c2ecf20Sopenharmony_ci			req->tidoffset = 0;
6688c2ecf20Sopenharmony_ci			len = min_t(u32, tidlen, req->info.fragsize);
6698c2ecf20Sopenharmony_ci		}
6708c2ecf20Sopenharmony_ci		/*
6718c2ecf20Sopenharmony_ci		 * Since the TID pairs map entire pages, make sure that we
6728c2ecf20Sopenharmony_ci		 * are not going to try to send more data that we have
6738c2ecf20Sopenharmony_ci		 * remaining.
6748c2ecf20Sopenharmony_ci		 */
6758c2ecf20Sopenharmony_ci		len = min(len, req->data_len - req->sent);
6768c2ecf20Sopenharmony_ci	} else {
6778c2ecf20Sopenharmony_ci		len = min(req->data_len - req->sent, (u32)req->info.fragsize);
6788c2ecf20Sopenharmony_ci	}
6798c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_compute_length(req->pq->dd,
6808c2ecf20Sopenharmony_ci					    req->pq->ctxt,
6818c2ecf20Sopenharmony_ci					    req->pq->subctxt,
6828c2ecf20Sopenharmony_ci					    req->info.comp_idx,
6838c2ecf20Sopenharmony_ci					    len);
6848c2ecf20Sopenharmony_ci	return len;
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_cistatic inline u32 pad_len(u32 len)
6888c2ecf20Sopenharmony_ci{
6898c2ecf20Sopenharmony_ci	if (len & (sizeof(u32) - 1))
6908c2ecf20Sopenharmony_ci		len += sizeof(u32) - (len & (sizeof(u32) - 1));
6918c2ecf20Sopenharmony_ci	return len;
6928c2ecf20Sopenharmony_ci}
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_cistatic inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
6958c2ecf20Sopenharmony_ci{
6968c2ecf20Sopenharmony_ci	/* (Size of complete header - size of PBC) + 4B ICRC + data length */
6978c2ecf20Sopenharmony_ci	return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
6988c2ecf20Sopenharmony_ci}
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_cistatic int user_sdma_txadd_ahg(struct user_sdma_request *req,
7018c2ecf20Sopenharmony_ci			       struct user_sdma_txreq *tx,
7028c2ecf20Sopenharmony_ci			       u32 datalen)
7038c2ecf20Sopenharmony_ci{
7048c2ecf20Sopenharmony_ci	int ret;
7058c2ecf20Sopenharmony_ci	u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
7068c2ecf20Sopenharmony_ci	u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen));
7078c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	/*
7108c2ecf20Sopenharmony_ci	 * Copy the request header into the tx header
7118c2ecf20Sopenharmony_ci	 * because the HW needs a cacheline-aligned
7128c2ecf20Sopenharmony_ci	 * address.
7138c2ecf20Sopenharmony_ci	 * This copy can be optimized out if the hdr
7148c2ecf20Sopenharmony_ci	 * member of user_sdma_request were also
7158c2ecf20Sopenharmony_ci	 * cacheline aligned.
7168c2ecf20Sopenharmony_ci	 */
7178c2ecf20Sopenharmony_ci	memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr));
7188c2ecf20Sopenharmony_ci	if (PBC2LRH(pbclen) != lrhlen) {
7198c2ecf20Sopenharmony_ci		pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
7208c2ecf20Sopenharmony_ci		tx->hdr.pbc[0] = cpu_to_le16(pbclen);
7218c2ecf20Sopenharmony_ci	}
7228c2ecf20Sopenharmony_ci	ret = check_header_template(req, &tx->hdr, lrhlen, datalen);
7238c2ecf20Sopenharmony_ci	if (ret)
7248c2ecf20Sopenharmony_ci		return ret;
7258c2ecf20Sopenharmony_ci	ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY,
7268c2ecf20Sopenharmony_ci			      sizeof(tx->hdr) + datalen, req->ahg_idx,
7278c2ecf20Sopenharmony_ci			      0, NULL, 0, user_sdma_txreq_cb);
7288c2ecf20Sopenharmony_ci	if (ret)
7298c2ecf20Sopenharmony_ci		return ret;
7308c2ecf20Sopenharmony_ci	ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr));
7318c2ecf20Sopenharmony_ci	if (ret)
7328c2ecf20Sopenharmony_ci		sdma_txclean(pq->dd, &tx->txreq);
7338c2ecf20Sopenharmony_ci	return ret;
7348c2ecf20Sopenharmony_ci}
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_cistatic int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
7378c2ecf20Sopenharmony_ci{
7388c2ecf20Sopenharmony_ci	int ret = 0;
7398c2ecf20Sopenharmony_ci	u16 count;
7408c2ecf20Sopenharmony_ci	unsigned npkts = 0;
7418c2ecf20Sopenharmony_ci	struct user_sdma_txreq *tx = NULL;
7428c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = NULL;
7438c2ecf20Sopenharmony_ci	struct user_sdma_iovec *iovec = NULL;
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	if (!req->pq)
7468c2ecf20Sopenharmony_ci		return -EINVAL;
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	pq = req->pq;
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci	/* If tx completion has reported an error, we are done. */
7518c2ecf20Sopenharmony_ci	if (READ_ONCE(req->has_error))
7528c2ecf20Sopenharmony_ci		return -EFAULT;
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	/*
7558c2ecf20Sopenharmony_ci	 * Check if we might have sent the entire request already
7568c2ecf20Sopenharmony_ci	 */
7578c2ecf20Sopenharmony_ci	if (unlikely(req->seqnum == req->info.npkts)) {
7588c2ecf20Sopenharmony_ci		if (!list_empty(&req->txps))
7598c2ecf20Sopenharmony_ci			goto dosend;
7608c2ecf20Sopenharmony_ci		return ret;
7618c2ecf20Sopenharmony_ci	}
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	if (!maxpkts || maxpkts > req->info.npkts - req->seqnum)
7648c2ecf20Sopenharmony_ci		maxpkts = req->info.npkts - req->seqnum;
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	while (npkts < maxpkts) {
7678c2ecf20Sopenharmony_ci		u32 datalen = 0;
7688c2ecf20Sopenharmony_ci
7698c2ecf20Sopenharmony_ci		/*
7708c2ecf20Sopenharmony_ci		 * Check whether any of the completions have come back
7718c2ecf20Sopenharmony_ci		 * with errors. If so, we are not going to process any
7728c2ecf20Sopenharmony_ci		 * more packets from this request.
7738c2ecf20Sopenharmony_ci		 */
7748c2ecf20Sopenharmony_ci		if (READ_ONCE(req->has_error))
7758c2ecf20Sopenharmony_ci			return -EFAULT;
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci		tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
7788c2ecf20Sopenharmony_ci		if (!tx)
7798c2ecf20Sopenharmony_ci			return -ENOMEM;
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci		tx->flags = 0;
7828c2ecf20Sopenharmony_ci		tx->req = req;
7838c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&tx->list);
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci		/*
7868c2ecf20Sopenharmony_ci		 * For the last packet set the ACK request
7878c2ecf20Sopenharmony_ci		 * and disable header suppression.
7888c2ecf20Sopenharmony_ci		 */
7898c2ecf20Sopenharmony_ci		if (req->seqnum == req->info.npkts - 1)
7908c2ecf20Sopenharmony_ci			tx->flags |= (TXREQ_FLAGS_REQ_ACK |
7918c2ecf20Sopenharmony_ci				      TXREQ_FLAGS_REQ_DISABLE_SH);
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci		/*
7948c2ecf20Sopenharmony_ci		 * Calculate the payload size - this is min of the fragment
7958c2ecf20Sopenharmony_ci		 * (MTU) size or the remaining bytes in the request but only
7968c2ecf20Sopenharmony_ci		 * if we have payload data.
7978c2ecf20Sopenharmony_ci		 */
7988c2ecf20Sopenharmony_ci		if (req->data_len) {
7998c2ecf20Sopenharmony_ci			iovec = &req->iovs[req->iov_idx];
8008c2ecf20Sopenharmony_ci			if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) {
8018c2ecf20Sopenharmony_ci				if (++req->iov_idx == req->data_iovs) {
8028c2ecf20Sopenharmony_ci					ret = -EFAULT;
8038c2ecf20Sopenharmony_ci					goto free_tx;
8048c2ecf20Sopenharmony_ci				}
8058c2ecf20Sopenharmony_ci				iovec = &req->iovs[req->iov_idx];
8068c2ecf20Sopenharmony_ci				WARN_ON(iovec->offset);
8078c2ecf20Sopenharmony_ci			}
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci			datalen = compute_data_length(req, tx);
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci			/*
8128c2ecf20Sopenharmony_ci			 * Disable header suppression for the payload <= 8DWS.
8138c2ecf20Sopenharmony_ci			 * If there is an uncorrectable error in the receive
8148c2ecf20Sopenharmony_ci			 * data FIFO when the received payload size is less than
8158c2ecf20Sopenharmony_ci			 * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
8168c2ecf20Sopenharmony_ci			 * not reported.There is set RHF.EccErr if the header
8178c2ecf20Sopenharmony_ci			 * is not suppressed.
8188c2ecf20Sopenharmony_ci			 */
8198c2ecf20Sopenharmony_ci			if (!datalen) {
8208c2ecf20Sopenharmony_ci				SDMA_DBG(req,
8218c2ecf20Sopenharmony_ci					 "Request has data but pkt len is 0");
8228c2ecf20Sopenharmony_ci				ret = -EFAULT;
8238c2ecf20Sopenharmony_ci				goto free_tx;
8248c2ecf20Sopenharmony_ci			} else if (datalen <= 32) {
8258c2ecf20Sopenharmony_ci				tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
8268c2ecf20Sopenharmony_ci			}
8278c2ecf20Sopenharmony_ci		}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci		if (req->ahg_idx >= 0) {
8308c2ecf20Sopenharmony_ci			if (!req->seqnum) {
8318c2ecf20Sopenharmony_ci				ret = user_sdma_txadd_ahg(req, tx, datalen);
8328c2ecf20Sopenharmony_ci				if (ret)
8338c2ecf20Sopenharmony_ci					goto free_tx;
8348c2ecf20Sopenharmony_ci			} else {
8358c2ecf20Sopenharmony_ci				int changes;
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci				changes = set_txreq_header_ahg(req, tx,
8388c2ecf20Sopenharmony_ci							       datalen);
8398c2ecf20Sopenharmony_ci				if (changes < 0) {
8408c2ecf20Sopenharmony_ci					ret = changes;
8418c2ecf20Sopenharmony_ci					goto free_tx;
8428c2ecf20Sopenharmony_ci				}
8438c2ecf20Sopenharmony_ci			}
8448c2ecf20Sopenharmony_ci		} else {
8458c2ecf20Sopenharmony_ci			ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
8468c2ecf20Sopenharmony_ci					  datalen, user_sdma_txreq_cb);
8478c2ecf20Sopenharmony_ci			if (ret)
8488c2ecf20Sopenharmony_ci				goto free_tx;
8498c2ecf20Sopenharmony_ci			/*
8508c2ecf20Sopenharmony_ci			 * Modify the header for this packet. This only needs
8518c2ecf20Sopenharmony_ci			 * to be done if we are not going to use AHG. Otherwise,
8528c2ecf20Sopenharmony_ci			 * the HW will do it based on the changes we gave it
8538c2ecf20Sopenharmony_ci			 * during sdma_txinit_ahg().
8548c2ecf20Sopenharmony_ci			 */
8558c2ecf20Sopenharmony_ci			ret = set_txreq_header(req, tx, datalen);
8568c2ecf20Sopenharmony_ci			if (ret)
8578c2ecf20Sopenharmony_ci				goto free_txreq;
8588c2ecf20Sopenharmony_ci		}
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci		req->koffset += datalen;
8618c2ecf20Sopenharmony_ci		if (req_opcode(req->info.ctrl) == EXPECTED)
8628c2ecf20Sopenharmony_ci			req->tidoffset += datalen;
8638c2ecf20Sopenharmony_ci		req->sent += datalen;
8648c2ecf20Sopenharmony_ci		while (datalen) {
8658c2ecf20Sopenharmony_ci			ret = add_system_pages_to_sdma_packet(req, tx, iovec,
8668c2ecf20Sopenharmony_ci							      &datalen);
8678c2ecf20Sopenharmony_ci			if (ret)
8688c2ecf20Sopenharmony_ci				goto free_txreq;
8698c2ecf20Sopenharmony_ci			iovec = &req->iovs[req->iov_idx];
8708c2ecf20Sopenharmony_ci		}
8718c2ecf20Sopenharmony_ci		list_add_tail(&tx->txreq.list, &req->txps);
8728c2ecf20Sopenharmony_ci		/*
8738c2ecf20Sopenharmony_ci		 * It is important to increment this here as it is used to
8748c2ecf20Sopenharmony_ci		 * generate the BTH.PSN and, therefore, can't be bulk-updated
8758c2ecf20Sopenharmony_ci		 * outside of the loop.
8768c2ecf20Sopenharmony_ci		 */
8778c2ecf20Sopenharmony_ci		tx->seqnum = req->seqnum++;
8788c2ecf20Sopenharmony_ci		npkts++;
8798c2ecf20Sopenharmony_ci	}
8808c2ecf20Sopenharmony_cidosend:
8818c2ecf20Sopenharmony_ci	ret = sdma_send_txlist(req->sde,
8828c2ecf20Sopenharmony_ci			       iowait_get_ib_work(&pq->busy),
8838c2ecf20Sopenharmony_ci			       &req->txps, &count);
8848c2ecf20Sopenharmony_ci	req->seqsubmitted += count;
8858c2ecf20Sopenharmony_ci	if (req->seqsubmitted == req->info.npkts) {
8868c2ecf20Sopenharmony_ci		/*
8878c2ecf20Sopenharmony_ci		 * The txreq has already been submitted to the HW queue
8888c2ecf20Sopenharmony_ci		 * so we can free the AHG entry now. Corruption will not
8898c2ecf20Sopenharmony_ci		 * happen due to the sequential manner in which
8908c2ecf20Sopenharmony_ci		 * descriptors are processed.
8918c2ecf20Sopenharmony_ci		 */
8928c2ecf20Sopenharmony_ci		if (req->ahg_idx >= 0)
8938c2ecf20Sopenharmony_ci			sdma_ahg_free(req->sde, req->ahg_idx);
8948c2ecf20Sopenharmony_ci	}
8958c2ecf20Sopenharmony_ci	return ret;
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_cifree_txreq:
8988c2ecf20Sopenharmony_ci	sdma_txclean(pq->dd, &tx->txreq);
8998c2ecf20Sopenharmony_cifree_tx:
9008c2ecf20Sopenharmony_ci	kmem_cache_free(pq->txreq_cache, tx);
9018c2ecf20Sopenharmony_ci	return ret;
9028c2ecf20Sopenharmony_ci}
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_cistatic u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
9058c2ecf20Sopenharmony_ci{
9068c2ecf20Sopenharmony_ci	struct evict_data evict_data;
9078c2ecf20Sopenharmony_ci	struct mmu_rb_handler *handler = pq->handler;
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci	evict_data.cleared = 0;
9108c2ecf20Sopenharmony_ci	evict_data.target = npages;
9118c2ecf20Sopenharmony_ci	hfi1_mmu_rb_evict(handler, &evict_data);
9128c2ecf20Sopenharmony_ci	return evict_data.cleared;
9138c2ecf20Sopenharmony_ci}
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_cistatic int check_header_template(struct user_sdma_request *req,
9168c2ecf20Sopenharmony_ci				 struct hfi1_pkt_header *hdr, u32 lrhlen,
9178c2ecf20Sopenharmony_ci				 u32 datalen)
9188c2ecf20Sopenharmony_ci{
9198c2ecf20Sopenharmony_ci	/*
9208c2ecf20Sopenharmony_ci	 * Perform safety checks for any type of packet:
9218c2ecf20Sopenharmony_ci	 *    - transfer size is multiple of 64bytes
9228c2ecf20Sopenharmony_ci	 *    - packet length is multiple of 4 bytes
9238c2ecf20Sopenharmony_ci	 *    - packet length is not larger than MTU size
9248c2ecf20Sopenharmony_ci	 *
9258c2ecf20Sopenharmony_ci	 * These checks are only done for the first packet of the
9268c2ecf20Sopenharmony_ci	 * transfer since the header is "given" to us by user space.
9278c2ecf20Sopenharmony_ci	 * For the remainder of the packets we compute the values.
9288c2ecf20Sopenharmony_ci	 */
9298c2ecf20Sopenharmony_ci	if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 ||
9308c2ecf20Sopenharmony_ci	    lrhlen > get_lrh_len(*hdr, req->info.fragsize))
9318c2ecf20Sopenharmony_ci		return -EINVAL;
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
9348c2ecf20Sopenharmony_ci		/*
9358c2ecf20Sopenharmony_ci		 * The header is checked only on the first packet. Furthermore,
9368c2ecf20Sopenharmony_ci		 * we ensure that at least one TID entry is copied when the
9378c2ecf20Sopenharmony_ci		 * request is submitted. Therefore, we don't have to verify that
9388c2ecf20Sopenharmony_ci		 * tididx points to something sane.
9398c2ecf20Sopenharmony_ci		 */
9408c2ecf20Sopenharmony_ci		u32 tidval = req->tids[req->tididx],
9418c2ecf20Sopenharmony_ci			tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE,
9428c2ecf20Sopenharmony_ci			tididx = EXP_TID_GET(tidval, IDX),
9438c2ecf20Sopenharmony_ci			tidctrl = EXP_TID_GET(tidval, CTRL),
9448c2ecf20Sopenharmony_ci			tidoff;
9458c2ecf20Sopenharmony_ci		__le32 kval = hdr->kdeth.ver_tid_offset;
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci		tidoff = KDETH_GET(kval, OFFSET) *
9488c2ecf20Sopenharmony_ci			  (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
9498c2ecf20Sopenharmony_ci			   KDETH_OM_LARGE : KDETH_OM_SMALL);
9508c2ecf20Sopenharmony_ci		/*
9518c2ecf20Sopenharmony_ci		 * Expected receive packets have the following
9528c2ecf20Sopenharmony_ci		 * additional checks:
9538c2ecf20Sopenharmony_ci		 *     - offset is not larger than the TID size
9548c2ecf20Sopenharmony_ci		 *     - TIDCtrl values match between header and TID array
9558c2ecf20Sopenharmony_ci		 *     - TID indexes match between header and TID array
9568c2ecf20Sopenharmony_ci		 */
9578c2ecf20Sopenharmony_ci		if ((tidoff + datalen > tidlen) ||
9588c2ecf20Sopenharmony_ci		    KDETH_GET(kval, TIDCTRL) != tidctrl ||
9598c2ecf20Sopenharmony_ci		    KDETH_GET(kval, TID) != tididx)
9608c2ecf20Sopenharmony_ci			return -EINVAL;
9618c2ecf20Sopenharmony_ci	}
9628c2ecf20Sopenharmony_ci	return 0;
9638c2ecf20Sopenharmony_ci}
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ci/*
9668c2ecf20Sopenharmony_ci * Correctly set the BTH.PSN field based on type of
9678c2ecf20Sopenharmony_ci * transfer - eager packets can just increment the PSN but
9688c2ecf20Sopenharmony_ci * expected packets encode generation and sequence in the
9698c2ecf20Sopenharmony_ci * BTH.PSN field so just incrementing will result in errors.
9708c2ecf20Sopenharmony_ci */
9718c2ecf20Sopenharmony_cistatic inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
9728c2ecf20Sopenharmony_ci{
9738c2ecf20Sopenharmony_ci	u32 val = be32_to_cpu(bthpsn),
9748c2ecf20Sopenharmony_ci		mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull :
9758c2ecf20Sopenharmony_ci			0xffffffull),
9768c2ecf20Sopenharmony_ci		psn = val & mask;
9778c2ecf20Sopenharmony_ci	if (expct)
9788c2ecf20Sopenharmony_ci		psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
9798c2ecf20Sopenharmony_ci			((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
9808c2ecf20Sopenharmony_ci	else
9818c2ecf20Sopenharmony_ci		psn = psn + frags;
9828c2ecf20Sopenharmony_ci	return psn & mask;
9838c2ecf20Sopenharmony_ci}
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_cistatic int set_txreq_header(struct user_sdma_request *req,
9868c2ecf20Sopenharmony_ci			    struct user_sdma_txreq *tx, u32 datalen)
9878c2ecf20Sopenharmony_ci{
9888c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
9898c2ecf20Sopenharmony_ci	struct hfi1_pkt_header *hdr = &tx->hdr;
9908c2ecf20Sopenharmony_ci	u8 omfactor; /* KDETH.OM */
9918c2ecf20Sopenharmony_ci	u16 pbclen;
9928c2ecf20Sopenharmony_ci	int ret;
9938c2ecf20Sopenharmony_ci	u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci	/* Copy the header template to the request before modification */
9968c2ecf20Sopenharmony_ci	memcpy(hdr, &req->hdr, sizeof(*hdr));
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci	/*
9998c2ecf20Sopenharmony_ci	 * Check if the PBC and LRH length are mismatched. If so
10008c2ecf20Sopenharmony_ci	 * adjust both in the header.
10018c2ecf20Sopenharmony_ci	 */
10028c2ecf20Sopenharmony_ci	pbclen = le16_to_cpu(hdr->pbc[0]);
10038c2ecf20Sopenharmony_ci	if (PBC2LRH(pbclen) != lrhlen) {
10048c2ecf20Sopenharmony_ci		pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
10058c2ecf20Sopenharmony_ci		hdr->pbc[0] = cpu_to_le16(pbclen);
10068c2ecf20Sopenharmony_ci		hdr->lrh[2] = cpu_to_be16(lrhlen >> 2);
10078c2ecf20Sopenharmony_ci		/*
10088c2ecf20Sopenharmony_ci		 * Third packet
10098c2ecf20Sopenharmony_ci		 * This is the first packet in the sequence that has
10108c2ecf20Sopenharmony_ci		 * a "static" size that can be used for the rest of
10118c2ecf20Sopenharmony_ci		 * the packets (besides the last one).
10128c2ecf20Sopenharmony_ci		 */
10138c2ecf20Sopenharmony_ci		if (unlikely(req->seqnum == 2)) {
10148c2ecf20Sopenharmony_ci			/*
10158c2ecf20Sopenharmony_ci			 * From this point on the lengths in both the
10168c2ecf20Sopenharmony_ci			 * PBC and LRH are the same until the last
10178c2ecf20Sopenharmony_ci			 * packet.
10188c2ecf20Sopenharmony_ci			 * Adjust the template so we don't have to update
10198c2ecf20Sopenharmony_ci			 * every packet
10208c2ecf20Sopenharmony_ci			 */
10218c2ecf20Sopenharmony_ci			req->hdr.pbc[0] = hdr->pbc[0];
10228c2ecf20Sopenharmony_ci			req->hdr.lrh[2] = hdr->lrh[2];
10238c2ecf20Sopenharmony_ci		}
10248c2ecf20Sopenharmony_ci	}
10258c2ecf20Sopenharmony_ci	/*
10268c2ecf20Sopenharmony_ci	 * We only have to modify the header if this is not the
10278c2ecf20Sopenharmony_ci	 * first packet in the request. Otherwise, we use the
10288c2ecf20Sopenharmony_ci	 * header given to us.
10298c2ecf20Sopenharmony_ci	 */
10308c2ecf20Sopenharmony_ci	if (unlikely(!req->seqnum)) {
10318c2ecf20Sopenharmony_ci		ret = check_header_template(req, hdr, lrhlen, datalen);
10328c2ecf20Sopenharmony_ci		if (ret)
10338c2ecf20Sopenharmony_ci			return ret;
10348c2ecf20Sopenharmony_ci		goto done;
10358c2ecf20Sopenharmony_ci	}
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci	hdr->bth[2] = cpu_to_be32(
10388c2ecf20Sopenharmony_ci		set_pkt_bth_psn(hdr->bth[2],
10398c2ecf20Sopenharmony_ci				(req_opcode(req->info.ctrl) == EXPECTED),
10408c2ecf20Sopenharmony_ci				req->seqnum));
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	/* Set ACK request on last packet */
10438c2ecf20Sopenharmony_ci	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
10448c2ecf20Sopenharmony_ci		hdr->bth[2] |= cpu_to_be32(1UL << 31);
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci	/* Set the new offset */
10478c2ecf20Sopenharmony_ci	hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
10488c2ecf20Sopenharmony_ci	/* Expected packets have to fill in the new TID information */
10498c2ecf20Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
10508c2ecf20Sopenharmony_ci		tidval = req->tids[req->tididx];
10518c2ecf20Sopenharmony_ci		/*
10528c2ecf20Sopenharmony_ci		 * If the offset puts us at the end of the current TID,
10538c2ecf20Sopenharmony_ci		 * advance everything.
10548c2ecf20Sopenharmony_ci		 */
10558c2ecf20Sopenharmony_ci		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
10568c2ecf20Sopenharmony_ci					 PAGE_SIZE)) {
10578c2ecf20Sopenharmony_ci			req->tidoffset = 0;
10588c2ecf20Sopenharmony_ci			/*
10598c2ecf20Sopenharmony_ci			 * Since we don't copy all the TIDs, all at once,
10608c2ecf20Sopenharmony_ci			 * we have to check again.
10618c2ecf20Sopenharmony_ci			 */
10628c2ecf20Sopenharmony_ci			if (++req->tididx > req->n_tids - 1 ||
10638c2ecf20Sopenharmony_ci			    !req->tids[req->tididx]) {
10648c2ecf20Sopenharmony_ci				return -EINVAL;
10658c2ecf20Sopenharmony_ci			}
10668c2ecf20Sopenharmony_ci			tidval = req->tids[req->tididx];
10678c2ecf20Sopenharmony_ci		}
10688c2ecf20Sopenharmony_ci		omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
10698c2ecf20Sopenharmony_ci			KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
10708c2ecf20Sopenharmony_ci			KDETH_OM_SMALL_SHIFT;
10718c2ecf20Sopenharmony_ci		/* Set KDETH.TIDCtrl based on value for this TID. */
10728c2ecf20Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
10738c2ecf20Sopenharmony_ci			  EXP_TID_GET(tidval, CTRL));
10748c2ecf20Sopenharmony_ci		/* Set KDETH.TID based on value for this TID */
10758c2ecf20Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
10768c2ecf20Sopenharmony_ci			  EXP_TID_GET(tidval, IDX));
10778c2ecf20Sopenharmony_ci		/* Clear KDETH.SH when DISABLE_SH flag is set */
10788c2ecf20Sopenharmony_ci		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
10798c2ecf20Sopenharmony_ci			KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
10808c2ecf20Sopenharmony_ci		/*
10818c2ecf20Sopenharmony_ci		 * Set the KDETH.OFFSET and KDETH.OM based on size of
10828c2ecf20Sopenharmony_ci		 * transfer.
10838c2ecf20Sopenharmony_ci		 */
10848c2ecf20Sopenharmony_ci		trace_hfi1_sdma_user_tid_info(
10858c2ecf20Sopenharmony_ci			pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx,
10868c2ecf20Sopenharmony_ci			req->tidoffset, req->tidoffset >> omfactor,
10878c2ecf20Sopenharmony_ci			omfactor != KDETH_OM_SMALL_SHIFT);
10888c2ecf20Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
10898c2ecf20Sopenharmony_ci			  req->tidoffset >> omfactor);
10908c2ecf20Sopenharmony_ci		KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
10918c2ecf20Sopenharmony_ci			  omfactor != KDETH_OM_SMALL_SHIFT);
10928c2ecf20Sopenharmony_ci	}
10938c2ecf20Sopenharmony_cidone:
10948c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
10958c2ecf20Sopenharmony_ci				    req->info.comp_idx, hdr, tidval);
10968c2ecf20Sopenharmony_ci	return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr));
10978c2ecf20Sopenharmony_ci}
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_cistatic int set_txreq_header_ahg(struct user_sdma_request *req,
11008c2ecf20Sopenharmony_ci				struct user_sdma_txreq *tx, u32 datalen)
11018c2ecf20Sopenharmony_ci{
11028c2ecf20Sopenharmony_ci	u32 ahg[AHG_KDETH_ARRAY_SIZE];
11038c2ecf20Sopenharmony_ci	int idx = 0;
11048c2ecf20Sopenharmony_ci	u8 omfactor; /* KDETH.OM */
11058c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
11068c2ecf20Sopenharmony_ci	struct hfi1_pkt_header *hdr = &req->hdr;
11078c2ecf20Sopenharmony_ci	u16 pbclen = le16_to_cpu(hdr->pbc[0]);
11088c2ecf20Sopenharmony_ci	u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
11098c2ecf20Sopenharmony_ci	size_t array_size = ARRAY_SIZE(ahg);
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci	if (PBC2LRH(pbclen) != lrhlen) {
11128c2ecf20Sopenharmony_ci		/* PBC.PbcLengthDWs */
11138c2ecf20Sopenharmony_ci		idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12,
11148c2ecf20Sopenharmony_ci				     (__force u16)cpu_to_le16(LRH2PBC(lrhlen)));
11158c2ecf20Sopenharmony_ci		if (idx < 0)
11168c2ecf20Sopenharmony_ci			return idx;
11178c2ecf20Sopenharmony_ci		/* LRH.PktLen (we need the full 16 bits due to byte swap) */
11188c2ecf20Sopenharmony_ci		idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16,
11198c2ecf20Sopenharmony_ci				     (__force u16)cpu_to_be16(lrhlen >> 2));
11208c2ecf20Sopenharmony_ci		if (idx < 0)
11218c2ecf20Sopenharmony_ci			return idx;
11228c2ecf20Sopenharmony_ci	}
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci	/*
11258c2ecf20Sopenharmony_ci	 * Do the common updates
11268c2ecf20Sopenharmony_ci	 */
11278c2ecf20Sopenharmony_ci	/* BTH.PSN and BTH.A */
11288c2ecf20Sopenharmony_ci	val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
11298c2ecf20Sopenharmony_ci		(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
11308c2ecf20Sopenharmony_ci	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
11318c2ecf20Sopenharmony_ci		val32 |= 1UL << 31;
11328c2ecf20Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16,
11338c2ecf20Sopenharmony_ci			     (__force u16)cpu_to_be16(val32 >> 16));
11348c2ecf20Sopenharmony_ci	if (idx < 0)
11358c2ecf20Sopenharmony_ci		return idx;
11368c2ecf20Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16,
11378c2ecf20Sopenharmony_ci			     (__force u16)cpu_to_be16(val32 & 0xffff));
11388c2ecf20Sopenharmony_ci	if (idx < 0)
11398c2ecf20Sopenharmony_ci		return idx;
11408c2ecf20Sopenharmony_ci	/* KDETH.Offset */
11418c2ecf20Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16,
11428c2ecf20Sopenharmony_ci			     (__force u16)cpu_to_le16(req->koffset & 0xffff));
11438c2ecf20Sopenharmony_ci	if (idx < 0)
11448c2ecf20Sopenharmony_ci		return idx;
11458c2ecf20Sopenharmony_ci	idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16,
11468c2ecf20Sopenharmony_ci			     (__force u16)cpu_to_le16(req->koffset >> 16));
11478c2ecf20Sopenharmony_ci	if (idx < 0)
11488c2ecf20Sopenharmony_ci		return idx;
11498c2ecf20Sopenharmony_ci	if (req_opcode(req->info.ctrl) == EXPECTED) {
11508c2ecf20Sopenharmony_ci		__le16 val;
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci		tidval = req->tids[req->tididx];
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ci		/*
11558c2ecf20Sopenharmony_ci		 * If the offset puts us at the end of the current TID,
11568c2ecf20Sopenharmony_ci		 * advance everything.
11578c2ecf20Sopenharmony_ci		 */
11588c2ecf20Sopenharmony_ci		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
11598c2ecf20Sopenharmony_ci					 PAGE_SIZE)) {
11608c2ecf20Sopenharmony_ci			req->tidoffset = 0;
11618c2ecf20Sopenharmony_ci			/*
11628c2ecf20Sopenharmony_ci			 * Since we don't copy all the TIDs, all at once,
11638c2ecf20Sopenharmony_ci			 * we have to check again.
11648c2ecf20Sopenharmony_ci			 */
11658c2ecf20Sopenharmony_ci			if (++req->tididx > req->n_tids - 1 ||
11668c2ecf20Sopenharmony_ci			    !req->tids[req->tididx])
11678c2ecf20Sopenharmony_ci				return -EINVAL;
11688c2ecf20Sopenharmony_ci			tidval = req->tids[req->tididx];
11698c2ecf20Sopenharmony_ci		}
11708c2ecf20Sopenharmony_ci		omfactor = ((EXP_TID_GET(tidval, LEN) *
11718c2ecf20Sopenharmony_ci				  PAGE_SIZE) >=
11728c2ecf20Sopenharmony_ci				 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
11738c2ecf20Sopenharmony_ci				 KDETH_OM_SMALL_SHIFT;
11748c2ecf20Sopenharmony_ci		/* KDETH.OM and KDETH.OFFSET (TID) */
11758c2ecf20Sopenharmony_ci		idx = ahg_header_set(
11768c2ecf20Sopenharmony_ci				ahg, idx, array_size, 7, 0, 16,
11778c2ecf20Sopenharmony_ci				((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
11788c2ecf20Sopenharmony_ci				((req->tidoffset >> omfactor)
11798c2ecf20Sopenharmony_ci				& 0x7fff)));
11808c2ecf20Sopenharmony_ci		if (idx < 0)
11818c2ecf20Sopenharmony_ci			return idx;
11828c2ecf20Sopenharmony_ci		/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
11838c2ecf20Sopenharmony_ci		val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
11848c2ecf20Sopenharmony_ci				   (EXP_TID_GET(tidval, IDX) & 0x3ff));
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_ci		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
11878c2ecf20Sopenharmony_ci			val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
11888c2ecf20Sopenharmony_ci						      INTR) <<
11898c2ecf20Sopenharmony_ci					    AHG_KDETH_INTR_SHIFT));
11908c2ecf20Sopenharmony_ci		} else {
11918c2ecf20Sopenharmony_ci			val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
11928c2ecf20Sopenharmony_ci			       cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
11938c2ecf20Sopenharmony_ci			       cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
11948c2ecf20Sopenharmony_ci						      INTR) <<
11958c2ecf20Sopenharmony_ci					     AHG_KDETH_INTR_SHIFT));
11968c2ecf20Sopenharmony_ci		}
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci		idx = ahg_header_set(ahg, idx, array_size,
11998c2ecf20Sopenharmony_ci				     7, 16, 14, (__force u16)val);
12008c2ecf20Sopenharmony_ci		if (idx < 0)
12018c2ecf20Sopenharmony_ci			return idx;
12028c2ecf20Sopenharmony_ci	}
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
12058c2ecf20Sopenharmony_ci					req->info.comp_idx, req->sde->this_idx,
12068c2ecf20Sopenharmony_ci					req->ahg_idx, ahg, idx, tidval);
12078c2ecf20Sopenharmony_ci	sdma_txinit_ahg(&tx->txreq,
12088c2ecf20Sopenharmony_ci			SDMA_TXREQ_F_USE_AHG,
12098c2ecf20Sopenharmony_ci			datalen, req->ahg_idx, idx,
12108c2ecf20Sopenharmony_ci			ahg, sizeof(req->hdr),
12118c2ecf20Sopenharmony_ci			user_sdma_txreq_cb);
12128c2ecf20Sopenharmony_ci
12138c2ecf20Sopenharmony_ci	return idx;
12148c2ecf20Sopenharmony_ci}
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_ci/**
12178c2ecf20Sopenharmony_ci * user_sdma_txreq_cb() - SDMA tx request completion callback.
12188c2ecf20Sopenharmony_ci * @txreq: valid sdma tx request
12198c2ecf20Sopenharmony_ci * @status: success/failure of request
12208c2ecf20Sopenharmony_ci *
12218c2ecf20Sopenharmony_ci * Called when the SDMA progress state machine gets notification that
12228c2ecf20Sopenharmony_ci * the SDMA descriptors for this tx request have been processed by the
12238c2ecf20Sopenharmony_ci * DMA engine. Called in interrupt context.
12248c2ecf20Sopenharmony_ci * Only do work on completed sequences.
12258c2ecf20Sopenharmony_ci */
12268c2ecf20Sopenharmony_cistatic void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
12278c2ecf20Sopenharmony_ci{
12288c2ecf20Sopenharmony_ci	struct user_sdma_txreq *tx =
12298c2ecf20Sopenharmony_ci		container_of(txreq, struct user_sdma_txreq, txreq);
12308c2ecf20Sopenharmony_ci	struct user_sdma_request *req;
12318c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq;
12328c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_comp_q *cq;
12338c2ecf20Sopenharmony_ci	enum hfi1_sdma_comp_state state = COMPLETE;
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ci	if (!tx->req)
12368c2ecf20Sopenharmony_ci		return;
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_ci	req = tx->req;
12398c2ecf20Sopenharmony_ci	pq = req->pq;
12408c2ecf20Sopenharmony_ci	cq = req->cq;
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci	if (status != SDMA_TXREQ_S_OK) {
12438c2ecf20Sopenharmony_ci		SDMA_DBG(req, "SDMA completion with error %d",
12448c2ecf20Sopenharmony_ci			 status);
12458c2ecf20Sopenharmony_ci		WRITE_ONCE(req->has_error, 1);
12468c2ecf20Sopenharmony_ci		state = ERROR;
12478c2ecf20Sopenharmony_ci	}
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci	req->seqcomp = tx->seqnum;
12508c2ecf20Sopenharmony_ci	kmem_cache_free(pq->txreq_cache, tx);
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_ci	/* sequence isn't complete?  We are done */
12538c2ecf20Sopenharmony_ci	if (req->seqcomp != req->info.npkts - 1)
12548c2ecf20Sopenharmony_ci		return;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci	user_sdma_free_request(req);
12578c2ecf20Sopenharmony_ci	set_comp_state(pq, cq, req->info.comp_idx, state, status);
12588c2ecf20Sopenharmony_ci	pq_update(pq);
12598c2ecf20Sopenharmony_ci}
12608c2ecf20Sopenharmony_ci
12618c2ecf20Sopenharmony_cistatic inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
12628c2ecf20Sopenharmony_ci{
12638c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&pq->n_reqs))
12648c2ecf20Sopenharmony_ci		wake_up(&pq->wait);
12658c2ecf20Sopenharmony_ci}
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_cistatic void user_sdma_free_request(struct user_sdma_request *req)
12688c2ecf20Sopenharmony_ci{
12698c2ecf20Sopenharmony_ci	if (!list_empty(&req->txps)) {
12708c2ecf20Sopenharmony_ci		struct sdma_txreq *t, *p;
12718c2ecf20Sopenharmony_ci
12728c2ecf20Sopenharmony_ci		list_for_each_entry_safe(t, p, &req->txps, list) {
12738c2ecf20Sopenharmony_ci			struct user_sdma_txreq *tx =
12748c2ecf20Sopenharmony_ci				container_of(t, struct user_sdma_txreq, txreq);
12758c2ecf20Sopenharmony_ci			list_del_init(&t->list);
12768c2ecf20Sopenharmony_ci			sdma_txclean(req->pq->dd, t);
12778c2ecf20Sopenharmony_ci			kmem_cache_free(req->pq->txreq_cache, tx);
12788c2ecf20Sopenharmony_ci		}
12798c2ecf20Sopenharmony_ci	}
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_ci	kfree(req->tids);
12828c2ecf20Sopenharmony_ci	clear_bit(req->info.comp_idx, req->pq->req_in_use);
12838c2ecf20Sopenharmony_ci}
12848c2ecf20Sopenharmony_ci
12858c2ecf20Sopenharmony_cistatic inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
12868c2ecf20Sopenharmony_ci				  struct hfi1_user_sdma_comp_q *cq,
12878c2ecf20Sopenharmony_ci				  u16 idx, enum hfi1_sdma_comp_state state,
12888c2ecf20Sopenharmony_ci				  int ret)
12898c2ecf20Sopenharmony_ci{
12908c2ecf20Sopenharmony_ci	if (state == ERROR)
12918c2ecf20Sopenharmony_ci		cq->comps[idx].errcode = -ret;
12928c2ecf20Sopenharmony_ci	smp_wmb(); /* make sure errcode is visible first */
12938c2ecf20Sopenharmony_ci	cq->comps[idx].status = state;
12948c2ecf20Sopenharmony_ci	trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
12958c2ecf20Sopenharmony_ci					idx, state, ret);
12968c2ecf20Sopenharmony_ci}
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_cistatic void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
12998c2ecf20Sopenharmony_ci			       unsigned int start, unsigned int npages)
13008c2ecf20Sopenharmony_ci{
13018c2ecf20Sopenharmony_ci	hfi1_release_user_pages(mm, pages + start, npages, false);
13028c2ecf20Sopenharmony_ci	kfree(pages);
13038c2ecf20Sopenharmony_ci}
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_cistatic void free_system_node(struct sdma_mmu_node *node)
13068c2ecf20Sopenharmony_ci{
13078c2ecf20Sopenharmony_ci	if (node->npages) {
13088c2ecf20Sopenharmony_ci		unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
13098c2ecf20Sopenharmony_ci				   node->npages);
13108c2ecf20Sopenharmony_ci		atomic_sub(node->npages, &node->pq->n_locked);
13118c2ecf20Sopenharmony_ci	}
13128c2ecf20Sopenharmony_ci	kfree(node);
13138c2ecf20Sopenharmony_ci}
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci/*
13168c2ecf20Sopenharmony_ci * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
13178c2ecf20Sopenharmony_ci * from being released until after rb_node is assigned to an SDMA descriptor
13188c2ecf20Sopenharmony_ci * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
13198c2ecf20Sopenharmony_ci * virtual address range for rb_node is invalidated between now and then.
13208c2ecf20Sopenharmony_ci */
13218c2ecf20Sopenharmony_cistatic struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
13228c2ecf20Sopenharmony_ci					      unsigned long start,
13238c2ecf20Sopenharmony_ci					      unsigned long end)
13248c2ecf20Sopenharmony_ci{
13258c2ecf20Sopenharmony_ci	struct mmu_rb_node *rb_node;
13268c2ecf20Sopenharmony_ci	unsigned long flags;
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	spin_lock_irqsave(&handler->lock, flags);
13298c2ecf20Sopenharmony_ci	rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start));
13308c2ecf20Sopenharmony_ci	if (!rb_node) {
13318c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&handler->lock, flags);
13328c2ecf20Sopenharmony_ci		return NULL;
13338c2ecf20Sopenharmony_ci	}
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci	/* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
13368c2ecf20Sopenharmony_ci	kref_get(&rb_node->refcount);
13378c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&handler->lock, flags);
13388c2ecf20Sopenharmony_ci
13398c2ecf20Sopenharmony_ci	return container_of(rb_node, struct sdma_mmu_node, rb);
13408c2ecf20Sopenharmony_ci}
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_cistatic int pin_system_pages(struct user_sdma_request *req,
13438c2ecf20Sopenharmony_ci			    uintptr_t start_address, size_t length,
13448c2ecf20Sopenharmony_ci			    struct sdma_mmu_node *node, int npages)
13458c2ecf20Sopenharmony_ci{
13468c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
13478c2ecf20Sopenharmony_ci	int pinned, cleared;
13488c2ecf20Sopenharmony_ci	struct page **pages;
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
13518c2ecf20Sopenharmony_ci	if (!pages)
13528c2ecf20Sopenharmony_ci		return -ENOMEM;
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ciretry:
13558c2ecf20Sopenharmony_ci	if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked),
13568c2ecf20Sopenharmony_ci				npages)) {
13578c2ecf20Sopenharmony_ci		SDMA_DBG(req, "Evicting: nlocked %u npages %u",
13588c2ecf20Sopenharmony_ci			 atomic_read(&pq->n_locked), npages);
13598c2ecf20Sopenharmony_ci		cleared = sdma_cache_evict(pq, npages);
13608c2ecf20Sopenharmony_ci		if (cleared >= npages)
13618c2ecf20Sopenharmony_ci			goto retry;
13628c2ecf20Sopenharmony_ci	}
13638c2ecf20Sopenharmony_ci
13648c2ecf20Sopenharmony_ci	SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u",
13658c2ecf20Sopenharmony_ci		 start_address, node->npages, npages);
13668c2ecf20Sopenharmony_ci	pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0,
13678c2ecf20Sopenharmony_ci					 pages);
13688c2ecf20Sopenharmony_ci
13698c2ecf20Sopenharmony_ci	if (pinned < 0) {
13708c2ecf20Sopenharmony_ci		kfree(pages);
13718c2ecf20Sopenharmony_ci		SDMA_DBG(req, "pinned %d", pinned);
13728c2ecf20Sopenharmony_ci		return pinned;
13738c2ecf20Sopenharmony_ci	}
13748c2ecf20Sopenharmony_ci	if (pinned != npages) {
13758c2ecf20Sopenharmony_ci		unpin_vector_pages(current->mm, pages, node->npages, pinned);
13768c2ecf20Sopenharmony_ci		SDMA_DBG(req, "npages %u pinned %d", npages, pinned);
13778c2ecf20Sopenharmony_ci		return -EFAULT;
13788c2ecf20Sopenharmony_ci	}
13798c2ecf20Sopenharmony_ci	node->rb.addr = start_address;
13808c2ecf20Sopenharmony_ci	node->rb.len = length;
13818c2ecf20Sopenharmony_ci	node->pages = pages;
13828c2ecf20Sopenharmony_ci	node->npages = npages;
13838c2ecf20Sopenharmony_ci	atomic_add(pinned, &pq->n_locked);
13848c2ecf20Sopenharmony_ci	SDMA_DBG(req, "done. pinned %d", pinned);
13858c2ecf20Sopenharmony_ci	return 0;
13868c2ecf20Sopenharmony_ci}
13878c2ecf20Sopenharmony_ci
13888c2ecf20Sopenharmony_ci/*
13898c2ecf20Sopenharmony_ci * kref refcount on *node_p will be 2 on successful addition: one kref from
13908c2ecf20Sopenharmony_ci * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
13918c2ecf20Sopenharmony_ci * released until after *node_p is assigned to an SDMA descriptor (struct
13928c2ecf20Sopenharmony_ci * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
13938c2ecf20Sopenharmony_ci * address range for *node_p is invalidated between now and then.
13948c2ecf20Sopenharmony_ci */
13958c2ecf20Sopenharmony_cistatic int add_system_pinning(struct user_sdma_request *req,
13968c2ecf20Sopenharmony_ci			      struct sdma_mmu_node **node_p,
13978c2ecf20Sopenharmony_ci			      unsigned long start, unsigned long len)
13988c2ecf20Sopenharmony_ci
13998c2ecf20Sopenharmony_ci{
14008c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
14018c2ecf20Sopenharmony_ci	struct sdma_mmu_node *node;
14028c2ecf20Sopenharmony_ci	int ret;
14038c2ecf20Sopenharmony_ci
14048c2ecf20Sopenharmony_ci	node = kzalloc(sizeof(*node), GFP_KERNEL);
14058c2ecf20Sopenharmony_ci	if (!node)
14068c2ecf20Sopenharmony_ci		return -ENOMEM;
14078c2ecf20Sopenharmony_ci
14088c2ecf20Sopenharmony_ci	/* First kref "moves" to mmu_rb_handler */
14098c2ecf20Sopenharmony_ci	kref_init(&node->rb.refcount);
14108c2ecf20Sopenharmony_ci
14118c2ecf20Sopenharmony_ci	/* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
14128c2ecf20Sopenharmony_ci	kref_get(&node->rb.refcount);
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_ci	node->pq = pq;
14158c2ecf20Sopenharmony_ci	ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
14168c2ecf20Sopenharmony_ci	if (ret == 0) {
14178c2ecf20Sopenharmony_ci		ret = hfi1_mmu_rb_insert(pq->handler, &node->rb);
14188c2ecf20Sopenharmony_ci		if (ret)
14198c2ecf20Sopenharmony_ci			free_system_node(node);
14208c2ecf20Sopenharmony_ci		else
14218c2ecf20Sopenharmony_ci			*node_p = node;
14228c2ecf20Sopenharmony_ci
14238c2ecf20Sopenharmony_ci		return ret;
14248c2ecf20Sopenharmony_ci	}
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_ci	kfree(node);
14278c2ecf20Sopenharmony_ci	return ret;
14288c2ecf20Sopenharmony_ci}
14298c2ecf20Sopenharmony_ci
14308c2ecf20Sopenharmony_cistatic int get_system_cache_entry(struct user_sdma_request *req,
14318c2ecf20Sopenharmony_ci				  struct sdma_mmu_node **node_p,
14328c2ecf20Sopenharmony_ci				  size_t req_start, size_t req_len)
14338c2ecf20Sopenharmony_ci{
14348c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
14358c2ecf20Sopenharmony_ci	u64 start = ALIGN_DOWN(req_start, PAGE_SIZE);
14368c2ecf20Sopenharmony_ci	u64 end = PFN_ALIGN(req_start + req_len);
14378c2ecf20Sopenharmony_ci	struct mmu_rb_handler *handler = pq->handler;
14388c2ecf20Sopenharmony_ci	int ret;
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_ci	if ((end - start) == 0) {
14418c2ecf20Sopenharmony_ci		SDMA_DBG(req,
14428c2ecf20Sopenharmony_ci			 "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx",
14438c2ecf20Sopenharmony_ci			 req_start, req_len, start, end);
14448c2ecf20Sopenharmony_ci		return -EINVAL;
14458c2ecf20Sopenharmony_ci	}
14468c2ecf20Sopenharmony_ci
14478c2ecf20Sopenharmony_ci	SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len);
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_ci	while (1) {
14508c2ecf20Sopenharmony_ci		struct sdma_mmu_node *node =
14518c2ecf20Sopenharmony_ci			find_system_node(handler, start, end);
14528c2ecf20Sopenharmony_ci		u64 prepend_len = 0;
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_ci		SDMA_DBG(req, "node %p start %llx end %llu", node, start, end);
14558c2ecf20Sopenharmony_ci		if (!node) {
14568c2ecf20Sopenharmony_ci			ret = add_system_pinning(req, node_p, start,
14578c2ecf20Sopenharmony_ci						 end - start);
14588c2ecf20Sopenharmony_ci			if (ret == -EEXIST) {
14598c2ecf20Sopenharmony_ci				/*
14608c2ecf20Sopenharmony_ci				 * Another execution context has inserted a
14618c2ecf20Sopenharmony_ci				 * conficting entry first.
14628c2ecf20Sopenharmony_ci				 */
14638c2ecf20Sopenharmony_ci				continue;
14648c2ecf20Sopenharmony_ci			}
14658c2ecf20Sopenharmony_ci			return ret;
14668c2ecf20Sopenharmony_ci		}
14678c2ecf20Sopenharmony_ci
14688c2ecf20Sopenharmony_ci		if (node->rb.addr <= start) {
14698c2ecf20Sopenharmony_ci			/*
14708c2ecf20Sopenharmony_ci			 * This entry covers at least part of the region. If it doesn't extend
14718c2ecf20Sopenharmony_ci			 * to the end, then this will be called again for the next segment.
14728c2ecf20Sopenharmony_ci			 */
14738c2ecf20Sopenharmony_ci			*node_p = node;
14748c2ecf20Sopenharmony_ci			return 0;
14758c2ecf20Sopenharmony_ci		}
14768c2ecf20Sopenharmony_ci
14778c2ecf20Sopenharmony_ci		SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
14788c2ecf20Sopenharmony_ci			 node->rb.addr, kref_read(&node->rb.refcount));
14798c2ecf20Sopenharmony_ci		prepend_len = node->rb.addr - start;
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ci		/*
14828c2ecf20Sopenharmony_ci		 * This node will not be returned, instead a new node
14838c2ecf20Sopenharmony_ci		 * will be. So release the reference.
14848c2ecf20Sopenharmony_ci		 */
14858c2ecf20Sopenharmony_ci		kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
14868c2ecf20Sopenharmony_ci
14878c2ecf20Sopenharmony_ci		/* Prepend a node to cover the beginning of the allocation */
14888c2ecf20Sopenharmony_ci		ret = add_system_pinning(req, node_p, start, prepend_len);
14898c2ecf20Sopenharmony_ci		if (ret == -EEXIST) {
14908c2ecf20Sopenharmony_ci			/* Another execution context has inserted a conficting entry first. */
14918c2ecf20Sopenharmony_ci			continue;
14928c2ecf20Sopenharmony_ci		}
14938c2ecf20Sopenharmony_ci		return ret;
14948c2ecf20Sopenharmony_ci	}
14958c2ecf20Sopenharmony_ci}
14968c2ecf20Sopenharmony_ci
14978c2ecf20Sopenharmony_cistatic void sdma_mmu_rb_node_get(void *ctx)
14988c2ecf20Sopenharmony_ci{
14998c2ecf20Sopenharmony_ci	struct mmu_rb_node *node = ctx;
15008c2ecf20Sopenharmony_ci
15018c2ecf20Sopenharmony_ci	kref_get(&node->refcount);
15028c2ecf20Sopenharmony_ci}
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_cistatic void sdma_mmu_rb_node_put(void *ctx)
15058c2ecf20Sopenharmony_ci{
15068c2ecf20Sopenharmony_ci	struct sdma_mmu_node *node = ctx;
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
15098c2ecf20Sopenharmony_ci}
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_cistatic int add_mapping_to_sdma_packet(struct user_sdma_request *req,
15128c2ecf20Sopenharmony_ci				      struct user_sdma_txreq *tx,
15138c2ecf20Sopenharmony_ci				      struct sdma_mmu_node *cache_entry,
15148c2ecf20Sopenharmony_ci				      size_t start,
15158c2ecf20Sopenharmony_ci				      size_t from_this_cache_entry)
15168c2ecf20Sopenharmony_ci{
15178c2ecf20Sopenharmony_ci	struct hfi1_user_sdma_pkt_q *pq = req->pq;
15188c2ecf20Sopenharmony_ci	unsigned int page_offset;
15198c2ecf20Sopenharmony_ci	unsigned int from_this_page;
15208c2ecf20Sopenharmony_ci	size_t page_index;
15218c2ecf20Sopenharmony_ci	void *ctx;
15228c2ecf20Sopenharmony_ci	int ret;
15238c2ecf20Sopenharmony_ci
15248c2ecf20Sopenharmony_ci	/*
15258c2ecf20Sopenharmony_ci	 * Because the cache may be more fragmented than the memory that is being accessed,
15268c2ecf20Sopenharmony_ci	 * it's not strictly necessary to have a descriptor per cache entry.
15278c2ecf20Sopenharmony_ci	 */
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci	while (from_this_cache_entry) {
15308c2ecf20Sopenharmony_ci		page_index = PFN_DOWN(start - cache_entry->rb.addr);
15318c2ecf20Sopenharmony_ci
15328c2ecf20Sopenharmony_ci		if (page_index >= cache_entry->npages) {
15338c2ecf20Sopenharmony_ci			SDMA_DBG(req,
15348c2ecf20Sopenharmony_ci				 "Request for page_index %zu >= cache_entry->npages %u",
15358c2ecf20Sopenharmony_ci				 page_index, cache_entry->npages);
15368c2ecf20Sopenharmony_ci			return -EINVAL;
15378c2ecf20Sopenharmony_ci		}
15388c2ecf20Sopenharmony_ci
15398c2ecf20Sopenharmony_ci		page_offset = start - ALIGN_DOWN(start, PAGE_SIZE);
15408c2ecf20Sopenharmony_ci		from_this_page = PAGE_SIZE - page_offset;
15418c2ecf20Sopenharmony_ci
15428c2ecf20Sopenharmony_ci		if (from_this_page < from_this_cache_entry) {
15438c2ecf20Sopenharmony_ci			ctx = NULL;
15448c2ecf20Sopenharmony_ci		} else {
15458c2ecf20Sopenharmony_ci			/*
15468c2ecf20Sopenharmony_ci			 * In the case they are equal the next line has no practical effect,
15478c2ecf20Sopenharmony_ci			 * but it's better to do a register to register copy than a conditional
15488c2ecf20Sopenharmony_ci			 * branch.
15498c2ecf20Sopenharmony_ci			 */
15508c2ecf20Sopenharmony_ci			from_this_page = from_this_cache_entry;
15518c2ecf20Sopenharmony_ci			ctx = cache_entry;
15528c2ecf20Sopenharmony_ci		}
15538c2ecf20Sopenharmony_ci
15548c2ecf20Sopenharmony_ci		ret = sdma_txadd_page(pq->dd, &tx->txreq,
15558c2ecf20Sopenharmony_ci				      cache_entry->pages[page_index],
15568c2ecf20Sopenharmony_ci				      page_offset, from_this_page,
15578c2ecf20Sopenharmony_ci				      ctx,
15588c2ecf20Sopenharmony_ci				      sdma_mmu_rb_node_get,
15598c2ecf20Sopenharmony_ci				      sdma_mmu_rb_node_put);
15608c2ecf20Sopenharmony_ci		if (ret) {
15618c2ecf20Sopenharmony_ci			/*
15628c2ecf20Sopenharmony_ci			 * When there's a failure, the entire request is freed by
15638c2ecf20Sopenharmony_ci			 * user_sdma_send_pkts().
15648c2ecf20Sopenharmony_ci			 */
15658c2ecf20Sopenharmony_ci			SDMA_DBG(req,
15668c2ecf20Sopenharmony_ci				 "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u",
15678c2ecf20Sopenharmony_ci				 ret, page_index, page_offset, from_this_page);
15688c2ecf20Sopenharmony_ci			return ret;
15698c2ecf20Sopenharmony_ci		}
15708c2ecf20Sopenharmony_ci		start += from_this_page;
15718c2ecf20Sopenharmony_ci		from_this_cache_entry -= from_this_page;
15728c2ecf20Sopenharmony_ci	}
15738c2ecf20Sopenharmony_ci	return 0;
15748c2ecf20Sopenharmony_ci}
15758c2ecf20Sopenharmony_ci
15768c2ecf20Sopenharmony_cistatic int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
15778c2ecf20Sopenharmony_ci					   struct user_sdma_txreq *tx,
15788c2ecf20Sopenharmony_ci					   struct user_sdma_iovec *iovec,
15798c2ecf20Sopenharmony_ci					   size_t from_this_iovec)
15808c2ecf20Sopenharmony_ci{
15818c2ecf20Sopenharmony_ci	while (from_this_iovec > 0) {
15828c2ecf20Sopenharmony_ci		struct sdma_mmu_node *cache_entry;
15838c2ecf20Sopenharmony_ci		size_t from_this_cache_entry;
15848c2ecf20Sopenharmony_ci		size_t start;
15858c2ecf20Sopenharmony_ci		int ret;
15868c2ecf20Sopenharmony_ci
15878c2ecf20Sopenharmony_ci		start = (uintptr_t)iovec->iov.iov_base + iovec->offset;
15888c2ecf20Sopenharmony_ci		ret = get_system_cache_entry(req, &cache_entry, start,
15898c2ecf20Sopenharmony_ci					     from_this_iovec);
15908c2ecf20Sopenharmony_ci		if (ret) {
15918c2ecf20Sopenharmony_ci			SDMA_DBG(req, "pin system segment failed %d", ret);
15928c2ecf20Sopenharmony_ci			return ret;
15938c2ecf20Sopenharmony_ci		}
15948c2ecf20Sopenharmony_ci
15958c2ecf20Sopenharmony_ci		from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr);
15968c2ecf20Sopenharmony_ci		if (from_this_cache_entry > from_this_iovec)
15978c2ecf20Sopenharmony_ci			from_this_cache_entry = from_this_iovec;
15988c2ecf20Sopenharmony_ci
15998c2ecf20Sopenharmony_ci		ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
16008c2ecf20Sopenharmony_ci						 from_this_cache_entry);
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci		/*
16038c2ecf20Sopenharmony_ci		 * Done adding cache_entry to zero or more sdma_desc. Can
16048c2ecf20Sopenharmony_ci		 * kref_put() the "safety" kref taken under
16058c2ecf20Sopenharmony_ci		 * get_system_cache_entry().
16068c2ecf20Sopenharmony_ci		 */
16078c2ecf20Sopenharmony_ci		kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
16088c2ecf20Sopenharmony_ci
16098c2ecf20Sopenharmony_ci		if (ret) {
16108c2ecf20Sopenharmony_ci			SDMA_DBG(req, "add system segment failed %d", ret);
16118c2ecf20Sopenharmony_ci			return ret;
16128c2ecf20Sopenharmony_ci		}
16138c2ecf20Sopenharmony_ci
16148c2ecf20Sopenharmony_ci		iovec->offset += from_this_cache_entry;
16158c2ecf20Sopenharmony_ci		from_this_iovec -= from_this_cache_entry;
16168c2ecf20Sopenharmony_ci	}
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_ci	return 0;
16198c2ecf20Sopenharmony_ci}
16208c2ecf20Sopenharmony_ci
16218c2ecf20Sopenharmony_cistatic int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
16228c2ecf20Sopenharmony_ci					   struct user_sdma_txreq *tx,
16238c2ecf20Sopenharmony_ci					   struct user_sdma_iovec *iovec,
16248c2ecf20Sopenharmony_ci					   u32 *pkt_data_remaining)
16258c2ecf20Sopenharmony_ci{
16268c2ecf20Sopenharmony_ci	size_t remaining_to_add = *pkt_data_remaining;
16278c2ecf20Sopenharmony_ci	/*
16288c2ecf20Sopenharmony_ci	 * Walk through iovec entries, ensure the associated pages
16298c2ecf20Sopenharmony_ci	 * are pinned and mapped, add data to the packet until no more
16308c2ecf20Sopenharmony_ci	 * data remains to be added.
16318c2ecf20Sopenharmony_ci	 */
16328c2ecf20Sopenharmony_ci	while (remaining_to_add > 0) {
16338c2ecf20Sopenharmony_ci		struct user_sdma_iovec *cur_iovec;
16348c2ecf20Sopenharmony_ci		size_t from_this_iovec;
16358c2ecf20Sopenharmony_ci		int ret;
16368c2ecf20Sopenharmony_ci
16378c2ecf20Sopenharmony_ci		cur_iovec = iovec;
16388c2ecf20Sopenharmony_ci		from_this_iovec = iovec->iov.iov_len - iovec->offset;
16398c2ecf20Sopenharmony_ci
16408c2ecf20Sopenharmony_ci		if (from_this_iovec > remaining_to_add) {
16418c2ecf20Sopenharmony_ci			from_this_iovec = remaining_to_add;
16428c2ecf20Sopenharmony_ci		} else {
16438c2ecf20Sopenharmony_ci			/* The current iovec entry will be consumed by this pass. */
16448c2ecf20Sopenharmony_ci			req->iov_idx++;
16458c2ecf20Sopenharmony_ci			iovec++;
16468c2ecf20Sopenharmony_ci		}
16478c2ecf20Sopenharmony_ci
16488c2ecf20Sopenharmony_ci		ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec,
16498c2ecf20Sopenharmony_ci						      from_this_iovec);
16508c2ecf20Sopenharmony_ci		if (ret)
16518c2ecf20Sopenharmony_ci			return ret;
16528c2ecf20Sopenharmony_ci
16538c2ecf20Sopenharmony_ci		remaining_to_add -= from_this_iovec;
16548c2ecf20Sopenharmony_ci	}
16558c2ecf20Sopenharmony_ci	*pkt_data_remaining = remaining_to_add;
16568c2ecf20Sopenharmony_ci
16578c2ecf20Sopenharmony_ci	return 0;
16588c2ecf20Sopenharmony_ci}
16598c2ecf20Sopenharmony_ci
16608c2ecf20Sopenharmony_cistatic bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
16618c2ecf20Sopenharmony_ci			   unsigned long len)
16628c2ecf20Sopenharmony_ci{
16638c2ecf20Sopenharmony_ci	return (bool)(node->addr == addr);
16648c2ecf20Sopenharmony_ci}
16658c2ecf20Sopenharmony_ci
16668c2ecf20Sopenharmony_ci/*
16678c2ecf20Sopenharmony_ci * Return 1 to remove the node from the rb tree and call the remove op.
16688c2ecf20Sopenharmony_ci *
16698c2ecf20Sopenharmony_ci * Called with the rb tree lock held.
16708c2ecf20Sopenharmony_ci */
16718c2ecf20Sopenharmony_cistatic int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
16728c2ecf20Sopenharmony_ci			 void *evict_arg, bool *stop)
16738c2ecf20Sopenharmony_ci{
16748c2ecf20Sopenharmony_ci	struct sdma_mmu_node *node =
16758c2ecf20Sopenharmony_ci		container_of(mnode, struct sdma_mmu_node, rb);
16768c2ecf20Sopenharmony_ci	struct evict_data *evict_data = evict_arg;
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci	/* this node will be evicted, add its pages to our count */
16798c2ecf20Sopenharmony_ci	evict_data->cleared += node->npages;
16808c2ecf20Sopenharmony_ci
16818c2ecf20Sopenharmony_ci	/* have enough pages been cleared? */
16828c2ecf20Sopenharmony_ci	if (evict_data->cleared >= evict_data->target)
16838c2ecf20Sopenharmony_ci		*stop = true;
16848c2ecf20Sopenharmony_ci
16858c2ecf20Sopenharmony_ci	return 1; /* remove this node */
16868c2ecf20Sopenharmony_ci}
16878c2ecf20Sopenharmony_ci
16888c2ecf20Sopenharmony_cistatic void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
16898c2ecf20Sopenharmony_ci{
16908c2ecf20Sopenharmony_ci	struct sdma_mmu_node *node =
16918c2ecf20Sopenharmony_ci		container_of(mnode, struct sdma_mmu_node, rb);
16928c2ecf20Sopenharmony_ci
16938c2ecf20Sopenharmony_ci	free_system_node(node);
16948c2ecf20Sopenharmony_ci}
1695