162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2016 - 2018 Intel Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/slab.h>
762306a36Sopenharmony_ci#include <linux/vmalloc.h>
862306a36Sopenharmony_ci#include "cq.h"
962306a36Sopenharmony_ci#include "vt.h"
1062306a36Sopenharmony_ci#include "trace.h"
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_cistatic struct workqueue_struct *comp_vector_wq;
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci/**
1562306a36Sopenharmony_ci * rvt_cq_enter - add a new entry to the completion queue
1662306a36Sopenharmony_ci * @cq: completion queue
1762306a36Sopenharmony_ci * @entry: work completion entry to add
1862306a36Sopenharmony_ci * @solicited: true if @entry is solicited
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci * This may be called with qp->s_lock held.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * Return: return true on success, else return
2362306a36Sopenharmony_ci * false if cq is full.
2462306a36Sopenharmony_ci */
2562306a36Sopenharmony_cibool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
2662306a36Sopenharmony_ci{
2762306a36Sopenharmony_ci	struct ib_uverbs_wc *uqueue = NULL;
2862306a36Sopenharmony_ci	struct ib_wc *kqueue = NULL;
2962306a36Sopenharmony_ci	struct rvt_cq_wc *u_wc = NULL;
3062306a36Sopenharmony_ci	struct rvt_k_cq_wc *k_wc = NULL;
3162306a36Sopenharmony_ci	unsigned long flags;
3262306a36Sopenharmony_ci	u32 head;
3362306a36Sopenharmony_ci	u32 next;
3462306a36Sopenharmony_ci	u32 tail;
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	spin_lock_irqsave(&cq->lock, flags);
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	if (cq->ip) {
3962306a36Sopenharmony_ci		u_wc = cq->queue;
4062306a36Sopenharmony_ci		uqueue = &u_wc->uqueue[0];
4162306a36Sopenharmony_ci		head = RDMA_READ_UAPI_ATOMIC(u_wc->head);
4262306a36Sopenharmony_ci		tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail);
4362306a36Sopenharmony_ci	} else {
4462306a36Sopenharmony_ci		k_wc = cq->kqueue;
4562306a36Sopenharmony_ci		kqueue = &k_wc->kqueue[0];
4662306a36Sopenharmony_ci		head = k_wc->head;
4762306a36Sopenharmony_ci		tail = k_wc->tail;
4862306a36Sopenharmony_ci	}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	/*
5162306a36Sopenharmony_ci	 * Note that the head pointer might be writable by
5262306a36Sopenharmony_ci	 * user processes.Take care to verify it is a sane value.
5362306a36Sopenharmony_ci	 */
5462306a36Sopenharmony_ci	if (head >= (unsigned)cq->ibcq.cqe) {
5562306a36Sopenharmony_ci		head = cq->ibcq.cqe;
5662306a36Sopenharmony_ci		next = 0;
5762306a36Sopenharmony_ci	} else {
5862306a36Sopenharmony_ci		next = head + 1;
5962306a36Sopenharmony_ci	}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	if (unlikely(next == tail || cq->cq_full)) {
6262306a36Sopenharmony_ci		struct rvt_dev_info *rdi = cq->rdi;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci		if (!cq->cq_full)
6562306a36Sopenharmony_ci			rvt_pr_err_ratelimited(rdi, "CQ is full!\n");
6662306a36Sopenharmony_ci		cq->cq_full = true;
6762306a36Sopenharmony_ci		spin_unlock_irqrestore(&cq->lock, flags);
6862306a36Sopenharmony_ci		if (cq->ibcq.event_handler) {
6962306a36Sopenharmony_ci			struct ib_event ev;
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci			ev.device = cq->ibcq.device;
7262306a36Sopenharmony_ci			ev.element.cq = &cq->ibcq;
7362306a36Sopenharmony_ci			ev.event = IB_EVENT_CQ_ERR;
7462306a36Sopenharmony_ci			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
7562306a36Sopenharmony_ci		}
7662306a36Sopenharmony_ci		return false;
7762306a36Sopenharmony_ci	}
7862306a36Sopenharmony_ci	trace_rvt_cq_enter(cq, entry, head);
7962306a36Sopenharmony_ci	if (uqueue) {
8062306a36Sopenharmony_ci		uqueue[head].wr_id = entry->wr_id;
8162306a36Sopenharmony_ci		uqueue[head].status = entry->status;
8262306a36Sopenharmony_ci		uqueue[head].opcode = entry->opcode;
8362306a36Sopenharmony_ci		uqueue[head].vendor_err = entry->vendor_err;
8462306a36Sopenharmony_ci		uqueue[head].byte_len = entry->byte_len;
8562306a36Sopenharmony_ci		uqueue[head].ex.imm_data = entry->ex.imm_data;
8662306a36Sopenharmony_ci		uqueue[head].qp_num = entry->qp->qp_num;
8762306a36Sopenharmony_ci		uqueue[head].src_qp = entry->src_qp;
8862306a36Sopenharmony_ci		uqueue[head].wc_flags = entry->wc_flags;
8962306a36Sopenharmony_ci		uqueue[head].pkey_index = entry->pkey_index;
9062306a36Sopenharmony_ci		uqueue[head].slid = ib_lid_cpu16(entry->slid);
9162306a36Sopenharmony_ci		uqueue[head].sl = entry->sl;
9262306a36Sopenharmony_ci		uqueue[head].dlid_path_bits = entry->dlid_path_bits;
9362306a36Sopenharmony_ci		uqueue[head].port_num = entry->port_num;
9462306a36Sopenharmony_ci		/* Make sure entry is written before the head index. */
9562306a36Sopenharmony_ci		RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next);
9662306a36Sopenharmony_ci	} else {
9762306a36Sopenharmony_ci		kqueue[head] = *entry;
9862306a36Sopenharmony_ci		k_wc->head = next;
9962306a36Sopenharmony_ci	}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	if (cq->notify == IB_CQ_NEXT_COMP ||
10262306a36Sopenharmony_ci	    (cq->notify == IB_CQ_SOLICITED &&
10362306a36Sopenharmony_ci	     (solicited || entry->status != IB_WC_SUCCESS))) {
10462306a36Sopenharmony_ci		/*
10562306a36Sopenharmony_ci		 * This will cause send_complete() to be called in
10662306a36Sopenharmony_ci		 * another thread.
10762306a36Sopenharmony_ci		 */
10862306a36Sopenharmony_ci		cq->notify = RVT_CQ_NONE;
10962306a36Sopenharmony_ci		cq->triggered++;
11062306a36Sopenharmony_ci		queue_work_on(cq->comp_vector_cpu, comp_vector_wq,
11162306a36Sopenharmony_ci			      &cq->comptask);
11262306a36Sopenharmony_ci	}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	spin_unlock_irqrestore(&cq->lock, flags);
11562306a36Sopenharmony_ci	return true;
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ciEXPORT_SYMBOL(rvt_cq_enter);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_cistatic void send_complete(struct work_struct *work)
12062306a36Sopenharmony_ci{
12162306a36Sopenharmony_ci	struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/*
12462306a36Sopenharmony_ci	 * The completion handler will most likely rearm the notification
12562306a36Sopenharmony_ci	 * and poll for all pending entries.  If a new completion entry
12662306a36Sopenharmony_ci	 * is added while we are in this routine, queue_work()
12762306a36Sopenharmony_ci	 * won't call us again until we return so we check triggered to
12862306a36Sopenharmony_ci	 * see if we need to call the handler again.
12962306a36Sopenharmony_ci	 */
13062306a36Sopenharmony_ci	for (;;) {
13162306a36Sopenharmony_ci		u8 triggered = cq->triggered;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci		/*
13462306a36Sopenharmony_ci		 * IPoIB connected mode assumes the callback is from a
13562306a36Sopenharmony_ci		 * soft IRQ. We simulate this by blocking "bottom halves".
13662306a36Sopenharmony_ci		 * See the implementation for ipoib_cm_handle_tx_wc(),
13762306a36Sopenharmony_ci		 * netif_tx_lock_bh() and netif_tx_lock().
13862306a36Sopenharmony_ci		 */
13962306a36Sopenharmony_ci		local_bh_disable();
14062306a36Sopenharmony_ci		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
14162306a36Sopenharmony_ci		local_bh_enable();
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci		if (cq->triggered == triggered)
14462306a36Sopenharmony_ci			return;
14562306a36Sopenharmony_ci	}
14662306a36Sopenharmony_ci}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci/**
14962306a36Sopenharmony_ci * rvt_create_cq - create a completion queue
15062306a36Sopenharmony_ci * @ibcq: Allocated CQ
15162306a36Sopenharmony_ci * @attr: creation attributes
15262306a36Sopenharmony_ci * @udata: user data for libibverbs.so
15362306a36Sopenharmony_ci *
15462306a36Sopenharmony_ci * Called by ib_create_cq() in the generic verbs code.
15562306a36Sopenharmony_ci *
15662306a36Sopenharmony_ci * Return: 0 on success
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_ciint rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
15962306a36Sopenharmony_ci		  struct ib_udata *udata)
16062306a36Sopenharmony_ci{
16162306a36Sopenharmony_ci	struct ib_device *ibdev = ibcq->device;
16262306a36Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
16362306a36Sopenharmony_ci	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
16462306a36Sopenharmony_ci	struct rvt_cq_wc *u_wc = NULL;
16562306a36Sopenharmony_ci	struct rvt_k_cq_wc *k_wc = NULL;
16662306a36Sopenharmony_ci	u32 sz;
16762306a36Sopenharmony_ci	unsigned int entries = attr->cqe;
16862306a36Sopenharmony_ci	int comp_vector = attr->comp_vector;
16962306a36Sopenharmony_ci	int err;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	if (attr->flags)
17262306a36Sopenharmony_ci		return -EOPNOTSUPP;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	if (entries < 1 || entries > rdi->dparms.props.max_cqe)
17562306a36Sopenharmony_ci		return -EINVAL;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if (comp_vector < 0)
17862306a36Sopenharmony_ci		comp_vector = 0;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	/*
18362306a36Sopenharmony_ci	 * Allocate the completion queue entries and head/tail pointers.
18462306a36Sopenharmony_ci	 * This is allocated separately so that it can be resized and
18562306a36Sopenharmony_ci	 * also mapped into user space.
18662306a36Sopenharmony_ci	 * We need to use vmalloc() in order to support mmap and large
18762306a36Sopenharmony_ci	 * numbers of entries.
18862306a36Sopenharmony_ci	 */
18962306a36Sopenharmony_ci	if (udata && udata->outlen >= sizeof(__u64)) {
19062306a36Sopenharmony_ci		sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
19162306a36Sopenharmony_ci		sz += sizeof(*u_wc);
19262306a36Sopenharmony_ci		u_wc = vmalloc_user(sz);
19362306a36Sopenharmony_ci		if (!u_wc)
19462306a36Sopenharmony_ci			return -ENOMEM;
19562306a36Sopenharmony_ci	} else {
19662306a36Sopenharmony_ci		sz = sizeof(struct ib_wc) * (entries + 1);
19762306a36Sopenharmony_ci		sz += sizeof(*k_wc);
19862306a36Sopenharmony_ci		k_wc = vzalloc_node(sz, rdi->dparms.node);
19962306a36Sopenharmony_ci		if (!k_wc)
20062306a36Sopenharmony_ci			return -ENOMEM;
20162306a36Sopenharmony_ci	}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	/*
20462306a36Sopenharmony_ci	 * Return the address of the WC as the offset to mmap.
20562306a36Sopenharmony_ci	 * See rvt_mmap() for details.
20662306a36Sopenharmony_ci	 */
20762306a36Sopenharmony_ci	if (udata && udata->outlen >= sizeof(__u64)) {
20862306a36Sopenharmony_ci		cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
20962306a36Sopenharmony_ci		if (IS_ERR(cq->ip)) {
21062306a36Sopenharmony_ci			err = PTR_ERR(cq->ip);
21162306a36Sopenharmony_ci			goto bail_wc;
21262306a36Sopenharmony_ci		}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci		err = ib_copy_to_udata(udata, &cq->ip->offset,
21562306a36Sopenharmony_ci				       sizeof(cq->ip->offset));
21662306a36Sopenharmony_ci		if (err)
21762306a36Sopenharmony_ci			goto bail_ip;
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	spin_lock_irq(&rdi->n_cqs_lock);
22162306a36Sopenharmony_ci	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
22262306a36Sopenharmony_ci		spin_unlock_irq(&rdi->n_cqs_lock);
22362306a36Sopenharmony_ci		err = -ENOMEM;
22462306a36Sopenharmony_ci		goto bail_ip;
22562306a36Sopenharmony_ci	}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	rdi->n_cqs_allocated++;
22862306a36Sopenharmony_ci	spin_unlock_irq(&rdi->n_cqs_lock);
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	if (cq->ip) {
23162306a36Sopenharmony_ci		spin_lock_irq(&rdi->pending_lock);
23262306a36Sopenharmony_ci		list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
23362306a36Sopenharmony_ci		spin_unlock_irq(&rdi->pending_lock);
23462306a36Sopenharmony_ci	}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	/*
23762306a36Sopenharmony_ci	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
23862306a36Sopenharmony_ci	 * The number of entries should be >= the number requested or return
23962306a36Sopenharmony_ci	 * an error.
24062306a36Sopenharmony_ci	 */
24162306a36Sopenharmony_ci	cq->rdi = rdi;
24262306a36Sopenharmony_ci	if (rdi->driver_f.comp_vect_cpu_lookup)
24362306a36Sopenharmony_ci		cq->comp_vector_cpu =
24462306a36Sopenharmony_ci			rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
24562306a36Sopenharmony_ci	else
24662306a36Sopenharmony_ci		cq->comp_vector_cpu =
24762306a36Sopenharmony_ci			cpumask_first(cpumask_of_node(rdi->dparms.node));
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	cq->ibcq.cqe = entries;
25062306a36Sopenharmony_ci	cq->notify = RVT_CQ_NONE;
25162306a36Sopenharmony_ci	spin_lock_init(&cq->lock);
25262306a36Sopenharmony_ci	INIT_WORK(&cq->comptask, send_complete);
25362306a36Sopenharmony_ci	if (u_wc)
25462306a36Sopenharmony_ci		cq->queue = u_wc;
25562306a36Sopenharmony_ci	else
25662306a36Sopenharmony_ci		cq->kqueue = k_wc;
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	trace_rvt_create_cq(cq, attr);
25962306a36Sopenharmony_ci	return 0;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_cibail_ip:
26262306a36Sopenharmony_ci	kfree(cq->ip);
26362306a36Sopenharmony_cibail_wc:
26462306a36Sopenharmony_ci	vfree(u_wc);
26562306a36Sopenharmony_ci	vfree(k_wc);
26662306a36Sopenharmony_ci	return err;
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci/**
27062306a36Sopenharmony_ci * rvt_destroy_cq - destroy a completion queue
27162306a36Sopenharmony_ci * @ibcq: the completion queue to destroy.
27262306a36Sopenharmony_ci * @udata: user data or NULL for kernel object
27362306a36Sopenharmony_ci *
27462306a36Sopenharmony_ci * Called by ib_destroy_cq() in the generic verbs code.
27562306a36Sopenharmony_ci */
27662306a36Sopenharmony_ciint rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
27962306a36Sopenharmony_ci	struct rvt_dev_info *rdi = cq->rdi;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	flush_work(&cq->comptask);
28262306a36Sopenharmony_ci	spin_lock_irq(&rdi->n_cqs_lock);
28362306a36Sopenharmony_ci	rdi->n_cqs_allocated--;
28462306a36Sopenharmony_ci	spin_unlock_irq(&rdi->n_cqs_lock);
28562306a36Sopenharmony_ci	if (cq->ip)
28662306a36Sopenharmony_ci		kref_put(&cq->ip->ref, rvt_release_mmap_info);
28762306a36Sopenharmony_ci	else
28862306a36Sopenharmony_ci		vfree(cq->kqueue);
28962306a36Sopenharmony_ci	return 0;
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci/**
29362306a36Sopenharmony_ci * rvt_req_notify_cq - change the notification type for a completion queue
29462306a36Sopenharmony_ci * @ibcq: the completion queue
29562306a36Sopenharmony_ci * @notify_flags: the type of notification to request
29662306a36Sopenharmony_ci *
29762306a36Sopenharmony_ci * This may be called from interrupt context.  Also called by
29862306a36Sopenharmony_ci * ib_req_notify_cq() in the generic verbs code.
29962306a36Sopenharmony_ci *
30062306a36Sopenharmony_ci * Return: 0 for success.
30162306a36Sopenharmony_ci */
30262306a36Sopenharmony_ciint rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
30362306a36Sopenharmony_ci{
30462306a36Sopenharmony_ci	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
30562306a36Sopenharmony_ci	unsigned long flags;
30662306a36Sopenharmony_ci	int ret = 0;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	spin_lock_irqsave(&cq->lock, flags);
30962306a36Sopenharmony_ci	/*
31062306a36Sopenharmony_ci	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
31162306a36Sopenharmony_ci	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
31262306a36Sopenharmony_ci	 */
31362306a36Sopenharmony_ci	if (cq->notify != IB_CQ_NEXT_COMP)
31462306a36Sopenharmony_ci		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
31762306a36Sopenharmony_ci		if (cq->queue) {
31862306a36Sopenharmony_ci			if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) !=
31962306a36Sopenharmony_ci				RDMA_READ_UAPI_ATOMIC(cq->queue->tail))
32062306a36Sopenharmony_ci				ret = 1;
32162306a36Sopenharmony_ci		} else {
32262306a36Sopenharmony_ci			if (cq->kqueue->head != cq->kqueue->tail)
32362306a36Sopenharmony_ci				ret = 1;
32462306a36Sopenharmony_ci		}
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	spin_unlock_irqrestore(&cq->lock, flags);
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	return ret;
33062306a36Sopenharmony_ci}
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci/*
33362306a36Sopenharmony_ci * rvt_resize_cq - change the size of the CQ
33462306a36Sopenharmony_ci * @ibcq: the completion queue
33562306a36Sopenharmony_ci *
33662306a36Sopenharmony_ci * Return: 0 for success.
33762306a36Sopenharmony_ci */
33862306a36Sopenharmony_ciint rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
33962306a36Sopenharmony_ci{
34062306a36Sopenharmony_ci	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
34162306a36Sopenharmony_ci	u32 head, tail, n;
34262306a36Sopenharmony_ci	int ret;
34362306a36Sopenharmony_ci	u32 sz;
34462306a36Sopenharmony_ci	struct rvt_dev_info *rdi = cq->rdi;
34562306a36Sopenharmony_ci	struct rvt_cq_wc *u_wc = NULL;
34662306a36Sopenharmony_ci	struct rvt_cq_wc *old_u_wc = NULL;
34762306a36Sopenharmony_ci	struct rvt_k_cq_wc *k_wc = NULL;
34862306a36Sopenharmony_ci	struct rvt_k_cq_wc *old_k_wc = NULL;
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
35162306a36Sopenharmony_ci		return -EINVAL;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	/*
35462306a36Sopenharmony_ci	 * Need to use vmalloc() if we want to support large #s of entries.
35562306a36Sopenharmony_ci	 */
35662306a36Sopenharmony_ci	if (udata && udata->outlen >= sizeof(__u64)) {
35762306a36Sopenharmony_ci		sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
35862306a36Sopenharmony_ci		sz += sizeof(*u_wc);
35962306a36Sopenharmony_ci		u_wc = vmalloc_user(sz);
36062306a36Sopenharmony_ci		if (!u_wc)
36162306a36Sopenharmony_ci			return -ENOMEM;
36262306a36Sopenharmony_ci	} else {
36362306a36Sopenharmony_ci		sz = sizeof(struct ib_wc) * (cqe + 1);
36462306a36Sopenharmony_ci		sz += sizeof(*k_wc);
36562306a36Sopenharmony_ci		k_wc = vzalloc_node(sz, rdi->dparms.node);
36662306a36Sopenharmony_ci		if (!k_wc)
36762306a36Sopenharmony_ci			return -ENOMEM;
36862306a36Sopenharmony_ci	}
36962306a36Sopenharmony_ci	/* Check that we can write the offset to mmap. */
37062306a36Sopenharmony_ci	if (udata && udata->outlen >= sizeof(__u64)) {
37162306a36Sopenharmony_ci		__u64 offset = 0;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
37462306a36Sopenharmony_ci		if (ret)
37562306a36Sopenharmony_ci			goto bail_free;
37662306a36Sopenharmony_ci	}
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	spin_lock_irq(&cq->lock);
37962306a36Sopenharmony_ci	/*
38062306a36Sopenharmony_ci	 * Make sure head and tail are sane since they
38162306a36Sopenharmony_ci	 * might be user writable.
38262306a36Sopenharmony_ci	 */
38362306a36Sopenharmony_ci	if (u_wc) {
38462306a36Sopenharmony_ci		old_u_wc = cq->queue;
38562306a36Sopenharmony_ci		head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
38662306a36Sopenharmony_ci		tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
38762306a36Sopenharmony_ci	} else {
38862306a36Sopenharmony_ci		old_k_wc = cq->kqueue;
38962306a36Sopenharmony_ci		head = old_k_wc->head;
39062306a36Sopenharmony_ci		tail = old_k_wc->tail;
39162306a36Sopenharmony_ci	}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	if (head > (u32)cq->ibcq.cqe)
39462306a36Sopenharmony_ci		head = (u32)cq->ibcq.cqe;
39562306a36Sopenharmony_ci	if (tail > (u32)cq->ibcq.cqe)
39662306a36Sopenharmony_ci		tail = (u32)cq->ibcq.cqe;
39762306a36Sopenharmony_ci	if (head < tail)
39862306a36Sopenharmony_ci		n = cq->ibcq.cqe + 1 + head - tail;
39962306a36Sopenharmony_ci	else
40062306a36Sopenharmony_ci		n = head - tail;
40162306a36Sopenharmony_ci	if (unlikely((u32)cqe < n)) {
40262306a36Sopenharmony_ci		ret = -EINVAL;
40362306a36Sopenharmony_ci		goto bail_unlock;
40462306a36Sopenharmony_ci	}
40562306a36Sopenharmony_ci	for (n = 0; tail != head; n++) {
40662306a36Sopenharmony_ci		if (u_wc)
40762306a36Sopenharmony_ci			u_wc->uqueue[n] = old_u_wc->uqueue[tail];
40862306a36Sopenharmony_ci		else
40962306a36Sopenharmony_ci			k_wc->kqueue[n] = old_k_wc->kqueue[tail];
41062306a36Sopenharmony_ci		if (tail == (u32)cq->ibcq.cqe)
41162306a36Sopenharmony_ci			tail = 0;
41262306a36Sopenharmony_ci		else
41362306a36Sopenharmony_ci			tail++;
41462306a36Sopenharmony_ci	}
41562306a36Sopenharmony_ci	cq->ibcq.cqe = cqe;
41662306a36Sopenharmony_ci	if (u_wc) {
41762306a36Sopenharmony_ci		RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
41862306a36Sopenharmony_ci		RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
41962306a36Sopenharmony_ci		cq->queue = u_wc;
42062306a36Sopenharmony_ci	} else {
42162306a36Sopenharmony_ci		k_wc->head = n;
42262306a36Sopenharmony_ci		k_wc->tail = 0;
42362306a36Sopenharmony_ci		cq->kqueue = k_wc;
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci	spin_unlock_irq(&cq->lock);
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	if (u_wc)
42862306a36Sopenharmony_ci		vfree(old_u_wc);
42962306a36Sopenharmony_ci	else
43062306a36Sopenharmony_ci		vfree(old_k_wc);
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	if (cq->ip) {
43362306a36Sopenharmony_ci		struct rvt_mmap_info *ip = cq->ip;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci		rvt_update_mmap_info(rdi, ip, sz, u_wc);
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci		/*
43862306a36Sopenharmony_ci		 * Return the offset to mmap.
43962306a36Sopenharmony_ci		 * See rvt_mmap() for details.
44062306a36Sopenharmony_ci		 */
44162306a36Sopenharmony_ci		if (udata && udata->outlen >= sizeof(__u64)) {
44262306a36Sopenharmony_ci			ret = ib_copy_to_udata(udata, &ip->offset,
44362306a36Sopenharmony_ci					       sizeof(ip->offset));
44462306a36Sopenharmony_ci			if (ret)
44562306a36Sopenharmony_ci				return ret;
44662306a36Sopenharmony_ci		}
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci		spin_lock_irq(&rdi->pending_lock);
44962306a36Sopenharmony_ci		if (list_empty(&ip->pending_mmaps))
45062306a36Sopenharmony_ci			list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
45162306a36Sopenharmony_ci		spin_unlock_irq(&rdi->pending_lock);
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	return 0;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_cibail_unlock:
45762306a36Sopenharmony_ci	spin_unlock_irq(&cq->lock);
45862306a36Sopenharmony_cibail_free:
45962306a36Sopenharmony_ci	vfree(u_wc);
46062306a36Sopenharmony_ci	vfree(k_wc);
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	return ret;
46362306a36Sopenharmony_ci}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci/**
46662306a36Sopenharmony_ci * rvt_poll_cq - poll for work completion entries
46762306a36Sopenharmony_ci * @ibcq: the completion queue to poll
46862306a36Sopenharmony_ci * @num_entries: the maximum number of entries to return
46962306a36Sopenharmony_ci * @entry: pointer to array where work completions are placed
47062306a36Sopenharmony_ci *
47162306a36Sopenharmony_ci * This may be called from interrupt context.  Also called by ib_poll_cq()
47262306a36Sopenharmony_ci * in the generic verbs code.
47362306a36Sopenharmony_ci *
47462306a36Sopenharmony_ci * Return: the number of completion entries polled.
47562306a36Sopenharmony_ci */
47662306a36Sopenharmony_ciint rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
47762306a36Sopenharmony_ci{
47862306a36Sopenharmony_ci	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
47962306a36Sopenharmony_ci	struct rvt_k_cq_wc *wc;
48062306a36Sopenharmony_ci	unsigned long flags;
48162306a36Sopenharmony_ci	int npolled;
48262306a36Sopenharmony_ci	u32 tail;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	/* The kernel can only poll a kernel completion queue */
48562306a36Sopenharmony_ci	if (cq->ip)
48662306a36Sopenharmony_ci		return -EINVAL;
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	spin_lock_irqsave(&cq->lock, flags);
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	wc = cq->kqueue;
49162306a36Sopenharmony_ci	tail = wc->tail;
49262306a36Sopenharmony_ci	if (tail > (u32)cq->ibcq.cqe)
49362306a36Sopenharmony_ci		tail = (u32)cq->ibcq.cqe;
49462306a36Sopenharmony_ci	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
49562306a36Sopenharmony_ci		if (tail == wc->head)
49662306a36Sopenharmony_ci			break;
49762306a36Sopenharmony_ci		/* The kernel doesn't need a RMB since it has the lock. */
49862306a36Sopenharmony_ci		trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
49962306a36Sopenharmony_ci		*entry = wc->kqueue[tail];
50062306a36Sopenharmony_ci		if (tail >= cq->ibcq.cqe)
50162306a36Sopenharmony_ci			tail = 0;
50262306a36Sopenharmony_ci		else
50362306a36Sopenharmony_ci			tail++;
50462306a36Sopenharmony_ci	}
50562306a36Sopenharmony_ci	wc->tail = tail;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	spin_unlock_irqrestore(&cq->lock, flags);
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	return npolled;
51062306a36Sopenharmony_ci}
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci/**
51362306a36Sopenharmony_ci * rvt_driver_cq_init - Init cq resources on behalf of driver
51462306a36Sopenharmony_ci *
51562306a36Sopenharmony_ci * Return: 0 on success
51662306a36Sopenharmony_ci */
51762306a36Sopenharmony_ciint rvt_driver_cq_init(void)
51862306a36Sopenharmony_ci{
51962306a36Sopenharmony_ci	comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
52062306a36Sopenharmony_ci					 0, "rdmavt_cq");
52162306a36Sopenharmony_ci	if (!comp_vector_wq)
52262306a36Sopenharmony_ci		return -ENOMEM;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	return 0;
52562306a36Sopenharmony_ci}
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci/**
52862306a36Sopenharmony_ci * rvt_cq_exit - tear down cq reources
52962306a36Sopenharmony_ci */
53062306a36Sopenharmony_civoid rvt_cq_exit(void)
53162306a36Sopenharmony_ci{
53262306a36Sopenharmony_ci	destroy_workqueue(comp_vector_wq);
53362306a36Sopenharmony_ci	comp_vector_wq = NULL;
53462306a36Sopenharmony_ci}
535