162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2015 HGST, a Western Digital Company.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci#include <linux/err.h>
662306a36Sopenharmony_ci#include <linux/slab.h>
762306a36Sopenharmony_ci#include <rdma/ib_verbs.h>
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include "core_priv.h"
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <trace/events/rdma_core.h>
1262306a36Sopenharmony_ci/* Max size for shared CQ, may require tuning */
1362306a36Sopenharmony_ci#define IB_MAX_SHARED_CQ_SZ		4096U
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci/* # of WCs to poll for with a single call to ib_poll_cq */
1662306a36Sopenharmony_ci#define IB_POLL_BATCH			16
1762306a36Sopenharmony_ci#define IB_POLL_BATCH_DIRECT		8
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci/* # of WCs to iterate over before yielding */
2062306a36Sopenharmony_ci#define IB_POLL_BUDGET_IRQ		256
2162306a36Sopenharmony_ci#define IB_POLL_BUDGET_WORKQUEUE	65536
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define IB_POLL_FLAGS \
2462306a36Sopenharmony_ci	(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic const struct dim_cq_moder
2762306a36Sopenharmony_cirdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
2862306a36Sopenharmony_ci	{1,   0, 1,  0},
2962306a36Sopenharmony_ci	{1,   0, 4,  0},
3062306a36Sopenharmony_ci	{2,   0, 4,  0},
3162306a36Sopenharmony_ci	{2,   0, 8,  0},
3262306a36Sopenharmony_ci	{4,   0, 8,  0},
3362306a36Sopenharmony_ci	{16,  0, 8,  0},
3462306a36Sopenharmony_ci	{16,  0, 16, 0},
3562306a36Sopenharmony_ci	{32,  0, 16, 0},
3662306a36Sopenharmony_ci	{32,  0, 32, 0},
3762306a36Sopenharmony_ci};
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cistatic void ib_cq_rdma_dim_work(struct work_struct *w)
4062306a36Sopenharmony_ci{
4162306a36Sopenharmony_ci	struct dim *dim = container_of(w, struct dim, work);
4262306a36Sopenharmony_ci	struct ib_cq *cq = dim->priv;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	u16 usec = rdma_dim_prof[dim->profile_ix].usec;
4562306a36Sopenharmony_ci	u16 comps = rdma_dim_prof[dim->profile_ix].comps;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	dim->state = DIM_START_MEASURE;
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	trace_cq_modify(cq, comps, usec);
5062306a36Sopenharmony_ci	cq->device->ops.modify_cq(cq, comps, usec);
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic void rdma_dim_init(struct ib_cq *cq)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	struct dim *dim;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	if (!cq->device->ops.modify_cq || !cq->device->use_cq_dim ||
5862306a36Sopenharmony_ci	    cq->poll_ctx == IB_POLL_DIRECT)
5962306a36Sopenharmony_ci		return;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	dim = kzalloc(sizeof(struct dim), GFP_KERNEL);
6262306a36Sopenharmony_ci	if (!dim)
6362306a36Sopenharmony_ci		return;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	dim->state = DIM_START_MEASURE;
6662306a36Sopenharmony_ci	dim->tune_state = DIM_GOING_RIGHT;
6762306a36Sopenharmony_ci	dim->profile_ix = RDMA_DIM_START_PROFILE;
6862306a36Sopenharmony_ci	dim->priv = cq;
6962306a36Sopenharmony_ci	cq->dim = dim;
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
7262306a36Sopenharmony_ci}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_cistatic void rdma_dim_destroy(struct ib_cq *cq)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	if (!cq->dim)
7762306a36Sopenharmony_ci		return;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	cancel_work_sync(&cq->dim->work);
8062306a36Sopenharmony_ci	kfree(cq->dim);
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_cistatic int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
8462306a36Sopenharmony_ci{
8562306a36Sopenharmony_ci	int rc;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	rc = ib_poll_cq(cq, num_entries, wc);
8862306a36Sopenharmony_ci	trace_cq_poll(cq, num_entries, rc);
8962306a36Sopenharmony_ci	return rc;
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistatic int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
9362306a36Sopenharmony_ci			   int batch)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	int i, n, completed = 0;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	trace_cq_process(cq);
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	/*
10062306a36Sopenharmony_ci	 * budget might be (-1) if the caller does not
10162306a36Sopenharmony_ci	 * want to bound this call, thus we need unsigned
10262306a36Sopenharmony_ci	 * minimum here.
10362306a36Sopenharmony_ci	 */
10462306a36Sopenharmony_ci	while ((n = __poll_cq(cq, min_t(u32, batch,
10562306a36Sopenharmony_ci					budget - completed), wcs)) > 0) {
10662306a36Sopenharmony_ci		for (i = 0; i < n; i++) {
10762306a36Sopenharmony_ci			struct ib_wc *wc = &wcs[i];
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci			if (wc->wr_cqe)
11062306a36Sopenharmony_ci				wc->wr_cqe->done(cq, wc);
11162306a36Sopenharmony_ci			else
11262306a36Sopenharmony_ci				WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
11362306a36Sopenharmony_ci		}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci		completed += n;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci		if (n != batch || (budget != -1 && completed >= budget))
11862306a36Sopenharmony_ci			break;
11962306a36Sopenharmony_ci	}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	return completed;
12262306a36Sopenharmony_ci}
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci/**
12562306a36Sopenharmony_ci * ib_process_cq_direct - process a CQ in caller context
12662306a36Sopenharmony_ci * @cq:		CQ to process
12762306a36Sopenharmony_ci * @budget:	number of CQEs to poll for
12862306a36Sopenharmony_ci *
12962306a36Sopenharmony_ci * This function is used to process all outstanding CQ entries.
13062306a36Sopenharmony_ci * It does not offload CQ processing to a different context and does
13162306a36Sopenharmony_ci * not ask for completion interrupts from the HCA.
13262306a36Sopenharmony_ci * Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
13362306a36Sopenharmony_ci * concurrent processing.
13462306a36Sopenharmony_ci *
13562306a36Sopenharmony_ci * Note: do not pass -1 as %budget unless it is guaranteed that the number
13662306a36Sopenharmony_ci * of completions that will be processed is small.
13762306a36Sopenharmony_ci */
13862306a36Sopenharmony_ciint ib_process_cq_direct(struct ib_cq *cq, int budget)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	struct ib_wc wcs[IB_POLL_BATCH_DIRECT];
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	return __ib_process_cq(cq, budget, wcs, IB_POLL_BATCH_DIRECT);
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ciEXPORT_SYMBOL(ib_process_cq_direct);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cistatic void ib_cq_completion_direct(struct ib_cq *cq, void *private)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
14962306a36Sopenharmony_ci}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_cistatic int ib_poll_handler(struct irq_poll *iop, int budget)
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
15462306a36Sopenharmony_ci	struct dim *dim = cq->dim;
15562306a36Sopenharmony_ci	int completed;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
15862306a36Sopenharmony_ci	if (completed < budget) {
15962306a36Sopenharmony_ci		irq_poll_complete(&cq->iop);
16062306a36Sopenharmony_ci		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
16162306a36Sopenharmony_ci			trace_cq_reschedule(cq);
16262306a36Sopenharmony_ci			irq_poll_sched(&cq->iop);
16362306a36Sopenharmony_ci		}
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	if (dim)
16762306a36Sopenharmony_ci		rdma_dim(dim, completed);
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	return completed;
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_cistatic void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	trace_cq_schedule(cq);
17562306a36Sopenharmony_ci	irq_poll_sched(&cq->iop);
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cistatic void ib_cq_poll_work(struct work_struct *work)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	struct ib_cq *cq = container_of(work, struct ib_cq, work);
18162306a36Sopenharmony_ci	int completed;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
18462306a36Sopenharmony_ci				    IB_POLL_BATCH);
18562306a36Sopenharmony_ci	if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
18662306a36Sopenharmony_ci	    ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
18762306a36Sopenharmony_ci		queue_work(cq->comp_wq, &cq->work);
18862306a36Sopenharmony_ci	else if (cq->dim)
18962306a36Sopenharmony_ci		rdma_dim(cq->dim, completed);
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_cistatic void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	trace_cq_schedule(cq);
19562306a36Sopenharmony_ci	queue_work(cq->comp_wq, &cq->work);
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci/**
19962306a36Sopenharmony_ci * __ib_alloc_cq - allocate a completion queue
20062306a36Sopenharmony_ci * @dev:		device to allocate the CQ for
20162306a36Sopenharmony_ci * @private:		driver private data, accessible from cq->cq_context
20262306a36Sopenharmony_ci * @nr_cqe:		number of CQEs to allocate
20362306a36Sopenharmony_ci * @comp_vector:	HCA completion vectors for this CQ
20462306a36Sopenharmony_ci * @poll_ctx:		context to poll the CQ from.
20562306a36Sopenharmony_ci * @caller:		module owner name.
20662306a36Sopenharmony_ci *
20762306a36Sopenharmony_ci * This is the proper interface to allocate a CQ for in-kernel users. A
20862306a36Sopenharmony_ci * CQ allocated with this interface will automatically be polled from the
20962306a36Sopenharmony_ci * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
21062306a36Sopenharmony_ci * to use this CQ abstraction.
21162306a36Sopenharmony_ci */
21262306a36Sopenharmony_cistruct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
21362306a36Sopenharmony_ci			    int comp_vector, enum ib_poll_context poll_ctx,
21462306a36Sopenharmony_ci			    const char *caller)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	struct ib_cq_init_attr cq_attr = {
21762306a36Sopenharmony_ci		.cqe		= nr_cqe,
21862306a36Sopenharmony_ci		.comp_vector	= comp_vector,
21962306a36Sopenharmony_ci	};
22062306a36Sopenharmony_ci	struct ib_cq *cq;
22162306a36Sopenharmony_ci	int ret = -ENOMEM;
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	cq = rdma_zalloc_drv_obj(dev, ib_cq);
22462306a36Sopenharmony_ci	if (!cq)
22562306a36Sopenharmony_ci		return ERR_PTR(ret);
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	cq->device = dev;
22862306a36Sopenharmony_ci	cq->cq_context = private;
22962306a36Sopenharmony_ci	cq->poll_ctx = poll_ctx;
23062306a36Sopenharmony_ci	atomic_set(&cq->usecnt, 0);
23162306a36Sopenharmony_ci	cq->comp_vector = comp_vector;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
23462306a36Sopenharmony_ci	if (!cq->wc)
23562306a36Sopenharmony_ci		goto out_free_cq;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
23862306a36Sopenharmony_ci	rdma_restrack_set_name(&cq->res, caller);
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	ret = dev->ops.create_cq(cq, &cq_attr, NULL);
24162306a36Sopenharmony_ci	if (ret)
24262306a36Sopenharmony_ci		goto out_free_wc;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	rdma_dim_init(cq);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	switch (cq->poll_ctx) {
24762306a36Sopenharmony_ci	case IB_POLL_DIRECT:
24862306a36Sopenharmony_ci		cq->comp_handler = ib_cq_completion_direct;
24962306a36Sopenharmony_ci		break;
25062306a36Sopenharmony_ci	case IB_POLL_SOFTIRQ:
25162306a36Sopenharmony_ci		cq->comp_handler = ib_cq_completion_softirq;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci		irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
25462306a36Sopenharmony_ci		ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
25562306a36Sopenharmony_ci		break;
25662306a36Sopenharmony_ci	case IB_POLL_WORKQUEUE:
25762306a36Sopenharmony_ci	case IB_POLL_UNBOUND_WORKQUEUE:
25862306a36Sopenharmony_ci		cq->comp_handler = ib_cq_completion_workqueue;
25962306a36Sopenharmony_ci		INIT_WORK(&cq->work, ib_cq_poll_work);
26062306a36Sopenharmony_ci		ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
26162306a36Sopenharmony_ci		cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
26262306a36Sopenharmony_ci				ib_comp_wq : ib_comp_unbound_wq;
26362306a36Sopenharmony_ci		break;
26462306a36Sopenharmony_ci	default:
26562306a36Sopenharmony_ci		ret = -EINVAL;
26662306a36Sopenharmony_ci		goto out_destroy_cq;
26762306a36Sopenharmony_ci	}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	rdma_restrack_add(&cq->res);
27062306a36Sopenharmony_ci	trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
27162306a36Sopenharmony_ci	return cq;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ciout_destroy_cq:
27462306a36Sopenharmony_ci	rdma_dim_destroy(cq);
27562306a36Sopenharmony_ci	cq->device->ops.destroy_cq(cq, NULL);
27662306a36Sopenharmony_ciout_free_wc:
27762306a36Sopenharmony_ci	rdma_restrack_put(&cq->res);
27862306a36Sopenharmony_ci	kfree(cq->wc);
27962306a36Sopenharmony_ciout_free_cq:
28062306a36Sopenharmony_ci	kfree(cq);
28162306a36Sopenharmony_ci	trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
28262306a36Sopenharmony_ci	return ERR_PTR(ret);
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ciEXPORT_SYMBOL(__ib_alloc_cq);
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci/**
28762306a36Sopenharmony_ci * __ib_alloc_cq_any - allocate a completion queue
28862306a36Sopenharmony_ci * @dev:		device to allocate the CQ for
28962306a36Sopenharmony_ci * @private:		driver private data, accessible from cq->cq_context
29062306a36Sopenharmony_ci * @nr_cqe:		number of CQEs to allocate
29162306a36Sopenharmony_ci * @poll_ctx:		context to poll the CQ from
29262306a36Sopenharmony_ci * @caller:		module owner name
29362306a36Sopenharmony_ci *
29462306a36Sopenharmony_ci * Attempt to spread ULP Completion Queues over each device's interrupt
29562306a36Sopenharmony_ci * vectors. A simple best-effort mechanism is used.
29662306a36Sopenharmony_ci */
29762306a36Sopenharmony_cistruct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
29862306a36Sopenharmony_ci				int nr_cqe, enum ib_poll_context poll_ctx,
29962306a36Sopenharmony_ci				const char *caller)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci	static atomic_t counter;
30262306a36Sopenharmony_ci	int comp_vector = 0;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	if (dev->num_comp_vectors > 1)
30562306a36Sopenharmony_ci		comp_vector =
30662306a36Sopenharmony_ci			atomic_inc_return(&counter) %
30762306a36Sopenharmony_ci			min_t(int, dev->num_comp_vectors, num_online_cpus());
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
31062306a36Sopenharmony_ci			     caller);
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ciEXPORT_SYMBOL(__ib_alloc_cq_any);
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci/**
31562306a36Sopenharmony_ci * ib_free_cq - free a completion queue
31662306a36Sopenharmony_ci * @cq:		completion queue to free.
31762306a36Sopenharmony_ci */
31862306a36Sopenharmony_civoid ib_free_cq(struct ib_cq *cq)
31962306a36Sopenharmony_ci{
32062306a36Sopenharmony_ci	int ret;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
32362306a36Sopenharmony_ci		return;
32462306a36Sopenharmony_ci	if (WARN_ON_ONCE(cq->cqe_used))
32562306a36Sopenharmony_ci		return;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	switch (cq->poll_ctx) {
32862306a36Sopenharmony_ci	case IB_POLL_DIRECT:
32962306a36Sopenharmony_ci		break;
33062306a36Sopenharmony_ci	case IB_POLL_SOFTIRQ:
33162306a36Sopenharmony_ci		irq_poll_disable(&cq->iop);
33262306a36Sopenharmony_ci		break;
33362306a36Sopenharmony_ci	case IB_POLL_WORKQUEUE:
33462306a36Sopenharmony_ci	case IB_POLL_UNBOUND_WORKQUEUE:
33562306a36Sopenharmony_ci		cancel_work_sync(&cq->work);
33662306a36Sopenharmony_ci		break;
33762306a36Sopenharmony_ci	default:
33862306a36Sopenharmony_ci		WARN_ON_ONCE(1);
33962306a36Sopenharmony_ci	}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	rdma_dim_destroy(cq);
34262306a36Sopenharmony_ci	trace_cq_free(cq);
34362306a36Sopenharmony_ci	ret = cq->device->ops.destroy_cq(cq, NULL);
34462306a36Sopenharmony_ci	WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
34562306a36Sopenharmony_ci	rdma_restrack_del(&cq->res);
34662306a36Sopenharmony_ci	kfree(cq->wc);
34762306a36Sopenharmony_ci	kfree(cq);
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ciEXPORT_SYMBOL(ib_free_cq);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_civoid ib_cq_pool_cleanup(struct ib_device *dev)
35262306a36Sopenharmony_ci{
35362306a36Sopenharmony_ci	struct ib_cq *cq, *n;
35462306a36Sopenharmony_ci	unsigned int i;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) {
35762306a36Sopenharmony_ci		list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
35862306a36Sopenharmony_ci					 pool_entry) {
35962306a36Sopenharmony_ci			WARN_ON(cq->cqe_used);
36062306a36Sopenharmony_ci			list_del(&cq->pool_entry);
36162306a36Sopenharmony_ci			cq->shared = false;
36262306a36Sopenharmony_ci			ib_free_cq(cq);
36362306a36Sopenharmony_ci		}
36462306a36Sopenharmony_ci	}
36562306a36Sopenharmony_ci}
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_cistatic int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes,
36862306a36Sopenharmony_ci			enum ib_poll_context poll_ctx)
36962306a36Sopenharmony_ci{
37062306a36Sopenharmony_ci	LIST_HEAD(tmp_list);
37162306a36Sopenharmony_ci	unsigned int nr_cqs, i;
37262306a36Sopenharmony_ci	struct ib_cq *cq, *n;
37362306a36Sopenharmony_ci	int ret;
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
37662306a36Sopenharmony_ci		WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
37762306a36Sopenharmony_ci		return -EINVAL;
37862306a36Sopenharmony_ci	}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	/*
38162306a36Sopenharmony_ci	 * Allocate at least as many CQEs as requested, and otherwise
38262306a36Sopenharmony_ci	 * a reasonable batch size so that we can share CQs between
38362306a36Sopenharmony_ci	 * multiple users instead of allocating a larger number of CQs.
38462306a36Sopenharmony_ci	 */
38562306a36Sopenharmony_ci	nr_cqes = min_t(unsigned int, dev->attrs.max_cqe,
38662306a36Sopenharmony_ci			max(nr_cqes, IB_MAX_SHARED_CQ_SZ));
38762306a36Sopenharmony_ci	nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
38862306a36Sopenharmony_ci	for (i = 0; i < nr_cqs; i++) {
38962306a36Sopenharmony_ci		cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx);
39062306a36Sopenharmony_ci		if (IS_ERR(cq)) {
39162306a36Sopenharmony_ci			ret = PTR_ERR(cq);
39262306a36Sopenharmony_ci			goto out_free_cqs;
39362306a36Sopenharmony_ci		}
39462306a36Sopenharmony_ci		cq->shared = true;
39562306a36Sopenharmony_ci		list_add_tail(&cq->pool_entry, &tmp_list);
39662306a36Sopenharmony_ci	}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	spin_lock_irq(&dev->cq_pools_lock);
39962306a36Sopenharmony_ci	list_splice(&tmp_list, &dev->cq_pools[poll_ctx]);
40062306a36Sopenharmony_ci	spin_unlock_irq(&dev->cq_pools_lock);
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	return 0;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ciout_free_cqs:
40562306a36Sopenharmony_ci	list_for_each_entry_safe(cq, n, &tmp_list, pool_entry) {
40662306a36Sopenharmony_ci		cq->shared = false;
40762306a36Sopenharmony_ci		ib_free_cq(cq);
40862306a36Sopenharmony_ci	}
40962306a36Sopenharmony_ci	return ret;
41062306a36Sopenharmony_ci}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci/**
41362306a36Sopenharmony_ci * ib_cq_pool_get() - Find the least used completion queue that matches
41462306a36Sopenharmony_ci *   a given cpu hint (or least used for wild card affinity) and fits
41562306a36Sopenharmony_ci *   nr_cqe.
41662306a36Sopenharmony_ci * @dev: rdma device
41762306a36Sopenharmony_ci * @nr_cqe: number of needed cqe entries
41862306a36Sopenharmony_ci * @comp_vector_hint: completion vector hint (-1) for the driver to assign
41962306a36Sopenharmony_ci *   a comp vector based on internal counter
42062306a36Sopenharmony_ci * @poll_ctx: cq polling context
42162306a36Sopenharmony_ci *
42262306a36Sopenharmony_ci * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and
42362306a36Sopenharmony_ci * claim entries in it for us.  In case there is no available cq, allocate
42462306a36Sopenharmony_ci * a new cq with the requirements and add it to the device pool.
42562306a36Sopenharmony_ci * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
42662306a36Sopenharmony_ci * for @poll_ctx.
42762306a36Sopenharmony_ci */
42862306a36Sopenharmony_cistruct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
42962306a36Sopenharmony_ci			     int comp_vector_hint,
43062306a36Sopenharmony_ci			     enum ib_poll_context poll_ctx)
43162306a36Sopenharmony_ci{
43262306a36Sopenharmony_ci	static unsigned int default_comp_vector;
43362306a36Sopenharmony_ci	unsigned int vector, num_comp_vectors;
43462306a36Sopenharmony_ci	struct ib_cq *cq, *found = NULL;
43562306a36Sopenharmony_ci	int ret;
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
43862306a36Sopenharmony_ci		WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
43962306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
44062306a36Sopenharmony_ci	}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	num_comp_vectors =
44362306a36Sopenharmony_ci		min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
44462306a36Sopenharmony_ci	/* Project the affinty to the device completion vector range */
44562306a36Sopenharmony_ci	if (comp_vector_hint < 0) {
44662306a36Sopenharmony_ci		comp_vector_hint =
44762306a36Sopenharmony_ci			(READ_ONCE(default_comp_vector) + 1) % num_comp_vectors;
44862306a36Sopenharmony_ci		WRITE_ONCE(default_comp_vector, comp_vector_hint);
44962306a36Sopenharmony_ci	}
45062306a36Sopenharmony_ci	vector = comp_vector_hint % num_comp_vectors;
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	/*
45362306a36Sopenharmony_ci	 * Find the least used CQ with correct affinity and
45462306a36Sopenharmony_ci	 * enough free CQ entries
45562306a36Sopenharmony_ci	 */
45662306a36Sopenharmony_ci	while (!found) {
45762306a36Sopenharmony_ci		spin_lock_irq(&dev->cq_pools_lock);
45862306a36Sopenharmony_ci		list_for_each_entry(cq, &dev->cq_pools[poll_ctx],
45962306a36Sopenharmony_ci				    pool_entry) {
46062306a36Sopenharmony_ci			/*
46162306a36Sopenharmony_ci			 * Check to see if we have found a CQ with the
46262306a36Sopenharmony_ci			 * correct completion vector
46362306a36Sopenharmony_ci			 */
46462306a36Sopenharmony_ci			if (vector != cq->comp_vector)
46562306a36Sopenharmony_ci				continue;
46662306a36Sopenharmony_ci			if (cq->cqe_used + nr_cqe > cq->cqe)
46762306a36Sopenharmony_ci				continue;
46862306a36Sopenharmony_ci			found = cq;
46962306a36Sopenharmony_ci			break;
47062306a36Sopenharmony_ci		}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci		if (found) {
47362306a36Sopenharmony_ci			found->cqe_used += nr_cqe;
47462306a36Sopenharmony_ci			spin_unlock_irq(&dev->cq_pools_lock);
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci			return found;
47762306a36Sopenharmony_ci		}
47862306a36Sopenharmony_ci		spin_unlock_irq(&dev->cq_pools_lock);
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci		/*
48162306a36Sopenharmony_ci		 * Didn't find a match or ran out of CQs in the device
48262306a36Sopenharmony_ci		 * pool, allocate a new array of CQs.
48362306a36Sopenharmony_ci		 */
48462306a36Sopenharmony_ci		ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx);
48562306a36Sopenharmony_ci		if (ret)
48662306a36Sopenharmony_ci			return ERR_PTR(ret);
48762306a36Sopenharmony_ci	}
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	return found;
49062306a36Sopenharmony_ci}
49162306a36Sopenharmony_ciEXPORT_SYMBOL(ib_cq_pool_get);
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci/**
49462306a36Sopenharmony_ci * ib_cq_pool_put - Return a CQ taken from a shared pool.
49562306a36Sopenharmony_ci * @cq: The CQ to return.
49662306a36Sopenharmony_ci * @nr_cqe: The max number of cqes that the user had requested.
49762306a36Sopenharmony_ci */
49862306a36Sopenharmony_civoid ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe)
49962306a36Sopenharmony_ci{
50062306a36Sopenharmony_ci	if (WARN_ON_ONCE(nr_cqe > cq->cqe_used))
50162306a36Sopenharmony_ci		return;
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	spin_lock_irq(&cq->device->cq_pools_lock);
50462306a36Sopenharmony_ci	cq->cqe_used -= nr_cqe;
50562306a36Sopenharmony_ci	spin_unlock_irq(&cq->device->cq_pools_lock);
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ciEXPORT_SYMBOL(ib_cq_pool_put);
508