162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
362306a36Sopenharmony_ci#include <linux/init.h>
462306a36Sopenharmony_ci#include <linux/kernel.h>
562306a36Sopenharmony_ci#include <linux/module.h>
662306a36Sopenharmony_ci#include <linux/pci.h>
762306a36Sopenharmony_ci#include <uapi/linux/idxd.h>
862306a36Sopenharmony_ci#include "idxd.h"
962306a36Sopenharmony_ci#include "registers.h"
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_cistatic struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
1262306a36Sopenharmony_ci{
1362306a36Sopenharmony_ci	struct idxd_desc *desc;
1462306a36Sopenharmony_ci	struct idxd_device *idxd = wq->idxd;
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci	desc = wq->descs[idx];
1762306a36Sopenharmony_ci	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
1862306a36Sopenharmony_ci	memset(desc->completion, 0, idxd->data->compl_size);
1962306a36Sopenharmony_ci	desc->cpu = cpu;
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	if (device_pasid_enabled(idxd))
2262306a36Sopenharmony_ci		desc->hw->pasid = idxd->pasid;
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci	return desc;
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistruct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
2862306a36Sopenharmony_ci{
2962306a36Sopenharmony_ci	int cpu, idx;
3062306a36Sopenharmony_ci	struct idxd_device *idxd = wq->idxd;
3162306a36Sopenharmony_ci	DEFINE_SBQ_WAIT(wait);
3262306a36Sopenharmony_ci	struct sbq_wait_state *ws;
3362306a36Sopenharmony_ci	struct sbitmap_queue *sbq;
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	if (idxd->state != IDXD_DEV_ENABLED)
3662306a36Sopenharmony_ci		return ERR_PTR(-EIO);
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	sbq = &wq->sbq;
3962306a36Sopenharmony_ci	idx = sbitmap_queue_get(sbq, &cpu);
4062306a36Sopenharmony_ci	if (idx < 0) {
4162306a36Sopenharmony_ci		if (optype == IDXD_OP_NONBLOCK)
4262306a36Sopenharmony_ci			return ERR_PTR(-EAGAIN);
4362306a36Sopenharmony_ci	} else {
4462306a36Sopenharmony_ci		return __get_desc(wq, idx, cpu);
4562306a36Sopenharmony_ci	}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	ws = &sbq->ws[0];
4862306a36Sopenharmony_ci	for (;;) {
4962306a36Sopenharmony_ci		sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
5062306a36Sopenharmony_ci		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
5162306a36Sopenharmony_ci			break;
5262306a36Sopenharmony_ci		idx = sbitmap_queue_get(sbq, &cpu);
5362306a36Sopenharmony_ci		if (idx >= 0)
5462306a36Sopenharmony_ci			break;
5562306a36Sopenharmony_ci		schedule();
5662306a36Sopenharmony_ci	}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	sbitmap_finish_wait(sbq, ws, &wait);
5962306a36Sopenharmony_ci	if (idx < 0)
6062306a36Sopenharmony_ci		return ERR_PTR(-EAGAIN);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	return __get_desc(wq, idx, cpu);
6362306a36Sopenharmony_ci}
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_civoid idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
6662306a36Sopenharmony_ci{
6762306a36Sopenharmony_ci	int cpu = desc->cpu;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	desc->cpu = -1;
7062306a36Sopenharmony_ci	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
7462306a36Sopenharmony_ci					 struct idxd_desc *desc)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	struct idxd_desc *d, *n;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	lockdep_assert_held(&ie->list_lock);
7962306a36Sopenharmony_ci	list_for_each_entry_safe(d, n, &ie->work_list, list) {
8062306a36Sopenharmony_ci		if (d == desc) {
8162306a36Sopenharmony_ci			list_del(&d->list);
8262306a36Sopenharmony_ci			return d;
8362306a36Sopenharmony_ci		}
8462306a36Sopenharmony_ci	}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	/*
8762306a36Sopenharmony_ci	 * At this point, the desc needs to be aborted is held by the completion
8862306a36Sopenharmony_ci	 * handler where it has taken it off the pending list but has not added to the
8962306a36Sopenharmony_ci	 * work list. It will be cleaned up by the interrupt handler when it sees the
9062306a36Sopenharmony_ci	 * IDXD_COMP_DESC_ABORT for completion status.
9162306a36Sopenharmony_ci	 */
9262306a36Sopenharmony_ci	return NULL;
9362306a36Sopenharmony_ci}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_cistatic void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
9662306a36Sopenharmony_ci			     struct idxd_desc *desc)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	struct idxd_desc *d, *t, *found = NULL;
9962306a36Sopenharmony_ci	struct llist_node *head;
10062306a36Sopenharmony_ci	LIST_HEAD(flist);
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	desc->completion->status = IDXD_COMP_DESC_ABORT;
10362306a36Sopenharmony_ci	/*
10462306a36Sopenharmony_ci	 * Grab the list lock so it will block the irq thread handler. This allows the
10562306a36Sopenharmony_ci	 * abort code to locate the descriptor need to be aborted.
10662306a36Sopenharmony_ci	 */
10762306a36Sopenharmony_ci	spin_lock(&ie->list_lock);
10862306a36Sopenharmony_ci	head = llist_del_all(&ie->pending_llist);
10962306a36Sopenharmony_ci	if (head) {
11062306a36Sopenharmony_ci		llist_for_each_entry_safe(d, t, head, llnode) {
11162306a36Sopenharmony_ci			if (d == desc) {
11262306a36Sopenharmony_ci				found = desc;
11362306a36Sopenharmony_ci				continue;
11462306a36Sopenharmony_ci			}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci			if (d->completion->status)
11762306a36Sopenharmony_ci				list_add_tail(&d->list, &flist);
11862306a36Sopenharmony_ci			else
11962306a36Sopenharmony_ci				list_add_tail(&d->list, &ie->work_list);
12062306a36Sopenharmony_ci		}
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	if (!found)
12462306a36Sopenharmony_ci		found = list_abort_desc(wq, ie, desc);
12562306a36Sopenharmony_ci	spin_unlock(&ie->list_lock);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (found)
12862306a36Sopenharmony_ci		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/*
13162306a36Sopenharmony_ci	 * completing the descriptor will return desc to allocator and
13262306a36Sopenharmony_ci	 * the desc can be acquired by a different process and the
13362306a36Sopenharmony_ci	 * desc->list can be modified.  Delete desc from list so the
13462306a36Sopenharmony_ci	 * list trasversing does not get corrupted by the other process.
13562306a36Sopenharmony_ci	 */
13662306a36Sopenharmony_ci	list_for_each_entry_safe(d, t, &flist, list) {
13762306a36Sopenharmony_ci		list_del_init(&d->list);
13862306a36Sopenharmony_ci		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
13962306a36Sopenharmony_ci	}
14062306a36Sopenharmony_ci}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci/*
14362306a36Sopenharmony_ci * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
14462306a36Sopenharmony_ci * has better control of number of descriptors being submitted to a shared wq by limiting
14562306a36Sopenharmony_ci * the number of driver allocated descriptors to the wq size. However, when the swq is
14662306a36Sopenharmony_ci * exported to a guest kernel, it may be shared with multiple guest kernels. This means
14762306a36Sopenharmony_ci * the likelihood of getting busy returned on the swq when submitting goes significantly up.
14862306a36Sopenharmony_ci * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
14962306a36Sopenharmony_ci * up. The sysfs knob can be tuned by the system administrator.
15062306a36Sopenharmony_ci */
15162306a36Sopenharmony_ciint idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	unsigned int retries = wq->enqcmds_retries;
15462306a36Sopenharmony_ci	int rc;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	do {
15762306a36Sopenharmony_ci		rc = enqcmds(portal, desc);
15862306a36Sopenharmony_ci		if (rc == 0)
15962306a36Sopenharmony_ci			break;
16062306a36Sopenharmony_ci		cpu_relax();
16162306a36Sopenharmony_ci	} while (retries--);
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	return rc;
16462306a36Sopenharmony_ci}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ciint idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	struct idxd_device *idxd = wq->idxd;
16962306a36Sopenharmony_ci	struct idxd_irq_entry *ie = NULL;
17062306a36Sopenharmony_ci	u32 desc_flags = desc->hw->flags;
17162306a36Sopenharmony_ci	void __iomem *portal;
17262306a36Sopenharmony_ci	int rc;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	if (idxd->state != IDXD_DEV_ENABLED)
17562306a36Sopenharmony_ci		return -EIO;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if (!percpu_ref_tryget_live(&wq->wq_active)) {
17862306a36Sopenharmony_ci		wait_for_completion(&wq->wq_resurrect);
17962306a36Sopenharmony_ci		if (!percpu_ref_tryget_live(&wq->wq_active))
18062306a36Sopenharmony_ci			return -ENXIO;
18162306a36Sopenharmony_ci	}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	portal = idxd_wq_portal_addr(wq);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	/*
18662306a36Sopenharmony_ci	 * Pending the descriptor to the lockless list for the irq_entry
18762306a36Sopenharmony_ci	 * that we designated the descriptor to.
18862306a36Sopenharmony_ci	 */
18962306a36Sopenharmony_ci	if (desc_flags & IDXD_OP_FLAG_RCI) {
19062306a36Sopenharmony_ci		ie = &wq->ie;
19162306a36Sopenharmony_ci		desc->hw->int_handle = ie->int_handle;
19262306a36Sopenharmony_ci		llist_add(&desc->llnode, &ie->pending_llist);
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	/*
19662306a36Sopenharmony_ci	 * The wmb() flushes writes to coherent DMA data before
19762306a36Sopenharmony_ci	 * possibly triggering a DMA read. The wmb() is necessary
19862306a36Sopenharmony_ci	 * even on UP because the recipient is a device.
19962306a36Sopenharmony_ci	 */
20062306a36Sopenharmony_ci	wmb();
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	if (wq_dedicated(wq)) {
20362306a36Sopenharmony_ci		iosubmit_cmds512(portal, desc->hw, 1);
20462306a36Sopenharmony_ci	} else {
20562306a36Sopenharmony_ci		rc = idxd_enqcmds(wq, portal, desc->hw);
20662306a36Sopenharmony_ci		if (rc < 0) {
20762306a36Sopenharmony_ci			percpu_ref_put(&wq->wq_active);
20862306a36Sopenharmony_ci			/* abort operation frees the descriptor */
20962306a36Sopenharmony_ci			if (ie)
21062306a36Sopenharmony_ci				llist_abort_desc(wq, ie, desc);
21162306a36Sopenharmony_ci			return rc;
21262306a36Sopenharmony_ci		}
21362306a36Sopenharmony_ci	}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	percpu_ref_put(&wq->wq_active);
21662306a36Sopenharmony_ci	return 0;
21762306a36Sopenharmony_ci}
218