1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Adjunct processor matrix VFIO device driver callbacks.
4 *
5 * Copyright IBM Corp. 2018
6 *
7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8 *	      Halil Pasic <pasic@linux.ibm.com>
9 *	      Pierre Morel <pmorel@linux.ibm.com>
10 */
11#include <linux/string.h>
12#include <linux/vfio.h>
13#include <linux/device.h>
14#include <linux/list.h>
15#include <linux/ctype.h>
16#include <linux/bitops.h>
17#include <linux/kvm_host.h>
18#include <linux/module.h>
19#include <asm/kvm.h>
20#include <asm/zcrypt.h>
21
22#include "vfio_ap_private.h"
23
24#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
25#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
26
27static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
28static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
29
30static int match_apqn(struct device *dev, const void *data)
31{
32	struct vfio_ap_queue *q = dev_get_drvdata(dev);
33
34	return (q->apqn == *(int *)(data)) ? 1 : 0;
35}
36
37/**
38 * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
39 * @matrix_mdev: the associated mediated matrix
40 * @apqn: The queue APQN
41 *
42 * Retrieve a queue with a specific APQN from the list of the
43 * devices of the vfio_ap_drv.
44 * Verify that the APID and the APQI are set in the matrix.
45 *
46 * Returns the pointer to the associated vfio_ap_queue
47 */
48static struct vfio_ap_queue *vfio_ap_get_queue(
49					struct ap_matrix_mdev *matrix_mdev,
50					int apqn)
51{
52	struct vfio_ap_queue *q;
53
54	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
55		return NULL;
56	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
57		return NULL;
58
59	q = vfio_ap_find_queue(apqn);
60	if (q)
61		q->matrix_mdev = matrix_mdev;
62
63	return q;
64}
65
66/**
67 * vfio_ap_wait_for_irqclear
68 * @apqn: The AP Queue number
69 *
70 * Checks the IRQ bit for the status of this APQN using ap_tapq.
71 * Returns if the ap_tapq function succeeded and the bit is clear.
72 * Returns if ap_tapq function failed with invalid, deconfigured or
73 * checkstopped AP.
74 * Otherwise retries up to 5 times after waiting 20ms.
75 *
76 */
77static void vfio_ap_wait_for_irqclear(int apqn)
78{
79	struct ap_queue_status status;
80	int retry = 5;
81
82	do {
83		status = ap_tapq(apqn, NULL);
84		switch (status.response_code) {
85		case AP_RESPONSE_NORMAL:
86		case AP_RESPONSE_RESET_IN_PROGRESS:
87			if (!status.irq_enabled)
88				return;
89			fallthrough;
90		case AP_RESPONSE_BUSY:
91			msleep(20);
92			break;
93		case AP_RESPONSE_Q_NOT_AVAIL:
94		case AP_RESPONSE_DECONFIGURED:
95		case AP_RESPONSE_CHECKSTOPPED:
96		default:
97			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
98				  status.response_code, apqn);
99			return;
100		}
101	} while (--retry);
102
103	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
104		  __func__, status.response_code, apqn);
105}
106
107/**
108 * vfio_ap_free_aqic_resources
109 * @q: The vfio_ap_queue
110 *
111 * Unregisters the ISC in the GIB when the saved ISC not invalid.
112 * Unpin the guest's page holding the NIB when it exist.
113 * Reset the saved_pfn and saved_isc to invalid values.
114 *
115 */
116static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
117{
118	if (!q)
119		return;
120	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
121	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
122		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
123		q->saved_isc = VFIO_AP_ISC_INVALID;
124	}
125	if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
126		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
127				 &q->saved_pfn, 1);
128		q->saved_pfn = 0;
129	}
130}
131
132/**
133 * vfio_ap_irq_disable
134 * @q: The vfio_ap_queue
135 *
136 * Uses ap_aqic to disable the interruption and in case of success, reset
137 * in progress or IRQ disable command already proceeded: calls
138 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
139 * and calls vfio_ap_free_aqic_resources() to free the resources associated
140 * with the AP interrupt handling.
141 *
142 * In the case the AP is busy, or a reset is in progress,
143 * retries after 20ms, up to 5 times.
144 *
145 * Returns if ap_aqic function failed with invalid, deconfigured or
146 * checkstopped AP.
147 */
148static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
149{
150	struct ap_qirq_ctrl aqic_gisa = {};
151	struct ap_queue_status status;
152	int retries = 5;
153
154	do {
155		status = ap_aqic(q->apqn, aqic_gisa, NULL);
156		switch (status.response_code) {
157		case AP_RESPONSE_OTHERWISE_CHANGED:
158		case AP_RESPONSE_NORMAL:
159			vfio_ap_wait_for_irqclear(q->apqn);
160			goto end_free;
161		case AP_RESPONSE_RESET_IN_PROGRESS:
162		case AP_RESPONSE_BUSY:
163			msleep(20);
164			break;
165		case AP_RESPONSE_Q_NOT_AVAIL:
166		case AP_RESPONSE_DECONFIGURED:
167		case AP_RESPONSE_CHECKSTOPPED:
168		case AP_RESPONSE_INVALID_ADDRESS:
169		default:
170			/* All cases in default means AP not operational */
171			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
172				  status.response_code);
173			goto end_free;
174		}
175	} while (retries--);
176
177	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
178		  status.response_code);
179end_free:
180	vfio_ap_free_aqic_resources(q);
181	q->matrix_mdev = NULL;
182	return status;
183}
184
185/**
186 * vfio_ap_setirq: Enable Interruption for a APQN
187 *
188 * @dev: the device associated with the ap_queue
189 * @q:	 the vfio_ap_queue holding AQIC parameters
190 *
191 * Pin the NIB saved in *q
192 * Register the guest ISC to GIB interface and retrieve the
193 * host ISC to issue the host side PQAP/AQIC
194 *
195 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
196 * vfio_pin_pages failed.
197 *
198 * Otherwise return the ap_queue_status returned by the ap_aqic(),
199 * all retry handling will be done by the guest.
200 */
201static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
202						 int isc,
203						 unsigned long nib)
204{
205	struct ap_qirq_ctrl aqic_gisa = {};
206	struct ap_queue_status status = {};
207	struct kvm_s390_gisa *gisa;
208	struct kvm *kvm;
209	unsigned long h_nib, g_pfn, h_pfn;
210	int ret;
211
212	g_pfn = nib >> PAGE_SHIFT;
213	ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
214			     IOMMU_READ | IOMMU_WRITE, &h_pfn);
215	switch (ret) {
216	case 1:
217		break;
218	default:
219		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
220		return status;
221	}
222
223	kvm = q->matrix_mdev->kvm;
224	gisa = kvm->arch.gisa_int.origin;
225
226	h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
227	aqic_gisa.gisc = isc;
228	aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
229	aqic_gisa.ir = 1;
230	aqic_gisa.gisa = (uint64_t)gisa >> 4;
231
232	status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
233	switch (status.response_code) {
234	case AP_RESPONSE_NORMAL:
235		/* See if we did clear older IRQ configuration */
236		vfio_ap_free_aqic_resources(q);
237		q->saved_pfn = g_pfn;
238		q->saved_isc = isc;
239		break;
240	case AP_RESPONSE_OTHERWISE_CHANGED:
241		/* We could not modify IRQ setings: clear new configuration */
242		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
243		kvm_s390_gisc_unregister(kvm, isc);
244		break;
245	default:
246		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
247			status.response_code);
248		vfio_ap_irq_disable(q);
249		break;
250	}
251
252	return status;
253}
254
255/**
256 * handle_pqap: PQAP instruction callback
257 *
258 * @vcpu: The vcpu on which we received the PQAP instruction
259 *
260 * Get the general register contents to initialize internal variables.
261 * REG[0]: APQN
262 * REG[1]: IR and ISC
263 * REG[2]: NIB
264 *
265 * Response.status may be set to following Response Code:
266 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
267 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
268 * - AP_RESPONSE_NORMAL (0) : in case of successs
269 *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
270 * We take the matrix_dev lock to ensure serialization on queues and
271 * mediated device access.
272 *
273 * Return 0 if we could handle the request inside KVM.
274 * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
275 */
276static int handle_pqap(struct kvm_vcpu *vcpu)
277{
278	uint64_t status;
279	uint16_t apqn;
280	struct vfio_ap_queue *q;
281	struct ap_queue_status qstatus = {
282			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
283	struct ap_matrix_mdev *matrix_mdev;
284
285	/* If we do not use the AIV facility just go to userland */
286	if (!(vcpu->arch.sie_block->eca & ECA_AIV))
287		return -EOPNOTSUPP;
288
289	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
290	mutex_lock(&matrix_dev->lock);
291
292	if (!vcpu->kvm->arch.crypto.pqap_hook)
293		goto out_unlock;
294	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
295				   struct ap_matrix_mdev, pqap_hook);
296
297	q = vfio_ap_get_queue(matrix_mdev, apqn);
298	if (!q)
299		goto out_unlock;
300
301	status = vcpu->run->s.regs.gprs[1];
302
303	/* If IR bit(16) is set we enable the interrupt */
304	if ((status >> (63 - 16)) & 0x01)
305		qstatus = vfio_ap_irq_enable(q, status & 0x07,
306					     vcpu->run->s.regs.gprs[2]);
307	else
308		qstatus = vfio_ap_irq_disable(q);
309
310out_unlock:
311	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
312	vcpu->run->s.regs.gprs[1] >>= 32;
313	mutex_unlock(&matrix_dev->lock);
314	return 0;
315}
316
317static void vfio_ap_matrix_init(struct ap_config_info *info,
318				struct ap_matrix *matrix)
319{
320	matrix->apm_max = info->apxa ? info->Na : 63;
321	matrix->aqm_max = info->apxa ? info->Nd : 15;
322	matrix->adm_max = info->apxa ? info->Nd : 15;
323}
324
325static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
326{
327	struct ap_matrix_mdev *matrix_mdev;
328
329	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
330		return -EPERM;
331
332	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
333	if (!matrix_mdev) {
334		atomic_inc(&matrix_dev->available_instances);
335		return -ENOMEM;
336	}
337
338	matrix_mdev->mdev = mdev;
339	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
340	mdev_set_drvdata(mdev, matrix_mdev);
341	matrix_mdev->pqap_hook.hook = handle_pqap;
342	matrix_mdev->pqap_hook.owner = THIS_MODULE;
343	mutex_lock(&matrix_dev->lock);
344	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
345	mutex_unlock(&matrix_dev->lock);
346
347	return 0;
348}
349
350static int vfio_ap_mdev_remove(struct mdev_device *mdev)
351{
352	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
353
354	if (matrix_mdev->kvm)
355		return -EBUSY;
356
357	mutex_lock(&matrix_dev->lock);
358	vfio_ap_mdev_reset_queues(mdev);
359	list_del(&matrix_mdev->node);
360	mutex_unlock(&matrix_dev->lock);
361
362	kfree(matrix_mdev);
363	mdev_set_drvdata(mdev, NULL);
364	atomic_inc(&matrix_dev->available_instances);
365
366	return 0;
367}
368
369static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
370{
371	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
372}
373
374static MDEV_TYPE_ATTR_RO(name);
375
376static ssize_t available_instances_show(struct kobject *kobj,
377					struct device *dev, char *buf)
378{
379	return sprintf(buf, "%d\n",
380		       atomic_read(&matrix_dev->available_instances));
381}
382
383static MDEV_TYPE_ATTR_RO(available_instances);
384
385static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
386			       char *buf)
387{
388	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
389}
390
391static MDEV_TYPE_ATTR_RO(device_api);
392
393static struct attribute *vfio_ap_mdev_type_attrs[] = {
394	&mdev_type_attr_name.attr,
395	&mdev_type_attr_device_api.attr,
396	&mdev_type_attr_available_instances.attr,
397	NULL,
398};
399
400static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
401	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
402	.attrs = vfio_ap_mdev_type_attrs,
403};
404
405static struct attribute_group *vfio_ap_mdev_type_groups[] = {
406	&vfio_ap_mdev_hwvirt_type_group,
407	NULL,
408};
409
410struct vfio_ap_queue_reserved {
411	unsigned long *apid;
412	unsigned long *apqi;
413	bool reserved;
414};
415
416/**
417 * vfio_ap_has_queue
418 *
419 * @dev: an AP queue device
420 * @data: a struct vfio_ap_queue_reserved reference
421 *
422 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
423 * apid or apqi specified in @data:
424 *
425 * - If @data contains both an apid and apqi value, then @data will be flagged
426 *   as reserved if the APID and APQI fields for the AP queue device matches
427 *
428 * - If @data contains only an apid value, @data will be flagged as
429 *   reserved if the APID field in the AP queue device matches
430 *
431 * - If @data contains only an apqi value, @data will be flagged as
432 *   reserved if the APQI field in the AP queue device matches
433 *
434 * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if
435 * @data does not contain either an apid or apqi.
436 */
437static int vfio_ap_has_queue(struct device *dev, void *data)
438{
439	struct vfio_ap_queue_reserved *qres = data;
440	struct ap_queue *ap_queue = to_ap_queue(dev);
441	ap_qid_t qid;
442	unsigned long id;
443
444	if (qres->apid && qres->apqi) {
445		qid = AP_MKQID(*qres->apid, *qres->apqi);
446		if (qid == ap_queue->qid)
447			qres->reserved = true;
448	} else if (qres->apid && !qres->apqi) {
449		id = AP_QID_CARD(ap_queue->qid);
450		if (id == *qres->apid)
451			qres->reserved = true;
452	} else if (!qres->apid && qres->apqi) {
453		id = AP_QID_QUEUE(ap_queue->qid);
454		if (id == *qres->apqi)
455			qres->reserved = true;
456	} else {
457		return -EINVAL;
458	}
459
460	return 0;
461}
462
463/**
464 * vfio_ap_verify_queue_reserved
465 *
466 * @matrix_dev: a mediated matrix device
467 * @apid: an AP adapter ID
468 * @apqi: an AP queue index
469 *
470 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
471 * driver according to the following rules:
472 *
473 * - If both @apid and @apqi are not NULL, then there must be an AP queue
474 *   device bound to the vfio_ap driver with the APQN identified by @apid and
475 *   @apqi
476 *
477 * - If only @apid is not NULL, then there must be an AP queue device bound
478 *   to the vfio_ap driver with an APQN containing @apid
479 *
480 * - If only @apqi is not NULL, then there must be an AP queue device bound
481 *   to the vfio_ap driver with an APQN containing @apqi
482 *
483 * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
484 */
485static int vfio_ap_verify_queue_reserved(unsigned long *apid,
486					 unsigned long *apqi)
487{
488	int ret;
489	struct vfio_ap_queue_reserved qres;
490
491	qres.apid = apid;
492	qres.apqi = apqi;
493	qres.reserved = false;
494
495	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
496				     &qres, vfio_ap_has_queue);
497	if (ret)
498		return ret;
499
500	if (qres.reserved)
501		return 0;
502
503	return -EADDRNOTAVAIL;
504}
505
506static int
507vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
508					     unsigned long apid)
509{
510	int ret;
511	unsigned long apqi;
512	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
513
514	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
515		return vfio_ap_verify_queue_reserved(&apid, NULL);
516
517	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
518		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
519		if (ret)
520			return ret;
521	}
522
523	return 0;
524}
525
526/**
527 * vfio_ap_mdev_verify_no_sharing
528 *
529 * Verifies that the APQNs derived from the cross product of the AP adapter IDs
530 * and AP queue indexes comprising the AP matrix are not configured for another
531 * mediated device. AP queue sharing is not allowed.
532 *
533 * @matrix_mdev: the mediated matrix device
534 *
535 * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE.
536 */
537static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
538{
539	struct ap_matrix_mdev *lstdev;
540	DECLARE_BITMAP(apm, AP_DEVICES);
541	DECLARE_BITMAP(aqm, AP_DOMAINS);
542
543	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
544		if (matrix_mdev == lstdev)
545			continue;
546
547		memset(apm, 0, sizeof(apm));
548		memset(aqm, 0, sizeof(aqm));
549
550		/*
551		 * We work on full longs, as we can only exclude the leftover
552		 * bits in non-inverse order. The leftover is all zeros.
553		 */
554		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
555				lstdev->matrix.apm, AP_DEVICES))
556			continue;
557
558		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
559				lstdev->matrix.aqm, AP_DOMAINS))
560			continue;
561
562		return -EADDRINUSE;
563	}
564
565	return 0;
566}
567
568/**
569 * assign_adapter_store
570 *
571 * @dev:	the matrix device
572 * @attr:	the mediated matrix device's assign_adapter attribute
573 * @buf:	a buffer containing the AP adapter number (APID) to
574 *		be assigned
575 * @count:	the number of bytes in @buf
576 *
577 * Parses the APID from @buf and sets the corresponding bit in the mediated
578 * matrix device's APM.
579 *
580 * Returns the number of bytes processed if the APID is valid; otherwise,
581 * returns one of the following errors:
582 *
583 *	1. -EINVAL
584 *	   The APID is not a valid number
585 *
586 *	2. -ENODEV
587 *	   The APID exceeds the maximum value configured for the system
588 *
589 *	3. -EADDRNOTAVAIL
590 *	   An APQN derived from the cross product of the APID being assigned
591 *	   and the APQIs previously assigned is not bound to the vfio_ap device
592 *	   driver; or, if no APQIs have yet been assigned, the APID is not
593 *	   contained in an APQN bound to the vfio_ap device driver.
594 *
595 *	4. -EADDRINUSE
596 *	   An APQN derived from the cross product of the APID being assigned
597 *	   and the APQIs previously assigned is being used by another mediated
598 *	   matrix device
599 */
600static ssize_t assign_adapter_store(struct device *dev,
601				    struct device_attribute *attr,
602				    const char *buf, size_t count)
603{
604	int ret;
605	unsigned long apid;
606	struct mdev_device *mdev = mdev_from_dev(dev);
607	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
608
609	/* If the guest is running, disallow assignment of adapter */
610	if (matrix_mdev->kvm)
611		return -EBUSY;
612
613	ret = kstrtoul(buf, 0, &apid);
614	if (ret)
615		return ret;
616
617	if (apid > matrix_mdev->matrix.apm_max)
618		return -ENODEV;
619
620	/*
621	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
622	 * number (APID). The bits in the mask, from most significant to least
623	 * significant bit, correspond to APIDs 0-255.
624	 */
625	mutex_lock(&matrix_dev->lock);
626
627	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
628	if (ret)
629		goto done;
630
631	set_bit_inv(apid, matrix_mdev->matrix.apm);
632
633	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
634	if (ret)
635		goto share_err;
636
637	ret = count;
638	goto done;
639
640share_err:
641	clear_bit_inv(apid, matrix_mdev->matrix.apm);
642done:
643	mutex_unlock(&matrix_dev->lock);
644
645	return ret;
646}
647static DEVICE_ATTR_WO(assign_adapter);
648
649/**
650 * unassign_adapter_store
651 *
652 * @dev:	the matrix device
653 * @attr:	the mediated matrix device's unassign_adapter attribute
654 * @buf:	a buffer containing the adapter number (APID) to be unassigned
655 * @count:	the number of bytes in @buf
656 *
657 * Parses the APID from @buf and clears the corresponding bit in the mediated
658 * matrix device's APM.
659 *
660 * Returns the number of bytes processed if the APID is valid; otherwise,
661 * returns one of the following errors:
662 *	-EINVAL if the APID is not a number
663 *	-ENODEV if the APID it exceeds the maximum value configured for the
664 *		system
665 */
666static ssize_t unassign_adapter_store(struct device *dev,
667				      struct device_attribute *attr,
668				      const char *buf, size_t count)
669{
670	int ret;
671	unsigned long apid;
672	struct mdev_device *mdev = mdev_from_dev(dev);
673	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
674
675	/* If the guest is running, disallow un-assignment of adapter */
676	if (matrix_mdev->kvm)
677		return -EBUSY;
678
679	ret = kstrtoul(buf, 0, &apid);
680	if (ret)
681		return ret;
682
683	if (apid > matrix_mdev->matrix.apm_max)
684		return -ENODEV;
685
686	mutex_lock(&matrix_dev->lock);
687	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
688	mutex_unlock(&matrix_dev->lock);
689
690	return count;
691}
692static DEVICE_ATTR_WO(unassign_adapter);
693
694static int
695vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
696					     unsigned long apqi)
697{
698	int ret;
699	unsigned long apid;
700	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
701
702	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
703		return vfio_ap_verify_queue_reserved(NULL, &apqi);
704
705	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
706		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
707		if (ret)
708			return ret;
709	}
710
711	return 0;
712}
713
714/**
715 * assign_domain_store
716 *
717 * @dev:	the matrix device
718 * @attr:	the mediated matrix device's assign_domain attribute
719 * @buf:	a buffer containing the AP queue index (APQI) of the domain to
720 *		be assigned
721 * @count:	the number of bytes in @buf
722 *
723 * Parses the APQI from @buf and sets the corresponding bit in the mediated
724 * matrix device's AQM.
725 *
726 * Returns the number of bytes processed if the APQI is valid; otherwise returns
727 * one of the following errors:
728 *
729 *	1. -EINVAL
730 *	   The APQI is not a valid number
731 *
732 *	2. -ENODEV
733 *	   The APQI exceeds the maximum value configured for the system
734 *
735 *	3. -EADDRNOTAVAIL
736 *	   An APQN derived from the cross product of the APQI being assigned
737 *	   and the APIDs previously assigned is not bound to the vfio_ap device
738 *	   driver; or, if no APIDs have yet been assigned, the APQI is not
739 *	   contained in an APQN bound to the vfio_ap device driver.
740 *
741 *	4. -EADDRINUSE
742 *	   An APQN derived from the cross product of the APQI being assigned
743 *	   and the APIDs previously assigned is being used by another mediated
744 *	   matrix device
745 */
746static ssize_t assign_domain_store(struct device *dev,
747				   struct device_attribute *attr,
748				   const char *buf, size_t count)
749{
750	int ret;
751	unsigned long apqi;
752	struct mdev_device *mdev = mdev_from_dev(dev);
753	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
754	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
755
756	/* If the guest is running, disallow assignment of domain */
757	if (matrix_mdev->kvm)
758		return -EBUSY;
759
760	ret = kstrtoul(buf, 0, &apqi);
761	if (ret)
762		return ret;
763	if (apqi > max_apqi)
764		return -ENODEV;
765
766	mutex_lock(&matrix_dev->lock);
767
768	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
769	if (ret)
770		goto done;
771
772	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
773
774	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
775	if (ret)
776		goto share_err;
777
778	ret = count;
779	goto done;
780
781share_err:
782	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
783done:
784	mutex_unlock(&matrix_dev->lock);
785
786	return ret;
787}
788static DEVICE_ATTR_WO(assign_domain);
789
790
791/**
792 * unassign_domain_store
793 *
794 * @dev:	the matrix device
795 * @attr:	the mediated matrix device's unassign_domain attribute
796 * @buf:	a buffer containing the AP queue index (APQI) of the domain to
797 *		be unassigned
798 * @count:	the number of bytes in @buf
799 *
800 * Parses the APQI from @buf and clears the corresponding bit in the
801 * mediated matrix device's AQM.
802 *
803 * Returns the number of bytes processed if the APQI is valid; otherwise,
804 * returns one of the following errors:
805 *	-EINVAL if the APQI is not a number
806 *	-ENODEV if the APQI exceeds the maximum value configured for the system
807 */
808static ssize_t unassign_domain_store(struct device *dev,
809				     struct device_attribute *attr,
810				     const char *buf, size_t count)
811{
812	int ret;
813	unsigned long apqi;
814	struct mdev_device *mdev = mdev_from_dev(dev);
815	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
816
817	/* If the guest is running, disallow un-assignment of domain */
818	if (matrix_mdev->kvm)
819		return -EBUSY;
820
821	ret = kstrtoul(buf, 0, &apqi);
822	if (ret)
823		return ret;
824
825	if (apqi > matrix_mdev->matrix.aqm_max)
826		return -ENODEV;
827
828	mutex_lock(&matrix_dev->lock);
829	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
830	mutex_unlock(&matrix_dev->lock);
831
832	return count;
833}
834static DEVICE_ATTR_WO(unassign_domain);
835
836/**
837 * assign_control_domain_store
838 *
839 * @dev:	the matrix device
840 * @attr:	the mediated matrix device's assign_control_domain attribute
841 * @buf:	a buffer containing the domain ID to be assigned
842 * @count:	the number of bytes in @buf
843 *
844 * Parses the domain ID from @buf and sets the corresponding bit in the mediated
845 * matrix device's ADM.
846 *
847 * Returns the number of bytes processed if the domain ID is valid; otherwise,
848 * returns one of the following errors:
849 *	-EINVAL if the ID is not a number
850 *	-ENODEV if the ID exceeds the maximum value configured for the system
851 */
852static ssize_t assign_control_domain_store(struct device *dev,
853					   struct device_attribute *attr,
854					   const char *buf, size_t count)
855{
856	int ret;
857	unsigned long id;
858	struct mdev_device *mdev = mdev_from_dev(dev);
859	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
860
861	/* If the guest is running, disallow assignment of control domain */
862	if (matrix_mdev->kvm)
863		return -EBUSY;
864
865	ret = kstrtoul(buf, 0, &id);
866	if (ret)
867		return ret;
868
869	if (id > matrix_mdev->matrix.adm_max)
870		return -ENODEV;
871
872	/* Set the bit in the ADM (bitmask) corresponding to the AP control
873	 * domain number (id). The bits in the mask, from most significant to
874	 * least significant, correspond to IDs 0 up to the one less than the
875	 * number of control domains that can be assigned.
876	 */
877	mutex_lock(&matrix_dev->lock);
878	set_bit_inv(id, matrix_mdev->matrix.adm);
879	mutex_unlock(&matrix_dev->lock);
880
881	return count;
882}
883static DEVICE_ATTR_WO(assign_control_domain);
884
885/**
886 * unassign_control_domain_store
887 *
888 * @dev:	the matrix device
889 * @attr:	the mediated matrix device's unassign_control_domain attribute
890 * @buf:	a buffer containing the domain ID to be unassigned
891 * @count:	the number of bytes in @buf
892 *
893 * Parses the domain ID from @buf and clears the corresponding bit in the
894 * mediated matrix device's ADM.
895 *
896 * Returns the number of bytes processed if the domain ID is valid; otherwise,
897 * returns one of the following errors:
898 *	-EINVAL if the ID is not a number
899 *	-ENODEV if the ID exceeds the maximum value configured for the system
900 */
901static ssize_t unassign_control_domain_store(struct device *dev,
902					     struct device_attribute *attr,
903					     const char *buf, size_t count)
904{
905	int ret;
906	unsigned long domid;
907	struct mdev_device *mdev = mdev_from_dev(dev);
908	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
909	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
910
911	/* If the guest is running, disallow un-assignment of control domain */
912	if (matrix_mdev->kvm)
913		return -EBUSY;
914
915	ret = kstrtoul(buf, 0, &domid);
916	if (ret)
917		return ret;
918	if (domid > max_domid)
919		return -ENODEV;
920
921	mutex_lock(&matrix_dev->lock);
922	clear_bit_inv(domid, matrix_mdev->matrix.adm);
923	mutex_unlock(&matrix_dev->lock);
924
925	return count;
926}
927static DEVICE_ATTR_WO(unassign_control_domain);
928
929static ssize_t control_domains_show(struct device *dev,
930				    struct device_attribute *dev_attr,
931				    char *buf)
932{
933	unsigned long id;
934	int nchars = 0;
935	int n;
936	char *bufpos = buf;
937	struct mdev_device *mdev = mdev_from_dev(dev);
938	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
939	unsigned long max_domid = matrix_mdev->matrix.adm_max;
940
941	mutex_lock(&matrix_dev->lock);
942	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
943		n = sprintf(bufpos, "%04lx\n", id);
944		bufpos += n;
945		nchars += n;
946	}
947	mutex_unlock(&matrix_dev->lock);
948
949	return nchars;
950}
951static DEVICE_ATTR_RO(control_domains);
952
953static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
954			   char *buf)
955{
956	struct mdev_device *mdev = mdev_from_dev(dev);
957	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
958	char *bufpos = buf;
959	unsigned long apid;
960	unsigned long apqi;
961	unsigned long apid1;
962	unsigned long apqi1;
963	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
964	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
965	int nchars = 0;
966	int n;
967
968	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
969	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
970
971	mutex_lock(&matrix_dev->lock);
972
973	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
974		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
975			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
976					     naqm_bits) {
977				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
978					    apqi);
979				bufpos += n;
980				nchars += n;
981			}
982		}
983	} else if (apid1 < napm_bits) {
984		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
985			n = sprintf(bufpos, "%02lx.\n", apid);
986			bufpos += n;
987			nchars += n;
988		}
989	} else if (apqi1 < naqm_bits) {
990		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
991			n = sprintf(bufpos, ".%04lx\n", apqi);
992			bufpos += n;
993			nchars += n;
994		}
995	}
996
997	mutex_unlock(&matrix_dev->lock);
998
999	return nchars;
1000}
1001static DEVICE_ATTR_RO(matrix);
1002
1003static struct attribute *vfio_ap_mdev_attrs[] = {
1004	&dev_attr_assign_adapter.attr,
1005	&dev_attr_unassign_adapter.attr,
1006	&dev_attr_assign_domain.attr,
1007	&dev_attr_unassign_domain.attr,
1008	&dev_attr_assign_control_domain.attr,
1009	&dev_attr_unassign_control_domain.attr,
1010	&dev_attr_control_domains.attr,
1011	&dev_attr_matrix.attr,
1012	NULL,
1013};
1014
1015static struct attribute_group vfio_ap_mdev_attr_group = {
1016	.attrs = vfio_ap_mdev_attrs
1017};
1018
1019static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1020	&vfio_ap_mdev_attr_group,
1021	NULL
1022};
1023
1024/**
1025 * vfio_ap_mdev_set_kvm
1026 *
1027 * @matrix_mdev: a mediated matrix device
1028 * @kvm: reference to KVM instance
1029 *
1030 * Verifies no other mediated matrix device has @kvm and sets a reference to
1031 * it in @matrix_mdev->kvm.
1032 *
1033 * Return 0 if no other mediated matrix device has a reference to @kvm;
1034 * otherwise, returns an -EPERM.
1035 */
1036static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1037				struct kvm *kvm)
1038{
1039	struct ap_matrix_mdev *m;
1040
1041	mutex_lock(&matrix_dev->lock);
1042
1043	list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1044		if ((m != matrix_mdev) && (m->kvm == kvm)) {
1045			mutex_unlock(&matrix_dev->lock);
1046			return -EPERM;
1047		}
1048	}
1049
1050	matrix_mdev->kvm = kvm;
1051	kvm_get_kvm(kvm);
1052	kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1053	mutex_unlock(&matrix_dev->lock);
1054
1055	return 0;
1056}
1057
1058/*
1059 * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
1060 *
1061 * @nb: The notifier block
1062 * @action: Action to be taken
1063 * @data: data associated with the request
1064 *
1065 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1066 * pinned before). Other requests are ignored.
1067 *
1068 */
1069static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1070				       unsigned long action, void *data)
1071{
1072	struct ap_matrix_mdev *matrix_mdev;
1073
1074	matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1075
1076	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1077		struct vfio_iommu_type1_dma_unmap *unmap = data;
1078		unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1079
1080		vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
1081		return NOTIFY_OK;
1082	}
1083
1084	return NOTIFY_DONE;
1085}
1086
1087static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
1088				       unsigned long action, void *data)
1089{
1090	int ret;
1091	struct ap_matrix_mdev *matrix_mdev;
1092
1093	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
1094		return NOTIFY_OK;
1095
1096	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
1097
1098	if (!data) {
1099		matrix_mdev->kvm = NULL;
1100		return NOTIFY_OK;
1101	}
1102
1103	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
1104	if (ret)
1105		return NOTIFY_DONE;
1106
1107	/* If there is no CRYCB pointer, then we can't copy the masks */
1108	if (!matrix_mdev->kvm->arch.crypto.crycbd)
1109		return NOTIFY_DONE;
1110
1111	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
1112				  matrix_mdev->matrix.aqm,
1113				  matrix_mdev->matrix.adm);
1114
1115	return NOTIFY_OK;
1116}
1117
1118static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1119{
1120	struct device *dev;
1121	struct vfio_ap_queue *q = NULL;
1122
1123	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1124				 &apqn, match_apqn);
1125	if (dev) {
1126		q = dev_get_drvdata(dev);
1127		put_device(dev);
1128	}
1129
1130	return q;
1131}
1132
1133int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1134			     unsigned int retry)
1135{
1136	struct ap_queue_status status;
1137	int ret;
1138	int retry2 = 2;
1139
1140	if (!q)
1141		return 0;
1142
1143retry_zapq:
1144	status = ap_zapq(q->apqn);
1145	switch (status.response_code) {
1146	case AP_RESPONSE_NORMAL:
1147		ret = 0;
1148		break;
1149	case AP_RESPONSE_RESET_IN_PROGRESS:
1150		if (retry--) {
1151			msleep(20);
1152			goto retry_zapq;
1153		}
1154		ret = -EBUSY;
1155		break;
1156	case AP_RESPONSE_Q_NOT_AVAIL:
1157	case AP_RESPONSE_DECONFIGURED:
1158	case AP_RESPONSE_CHECKSTOPPED:
1159		WARN_ON_ONCE(status.irq_enabled);
1160		ret = -EBUSY;
1161		goto free_resources;
1162	default:
1163		/* things are really broken, give up */
1164		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1165		     status.response_code);
1166		return -EIO;
1167	}
1168
1169	/* wait for the reset to take effect */
1170	while (retry2--) {
1171		if (status.queue_empty && !status.irq_enabled)
1172			break;
1173		msleep(20);
1174		status = ap_tapq(q->apqn, NULL);
1175	}
1176	WARN_ON_ONCE(retry2 <= 0);
1177
1178free_resources:
1179	vfio_ap_free_aqic_resources(q);
1180
1181	return ret;
1182}
1183
1184static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
1185{
1186	int ret;
1187	int rc = 0;
1188	unsigned long apid, apqi;
1189	struct vfio_ap_queue *q;
1190	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1191
1192	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1193			     matrix_mdev->matrix.apm_max + 1) {
1194		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1195				     matrix_mdev->matrix.aqm_max + 1) {
1196			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1197			ret = vfio_ap_mdev_reset_queue(q, 1);
1198			/*
1199			 * Regardless whether a queue turns out to be busy, or
1200			 * is not operational, we need to continue resetting
1201			 * the remaining queues.
1202			 */
1203			if (ret)
1204				rc = ret;
1205		}
1206	}
1207
1208	return rc;
1209}
1210
1211static int vfio_ap_mdev_open(struct mdev_device *mdev)
1212{
1213	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1214	unsigned long events;
1215	int ret;
1216
1217
1218	if (!try_module_get(THIS_MODULE))
1219		return -ENODEV;
1220
1221	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
1222	events = VFIO_GROUP_NOTIFY_SET_KVM;
1223
1224	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1225				     &events, &matrix_mdev->group_notifier);
1226	if (ret) {
1227		module_put(THIS_MODULE);
1228		return ret;
1229	}
1230
1231	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1232	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1233	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
1234				     &events, &matrix_mdev->iommu_notifier);
1235	if (!ret)
1236		return ret;
1237
1238	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1239				 &matrix_mdev->group_notifier);
1240	module_put(THIS_MODULE);
1241	return ret;
1242}
1243
1244static void vfio_ap_mdev_release(struct mdev_device *mdev)
1245{
1246	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1247
1248	mutex_lock(&matrix_dev->lock);
1249	if (matrix_mdev->kvm) {
1250		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
1251		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
1252		vfio_ap_mdev_reset_queues(mdev);
1253		kvm_put_kvm(matrix_mdev->kvm);
1254		matrix_mdev->kvm = NULL;
1255	}
1256	mutex_unlock(&matrix_dev->lock);
1257
1258	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
1259				 &matrix_mdev->iommu_notifier);
1260	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1261				 &matrix_mdev->group_notifier);
1262	module_put(THIS_MODULE);
1263}
1264
1265static int vfio_ap_mdev_get_device_info(unsigned long arg)
1266{
1267	unsigned long minsz;
1268	struct vfio_device_info info;
1269
1270	minsz = offsetofend(struct vfio_device_info, num_irqs);
1271
1272	if (copy_from_user(&info, (void __user *)arg, minsz))
1273		return -EFAULT;
1274
1275	if (info.argsz < minsz)
1276		return -EINVAL;
1277
1278	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1279	info.num_regions = 0;
1280	info.num_irqs = 0;
1281
1282	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1283}
1284
1285static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
1286				    unsigned int cmd, unsigned long arg)
1287{
1288	int ret;
1289
1290	mutex_lock(&matrix_dev->lock);
1291	switch (cmd) {
1292	case VFIO_DEVICE_GET_INFO:
1293		ret = vfio_ap_mdev_get_device_info(arg);
1294		break;
1295	case VFIO_DEVICE_RESET:
1296		ret = vfio_ap_mdev_reset_queues(mdev);
1297		break;
1298	default:
1299		ret = -EOPNOTSUPP;
1300		break;
1301	}
1302	mutex_unlock(&matrix_dev->lock);
1303
1304	return ret;
1305}
1306
1307static const struct mdev_parent_ops vfio_ap_matrix_ops = {
1308	.owner			= THIS_MODULE,
1309	.supported_type_groups	= vfio_ap_mdev_type_groups,
1310	.mdev_attr_groups	= vfio_ap_mdev_attr_groups,
1311	.create			= vfio_ap_mdev_create,
1312	.remove			= vfio_ap_mdev_remove,
1313	.open			= vfio_ap_mdev_open,
1314	.release		= vfio_ap_mdev_release,
1315	.ioctl			= vfio_ap_mdev_ioctl,
1316};
1317
1318int vfio_ap_mdev_register(void)
1319{
1320	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1321
1322	return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
1323}
1324
1325void vfio_ap_mdev_unregister(void)
1326{
1327	mdev_unregister_device(&matrix_dev->device);
1328}
1329