xref: /kernel/linux/linux-6.6/drivers/iommu/intel/svm.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright © 2015 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>
6 */
7
8#include <linux/mmu_notifier.h>
9#include <linux/sched.h>
10#include <linux/sched/mm.h>
11#include <linux/slab.h>
12#include <linux/rculist.h>
13#include <linux/pci.h>
14#include <linux/pci-ats.h>
15#include <linux/dmar.h>
16#include <linux/interrupt.h>
17#include <linux/mm_types.h>
18#include <linux/xarray.h>
19#include <asm/page.h>
20#include <asm/fpu/api.h>
21
22#include "iommu.h"
23#include "pasid.h"
24#include "perf.h"
25#include "../iommu-sva.h"
26#include "trace.h"
27
28static irqreturn_t prq_event_thread(int irq, void *d);
29
30static DEFINE_XARRAY_ALLOC(pasid_private_array);
31static int pasid_private_add(ioasid_t pasid, void *priv)
32{
33	return xa_alloc(&pasid_private_array, &pasid, priv,
34			XA_LIMIT(pasid, pasid), GFP_ATOMIC);
35}
36
37static void pasid_private_remove(ioasid_t pasid)
38{
39	xa_erase(&pasid_private_array, pasid);
40}
41
42static void *pasid_private_find(ioasid_t pasid)
43{
44	return xa_load(&pasid_private_array, pasid);
45}
46
47static struct intel_svm_dev *
48svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
49{
50	struct intel_svm_dev *sdev = NULL, *t;
51
52	rcu_read_lock();
53	list_for_each_entry_rcu(t, &svm->devs, list) {
54		if (t->dev == dev) {
55			sdev = t;
56			break;
57		}
58	}
59	rcu_read_unlock();
60
61	return sdev;
62}
63
64int intel_svm_enable_prq(struct intel_iommu *iommu)
65{
66	struct iopf_queue *iopfq;
67	struct page *pages;
68	int irq, ret;
69
70	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
71	if (!pages) {
72		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
73			iommu->name);
74		return -ENOMEM;
75	}
76	iommu->prq = page_address(pages);
77
78	irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
79	if (irq <= 0) {
80		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
81		       iommu->name);
82		ret = -EINVAL;
83		goto free_prq;
84	}
85	iommu->pr_irq = irq;
86
87	snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
88		 "dmar%d-iopfq", iommu->seq_id);
89	iopfq = iopf_queue_alloc(iommu->iopfq_name);
90	if (!iopfq) {
91		pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
92		ret = -ENOMEM;
93		goto free_hwirq;
94	}
95	iommu->iopf_queue = iopfq;
96
97	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
98
99	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
100				   iommu->prq_name, iommu);
101	if (ret) {
102		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
103		       iommu->name);
104		goto free_iopfq;
105	}
106	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
107	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
108	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
109
110	init_completion(&iommu->prq_complete);
111
112	return 0;
113
114free_iopfq:
115	iopf_queue_free(iommu->iopf_queue);
116	iommu->iopf_queue = NULL;
117free_hwirq:
118	dmar_free_hwirq(irq);
119	iommu->pr_irq = 0;
120free_prq:
121	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
122	iommu->prq = NULL;
123
124	return ret;
125}
126
127int intel_svm_finish_prq(struct intel_iommu *iommu)
128{
129	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
130	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
131	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
132
133	if (iommu->pr_irq) {
134		free_irq(iommu->pr_irq, iommu);
135		dmar_free_hwirq(iommu->pr_irq);
136		iommu->pr_irq = 0;
137	}
138
139	if (iommu->iopf_queue) {
140		iopf_queue_free(iommu->iopf_queue);
141		iommu->iopf_queue = NULL;
142	}
143
144	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
145	iommu->prq = NULL;
146
147	return 0;
148}
149
150void intel_svm_check(struct intel_iommu *iommu)
151{
152	if (!pasid_supported(iommu))
153		return;
154
155	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
156	    !cap_fl1gp_support(iommu->cap)) {
157		pr_err("%s SVM disabled, incompatible 1GB page capability\n",
158		       iommu->name);
159		return;
160	}
161
162	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
163	    !cap_fl5lp_support(iommu->cap)) {
164		pr_err("%s SVM disabled, incompatible paging mode\n",
165		       iommu->name);
166		return;
167	}
168
169	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
170}
171
172static void __flush_svm_range_dev(struct intel_svm *svm,
173				  struct intel_svm_dev *sdev,
174				  unsigned long address,
175				  unsigned long pages, int ih)
176{
177	struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
178
179	if (WARN_ON(!pages))
180		return;
181
182	qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
183	if (info->ats_enabled) {
184		qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
185					 svm->pasid, sdev->qdep, address,
186					 order_base_2(pages));
187		quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
188					  svm->pasid, sdev->qdep);
189	}
190}
191
192static void intel_flush_svm_range_dev(struct intel_svm *svm,
193				      struct intel_svm_dev *sdev,
194				      unsigned long address,
195				      unsigned long pages, int ih)
196{
197	unsigned long shift = ilog2(__roundup_pow_of_two(pages));
198	unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
199	unsigned long start = ALIGN_DOWN(address, align);
200	unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
201
202	while (start < end) {
203		__flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
204		start += align;
205	}
206}
207
208static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
209				unsigned long pages, int ih)
210{
211	struct intel_svm_dev *sdev;
212
213	rcu_read_lock();
214	list_for_each_entry_rcu(sdev, &svm->devs, list)
215		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
216	rcu_read_unlock();
217}
218
219static void intel_flush_svm_all(struct intel_svm *svm)
220{
221	struct device_domain_info *info;
222	struct intel_svm_dev *sdev;
223
224	rcu_read_lock();
225	list_for_each_entry_rcu(sdev, &svm->devs, list) {
226		info = dev_iommu_priv_get(sdev->dev);
227
228		qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
229		if (info->ats_enabled) {
230			qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
231						 svm->pasid, sdev->qdep,
232						 0, 64 - VTD_PAGE_SHIFT);
233			quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
234						  svm->pasid, sdev->qdep);
235		}
236	}
237	rcu_read_unlock();
238}
239
240/* Pages have been freed at this point */
241static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
242					struct mm_struct *mm,
243					unsigned long start, unsigned long end)
244{
245	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
246
247	if (start == 0 && end == -1UL) {
248		intel_flush_svm_all(svm);
249		return;
250	}
251
252	intel_flush_svm_range(svm, start,
253			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
254}
255
256static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
257{
258	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
259	struct intel_svm_dev *sdev;
260
261	/* This might end up being called from exit_mmap(), *before* the page
262	 * tables are cleared. And __mmu_notifier_release() will delete us from
263	 * the list of notifiers so that our invalidate_range() callback doesn't
264	 * get called when the page tables are cleared. So we need to protect
265	 * against hardware accessing those page tables.
266	 *
267	 * We do it by clearing the entry in the PASID table and then flushing
268	 * the IOTLB and the PASID table caches. This might upset hardware;
269	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
270	 * page) so that we end up taking a fault that the hardware really
271	 * *has* to handle gracefully without affecting other processes.
272	 */
273	rcu_read_lock();
274	list_for_each_entry_rcu(sdev, &svm->devs, list)
275		intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
276					    svm->pasid, true);
277	rcu_read_unlock();
278
279}
280
281static const struct mmu_notifier_ops intel_mmuops = {
282	.release = intel_mm_release,
283	.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
284};
285
286static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
287			     struct intel_svm **rsvm,
288			     struct intel_svm_dev **rsdev)
289{
290	struct intel_svm_dev *sdev = NULL;
291	struct intel_svm *svm;
292
293	if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
294		return -EINVAL;
295
296	svm = pasid_private_find(pasid);
297	if (IS_ERR(svm))
298		return PTR_ERR(svm);
299
300	if (!svm)
301		goto out;
302
303	/*
304	 * If we found svm for the PASID, there must be at least one device
305	 * bond.
306	 */
307	if (WARN_ON(list_empty(&svm->devs)))
308		return -EINVAL;
309	sdev = svm_lookup_device_by_dev(svm, dev);
310
311out:
312	*rsvm = svm;
313	*rsdev = sdev;
314
315	return 0;
316}
317
318static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
319			     struct mm_struct *mm)
320{
321	struct device_domain_info *info = dev_iommu_priv_get(dev);
322	struct intel_svm_dev *sdev;
323	struct intel_svm *svm;
324	unsigned long sflags;
325	int ret = 0;
326
327	svm = pasid_private_find(mm->pasid);
328	if (!svm) {
329		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
330		if (!svm)
331			return -ENOMEM;
332
333		svm->pasid = mm->pasid;
334		svm->mm = mm;
335		INIT_LIST_HEAD_RCU(&svm->devs);
336
337		svm->notifier.ops = &intel_mmuops;
338		ret = mmu_notifier_register(&svm->notifier, mm);
339		if (ret) {
340			kfree(svm);
341			return ret;
342		}
343
344		ret = pasid_private_add(svm->pasid, svm);
345		if (ret) {
346			mmu_notifier_unregister(&svm->notifier, mm);
347			kfree(svm);
348			return ret;
349		}
350	}
351
352	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
353	if (!sdev) {
354		ret = -ENOMEM;
355		goto free_svm;
356	}
357
358	sdev->dev = dev;
359	sdev->iommu = iommu;
360	sdev->did = FLPT_DEFAULT_DID;
361	sdev->sid = PCI_DEVID(info->bus, info->devfn);
362	init_rcu_head(&sdev->rcu);
363	if (info->ats_enabled) {
364		sdev->qdep = info->ats_qdep;
365		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
366			sdev->qdep = 0;
367	}
368
369	/* Setup the pasid table: */
370	sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
371	ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
372					    FLPT_DEFAULT_DID, sflags);
373	if (ret)
374		goto free_sdev;
375
376	list_add_rcu(&sdev->list, &svm->devs);
377
378	return 0;
379
380free_sdev:
381	kfree(sdev);
382free_svm:
383	if (list_empty(&svm->devs)) {
384		mmu_notifier_unregister(&svm->notifier, mm);
385		pasid_private_remove(mm->pasid);
386		kfree(svm);
387	}
388
389	return ret;
390}
391
392void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
393{
394	struct intel_svm_dev *sdev;
395	struct intel_iommu *iommu;
396	struct intel_svm *svm;
397	struct mm_struct *mm;
398
399	iommu = device_to_iommu(dev, NULL, NULL);
400	if (!iommu)
401		return;
402
403	if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
404		return;
405	mm = svm->mm;
406
407	if (sdev) {
408		list_del_rcu(&sdev->list);
409		kfree_rcu(sdev, rcu);
410
411		if (list_empty(&svm->devs)) {
412			if (svm->notifier.ops)
413				mmu_notifier_unregister(&svm->notifier, mm);
414			pasid_private_remove(svm->pasid);
415			/*
416			 * We mandate that no page faults may be outstanding
417			 * for the PASID when intel_svm_unbind_mm() is called.
418			 * If that is not obeyed, subtle errors will happen.
419			 * Let's make them less subtle...
420			 */
421			memset(svm, 0x6b, sizeof(*svm));
422			kfree(svm);
423		}
424	}
425}
426
427/* Page request queue descriptor */
428struct page_req_dsc {
429	union {
430		struct {
431			u64 type:8;
432			u64 pasid_present:1;
433			u64 priv_data_present:1;
434			u64 rsvd:6;
435			u64 rid:16;
436			u64 pasid:20;
437			u64 exe_req:1;
438			u64 pm_req:1;
439			u64 rsvd2:10;
440		};
441		u64 qw_0;
442	};
443	union {
444		struct {
445			u64 rd_req:1;
446			u64 wr_req:1;
447			u64 lpig:1;
448			u64 prg_index:9;
449			u64 addr:52;
450		};
451		u64 qw_1;
452	};
453	u64 priv_data[2];
454};
455
456static bool is_canonical_address(u64 addr)
457{
458	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
459	long saddr = (long) addr;
460
461	return (((saddr << shift) >> shift) == saddr);
462}
463
464/**
465 * intel_drain_pasid_prq - Drain page requests and responses for a pasid
466 * @dev: target device
467 * @pasid: pasid for draining
468 *
469 * Drain all pending page requests and responses related to @pasid in both
470 * software and hardware. This is supposed to be called after the device
471 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
472 * and DevTLB have been invalidated.
473 *
474 * It waits until all pending page requests for @pasid in the page fault
475 * queue are completed by the prq handling thread. Then follow the steps
476 * described in VT-d spec CH7.10 to drain all page requests and page
477 * responses pending in the hardware.
478 */
479void intel_drain_pasid_prq(struct device *dev, u32 pasid)
480{
481	struct device_domain_info *info;
482	struct dmar_domain *domain;
483	struct intel_iommu *iommu;
484	struct qi_desc desc[3];
485	struct pci_dev *pdev;
486	int head, tail;
487	u16 sid, did;
488	int qdep;
489
490	info = dev_iommu_priv_get(dev);
491	if (WARN_ON(!info || !dev_is_pci(dev)))
492		return;
493
494	if (!info->pri_enabled)
495		return;
496
497	iommu = info->iommu;
498	domain = info->domain;
499	pdev = to_pci_dev(dev);
500	sid = PCI_DEVID(info->bus, info->devfn);
501	did = domain_id_iommu(domain, iommu);
502	qdep = pci_ats_queue_depth(pdev);
503
504	/*
505	 * Check and wait until all pending page requests in the queue are
506	 * handled by the prq handling thread.
507	 */
508prq_retry:
509	reinit_completion(&iommu->prq_complete);
510	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
511	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
512	while (head != tail) {
513		struct page_req_dsc *req;
514
515		req = &iommu->prq[head / sizeof(*req)];
516		if (!req->pasid_present || req->pasid != pasid) {
517			head = (head + sizeof(*req)) & PRQ_RING_MASK;
518			continue;
519		}
520
521		wait_for_completion(&iommu->prq_complete);
522		goto prq_retry;
523	}
524
525	iopf_queue_flush_dev(dev);
526
527	/*
528	 * Perform steps described in VT-d spec CH7.10 to drain page
529	 * requests and responses in hardware.
530	 */
531	memset(desc, 0, sizeof(desc));
532	desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
533			QI_IWD_FENCE |
534			QI_IWD_TYPE;
535	desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
536			QI_EIOTLB_DID(did) |
537			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
538			QI_EIOTLB_TYPE;
539	desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
540			QI_DEV_EIOTLB_SID(sid) |
541			QI_DEV_EIOTLB_QDEP(qdep) |
542			QI_DEIOTLB_TYPE |
543			QI_DEV_IOTLB_PFSID(info->pfsid);
544qi_retry:
545	reinit_completion(&iommu->prq_complete);
546	qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
547	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
548		wait_for_completion(&iommu->prq_complete);
549		goto qi_retry;
550	}
551}
552
553static int prq_to_iommu_prot(struct page_req_dsc *req)
554{
555	int prot = 0;
556
557	if (req->rd_req)
558		prot |= IOMMU_FAULT_PERM_READ;
559	if (req->wr_req)
560		prot |= IOMMU_FAULT_PERM_WRITE;
561	if (req->exe_req)
562		prot |= IOMMU_FAULT_PERM_EXEC;
563	if (req->pm_req)
564		prot |= IOMMU_FAULT_PERM_PRIV;
565
566	return prot;
567}
568
569static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
570				struct page_req_dsc *desc)
571{
572	struct iommu_fault_event event;
573
574	if (!dev || !dev_is_pci(dev))
575		return -ENODEV;
576
577	/* Fill in event data for device specific processing */
578	memset(&event, 0, sizeof(struct iommu_fault_event));
579	event.fault.type = IOMMU_FAULT_PAGE_REQ;
580	event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
581	event.fault.prm.pasid = desc->pasid;
582	event.fault.prm.grpid = desc->prg_index;
583	event.fault.prm.perm = prq_to_iommu_prot(desc);
584
585	if (desc->lpig)
586		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
587	if (desc->pasid_present) {
588		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
589		event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
590	}
591	if (desc->priv_data_present) {
592		/*
593		 * Set last page in group bit if private data is present,
594		 * page response is required as it does for LPIG.
595		 * iommu_report_device_fault() doesn't understand this vendor
596		 * specific requirement thus we set last_page as a workaround.
597		 */
598		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
599		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
600		event.fault.prm.private_data[0] = desc->priv_data[0];
601		event.fault.prm.private_data[1] = desc->priv_data[1];
602	} else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
603		/*
604		 * If the private data fields are not used by hardware, use it
605		 * to monitor the prq handle latency.
606		 */
607		event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
608	}
609
610	return iommu_report_device_fault(dev, &event);
611}
612
613static void handle_bad_prq_event(struct intel_iommu *iommu,
614				 struct page_req_dsc *req, int result)
615{
616	struct qi_desc desc;
617
618	pr_err("%s: Invalid page request: %08llx %08llx\n",
619	       iommu->name, ((unsigned long long *)req)[0],
620	       ((unsigned long long *)req)[1]);
621
622	/*
623	 * Per VT-d spec. v3.0 ch7.7, system software must
624	 * respond with page group response if private data
625	 * is present (PDP) or last page in group (LPIG) bit
626	 * is set. This is an additional VT-d feature beyond
627	 * PCI ATS spec.
628	 */
629	if (!req->lpig && !req->priv_data_present)
630		return;
631
632	desc.qw0 = QI_PGRP_PASID(req->pasid) |
633			QI_PGRP_DID(req->rid) |
634			QI_PGRP_PASID_P(req->pasid_present) |
635			QI_PGRP_PDP(req->priv_data_present) |
636			QI_PGRP_RESP_CODE(result) |
637			QI_PGRP_RESP_TYPE;
638	desc.qw1 = QI_PGRP_IDX(req->prg_index) |
639			QI_PGRP_LPIG(req->lpig);
640
641	if (req->priv_data_present) {
642		desc.qw2 = req->priv_data[0];
643		desc.qw3 = req->priv_data[1];
644	} else {
645		desc.qw2 = 0;
646		desc.qw3 = 0;
647	}
648
649	qi_submit_sync(iommu, &desc, 1, 0);
650}
651
652static irqreturn_t prq_event_thread(int irq, void *d)
653{
654	struct intel_iommu *iommu = d;
655	struct page_req_dsc *req;
656	int head, tail, handled;
657	struct pci_dev *pdev;
658	u64 address;
659
660	/*
661	 * Clear PPR bit before reading head/tail registers, to ensure that
662	 * we get a new interrupt if needed.
663	 */
664	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
665
666	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
667	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
668	handled = (head != tail);
669	while (head != tail) {
670		req = &iommu->prq[head / sizeof(*req)];
671		address = (u64)req->addr << VTD_PAGE_SHIFT;
672
673		if (unlikely(!req->pasid_present)) {
674			pr_err("IOMMU: %s: Page request without PASID\n",
675			       iommu->name);
676bad_req:
677			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
678			goto prq_advance;
679		}
680
681		if (unlikely(!is_canonical_address(address))) {
682			pr_err("IOMMU: %s: Address is not canonical\n",
683			       iommu->name);
684			goto bad_req;
685		}
686
687		if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
688			pr_err("IOMMU: %s: Page request in Privilege Mode\n",
689			       iommu->name);
690			goto bad_req;
691		}
692
693		if (unlikely(req->exe_req && req->rd_req)) {
694			pr_err("IOMMU: %s: Execution request not supported\n",
695			       iommu->name);
696			goto bad_req;
697		}
698
699		/* Drop Stop Marker message. No need for a response. */
700		if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
701			goto prq_advance;
702
703		pdev = pci_get_domain_bus_and_slot(iommu->segment,
704						   PCI_BUS_NUM(req->rid),
705						   req->rid & 0xff);
706		/*
707		 * If prq is to be handled outside iommu driver via receiver of
708		 * the fault notifiers, we skip the page response here.
709		 */
710		if (!pdev)
711			goto bad_req;
712
713		if (intel_svm_prq_report(iommu, &pdev->dev, req))
714			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
715		else
716			trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
717					 req->priv_data[0], req->priv_data[1],
718					 iommu->prq_seq_number++);
719		pci_dev_put(pdev);
720prq_advance:
721		head = (head + sizeof(*req)) & PRQ_RING_MASK;
722	}
723
724	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
725
726	/*
727	 * Clear the page request overflow bit and wake up all threads that
728	 * are waiting for the completion of this handling.
729	 */
730	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
731		pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
732				    iommu->name);
733		head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
734		tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
735		if (head == tail) {
736			iopf_queue_discard_partial(iommu->iopf_queue);
737			writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
738			pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
739					    iommu->name);
740		}
741	}
742
743	if (!completion_done(&iommu->prq_complete))
744		complete(&iommu->prq_complete);
745
746	return IRQ_RETVAL(handled);
747}
748
749int intel_svm_page_response(struct device *dev,
750			    struct iommu_fault_event *evt,
751			    struct iommu_page_response *msg)
752{
753	struct iommu_fault_page_request *prm;
754	struct intel_iommu *iommu;
755	bool private_present;
756	bool pasid_present;
757	bool last_page;
758	u8 bus, devfn;
759	int ret = 0;
760	u16 sid;
761
762	if (!dev || !dev_is_pci(dev))
763		return -ENODEV;
764
765	iommu = device_to_iommu(dev, &bus, &devfn);
766	if (!iommu)
767		return -ENODEV;
768
769	if (!msg || !evt)
770		return -EINVAL;
771
772	prm = &evt->fault.prm;
773	sid = PCI_DEVID(bus, devfn);
774	pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
775	private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
776	last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
777
778	if (!pasid_present) {
779		ret = -EINVAL;
780		goto out;
781	}
782
783	if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
784		ret = -EINVAL;
785		goto out;
786	}
787
788	/*
789	 * Per VT-d spec. v3.0 ch7.7, system software must respond
790	 * with page group response if private data is present (PDP)
791	 * or last page in group (LPIG) bit is set. This is an
792	 * additional VT-d requirement beyond PCI ATS spec.
793	 */
794	if (last_page || private_present) {
795		struct qi_desc desc;
796
797		desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
798				QI_PGRP_PASID_P(pasid_present) |
799				QI_PGRP_PDP(private_present) |
800				QI_PGRP_RESP_CODE(msg->code) |
801				QI_PGRP_RESP_TYPE;
802		desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
803		desc.qw2 = 0;
804		desc.qw3 = 0;
805
806		if (private_present) {
807			desc.qw2 = prm->private_data[0];
808			desc.qw3 = prm->private_data[1];
809		} else if (prm->private_data[0]) {
810			dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
811				ktime_to_ns(ktime_get()) - prm->private_data[0]);
812		}
813
814		qi_submit_sync(iommu, &desc, 1, 0);
815	}
816out:
817	return ret;
818}
819
820static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
821				   struct device *dev, ioasid_t pasid)
822{
823	struct device_domain_info *info = dev_iommu_priv_get(dev);
824	struct intel_iommu *iommu = info->iommu;
825	struct mm_struct *mm = domain->mm;
826
827	return intel_svm_bind_mm(iommu, dev, mm);
828}
829
830static void intel_svm_domain_free(struct iommu_domain *domain)
831{
832	kfree(to_dmar_domain(domain));
833}
834
835static const struct iommu_domain_ops intel_svm_domain_ops = {
836	.set_dev_pasid		= intel_svm_set_dev_pasid,
837	.free			= intel_svm_domain_free
838};
839
840struct iommu_domain *intel_svm_domain_alloc(void)
841{
842	struct dmar_domain *domain;
843
844	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
845	if (!domain)
846		return NULL;
847	domain->domain.ops = &intel_svm_domain_ops;
848
849	return &domain->domain;
850}
851