1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2006, Intel Corporation.
4 *
5 * Copyright (C) 2006-2008 Intel Corporation
6 * Author: Ashok Raj <ashok.raj@intel.com>
7 * Author: Shaohua Li <shaohua.li@intel.com>
8 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9 *
10 * This file implements early detection/parsing of Remapping Devices
11 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12 * tables.
13 *
14 * These routines are used by both DMA-remapping and Interrupt-remapping
15 */
16
17#define pr_fmt(fmt)     "DMAR: " fmt
18
19#include <linux/pci.h>
20#include <linux/dmar.h>
21#include <linux/iova.h>
22#include <linux/intel-iommu.h>
23#include <linux/timer.h>
24#include <linux/irq.h>
25#include <linux/interrupt.h>
26#include <linux/tboot.h>
27#include <linux/dmi.h>
28#include <linux/slab.h>
29#include <linux/iommu.h>
30#include <linux/numa.h>
31#include <linux/limits.h>
32#include <asm/irq_remapping.h>
33#include <asm/iommu_table.h>
34
35#include "../irq_remapping.h"
36
37typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38struct dmar_res_callback {
39	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41	bool			ignore_unhandled;
42	bool			print_entry;
43};
44
45/*
46 * Assumptions:
47 * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48 *    before IO devices managed by that unit.
49 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50 *    after IO devices managed by that unit.
51 * 3) Hotplug events are rare.
52 *
53 * Locking rules for DMA and interrupt remapping related global data structures:
54 * 1) Use dmar_global_lock in process context
55 * 2) Use RCU in interrupt context
56 */
57DECLARE_RWSEM(dmar_global_lock);
58LIST_HEAD(dmar_drhd_units);
59
60struct acpi_table_header * __initdata dmar_tbl;
61static int dmar_dev_scope_status = 1;
62static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63
64static int alloc_iommu(struct dmar_drhd_unit *drhd);
65static void free_iommu(struct intel_iommu *iommu);
66
67extern const struct iommu_ops intel_iommu_ops;
68
69static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70{
71	/*
72	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73	 * the very end.
74	 */
75	if (drhd->include_all)
76		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77	else
78		list_add_rcu(&drhd->list, &dmar_drhd_units);
79}
80
81void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82{
83	struct acpi_dmar_device_scope *scope;
84
85	*cnt = 0;
86	while (start < end) {
87		scope = start;
88		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91			(*cnt)++;
92		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94			pr_warn("Unsupported device scope\n");
95		}
96		start += scope->length;
97	}
98	if (*cnt == 0)
99		return NULL;
100
101	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102}
103
104void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105{
106	int i;
107	struct device *tmp_dev;
108
109	if (*devices && *cnt) {
110		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111			put_device(tmp_dev);
112		kfree(*devices);
113	}
114
115	*devices = NULL;
116	*cnt = 0;
117}
118
119/* Optimize out kzalloc()/kfree() for normal cases */
120static char dmar_pci_notify_info_buf[64];
121
122static struct dmar_pci_notify_info *
123dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124{
125	int level = 0;
126	size_t size;
127	struct pci_dev *tmp;
128	struct dmar_pci_notify_info *info;
129
130	BUG_ON(dev->is_virtfn);
131
132	/*
133	 * Ignore devices that have a domain number higher than what can
134	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135	 */
136	if (pci_domain_nr(dev->bus) > U16_MAX)
137		return NULL;
138
139	/* Only generate path[] for device addition event */
140	if (event == BUS_NOTIFY_ADD_DEVICE)
141		for (tmp = dev; tmp; tmp = tmp->bus->self)
142			level++;
143
144	size = struct_size(info, path, level);
145	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147	} else {
148		info = kzalloc(size, GFP_KERNEL);
149		if (!info) {
150			pr_warn("Out of memory when allocating notify_info "
151				"for %s.\n", pci_name(dev));
152			if (dmar_dev_scope_status == 0)
153				dmar_dev_scope_status = -ENOMEM;
154			return NULL;
155		}
156	}
157
158	info->event = event;
159	info->dev = dev;
160	info->seg = pci_domain_nr(dev->bus);
161	info->level = level;
162	if (event == BUS_NOTIFY_ADD_DEVICE) {
163		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164			level--;
165			info->path[level].bus = tmp->bus->number;
166			info->path[level].device = PCI_SLOT(tmp->devfn);
167			info->path[level].function = PCI_FUNC(tmp->devfn);
168			if (pci_is_root_bus(tmp->bus))
169				info->bus = tmp->bus->number;
170		}
171	}
172
173	return info;
174}
175
176static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177{
178	if ((void *)info != dmar_pci_notify_info_buf)
179		kfree(info);
180}
181
182static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183				struct acpi_dmar_pci_path *path, int count)
184{
185	int i;
186
187	if (info->bus != bus)
188		goto fallback;
189	if (info->level != count)
190		goto fallback;
191
192	for (i = 0; i < count; i++) {
193		if (path[i].device != info->path[i].device ||
194		    path[i].function != info->path[i].function)
195			goto fallback;
196	}
197
198	return true;
199
200fallback:
201
202	if (count != 1)
203		return false;
204
205	i = info->level - 1;
206	if (bus              == info->path[i].bus &&
207	    path[0].device   == info->path[i].device &&
208	    path[0].function == info->path[i].function) {
209		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210			bus, path[0].device, path[0].function);
211		return true;
212	}
213
214	return false;
215}
216
217/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219			  void *start, void*end, u16 segment,
220			  struct dmar_dev_scope *devices,
221			  int devices_cnt)
222{
223	int i, level;
224	struct device *tmp, *dev = &info->dev->dev;
225	struct acpi_dmar_device_scope *scope;
226	struct acpi_dmar_pci_path *path;
227
228	if (segment != info->seg)
229		return 0;
230
231	for (; start < end; start += scope->length) {
232		scope = start;
233		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235			continue;
236
237		path = (struct acpi_dmar_pci_path *)(scope + 1);
238		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239		if (!dmar_match_pci_path(info, scope->bus, path, level))
240			continue;
241
242		/*
243		 * We expect devices with endpoint scope to have normal PCI
244		 * headers, and devices with bridge scope to have bridge PCI
245		 * headers.  However PCI NTB devices may be listed in the
246		 * DMAR table with bridge scope, even though they have a
247		 * normal PCI header.  NTB devices are identified by class
248		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249		 * for this special case.
250		 */
251		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256			pr_warn("Device scope type does not match for %s\n",
257				pci_name(info->dev));
258			return -EINVAL;
259		}
260
261		for_each_dev_scope(devices, devices_cnt, i, tmp)
262			if (tmp == NULL) {
263				devices[i].bus = info->dev->bus->number;
264				devices[i].devfn = info->dev->devfn;
265				rcu_assign_pointer(devices[i].dev,
266						   get_device(dev));
267				return 1;
268			}
269		BUG_ON(i >= devices_cnt);
270	}
271
272	return 0;
273}
274
275int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276			  struct dmar_dev_scope *devices, int count)
277{
278	int index;
279	struct device *tmp;
280
281	if (info->seg != segment)
282		return 0;
283
284	for_each_active_dev_scope(devices, count, index, tmp)
285		if (tmp == &info->dev->dev) {
286			RCU_INIT_POINTER(devices[index].dev, NULL);
287			synchronize_rcu();
288			put_device(tmp);
289			return 1;
290		}
291
292	return 0;
293}
294
295static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296{
297	int ret = 0;
298	struct dmar_drhd_unit *dmaru;
299	struct acpi_dmar_hardware_unit *drhd;
300
301	for_each_drhd_unit(dmaru) {
302		if (dmaru->include_all)
303			continue;
304
305		drhd = container_of(dmaru->hdr,
306				    struct acpi_dmar_hardware_unit, header);
307		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308				((void *)drhd) + drhd->header.length,
309				dmaru->segment,
310				dmaru->devices, dmaru->devices_cnt);
311		if (ret)
312			break;
313	}
314	if (ret >= 0)
315		ret = dmar_iommu_notify_scope_dev(info);
316	if (ret < 0 && dmar_dev_scope_status == 0)
317		dmar_dev_scope_status = ret;
318
319	if (ret >= 0)
320		intel_irq_remap_add_device(info);
321
322	return ret;
323}
324
325static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326{
327	struct dmar_drhd_unit *dmaru;
328
329	for_each_drhd_unit(dmaru)
330		if (dmar_remove_dev_scope(info, dmaru->segment,
331			dmaru->devices, dmaru->devices_cnt))
332			break;
333	dmar_iommu_notify_scope_dev(info);
334}
335
336static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
337{
338	struct pci_dev *physfn = pci_physfn(pdev);
339
340	dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
341}
342
343static int dmar_pci_bus_notifier(struct notifier_block *nb,
344				 unsigned long action, void *data)
345{
346	struct pci_dev *pdev = to_pci_dev(data);
347	struct dmar_pci_notify_info *info;
348
349	/* Only care about add/remove events for physical functions.
350	 * For VFs we actually do the lookup based on the corresponding
351	 * PF in device_to_iommu() anyway. */
352	if (pdev->is_virtfn) {
353		/*
354		 * Ensure that the VF device inherits the irq domain of the
355		 * PF device. Ideally the device would inherit the domain
356		 * from the bus, but DMAR can have multiple units per bus
357		 * which makes this impossible. The VF 'bus' could inherit
358		 * from the PF device, but that's yet another x86'sism to
359		 * inflict on everybody else.
360		 */
361		if (action == BUS_NOTIFY_ADD_DEVICE)
362			vf_inherit_msi_domain(pdev);
363		return NOTIFY_DONE;
364	}
365
366	if (action != BUS_NOTIFY_ADD_DEVICE &&
367	    action != BUS_NOTIFY_REMOVED_DEVICE)
368		return NOTIFY_DONE;
369
370	info = dmar_alloc_pci_notify_info(pdev, action);
371	if (!info)
372		return NOTIFY_DONE;
373
374	down_write(&dmar_global_lock);
375	if (action == BUS_NOTIFY_ADD_DEVICE)
376		dmar_pci_bus_add_dev(info);
377	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
378		dmar_pci_bus_del_dev(info);
379	up_write(&dmar_global_lock);
380
381	dmar_free_pci_notify_info(info);
382
383	return NOTIFY_OK;
384}
385
386static struct notifier_block dmar_pci_bus_nb = {
387	.notifier_call = dmar_pci_bus_notifier,
388	.priority = 1,
389};
390
391static struct dmar_drhd_unit *
392dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
393{
394	struct dmar_drhd_unit *dmaru;
395
396	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
397				dmar_rcu_check())
398		if (dmaru->segment == drhd->segment &&
399		    dmaru->reg_base_addr == drhd->address)
400			return dmaru;
401
402	return NULL;
403}
404
405/*
406 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
407 * structure which uniquely represent one DMA remapping hardware unit
408 * present in the platform
409 */
410static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
411{
412	struct acpi_dmar_hardware_unit *drhd;
413	struct dmar_drhd_unit *dmaru;
414	int ret;
415
416	drhd = (struct acpi_dmar_hardware_unit *)header;
417	dmaru = dmar_find_dmaru(drhd);
418	if (dmaru)
419		goto out;
420
421	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
422	if (!dmaru)
423		return -ENOMEM;
424
425	/*
426	 * If header is allocated from slab by ACPI _DSM method, we need to
427	 * copy the content because the memory buffer will be freed on return.
428	 */
429	dmaru->hdr = (void *)(dmaru + 1);
430	memcpy(dmaru->hdr, header, header->length);
431	dmaru->reg_base_addr = drhd->address;
432	dmaru->segment = drhd->segment;
433	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
434	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
435					      ((void *)drhd) + drhd->header.length,
436					      &dmaru->devices_cnt);
437	if (dmaru->devices_cnt && dmaru->devices == NULL) {
438		kfree(dmaru);
439		return -ENOMEM;
440	}
441
442	ret = alloc_iommu(dmaru);
443	if (ret) {
444		dmar_free_dev_scope(&dmaru->devices,
445				    &dmaru->devices_cnt);
446		kfree(dmaru);
447		return ret;
448	}
449	dmar_register_drhd_unit(dmaru);
450
451out:
452	if (arg)
453		(*(int *)arg)++;
454
455	return 0;
456}
457
458static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
459{
460	if (dmaru->devices && dmaru->devices_cnt)
461		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
462	if (dmaru->iommu)
463		free_iommu(dmaru->iommu);
464	kfree(dmaru);
465}
466
467static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
468				      void *arg)
469{
470	struct acpi_dmar_andd *andd = (void *)header;
471
472	/* Check for NUL termination within the designated length */
473	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
474		pr_warn(FW_BUG
475			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
476			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
477			   dmi_get_system_info(DMI_BIOS_VENDOR),
478			   dmi_get_system_info(DMI_BIOS_VERSION),
479			   dmi_get_system_info(DMI_PRODUCT_VERSION));
480		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
481		return -EINVAL;
482	}
483	pr_info("ANDD device: %x name: %s\n", andd->device_number,
484		andd->device_name);
485
486	return 0;
487}
488
489#ifdef CONFIG_ACPI_NUMA
490static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
491{
492	struct acpi_dmar_rhsa *rhsa;
493	struct dmar_drhd_unit *drhd;
494
495	rhsa = (struct acpi_dmar_rhsa *)header;
496	for_each_drhd_unit(drhd) {
497		if (drhd->reg_base_addr == rhsa->base_address) {
498			int node = pxm_to_node(rhsa->proximity_domain);
499
500			if (node != NUMA_NO_NODE && !node_online(node))
501				node = NUMA_NO_NODE;
502			drhd->iommu->node = node;
503			return 0;
504		}
505	}
506	pr_warn(FW_BUG
507		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
508		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
509		rhsa->base_address,
510		dmi_get_system_info(DMI_BIOS_VENDOR),
511		dmi_get_system_info(DMI_BIOS_VERSION),
512		dmi_get_system_info(DMI_PRODUCT_VERSION));
513	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
514
515	return 0;
516}
517#else
518#define	dmar_parse_one_rhsa		dmar_res_noop
519#endif
520
521static void
522dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
523{
524	struct acpi_dmar_hardware_unit *drhd;
525	struct acpi_dmar_reserved_memory *rmrr;
526	struct acpi_dmar_atsr *atsr;
527	struct acpi_dmar_rhsa *rhsa;
528
529	switch (header->type) {
530	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
531		drhd = container_of(header, struct acpi_dmar_hardware_unit,
532				    header);
533		pr_info("DRHD base: %#016Lx flags: %#x\n",
534			(unsigned long long)drhd->address, drhd->flags);
535		break;
536	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
537		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
538				    header);
539		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
540			(unsigned long long)rmrr->base_address,
541			(unsigned long long)rmrr->end_address);
542		break;
543	case ACPI_DMAR_TYPE_ROOT_ATS:
544		atsr = container_of(header, struct acpi_dmar_atsr, header);
545		pr_info("ATSR flags: %#x\n", atsr->flags);
546		break;
547	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
548		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
549		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
550		       (unsigned long long)rhsa->base_address,
551		       rhsa->proximity_domain);
552		break;
553	case ACPI_DMAR_TYPE_NAMESPACE:
554		/* We don't print this here because we need to sanity-check
555		   it first. So print it in dmar_parse_one_andd() instead. */
556		break;
557	}
558}
559
560/**
561 * dmar_table_detect - checks to see if the platform supports DMAR devices
562 */
563static int __init dmar_table_detect(void)
564{
565	acpi_status status = AE_OK;
566
567	/* if we could find DMAR table, then there are DMAR devices */
568	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
569
570	if (ACPI_SUCCESS(status) && !dmar_tbl) {
571		pr_warn("Unable to map DMAR\n");
572		status = AE_NOT_FOUND;
573	}
574
575	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
576}
577
578static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
579				       size_t len, struct dmar_res_callback *cb)
580{
581	struct acpi_dmar_header *iter, *next;
582	struct acpi_dmar_header *end = ((void *)start) + len;
583
584	for (iter = start; iter < end; iter = next) {
585		next = (void *)iter + iter->length;
586		if (iter->length == 0) {
587			/* Avoid looping forever on bad ACPI tables */
588			pr_debug(FW_BUG "Invalid 0-length structure\n");
589			break;
590		} else if (next > end) {
591			/* Avoid passing table end */
592			pr_warn(FW_BUG "Record passes table end\n");
593			return -EINVAL;
594		}
595
596		if (cb->print_entry)
597			dmar_table_print_dmar_entry(iter);
598
599		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
600			/* continue for forward compatibility */
601			pr_debug("Unknown DMAR structure type %d\n",
602				 iter->type);
603		} else if (cb->cb[iter->type]) {
604			int ret;
605
606			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
607			if (ret)
608				return ret;
609		} else if (!cb->ignore_unhandled) {
610			pr_warn("No handler for DMAR structure type %d\n",
611				iter->type);
612			return -EINVAL;
613		}
614	}
615
616	return 0;
617}
618
619static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
620				       struct dmar_res_callback *cb)
621{
622	return dmar_walk_remapping_entries((void *)(dmar + 1),
623			dmar->header.length - sizeof(*dmar), cb);
624}
625
626/**
627 * parse_dmar_table - parses the DMA reporting table
628 */
629static int __init
630parse_dmar_table(void)
631{
632	struct acpi_table_dmar *dmar;
633	int drhd_count = 0;
634	int ret;
635	struct dmar_res_callback cb = {
636		.print_entry = true,
637		.ignore_unhandled = true,
638		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
639		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
640		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
641		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
642		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
643		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
644	};
645
646	/*
647	 * Do it again, earlier dmar_tbl mapping could be mapped with
648	 * fixed map.
649	 */
650	dmar_table_detect();
651
652	/*
653	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
654	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
655	 */
656	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
657
658	dmar = (struct acpi_table_dmar *)dmar_tbl;
659	if (!dmar)
660		return -ENODEV;
661
662	if (dmar->width < PAGE_SHIFT - 1) {
663		pr_warn("Invalid DMAR haw\n");
664		return -EINVAL;
665	}
666
667	pr_info("Host address width %d\n", dmar->width + 1);
668	ret = dmar_walk_dmar_table(dmar, &cb);
669	if (ret == 0 && drhd_count == 0)
670		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
671
672	return ret;
673}
674
675static int dmar_pci_device_match(struct dmar_dev_scope devices[],
676				 int cnt, struct pci_dev *dev)
677{
678	int index;
679	struct device *tmp;
680
681	while (dev) {
682		for_each_active_dev_scope(devices, cnt, index, tmp)
683			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
684				return 1;
685
686		/* Check our parent */
687		dev = dev->bus->self;
688	}
689
690	return 0;
691}
692
693struct dmar_drhd_unit *
694dmar_find_matched_drhd_unit(struct pci_dev *dev)
695{
696	struct dmar_drhd_unit *dmaru;
697	struct acpi_dmar_hardware_unit *drhd;
698
699	dev = pci_physfn(dev);
700
701	rcu_read_lock();
702	for_each_drhd_unit(dmaru) {
703		drhd = container_of(dmaru->hdr,
704				    struct acpi_dmar_hardware_unit,
705				    header);
706
707		if (dmaru->include_all &&
708		    drhd->segment == pci_domain_nr(dev->bus))
709			goto out;
710
711		if (dmar_pci_device_match(dmaru->devices,
712					  dmaru->devices_cnt, dev))
713			goto out;
714	}
715	dmaru = NULL;
716out:
717	rcu_read_unlock();
718
719	return dmaru;
720}
721
722static void __init dmar_acpi_insert_dev_scope(u8 device_number,
723					      struct acpi_device *adev)
724{
725	struct dmar_drhd_unit *dmaru;
726	struct acpi_dmar_hardware_unit *drhd;
727	struct acpi_dmar_device_scope *scope;
728	struct device *tmp;
729	int i;
730	struct acpi_dmar_pci_path *path;
731
732	for_each_drhd_unit(dmaru) {
733		drhd = container_of(dmaru->hdr,
734				    struct acpi_dmar_hardware_unit,
735				    header);
736
737		for (scope = (void *)(drhd + 1);
738		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
739		     scope = ((void *)scope) + scope->length) {
740			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
741				continue;
742			if (scope->enumeration_id != device_number)
743				continue;
744
745			path = (void *)(scope + 1);
746			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
747				dev_name(&adev->dev), dmaru->reg_base_addr,
748				scope->bus, path->device, path->function);
749			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
750				if (tmp == NULL) {
751					dmaru->devices[i].bus = scope->bus;
752					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
753									    path->function);
754					rcu_assign_pointer(dmaru->devices[i].dev,
755							   get_device(&adev->dev));
756					return;
757				}
758			BUG_ON(i >= dmaru->devices_cnt);
759		}
760	}
761	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
762		device_number, dev_name(&adev->dev));
763}
764
765static int __init dmar_acpi_dev_scope_init(void)
766{
767	struct acpi_dmar_andd *andd;
768
769	if (dmar_tbl == NULL)
770		return -ENODEV;
771
772	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
773	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
774	     andd = ((void *)andd) + andd->header.length) {
775		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
776			acpi_handle h;
777			struct acpi_device *adev;
778
779			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
780							  andd->device_name,
781							  &h))) {
782				pr_err("Failed to find handle for ACPI object %s\n",
783				       andd->device_name);
784				continue;
785			}
786			if (acpi_bus_get_device(h, &adev)) {
787				pr_err("Failed to get device for ACPI object %s\n",
788				       andd->device_name);
789				continue;
790			}
791			dmar_acpi_insert_dev_scope(andd->device_number, adev);
792		}
793	}
794	return 0;
795}
796
797int __init dmar_dev_scope_init(void)
798{
799	struct pci_dev *dev = NULL;
800	struct dmar_pci_notify_info *info;
801
802	if (dmar_dev_scope_status != 1)
803		return dmar_dev_scope_status;
804
805	if (list_empty(&dmar_drhd_units)) {
806		dmar_dev_scope_status = -ENODEV;
807	} else {
808		dmar_dev_scope_status = 0;
809
810		dmar_acpi_dev_scope_init();
811
812		for_each_pci_dev(dev) {
813			if (dev->is_virtfn)
814				continue;
815
816			info = dmar_alloc_pci_notify_info(dev,
817					BUS_NOTIFY_ADD_DEVICE);
818			if (!info) {
819				pci_dev_put(dev);
820				return dmar_dev_scope_status;
821			} else {
822				dmar_pci_bus_add_dev(info);
823				dmar_free_pci_notify_info(info);
824			}
825		}
826	}
827
828	return dmar_dev_scope_status;
829}
830
831void __init dmar_register_bus_notifier(void)
832{
833	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
834}
835
836
837int __init dmar_table_init(void)
838{
839	static int dmar_table_initialized;
840	int ret;
841
842	if (dmar_table_initialized == 0) {
843		ret = parse_dmar_table();
844		if (ret < 0) {
845			if (ret != -ENODEV)
846				pr_info("Parse DMAR table failure.\n");
847		} else  if (list_empty(&dmar_drhd_units)) {
848			pr_info("No DMAR devices found\n");
849			ret = -ENODEV;
850		}
851
852		if (ret < 0)
853			dmar_table_initialized = ret;
854		else
855			dmar_table_initialized = 1;
856	}
857
858	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
859}
860
861static void warn_invalid_dmar(u64 addr, const char *message)
862{
863	pr_warn_once(FW_BUG
864		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
865		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
866		addr, message,
867		dmi_get_system_info(DMI_BIOS_VENDOR),
868		dmi_get_system_info(DMI_BIOS_VERSION),
869		dmi_get_system_info(DMI_PRODUCT_VERSION));
870	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
871}
872
873static int __ref
874dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
875{
876	struct acpi_dmar_hardware_unit *drhd;
877	void __iomem *addr;
878	u64 cap, ecap;
879
880	drhd = (void *)entry;
881	if (!drhd->address) {
882		warn_invalid_dmar(0, "");
883		return -EINVAL;
884	}
885
886	if (arg)
887		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
888	else
889		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
890	if (!addr) {
891		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
892		return -EINVAL;
893	}
894
895	cap = dmar_readq(addr + DMAR_CAP_REG);
896	ecap = dmar_readq(addr + DMAR_ECAP_REG);
897
898	if (arg)
899		iounmap(addr);
900	else
901		early_iounmap(addr, VTD_PAGE_SIZE);
902
903	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
904		warn_invalid_dmar(drhd->address, " returns all ones");
905		return -EINVAL;
906	}
907
908	return 0;
909}
910
911int __init detect_intel_iommu(void)
912{
913	int ret;
914	struct dmar_res_callback validate_drhd_cb = {
915		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
916		.ignore_unhandled = true,
917	};
918
919	down_write(&dmar_global_lock);
920	ret = dmar_table_detect();
921	if (!ret)
922		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
923					   &validate_drhd_cb);
924	if (!ret && !no_iommu && !iommu_detected &&
925	    (!dmar_disabled || dmar_platform_optin())) {
926		iommu_detected = 1;
927		/* Make sure ACS will be enabled */
928		pci_request_acs();
929	}
930
931#ifdef CONFIG_X86
932	if (!ret) {
933		x86_init.iommu.iommu_init = intel_iommu_init;
934		x86_platform.iommu_shutdown = intel_iommu_shutdown;
935	}
936
937#endif
938
939	if (dmar_tbl) {
940		acpi_put_table(dmar_tbl);
941		dmar_tbl = NULL;
942	}
943	up_write(&dmar_global_lock);
944
945	return ret ? ret : 1;
946}
947
948static void unmap_iommu(struct intel_iommu *iommu)
949{
950	iounmap(iommu->reg);
951	release_mem_region(iommu->reg_phys, iommu->reg_size);
952}
953
954/**
955 * map_iommu: map the iommu's registers
956 * @iommu: the iommu to map
957 * @phys_addr: the physical address of the base resgister
958 *
959 * Memory map the iommu's registers.  Start w/ a single page, and
960 * possibly expand if that turns out to be insufficent.
961 */
962static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
963{
964	int map_size, err=0;
965
966	iommu->reg_phys = phys_addr;
967	iommu->reg_size = VTD_PAGE_SIZE;
968
969	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
970		pr_err("Can't reserve memory\n");
971		err = -EBUSY;
972		goto out;
973	}
974
975	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
976	if (!iommu->reg) {
977		pr_err("Can't map the region\n");
978		err = -ENOMEM;
979		goto release;
980	}
981
982	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
983	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
984
985	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
986		err = -EINVAL;
987		warn_invalid_dmar(phys_addr, " returns all ones");
988		goto unmap;
989	}
990	if (ecap_vcs(iommu->ecap))
991		iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
992
993	/* the registers might be more than one page */
994	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
995			 cap_max_fault_reg_offset(iommu->cap));
996	map_size = VTD_PAGE_ALIGN(map_size);
997	if (map_size > iommu->reg_size) {
998		iounmap(iommu->reg);
999		release_mem_region(iommu->reg_phys, iommu->reg_size);
1000		iommu->reg_size = map_size;
1001		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1002					iommu->name)) {
1003			pr_err("Can't reserve memory\n");
1004			err = -EBUSY;
1005			goto out;
1006		}
1007		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1008		if (!iommu->reg) {
1009			pr_err("Can't map the region\n");
1010			err = -ENOMEM;
1011			goto release;
1012		}
1013	}
1014	err = 0;
1015	goto out;
1016
1017unmap:
1018	iounmap(iommu->reg);
1019release:
1020	release_mem_region(iommu->reg_phys, iommu->reg_size);
1021out:
1022	return err;
1023}
1024
1025static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1026{
1027	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1028					    DMAR_UNITS_SUPPORTED);
1029	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1030		iommu->seq_id = -1;
1031	} else {
1032		set_bit(iommu->seq_id, dmar_seq_ids);
1033		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1034	}
1035
1036	return iommu->seq_id;
1037}
1038
1039static void dmar_free_seq_id(struct intel_iommu *iommu)
1040{
1041	if (iommu->seq_id >= 0) {
1042		clear_bit(iommu->seq_id, dmar_seq_ids);
1043		iommu->seq_id = -1;
1044	}
1045}
1046
1047static int alloc_iommu(struct dmar_drhd_unit *drhd)
1048{
1049	struct intel_iommu *iommu;
1050	u32 ver, sts;
1051	int agaw = -1;
1052	int msagaw = -1;
1053	int err;
1054
1055	if (!drhd->reg_base_addr) {
1056		warn_invalid_dmar(0, "");
1057		return -EINVAL;
1058	}
1059
1060	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1061	if (!iommu)
1062		return -ENOMEM;
1063
1064	if (dmar_alloc_seq_id(iommu) < 0) {
1065		pr_err("Failed to allocate seq_id\n");
1066		err = -ENOSPC;
1067		goto error;
1068	}
1069
1070	err = map_iommu(iommu, drhd->reg_base_addr);
1071	if (err) {
1072		pr_err("Failed to map %s\n", iommu->name);
1073		goto error_free_seq_id;
1074	}
1075
1076	err = -EINVAL;
1077	if (cap_sagaw(iommu->cap) == 0) {
1078		pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1079			iommu->name);
1080		drhd->ignored = 1;
1081	}
1082
1083	if (!drhd->ignored) {
1084		agaw = iommu_calculate_agaw(iommu);
1085		if (agaw < 0) {
1086			pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1087			       iommu->seq_id);
1088			drhd->ignored = 1;
1089		}
1090	}
1091	if (!drhd->ignored) {
1092		msagaw = iommu_calculate_max_sagaw(iommu);
1093		if (msagaw < 0) {
1094			pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1095			       iommu->seq_id);
1096			drhd->ignored = 1;
1097			agaw = -1;
1098		}
1099	}
1100	iommu->agaw = agaw;
1101	iommu->msagaw = msagaw;
1102	iommu->segment = drhd->segment;
1103
1104	iommu->node = NUMA_NO_NODE;
1105
1106	ver = readl(iommu->reg + DMAR_VER_REG);
1107	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1108		iommu->name,
1109		(unsigned long long)drhd->reg_base_addr,
1110		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1111		(unsigned long long)iommu->cap,
1112		(unsigned long long)iommu->ecap);
1113
1114	/* Reflect status in gcmd */
1115	sts = readl(iommu->reg + DMAR_GSTS_REG);
1116	if (sts & DMA_GSTS_IRES)
1117		iommu->gcmd |= DMA_GCMD_IRE;
1118	if (sts & DMA_GSTS_TES)
1119		iommu->gcmd |= DMA_GCMD_TE;
1120	if (sts & DMA_GSTS_QIES)
1121		iommu->gcmd |= DMA_GCMD_QIE;
1122
1123	raw_spin_lock_init(&iommu->register_lock);
1124
1125	/*
1126	 * This is only for hotplug; at boot time intel_iommu_enabled won't
1127	 * be set yet. When intel_iommu_init() runs, it registers the units
1128	 * present at boot time, then sets intel_iommu_enabled.
1129	 */
1130	if (intel_iommu_enabled && !drhd->ignored) {
1131		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1132					     intel_iommu_groups,
1133					     "%s", iommu->name);
1134		if (err)
1135			goto err_unmap;
1136
1137		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1138
1139		err = iommu_device_register(&iommu->iommu);
1140		if (err)
1141			goto err_sysfs;
1142	}
1143
1144	drhd->iommu = iommu;
1145	iommu->drhd = drhd;
1146
1147	return 0;
1148
1149err_sysfs:
1150	iommu_device_sysfs_remove(&iommu->iommu);
1151err_unmap:
1152	unmap_iommu(iommu);
1153error_free_seq_id:
1154	dmar_free_seq_id(iommu);
1155error:
1156	kfree(iommu);
1157	return err;
1158}
1159
1160static void free_iommu(struct intel_iommu *iommu)
1161{
1162	if (intel_iommu_enabled && !iommu->drhd->ignored) {
1163		iommu_device_unregister(&iommu->iommu);
1164		iommu_device_sysfs_remove(&iommu->iommu);
1165	}
1166
1167	if (iommu->irq) {
1168		if (iommu->pr_irq) {
1169			free_irq(iommu->pr_irq, iommu);
1170			dmar_free_hwirq(iommu->pr_irq);
1171			iommu->pr_irq = 0;
1172		}
1173		free_irq(iommu->irq, iommu);
1174		dmar_free_hwirq(iommu->irq);
1175		iommu->irq = 0;
1176	}
1177
1178	if (iommu->qi) {
1179		free_page((unsigned long)iommu->qi->desc);
1180		kfree(iommu->qi->desc_status);
1181		kfree(iommu->qi);
1182	}
1183
1184	if (iommu->reg)
1185		unmap_iommu(iommu);
1186
1187	dmar_free_seq_id(iommu);
1188	kfree(iommu);
1189}
1190
1191/*
1192 * Reclaim all the submitted descriptors which have completed its work.
1193 */
1194static inline void reclaim_free_desc(struct q_inval *qi)
1195{
1196	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1197	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1198		qi->desc_status[qi->free_tail] = QI_FREE;
1199		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1200		qi->free_cnt++;
1201	}
1202}
1203
1204static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1205{
1206	u32 fault;
1207	int head, tail;
1208	struct q_inval *qi = iommu->qi;
1209	int shift = qi_shift(iommu);
1210
1211	if (qi->desc_status[wait_index] == QI_ABORT)
1212		return -EAGAIN;
1213
1214	fault = readl(iommu->reg + DMAR_FSTS_REG);
1215
1216	/*
1217	 * If IQE happens, the head points to the descriptor associated
1218	 * with the error. No new descriptors are fetched until the IQE
1219	 * is cleared.
1220	 */
1221	if (fault & DMA_FSTS_IQE) {
1222		head = readl(iommu->reg + DMAR_IQH_REG);
1223		if ((head >> shift) == index) {
1224			struct qi_desc *desc = qi->desc + head;
1225
1226			/*
1227			 * desc->qw2 and desc->qw3 are either reserved or
1228			 * used by software as private data. We won't print
1229			 * out these two qw's for security consideration.
1230			 */
1231			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1232			       (unsigned long long)desc->qw0,
1233			       (unsigned long long)desc->qw1);
1234			memcpy(desc, qi->desc + (wait_index << shift),
1235			       1 << shift);
1236			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1237			return -EINVAL;
1238		}
1239	}
1240
1241	/*
1242	 * If ITE happens, all pending wait_desc commands are aborted.
1243	 * No new descriptors are fetched until the ITE is cleared.
1244	 */
1245	if (fault & DMA_FSTS_ITE) {
1246		head = readl(iommu->reg + DMAR_IQH_REG);
1247		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1248		head |= 1;
1249		tail = readl(iommu->reg + DMAR_IQT_REG);
1250		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1251
1252		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1253
1254		do {
1255			if (qi->desc_status[head] == QI_IN_USE)
1256				qi->desc_status[head] = QI_ABORT;
1257			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1258		} while (head != tail);
1259
1260		if (qi->desc_status[wait_index] == QI_ABORT)
1261			return -EAGAIN;
1262	}
1263
1264	if (fault & DMA_FSTS_ICE)
1265		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1266
1267	return 0;
1268}
1269
1270/*
1271 * Function to submit invalidation descriptors of all types to the queued
1272 * invalidation interface(QI). Multiple descriptors can be submitted at a
1273 * time, a wait descriptor will be appended to each submission to ensure
1274 * hardware has completed the invalidation before return. Wait descriptors
1275 * can be part of the submission but it will not be polled for completion.
1276 */
1277int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1278		   unsigned int count, unsigned long options)
1279{
1280	struct q_inval *qi = iommu->qi;
1281	struct qi_desc wait_desc;
1282	int wait_index, index;
1283	unsigned long flags;
1284	int offset, shift;
1285	int rc, i;
1286
1287	if (!qi)
1288		return 0;
1289
1290restart:
1291	rc = 0;
1292
1293	raw_spin_lock_irqsave(&qi->q_lock, flags);
1294	/*
1295	 * Check if we have enough empty slots in the queue to submit,
1296	 * the calculation is based on:
1297	 * # of desc + 1 wait desc + 1 space between head and tail
1298	 */
1299	while (qi->free_cnt < count + 2) {
1300		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1301		cpu_relax();
1302		raw_spin_lock_irqsave(&qi->q_lock, flags);
1303	}
1304
1305	index = qi->free_head;
1306	wait_index = (index + count) % QI_LENGTH;
1307	shift = qi_shift(iommu);
1308
1309	for (i = 0; i < count; i++) {
1310		offset = ((index + i) % QI_LENGTH) << shift;
1311		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1312		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1313	}
1314	qi->desc_status[wait_index] = QI_IN_USE;
1315
1316	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1317			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1318	if (options & QI_OPT_WAIT_DRAIN)
1319		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1320	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1321	wait_desc.qw2 = 0;
1322	wait_desc.qw3 = 0;
1323
1324	offset = wait_index << shift;
1325	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1326
1327	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1328	qi->free_cnt -= count + 1;
1329
1330	/*
1331	 * update the HW tail register indicating the presence of
1332	 * new descriptors.
1333	 */
1334	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1335
1336	while (qi->desc_status[wait_index] != QI_DONE) {
1337		/*
1338		 * We will leave the interrupts disabled, to prevent interrupt
1339		 * context to queue another cmd while a cmd is already submitted
1340		 * and waiting for completion on this cpu. This is to avoid
1341		 * a deadlock where the interrupt context can wait indefinitely
1342		 * for free slots in the queue.
1343		 */
1344		rc = qi_check_fault(iommu, index, wait_index);
1345		if (rc)
1346			break;
1347
1348		raw_spin_unlock(&qi->q_lock);
1349		cpu_relax();
1350		raw_spin_lock(&qi->q_lock);
1351	}
1352
1353	for (i = 0; i < count; i++)
1354		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1355
1356	reclaim_free_desc(qi);
1357	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1358
1359	if (rc == -EAGAIN)
1360		goto restart;
1361
1362	return rc;
1363}
1364
1365/*
1366 * Flush the global interrupt entry cache.
1367 */
1368void qi_global_iec(struct intel_iommu *iommu)
1369{
1370	struct qi_desc desc;
1371
1372	desc.qw0 = QI_IEC_TYPE;
1373	desc.qw1 = 0;
1374	desc.qw2 = 0;
1375	desc.qw3 = 0;
1376
1377	/* should never fail */
1378	qi_submit_sync(iommu, &desc, 1, 0);
1379}
1380
1381void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1382		      u64 type)
1383{
1384	struct qi_desc desc;
1385
1386	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1387			| QI_CC_GRAN(type) | QI_CC_TYPE;
1388	desc.qw1 = 0;
1389	desc.qw2 = 0;
1390	desc.qw3 = 0;
1391
1392	qi_submit_sync(iommu, &desc, 1, 0);
1393}
1394
1395void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1396		    unsigned int size_order, u64 type)
1397{
1398	u8 dw = 0, dr = 0;
1399
1400	struct qi_desc desc;
1401	int ih = 0;
1402
1403	if (cap_write_drain(iommu->cap))
1404		dw = 1;
1405
1406	if (cap_read_drain(iommu->cap))
1407		dr = 1;
1408
1409	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1410		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1411	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1412		| QI_IOTLB_AM(size_order);
1413	desc.qw2 = 0;
1414	desc.qw3 = 0;
1415
1416	qi_submit_sync(iommu, &desc, 1, 0);
1417}
1418
1419void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1420			u16 qdep, u64 addr, unsigned mask)
1421{
1422	struct qi_desc desc;
1423
1424	if (mask) {
1425		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1426		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1427	} else
1428		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1429
1430	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1431		qdep = 0;
1432
1433	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1434		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1435	desc.qw2 = 0;
1436	desc.qw3 = 0;
1437
1438	qi_submit_sync(iommu, &desc, 1, 0);
1439}
1440
1441/* PASID-based IOTLB invalidation */
1442void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1443		     unsigned long npages, bool ih)
1444{
1445	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1446
1447	/*
1448	 * npages == -1 means a PASID-selective invalidation, otherwise,
1449	 * a positive value for Page-selective-within-PASID invalidation.
1450	 * 0 is not a valid input.
1451	 */
1452	if (WARN_ON(!npages)) {
1453		pr_err("Invalid input npages = %ld\n", npages);
1454		return;
1455	}
1456
1457	if (npages == -1) {
1458		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1459				QI_EIOTLB_DID(did) |
1460				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1461				QI_EIOTLB_TYPE;
1462		desc.qw1 = 0;
1463	} else {
1464		int mask = ilog2(__roundup_pow_of_two(npages));
1465		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1466
1467		if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1468			addr = ALIGN_DOWN(addr, align);
1469
1470		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1471				QI_EIOTLB_DID(did) |
1472				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1473				QI_EIOTLB_TYPE;
1474		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1475				QI_EIOTLB_IH(ih) |
1476				QI_EIOTLB_AM(mask);
1477	}
1478
1479	qi_submit_sync(iommu, &desc, 1, 0);
1480}
1481
1482/* PASID-based device IOTLB Invalidate */
1483void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1484			      u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1485{
1486	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1487	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1488
1489	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1490		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1491		QI_DEV_IOTLB_PFSID(pfsid);
1492
1493	/*
1494	 * If S bit is 0, we only flush a single page. If S bit is set,
1495	 * The least significant zero bit indicates the invalidation address
1496	 * range. VT-d spec 6.5.2.6.
1497	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1498	 * size order = 0 is PAGE_SIZE 4KB
1499	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1500	 * ECAP.
1501	 */
1502	if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1503		pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1504				    addr, size_order);
1505
1506	/* Take page address */
1507	desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1508
1509	if (size_order) {
1510		/*
1511		 * Existing 0s in address below size_order may be the least
1512		 * significant bit, we must set them to 1s to avoid having
1513		 * smaller size than desired.
1514		 */
1515		desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1516					VTD_PAGE_SHIFT);
1517		/* Clear size_order bit to indicate size */
1518		desc.qw1 &= ~mask;
1519		/* Set the S bit to indicate flushing more than 1 page */
1520		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1521	}
1522
1523	qi_submit_sync(iommu, &desc, 1, 0);
1524}
1525
1526void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1527			  u64 granu, u32 pasid)
1528{
1529	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1530
1531	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1532			QI_PC_GRAN(granu) | QI_PC_TYPE;
1533	qi_submit_sync(iommu, &desc, 1, 0);
1534}
1535
1536/*
1537 * Disable Queued Invalidation interface.
1538 */
1539void dmar_disable_qi(struct intel_iommu *iommu)
1540{
1541	unsigned long flags;
1542	u32 sts;
1543	cycles_t start_time = get_cycles();
1544
1545	if (!ecap_qis(iommu->ecap))
1546		return;
1547
1548	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1549
1550	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1551	if (!(sts & DMA_GSTS_QIES))
1552		goto end;
1553
1554	/*
1555	 * Give a chance to HW to complete the pending invalidation requests.
1556	 */
1557	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1558		readl(iommu->reg + DMAR_IQH_REG)) &&
1559		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1560		cpu_relax();
1561
1562	iommu->gcmd &= ~DMA_GCMD_QIE;
1563	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1564
1565	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1566		      !(sts & DMA_GSTS_QIES), sts);
1567end:
1568	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1569}
1570
1571/*
1572 * Enable queued invalidation.
1573 */
1574static void __dmar_enable_qi(struct intel_iommu *iommu)
1575{
1576	u32 sts;
1577	unsigned long flags;
1578	struct q_inval *qi = iommu->qi;
1579	u64 val = virt_to_phys(qi->desc);
1580
1581	qi->free_head = qi->free_tail = 0;
1582	qi->free_cnt = QI_LENGTH;
1583
1584	/*
1585	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1586	 * is present.
1587	 */
1588	if (ecap_smts(iommu->ecap))
1589		val |= (1 << 11) | 1;
1590
1591	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1592
1593	/* write zero to the tail reg */
1594	writel(0, iommu->reg + DMAR_IQT_REG);
1595
1596	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1597
1598	iommu->gcmd |= DMA_GCMD_QIE;
1599	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1600
1601	/* Make sure hardware complete it */
1602	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1603
1604	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1605}
1606
1607/*
1608 * Enable Queued Invalidation interface. This is a must to support
1609 * interrupt-remapping. Also used by DMA-remapping, which replaces
1610 * register based IOTLB invalidation.
1611 */
1612int dmar_enable_qi(struct intel_iommu *iommu)
1613{
1614	struct q_inval *qi;
1615	struct page *desc_page;
1616
1617	if (!ecap_qis(iommu->ecap))
1618		return -ENOENT;
1619
1620	/*
1621	 * queued invalidation is already setup and enabled.
1622	 */
1623	if (iommu->qi)
1624		return 0;
1625
1626	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1627	if (!iommu->qi)
1628		return -ENOMEM;
1629
1630	qi = iommu->qi;
1631
1632	/*
1633	 * Need two pages to accommodate 256 descriptors of 256 bits each
1634	 * if the remapping hardware supports scalable mode translation.
1635	 */
1636	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1637				     !!ecap_smts(iommu->ecap));
1638	if (!desc_page) {
1639		kfree(qi);
1640		iommu->qi = NULL;
1641		return -ENOMEM;
1642	}
1643
1644	qi->desc = page_address(desc_page);
1645
1646	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1647	if (!qi->desc_status) {
1648		free_page((unsigned long) qi->desc);
1649		kfree(qi);
1650		iommu->qi = NULL;
1651		return -ENOMEM;
1652	}
1653
1654	raw_spin_lock_init(&qi->q_lock);
1655
1656	__dmar_enable_qi(iommu);
1657
1658	return 0;
1659}
1660
1661/* iommu interrupt handling. Most stuff are MSI-like. */
1662
1663enum faulttype {
1664	DMA_REMAP,
1665	INTR_REMAP,
1666	UNKNOWN,
1667};
1668
1669static const char *dma_remap_fault_reasons[] =
1670{
1671	"Software",
1672	"Present bit in root entry is clear",
1673	"Present bit in context entry is clear",
1674	"Invalid context entry",
1675	"Access beyond MGAW",
1676	"PTE Write access is not set",
1677	"PTE Read access is not set",
1678	"Next page table ptr is invalid",
1679	"Root table address invalid",
1680	"Context table ptr is invalid",
1681	"non-zero reserved fields in RTP",
1682	"non-zero reserved fields in CTP",
1683	"non-zero reserved fields in PTE",
1684	"PCE for translation request specifies blocking",
1685};
1686
1687static const char * const dma_remap_sm_fault_reasons[] = {
1688	"SM: Invalid Root Table Address",
1689	"SM: TTM 0 for request with PASID",
1690	"SM: TTM 0 for page group request",
1691	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1692	"SM: Error attempting to access Root Entry",
1693	"SM: Present bit in Root Entry is clear",
1694	"SM: Non-zero reserved field set in Root Entry",
1695	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1696	"SM: Error attempting to access Context Entry",
1697	"SM: Present bit in Context Entry is clear",
1698	"SM: Non-zero reserved field set in the Context Entry",
1699	"SM: Invalid Context Entry",
1700	"SM: DTE field in Context Entry is clear",
1701	"SM: PASID Enable field in Context Entry is clear",
1702	"SM: PASID is larger than the max in Context Entry",
1703	"SM: PRE field in Context-Entry is clear",
1704	"SM: RID_PASID field error in Context-Entry",
1705	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1706	"SM: Error attempting to access the PASID Directory Entry",
1707	"SM: Present bit in Directory Entry is clear",
1708	"SM: Non-zero reserved field set in PASID Directory Entry",
1709	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1710	"SM: Error attempting to access PASID Table Entry",
1711	"SM: Present bit in PASID Table Entry is clear",
1712	"SM: Non-zero reserved field set in PASID Table Entry",
1713	"SM: Invalid Scalable-Mode PASID Table Entry",
1714	"SM: ERE field is clear in PASID Table Entry",
1715	"SM: SRE field is clear in PASID Table Entry",
1716	"Unknown", "Unknown",/* 0x5E-0x5F */
1717	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1718	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1719	"SM: Error attempting to access first-level paging entry",
1720	"SM: Present bit in first-level paging entry is clear",
1721	"SM: Non-zero reserved field set in first-level paging entry",
1722	"SM: Error attempting to access FL-PML4 entry",
1723	"SM: First-level entry address beyond MGAW in Nested translation",
1724	"SM: Read permission error in FL-PML4 entry in Nested translation",
1725	"SM: Read permission error in first-level paging entry in Nested translation",
1726	"SM: Write permission error in first-level paging entry in Nested translation",
1727	"SM: Error attempting to access second-level paging entry",
1728	"SM: Read/Write permission error in second-level paging entry",
1729	"SM: Non-zero reserved field set in second-level paging entry",
1730	"SM: Invalid second-level page table pointer",
1731	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1732	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1733	"SM: Address in first-level translation is not canonical",
1734	"SM: U/S set 0 for first-level translation with user privilege",
1735	"SM: No execute permission for request with PASID and ER=1",
1736	"SM: Address beyond the DMA hardware max",
1737	"SM: Second-level entry address beyond the max",
1738	"SM: No write permission for Write/AtomicOp request",
1739	"SM: No read permission for Read/AtomicOp request",
1740	"SM: Invalid address-interrupt address",
1741	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1742	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1743};
1744
1745static const char *irq_remap_fault_reasons[] =
1746{
1747	"Detected reserved fields in the decoded interrupt-remapped request",
1748	"Interrupt index exceeded the interrupt-remapping table size",
1749	"Present field in the IRTE entry is clear",
1750	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1751	"Detected reserved fields in the IRTE entry",
1752	"Blocked a compatibility format interrupt request",
1753	"Blocked an interrupt request due to source-id verification failure",
1754};
1755
1756static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1757{
1758	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1759					ARRAY_SIZE(irq_remap_fault_reasons))) {
1760		*fault_type = INTR_REMAP;
1761		return irq_remap_fault_reasons[fault_reason - 0x20];
1762	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1763			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1764		*fault_type = DMA_REMAP;
1765		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1766	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1767		*fault_type = DMA_REMAP;
1768		return dma_remap_fault_reasons[fault_reason];
1769	} else {
1770		*fault_type = UNKNOWN;
1771		return "Unknown";
1772	}
1773}
1774
1775
1776static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1777{
1778	if (iommu->irq == irq)
1779		return DMAR_FECTL_REG;
1780	else if (iommu->pr_irq == irq)
1781		return DMAR_PECTL_REG;
1782	else
1783		BUG();
1784}
1785
1786void dmar_msi_unmask(struct irq_data *data)
1787{
1788	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1789	int reg = dmar_msi_reg(iommu, data->irq);
1790	unsigned long flag;
1791
1792	/* unmask it */
1793	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1794	writel(0, iommu->reg + reg);
1795	/* Read a reg to force flush the post write */
1796	readl(iommu->reg + reg);
1797	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1798}
1799
1800void dmar_msi_mask(struct irq_data *data)
1801{
1802	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1803	int reg = dmar_msi_reg(iommu, data->irq);
1804	unsigned long flag;
1805
1806	/* mask it */
1807	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1808	writel(DMA_FECTL_IM, iommu->reg + reg);
1809	/* Read a reg to force flush the post write */
1810	readl(iommu->reg + reg);
1811	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1812}
1813
1814void dmar_msi_write(int irq, struct msi_msg *msg)
1815{
1816	struct intel_iommu *iommu = irq_get_handler_data(irq);
1817	int reg = dmar_msi_reg(iommu, irq);
1818	unsigned long flag;
1819
1820	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1821	writel(msg->data, iommu->reg + reg + 4);
1822	writel(msg->address_lo, iommu->reg + reg + 8);
1823	writel(msg->address_hi, iommu->reg + reg + 12);
1824	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1825}
1826
1827void dmar_msi_read(int irq, struct msi_msg *msg)
1828{
1829	struct intel_iommu *iommu = irq_get_handler_data(irq);
1830	int reg = dmar_msi_reg(iommu, irq);
1831	unsigned long flag;
1832
1833	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1834	msg->data = readl(iommu->reg + reg + 4);
1835	msg->address_lo = readl(iommu->reg + reg + 8);
1836	msg->address_hi = readl(iommu->reg + reg + 12);
1837	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1838}
1839
1840static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1841		u8 fault_reason, u32 pasid, u16 source_id,
1842		unsigned long long addr)
1843{
1844	const char *reason;
1845	int fault_type;
1846
1847	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1848
1849	if (fault_type == INTR_REMAP)
1850		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1851			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1852			PCI_FUNC(source_id & 0xFF), addr >> 48,
1853			fault_reason, reason);
1854	else
1855		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1856		       type ? "DMA Read" : "DMA Write",
1857		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1858		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1859		       fault_reason, reason);
1860	return 0;
1861}
1862
1863#define PRIMARY_FAULT_REG_LEN (16)
1864irqreturn_t dmar_fault(int irq, void *dev_id)
1865{
1866	struct intel_iommu *iommu = dev_id;
1867	int reg, fault_index;
1868	u32 fault_status;
1869	unsigned long flag;
1870	static DEFINE_RATELIMIT_STATE(rs,
1871				      DEFAULT_RATELIMIT_INTERVAL,
1872				      DEFAULT_RATELIMIT_BURST);
1873
1874	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1875	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1876	if (fault_status && __ratelimit(&rs))
1877		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1878
1879	/* TBD: ignore advanced fault log currently */
1880	if (!(fault_status & DMA_FSTS_PPF))
1881		goto unlock_exit;
1882
1883	fault_index = dma_fsts_fault_record_index(fault_status);
1884	reg = cap_fault_reg_offset(iommu->cap);
1885	while (1) {
1886		/* Disable printing, simply clear the fault when ratelimited */
1887		bool ratelimited = !__ratelimit(&rs);
1888		u8 fault_reason;
1889		u16 source_id;
1890		u64 guest_addr;
1891		u32 pasid;
1892		int type;
1893		u32 data;
1894		bool pasid_present;
1895
1896		/* highest 32 bits */
1897		data = readl(iommu->reg + reg +
1898				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1899		if (!(data & DMA_FRCD_F))
1900			break;
1901
1902		if (!ratelimited) {
1903			fault_reason = dma_frcd_fault_reason(data);
1904			type = dma_frcd_type(data);
1905
1906			pasid = dma_frcd_pasid_value(data);
1907			data = readl(iommu->reg + reg +
1908				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1909			source_id = dma_frcd_source_id(data);
1910
1911			pasid_present = dma_frcd_pasid_present(data);
1912			guest_addr = dmar_readq(iommu->reg + reg +
1913					fault_index * PRIMARY_FAULT_REG_LEN);
1914			guest_addr = dma_frcd_page_addr(guest_addr);
1915		}
1916
1917		/* clear the fault */
1918		writel(DMA_FRCD_F, iommu->reg + reg +
1919			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1920
1921		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1922
1923		if (!ratelimited)
1924			/* Using pasid -1 if pasid is not present */
1925			dmar_fault_do_one(iommu, type, fault_reason,
1926					  pasid_present ? pasid : -1,
1927					  source_id, guest_addr);
1928
1929		fault_index++;
1930		if (fault_index >= cap_num_fault_regs(iommu->cap))
1931			fault_index = 0;
1932		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1933	}
1934
1935	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1936	       iommu->reg + DMAR_FSTS_REG);
1937
1938unlock_exit:
1939	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1940	return IRQ_HANDLED;
1941}
1942
1943int dmar_set_interrupt(struct intel_iommu *iommu)
1944{
1945	int irq, ret;
1946
1947	/*
1948	 * Check if the fault interrupt is already initialized.
1949	 */
1950	if (iommu->irq)
1951		return 0;
1952
1953	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1954	if (irq > 0) {
1955		iommu->irq = irq;
1956	} else {
1957		pr_err("No free IRQ vectors\n");
1958		return -EINVAL;
1959	}
1960
1961	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1962	if (ret)
1963		pr_err("Can't request irq\n");
1964	return ret;
1965}
1966
1967int __init enable_drhd_fault_handling(void)
1968{
1969	struct dmar_drhd_unit *drhd;
1970	struct intel_iommu *iommu;
1971
1972	/*
1973	 * Enable fault control interrupt.
1974	 */
1975	for_each_iommu(iommu, drhd) {
1976		u32 fault_status;
1977		int ret = dmar_set_interrupt(iommu);
1978
1979		if (ret) {
1980			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1981			       (unsigned long long)drhd->reg_base_addr, ret);
1982			return -1;
1983		}
1984
1985		/*
1986		 * Clear any previous faults.
1987		 */
1988		dmar_fault(iommu->irq, iommu);
1989		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1990		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1991	}
1992
1993	return 0;
1994}
1995
1996/*
1997 * Re-enable Queued Invalidation interface.
1998 */
1999int dmar_reenable_qi(struct intel_iommu *iommu)
2000{
2001	if (!ecap_qis(iommu->ecap))
2002		return -ENOENT;
2003
2004	if (!iommu->qi)
2005		return -ENOENT;
2006
2007	/*
2008	 * First disable queued invalidation.
2009	 */
2010	dmar_disable_qi(iommu);
2011	/*
2012	 * Then enable queued invalidation again. Since there is no pending
2013	 * invalidation requests now, it's safe to re-enable queued
2014	 * invalidation.
2015	 */
2016	__dmar_enable_qi(iommu);
2017
2018	return 0;
2019}
2020
2021/*
2022 * Check interrupt remapping support in DMAR table description.
2023 */
2024int __init dmar_ir_support(void)
2025{
2026	struct acpi_table_dmar *dmar;
2027	dmar = (struct acpi_table_dmar *)dmar_tbl;
2028	if (!dmar)
2029		return 0;
2030	return dmar->flags & 0x1;
2031}
2032
2033/* Check whether DMAR units are in use */
2034static inline bool dmar_in_use(void)
2035{
2036	return irq_remapping_enabled || intel_iommu_enabled;
2037}
2038
2039static int __init dmar_free_unused_resources(void)
2040{
2041	struct dmar_drhd_unit *dmaru, *dmaru_n;
2042
2043	if (dmar_in_use())
2044		return 0;
2045
2046	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2047		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2048
2049	down_write(&dmar_global_lock);
2050	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2051		list_del(&dmaru->list);
2052		dmar_free_drhd(dmaru);
2053	}
2054	up_write(&dmar_global_lock);
2055
2056	return 0;
2057}
2058
2059late_initcall(dmar_free_unused_resources);
2060IOMMU_INIT_POST(detect_intel_iommu);
2061
2062/*
2063 * DMAR Hotplug Support
2064 * For more details, please refer to Intel(R) Virtualization Technology
2065 * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2066 * "Remapping Hardware Unit Hot Plug".
2067 */
2068static guid_t dmar_hp_guid =
2069	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2070		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2071
2072/*
2073 * Currently there's only one revision and BIOS will not check the revision id,
2074 * so use 0 for safety.
2075 */
2076#define	DMAR_DSM_REV_ID			0
2077#define	DMAR_DSM_FUNC_DRHD		1
2078#define	DMAR_DSM_FUNC_ATSR		2
2079#define	DMAR_DSM_FUNC_RHSA		3
2080
2081static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2082{
2083	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2084}
2085
2086static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2087				  dmar_res_handler_t handler, void *arg)
2088{
2089	int ret = -ENODEV;
2090	union acpi_object *obj;
2091	struct acpi_dmar_header *start;
2092	struct dmar_res_callback callback;
2093	static int res_type[] = {
2094		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2095		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2096		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2097	};
2098
2099	if (!dmar_detect_dsm(handle, func))
2100		return 0;
2101
2102	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2103				      func, NULL, ACPI_TYPE_BUFFER);
2104	if (!obj)
2105		return -ENODEV;
2106
2107	memset(&callback, 0, sizeof(callback));
2108	callback.cb[res_type[func]] = handler;
2109	callback.arg[res_type[func]] = arg;
2110	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2111	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2112
2113	ACPI_FREE(obj);
2114
2115	return ret;
2116}
2117
2118static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2119{
2120	int ret;
2121	struct dmar_drhd_unit *dmaru;
2122
2123	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2124	if (!dmaru)
2125		return -ENODEV;
2126
2127	ret = dmar_ir_hotplug(dmaru, true);
2128	if (ret == 0)
2129		ret = dmar_iommu_hotplug(dmaru, true);
2130
2131	return ret;
2132}
2133
2134static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2135{
2136	int i, ret;
2137	struct device *dev;
2138	struct dmar_drhd_unit *dmaru;
2139
2140	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2141	if (!dmaru)
2142		return 0;
2143
2144	/*
2145	 * All PCI devices managed by this unit should have been destroyed.
2146	 */
2147	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2148		for_each_active_dev_scope(dmaru->devices,
2149					  dmaru->devices_cnt, i, dev)
2150			return -EBUSY;
2151	}
2152
2153	ret = dmar_ir_hotplug(dmaru, false);
2154	if (ret == 0)
2155		ret = dmar_iommu_hotplug(dmaru, false);
2156
2157	return ret;
2158}
2159
2160static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2161{
2162	struct dmar_drhd_unit *dmaru;
2163
2164	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2165	if (dmaru) {
2166		list_del_rcu(&dmaru->list);
2167		synchronize_rcu();
2168		dmar_free_drhd(dmaru);
2169	}
2170
2171	return 0;
2172}
2173
2174static int dmar_hotplug_insert(acpi_handle handle)
2175{
2176	int ret;
2177	int drhd_count = 0;
2178
2179	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2180				     &dmar_validate_one_drhd, (void *)1);
2181	if (ret)
2182		goto out;
2183
2184	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2185				     &dmar_parse_one_drhd, (void *)&drhd_count);
2186	if (ret == 0 && drhd_count == 0) {
2187		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2188		goto out;
2189	} else if (ret) {
2190		goto release_drhd;
2191	}
2192
2193	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2194				     &dmar_parse_one_rhsa, NULL);
2195	if (ret)
2196		goto release_drhd;
2197
2198	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2199				     &dmar_parse_one_atsr, NULL);
2200	if (ret)
2201		goto release_atsr;
2202
2203	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2204				     &dmar_hp_add_drhd, NULL);
2205	if (!ret)
2206		return 0;
2207
2208	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2209			       &dmar_hp_remove_drhd, NULL);
2210release_atsr:
2211	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2212			       &dmar_release_one_atsr, NULL);
2213release_drhd:
2214	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2215			       &dmar_hp_release_drhd, NULL);
2216out:
2217	return ret;
2218}
2219
2220static int dmar_hotplug_remove(acpi_handle handle)
2221{
2222	int ret;
2223
2224	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2225				     &dmar_check_one_atsr, NULL);
2226	if (ret)
2227		return ret;
2228
2229	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2230				     &dmar_hp_remove_drhd, NULL);
2231	if (ret == 0) {
2232		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2233					       &dmar_release_one_atsr, NULL));
2234		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2235					       &dmar_hp_release_drhd, NULL));
2236	} else {
2237		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2238				       &dmar_hp_add_drhd, NULL);
2239	}
2240
2241	return ret;
2242}
2243
2244static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2245				       void *context, void **retval)
2246{
2247	acpi_handle *phdl = retval;
2248
2249	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2250		*phdl = handle;
2251		return AE_CTRL_TERMINATE;
2252	}
2253
2254	return AE_OK;
2255}
2256
2257static int dmar_device_hotplug(acpi_handle handle, bool insert)
2258{
2259	int ret;
2260	acpi_handle tmp = NULL;
2261	acpi_status status;
2262
2263	if (!dmar_in_use())
2264		return 0;
2265
2266	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2267		tmp = handle;
2268	} else {
2269		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2270					     ACPI_UINT32_MAX,
2271					     dmar_get_dsm_handle,
2272					     NULL, NULL, &tmp);
2273		if (ACPI_FAILURE(status)) {
2274			pr_warn("Failed to locate _DSM method.\n");
2275			return -ENXIO;
2276		}
2277	}
2278	if (tmp == NULL)
2279		return 0;
2280
2281	down_write(&dmar_global_lock);
2282	if (insert)
2283		ret = dmar_hotplug_insert(tmp);
2284	else
2285		ret = dmar_hotplug_remove(tmp);
2286	up_write(&dmar_global_lock);
2287
2288	return ret;
2289}
2290
2291int dmar_device_add(acpi_handle handle)
2292{
2293	return dmar_device_hotplug(handle, true);
2294}
2295
2296int dmar_device_remove(acpi_handle handle)
2297{
2298	return dmar_device_hotplug(handle, false);
2299}
2300
2301/*
2302 * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2303 *
2304 * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2305 * the ACPI DMAR table. This means that the platform boot firmware has made
2306 * sure no device can issue DMA outside of RMRR regions.
2307 */
2308bool dmar_platform_optin(void)
2309{
2310	struct acpi_table_dmar *dmar;
2311	acpi_status status;
2312	bool ret;
2313
2314	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2315				(struct acpi_table_header **)&dmar);
2316	if (ACPI_FAILURE(status))
2317		return false;
2318
2319	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2320	acpi_put_table((struct acpi_table_header *)dmar);
2321
2322	return ret;
2323}
2324EXPORT_SYMBOL_GPL(dmar_platform_optin);
2325