1// SPDX-License-Identifier: GPL-2.0
2/*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12#include <linux/acpi.h>
13#include <linux/acpi_iort.h>
14#include <linux/bitops.h>
15#include <linux/crash_dump.h>
16#include <linux/delay.h>
17#include <linux/dma-iommu.h>
18#include <linux/err.h>
19#include <linux/interrupt.h>
20#include <linux/io-pgtable.h>
21#include <linux/iopoll.h>
22#include <linux/module.h>
23#include <linux/msi.h>
24#include <linux/of.h>
25#include <linux/of_address.h>
26#include <linux/of_iommu.h>
27#include <linux/of_platform.h>
28#include <linux/pci.h>
29#include <linux/pci-ats.h>
30#include <linux/platform_device.h>
31
32#include <linux/amba/bus.h>
33
34#include "arm-smmu-v3.h"
35
36static bool disable_bypass = 1;
37module_param(disable_bypass, bool, 0444);
38MODULE_PARM_DESC(disable_bypass,
39	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40
41static bool disable_msipolling;
42module_param(disable_msipolling, bool, 0444);
43MODULE_PARM_DESC(disable_msipolling,
44	"Disable MSI-based polling for CMD_SYNC completion.");
45
46enum arm_smmu_msi_index {
47	EVTQ_MSI_INDEX,
48	GERROR_MSI_INDEX,
49	PRIQ_MSI_INDEX,
50	ARM_SMMU_MAX_MSIS,
51};
52
53static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54	[EVTQ_MSI_INDEX] = {
55		ARM_SMMU_EVTQ_IRQ_CFG0,
56		ARM_SMMU_EVTQ_IRQ_CFG1,
57		ARM_SMMU_EVTQ_IRQ_CFG2,
58	},
59	[GERROR_MSI_INDEX] = {
60		ARM_SMMU_GERROR_IRQ_CFG0,
61		ARM_SMMU_GERROR_IRQ_CFG1,
62		ARM_SMMU_GERROR_IRQ_CFG2,
63	},
64	[PRIQ_MSI_INDEX] = {
65		ARM_SMMU_PRIQ_IRQ_CFG0,
66		ARM_SMMU_PRIQ_IRQ_CFG1,
67		ARM_SMMU_PRIQ_IRQ_CFG2,
68	},
69};
70
71struct arm_smmu_option_prop {
72	u32 opt;
73	const char *prop;
74};
75
76DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77DEFINE_MUTEX(arm_smmu_asid_lock);
78
79static struct arm_smmu_option_prop arm_smmu_options[] = {
80	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82	{ 0, NULL},
83};
84
85static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86						 struct arm_smmu_device *smmu)
87{
88	if (offset > SZ_64K)
89		return smmu->page1 + offset - SZ_64K;
90
91	return smmu->base + offset;
92}
93
94static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95{
96	return container_of(dom, struct arm_smmu_domain, domain);
97}
98
99static void parse_driver_options(struct arm_smmu_device *smmu)
100{
101	int i = 0;
102
103	do {
104		if (of_property_read_bool(smmu->dev->of_node,
105						arm_smmu_options[i].prop)) {
106			smmu->options |= arm_smmu_options[i].opt;
107			dev_notice(smmu->dev, "option %s\n",
108				arm_smmu_options[i].prop);
109		}
110	} while (arm_smmu_options[++i].opt);
111}
112
113/* Low-level queue manipulation functions */
114static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115{
116	u32 space, prod, cons;
117
118	prod = Q_IDX(q, q->prod);
119	cons = Q_IDX(q, q->cons);
120
121	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122		space = (1 << q->max_n_shift) - (prod - cons);
123	else
124		space = cons - prod;
125
126	return space >= n;
127}
128
129static bool queue_full(struct arm_smmu_ll_queue *q)
130{
131	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133}
134
135static bool queue_empty(struct arm_smmu_ll_queue *q)
136{
137	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139}
140
141static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142{
143	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147}
148
149static void queue_sync_cons_out(struct arm_smmu_queue *q)
150{
151	/*
152	 * Ensure that all CPU accesses (reads and writes) to the queue
153	 * are complete before we update the cons pointer.
154	 */
155	__iomb();
156	writel_relaxed(q->llq.cons, q->cons_reg);
157}
158
159static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160{
161	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163}
164
165static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
166{
167	struct arm_smmu_ll_queue *llq = &q->llq;
168
169	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
170		return;
171
172	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
173		      Q_IDX(llq, llq->cons);
174	queue_sync_cons_out(q);
175}
176
177static int queue_sync_prod_in(struct arm_smmu_queue *q)
178{
179	u32 prod;
180	int ret = 0;
181
182	/*
183	 * We can't use the _relaxed() variant here, as we must prevent
184	 * speculative reads of the queue before we have determined that
185	 * prod has indeed moved.
186	 */
187	prod = readl(q->prod_reg);
188
189	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
190		ret = -EOVERFLOW;
191
192	q->llq.prod = prod;
193	return ret;
194}
195
196static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
197{
198	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
199	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
200}
201
202static void queue_poll_init(struct arm_smmu_device *smmu,
203			    struct arm_smmu_queue_poll *qp)
204{
205	qp->delay = 1;
206	qp->spin_cnt = 0;
207	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
208	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
209}
210
211static int queue_poll(struct arm_smmu_queue_poll *qp)
212{
213	if (ktime_compare(ktime_get(), qp->timeout) > 0)
214		return -ETIMEDOUT;
215
216	if (qp->wfe) {
217		wfe();
218	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
219		cpu_relax();
220	} else {
221		udelay(qp->delay);
222		qp->delay *= 2;
223		qp->spin_cnt = 0;
224	}
225
226	return 0;
227}
228
229static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
230{
231	int i;
232
233	for (i = 0; i < n_dwords; ++i)
234		*dst++ = cpu_to_le64(*src++);
235}
236
237static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
238{
239	int i;
240
241	for (i = 0; i < n_dwords; ++i)
242		*dst++ = le64_to_cpu(*src++);
243}
244
245static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
246{
247	if (queue_empty(&q->llq))
248		return -EAGAIN;
249
250	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
251	queue_inc_cons(&q->llq);
252	queue_sync_cons_out(q);
253	return 0;
254}
255
256/* High-level queue accessors */
257static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
258{
259	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
260	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
261
262	switch (ent->opcode) {
263	case CMDQ_OP_TLBI_EL2_ALL:
264	case CMDQ_OP_TLBI_NSNH_ALL:
265		break;
266	case CMDQ_OP_PREFETCH_CFG:
267		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
268		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
269		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
270		break;
271	case CMDQ_OP_CFGI_CD:
272		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
273		fallthrough;
274	case CMDQ_OP_CFGI_STE:
275		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
276		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
277		break;
278	case CMDQ_OP_CFGI_CD_ALL:
279		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
280		break;
281	case CMDQ_OP_CFGI_ALL:
282		/* Cover the entire SID range */
283		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
284		break;
285	case CMDQ_OP_TLBI_NH_VA:
286		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
290		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
294		break;
295	case CMDQ_OP_TLBI_S2_IPA:
296		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
297		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
298		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
300		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
301		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
302		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
303		break;
304	case CMDQ_OP_TLBI_NH_ASID:
305		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
306		fallthrough;
307	case CMDQ_OP_TLBI_S12_VMALL:
308		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
309		break;
310	case CMDQ_OP_ATC_INV:
311		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
312		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
313		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
314		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
315		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
316		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
317		break;
318	case CMDQ_OP_PRI_RESP:
319		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
320		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
321		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
322		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
323		switch (ent->pri.resp) {
324		case PRI_RESP_DENY:
325		case PRI_RESP_FAIL:
326		case PRI_RESP_SUCC:
327			break;
328		default:
329			return -EINVAL;
330		}
331		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
332		break;
333	case CMDQ_OP_CMD_SYNC:
334		if (ent->sync.msiaddr) {
335			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
336			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
337		} else {
338			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
339		}
340		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
341		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
342		break;
343	default:
344		return -ENOENT;
345	}
346
347	return 0;
348}
349
350static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
351					 u32 prod)
352{
353	struct arm_smmu_queue *q = &smmu->cmdq.q;
354	struct arm_smmu_cmdq_ent ent = {
355		.opcode = CMDQ_OP_CMD_SYNC,
356	};
357
358	/*
359	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
360	 * payload, so the write will zero the entire command on that platform.
361	 */
362	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
363		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
364				   q->ent_dwords * 8;
365	}
366
367	arm_smmu_cmdq_build_cmd(cmd, &ent);
368}
369
370static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
371{
372	static const char *cerror_str[] = {
373		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
374		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
375		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
376		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
377	};
378
379	int i;
380	u64 cmd[CMDQ_ENT_DWORDS];
381	struct arm_smmu_queue *q = &smmu->cmdq.q;
382	u32 cons = readl_relaxed(q->cons_reg);
383	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
384	struct arm_smmu_cmdq_ent cmd_sync = {
385		.opcode = CMDQ_OP_CMD_SYNC,
386	};
387
388	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
389		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
390
391	switch (idx) {
392	case CMDQ_ERR_CERROR_ABT_IDX:
393		dev_err(smmu->dev, "retrying command fetch\n");
394	case CMDQ_ERR_CERROR_NONE_IDX:
395		return;
396	case CMDQ_ERR_CERROR_ATC_INV_IDX:
397		/*
398		 * ATC Invalidation Completion timeout. CONS is still pointing
399		 * at the CMD_SYNC. Attempt to complete other pending commands
400		 * by repeating the CMD_SYNC, though we might well end up back
401		 * here since the ATC invalidation may still be pending.
402		 */
403		return;
404	case CMDQ_ERR_CERROR_ILL_IDX:
405	default:
406		break;
407	}
408
409	/*
410	 * We may have concurrent producers, so we need to be careful
411	 * not to touch any of the shadow cmdq state.
412	 */
413	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
414	dev_err(smmu->dev, "skipping command in error state:\n");
415	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
416		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
417
418	/* Convert the erroneous command into a CMD_SYNC */
419	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
420		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
421		return;
422	}
423
424	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425}
426
427/*
428 * Command queue locking.
429 * This is a form of bastardised rwlock with the following major changes:
430 *
431 * - The only LOCK routines are exclusive_trylock() and shared_lock().
432 *   Neither have barrier semantics, and instead provide only a control
433 *   dependency.
434 *
435 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
436 *   fails if the caller appears to be the last lock holder (yes, this is
437 *   racy). All successful UNLOCK routines have RELEASE semantics.
438 */
439static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
440{
441	int val;
442
443	/*
444	 * We can try to avoid the cmpxchg() loop by simply incrementing the
445	 * lock counter. When held in exclusive state, the lock counter is set
446	 * to INT_MIN so these increments won't hurt as the value will remain
447	 * negative.
448	 */
449	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
450		return;
451
452	do {
453		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
454	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
455}
456
457static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
458{
459	(void)atomic_dec_return_release(&cmdq->lock);
460}
461
462static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
463{
464	if (atomic_read(&cmdq->lock) == 1)
465		return false;
466
467	arm_smmu_cmdq_shared_unlock(cmdq);
468	return true;
469}
470
471#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
472({									\
473	bool __ret;							\
474	local_irq_save(flags);						\
475	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
476	if (!__ret)							\
477		local_irq_restore(flags);				\
478	__ret;								\
479})
480
481#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
482({									\
483	atomic_set_release(&cmdq->lock, 0);				\
484	local_irq_restore(flags);					\
485})
486
487
488/*
489 * Command queue insertion.
490 * This is made fiddly by our attempts to achieve some sort of scalability
491 * since there is one queue shared amongst all of the CPUs in the system.  If
492 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
493 * then you'll *love* this monstrosity.
494 *
495 * The basic idea is to split the queue up into ranges of commands that are
496 * owned by a given CPU; the owner may not have written all of the commands
497 * itself, but is responsible for advancing the hardware prod pointer when
498 * the time comes. The algorithm is roughly:
499 *
500 * 	1. Allocate some space in the queue. At this point we also discover
501 *	   whether the head of the queue is currently owned by another CPU,
502 *	   or whether we are the owner.
503 *
504 *	2. Write our commands into our allocated slots in the queue.
505 *
506 *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
507 *
508 *	4. If we are an owner:
509 *		a. Wait for the previous owner to finish.
510 *		b. Mark the queue head as unowned, which tells us the range
511 *		   that we are responsible for publishing.
512 *		c. Wait for all commands in our owned range to become valid.
513 *		d. Advance the hardware prod pointer.
514 *		e. Tell the next owner we've finished.
515 *
516 *	5. If we are inserting a CMD_SYNC (we may or may not have been an
517 *	   owner), then we need to stick around until it has completed:
518 *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
519 *		   to clear the first 4 bytes.
520 *		b. Otherwise, we spin waiting for the hardware cons pointer to
521 *		   advance past our command.
522 *
523 * The devil is in the details, particularly the use of locking for handling
524 * SYNC completion and freeing up space in the queue before we think that it is
525 * full.
526 */
527static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
528					       u32 sprod, u32 eprod, bool set)
529{
530	u32 swidx, sbidx, ewidx, ebidx;
531	struct arm_smmu_ll_queue llq = {
532		.max_n_shift	= cmdq->q.llq.max_n_shift,
533		.prod		= sprod,
534	};
535
536	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
537	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
538
539	while (llq.prod != eprod) {
540		unsigned long mask;
541		atomic_long_t *ptr;
542		u32 limit = BITS_PER_LONG;
543
544		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
545		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
546
547		ptr = &cmdq->valid_map[swidx];
548
549		if ((swidx == ewidx) && (sbidx < ebidx))
550			limit = ebidx;
551
552		mask = GENMASK(limit - 1, sbidx);
553
554		/*
555		 * The valid bit is the inverse of the wrap bit. This means
556		 * that a zero-initialised queue is invalid and, after marking
557		 * all entries as valid, they become invalid again when we
558		 * wrap.
559		 */
560		if (set) {
561			atomic_long_xor(mask, ptr);
562		} else { /* Poll */
563			unsigned long valid;
564
565			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
566			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
567		}
568
569		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
570	}
571}
572
573/* Mark all entries in the range [sprod, eprod) as valid */
574static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
575					u32 sprod, u32 eprod)
576{
577	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
578}
579
580/* Wait for all entries in the range [sprod, eprod) to become valid */
581static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
582					 u32 sprod, u32 eprod)
583{
584	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
585}
586
587/* Wait for the command queue to become non-full */
588static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
589					     struct arm_smmu_ll_queue *llq)
590{
591	unsigned long flags;
592	struct arm_smmu_queue_poll qp;
593	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
594	int ret = 0;
595
596	/*
597	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
598	 * that fails, spin until somebody else updates it for us.
599	 */
600	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
601		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
602		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
603		llq->val = READ_ONCE(cmdq->q.llq.val);
604		return 0;
605	}
606
607	queue_poll_init(smmu, &qp);
608	do {
609		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
610		if (!queue_full(llq))
611			break;
612
613		ret = queue_poll(&qp);
614	} while (!ret);
615
616	return ret;
617}
618
619/*
620 * Wait until the SMMU signals a CMD_SYNC completion MSI.
621 * Must be called with the cmdq lock held in some capacity.
622 */
623static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
624					  struct arm_smmu_ll_queue *llq)
625{
626	int ret = 0;
627	struct arm_smmu_queue_poll qp;
628	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
629	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
630
631	queue_poll_init(smmu, &qp);
632
633	/*
634	 * The MSI won't generate an event, since it's being written back
635	 * into the command queue.
636	 */
637	qp.wfe = false;
638	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
639	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
640	return ret;
641}
642
643/*
644 * Wait until the SMMU cons index passes llq->prod.
645 * Must be called with the cmdq lock held in some capacity.
646 */
647static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
648					       struct arm_smmu_ll_queue *llq)
649{
650	struct arm_smmu_queue_poll qp;
651	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
652	u32 prod = llq->prod;
653	int ret = 0;
654
655	queue_poll_init(smmu, &qp);
656	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
657	do {
658		if (queue_consumed(llq, prod))
659			break;
660
661		ret = queue_poll(&qp);
662
663		/*
664		 * This needs to be a readl() so that our subsequent call
665		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
666		 *
667		 * Specifically, we need to ensure that we observe all
668		 * shared_lock()s by other CMD_SYNCs that share our owner,
669		 * so that a failing call to tryunlock() means that we're
670		 * the last one out and therefore we can safely advance
671		 * cmdq->q.llq.cons. Roughly speaking:
672		 *
673		 * CPU 0		CPU1			CPU2 (us)
674		 *
675		 * if (sync)
676		 * 	shared_lock();
677		 *
678		 * dma_wmb();
679		 * set_valid_map();
680		 *
681		 * 			if (owner) {
682		 *				poll_valid_map();
683		 *				<control dependency>
684		 *				writel(prod_reg);
685		 *
686		 *						readl(cons_reg);
687		 *						tryunlock();
688		 *
689		 * Requires us to see CPU 0's shared_lock() acquisition.
690		 */
691		llq->cons = readl(cmdq->q.cons_reg);
692	} while (!ret);
693
694	return ret;
695}
696
697static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
698					 struct arm_smmu_ll_queue *llq)
699{
700	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
701		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
702
703	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
704}
705
706static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
707					u32 prod, int n)
708{
709	int i;
710	struct arm_smmu_ll_queue llq = {
711		.max_n_shift	= cmdq->q.llq.max_n_shift,
712		.prod		= prod,
713	};
714
715	for (i = 0; i < n; ++i) {
716		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
717
718		prod = queue_inc_prod_n(&llq, i);
719		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
720	}
721}
722
723/*
724 * This is the actual insertion function, and provides the following
725 * ordering guarantees to callers:
726 *
727 * - There is a dma_wmb() before publishing any commands to the queue.
728 *   This can be relied upon to order prior writes to data structures
729 *   in memory (such as a CD or an STE) before the command.
730 *
731 * - On completion of a CMD_SYNC, there is a control dependency.
732 *   This can be relied upon to order subsequent writes to memory (e.g.
733 *   freeing an IOVA) after completion of the CMD_SYNC.
734 *
735 * - Command insertion is totally ordered, so if two CPUs each race to
736 *   insert their own list of commands then all of the commands from one
737 *   CPU will appear before any of the commands from the other CPU.
738 */
739static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
740				       u64 *cmds, int n, bool sync)
741{
742	u64 cmd_sync[CMDQ_ENT_DWORDS];
743	u32 prod;
744	unsigned long flags;
745	bool owner;
746	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
747	struct arm_smmu_ll_queue llq = {
748		.max_n_shift = cmdq->q.llq.max_n_shift,
749	}, head = llq;
750	int ret = 0;
751
752	/* 1. Allocate some space in the queue */
753	local_irq_save(flags);
754	llq.val = READ_ONCE(cmdq->q.llq.val);
755	do {
756		u64 old;
757
758		while (!queue_has_space(&llq, n + sync)) {
759			local_irq_restore(flags);
760			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
761				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
762			local_irq_save(flags);
763		}
764
765		head.cons = llq.cons;
766		head.prod = queue_inc_prod_n(&llq, n + sync) |
767					     CMDQ_PROD_OWNED_FLAG;
768
769		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
770		if (old == llq.val)
771			break;
772
773		llq.val = old;
774	} while (1);
775	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
776	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
777	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
778
779	/*
780	 * 2. Write our commands into the queue
781	 * Dependency ordering from the cmpxchg() loop above.
782	 */
783	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
784	if (sync) {
785		prod = queue_inc_prod_n(&llq, n);
786		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
787		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
788
789		/*
790		 * In order to determine completion of our CMD_SYNC, we must
791		 * ensure that the queue can't wrap twice without us noticing.
792		 * We achieve that by taking the cmdq lock as shared before
793		 * marking our slot as valid.
794		 */
795		arm_smmu_cmdq_shared_lock(cmdq);
796	}
797
798	/* 3. Mark our slots as valid, ensuring commands are visible first */
799	dma_wmb();
800	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
801
802	/* 4. If we are the owner, take control of the SMMU hardware */
803	if (owner) {
804		/* a. Wait for previous owner to finish */
805		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
806
807		/* b. Stop gathering work by clearing the owned flag */
808		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
809						   &cmdq->q.llq.atomic.prod);
810		prod &= ~CMDQ_PROD_OWNED_FLAG;
811
812		/*
813		 * c. Wait for any gathered work to be written to the queue.
814		 * Note that we read our own entries so that we have the control
815		 * dependency required by (d).
816		 */
817		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
818
819		/*
820		 * d. Advance the hardware prod pointer
821		 * Control dependency ordering from the entries becoming valid.
822		 */
823		writel_relaxed(prod, cmdq->q.prod_reg);
824
825		/*
826		 * e. Tell the next owner we're done
827		 * Make sure we've updated the hardware first, so that we don't
828		 * race to update prod and potentially move it backwards.
829		 */
830		atomic_set_release(&cmdq->owner_prod, prod);
831	}
832
833	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
834	if (sync) {
835		llq.prod = queue_inc_prod_n(&llq, n);
836		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
837		if (ret) {
838			dev_err_ratelimited(smmu->dev,
839					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
840					    llq.prod,
841					    readl_relaxed(cmdq->q.prod_reg),
842					    readl_relaxed(cmdq->q.cons_reg));
843		}
844
845		/*
846		 * Try to unlock the cmdq lock. This will fail if we're the last
847		 * reader, in which case we can safely update cmdq->q.llq.cons
848		 */
849		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
850			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
851			arm_smmu_cmdq_shared_unlock(cmdq);
852		}
853	}
854
855	local_irq_restore(flags);
856	return ret;
857}
858
859static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
860				   struct arm_smmu_cmdq_ent *ent)
861{
862	u64 cmd[CMDQ_ENT_DWORDS];
863
864	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
865		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
866			 ent->opcode);
867		return -EINVAL;
868	}
869
870	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
871}
872
873static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
874{
875	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
876}
877
878static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
879				    struct arm_smmu_cmdq_batch *cmds,
880				    struct arm_smmu_cmdq_ent *cmd)
881{
882	if (cmds->num == CMDQ_BATCH_ENTRIES) {
883		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
884		cmds->num = 0;
885	}
886	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
887	cmds->num++;
888}
889
890static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
891				      struct arm_smmu_cmdq_batch *cmds)
892{
893	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
894}
895
896/* Context descriptor manipulation functions */
897void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
898{
899	struct arm_smmu_cmdq_ent cmd = {
900		.opcode = CMDQ_OP_TLBI_NH_ASID,
901		.tlbi.asid = asid,
902	};
903
904	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
905	arm_smmu_cmdq_issue_sync(smmu);
906}
907
908static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
909			     int ssid, bool leaf)
910{
911	size_t i;
912	unsigned long flags;
913	struct arm_smmu_master *master;
914	struct arm_smmu_cmdq_batch cmds = {};
915	struct arm_smmu_device *smmu = smmu_domain->smmu;
916	struct arm_smmu_cmdq_ent cmd = {
917		.opcode	= CMDQ_OP_CFGI_CD,
918		.cfgi	= {
919			.ssid	= ssid,
920			.leaf	= leaf,
921		},
922	};
923
924	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
925	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
926		for (i = 0; i < master->num_sids; i++) {
927			cmd.cfgi.sid = master->sids[i];
928			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
929		}
930	}
931	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
932
933	arm_smmu_cmdq_batch_submit(smmu, &cmds);
934}
935
936static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
937					struct arm_smmu_l1_ctx_desc *l1_desc)
938{
939	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
940
941	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
942					     &l1_desc->l2ptr_dma, GFP_KERNEL);
943	if (!l1_desc->l2ptr) {
944		dev_warn(smmu->dev,
945			 "failed to allocate context descriptor table\n");
946		return -ENOMEM;
947	}
948	return 0;
949}
950
951static void arm_smmu_write_cd_l1_desc(__le64 *dst,
952				      struct arm_smmu_l1_ctx_desc *l1_desc)
953{
954	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
955		  CTXDESC_L1_DESC_V;
956
957	/* See comment in arm_smmu_write_ctx_desc() */
958	WRITE_ONCE(*dst, cpu_to_le64(val));
959}
960
961static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
962				   u32 ssid)
963{
964	__le64 *l1ptr;
965	unsigned int idx;
966	struct arm_smmu_l1_ctx_desc *l1_desc;
967	struct arm_smmu_device *smmu = smmu_domain->smmu;
968	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
969
970	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
971		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
972
973	idx = ssid >> CTXDESC_SPLIT;
974	l1_desc = &cdcfg->l1_desc[idx];
975	if (!l1_desc->l2ptr) {
976		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
977			return NULL;
978
979		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
980		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
981		/* An invalid L1CD can be cached */
982		arm_smmu_sync_cd(smmu_domain, ssid, false);
983	}
984	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
985	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
986}
987
988int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
989			    struct arm_smmu_ctx_desc *cd)
990{
991	/*
992	 * This function handles the following cases:
993	 *
994	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
995	 * (2) Install a secondary CD, for SID+SSID traffic.
996	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
997	 *     CD, then invalidate the old entry and mappings.
998	 * (4) Remove a secondary CD.
999	 */
1000	u64 val;
1001	bool cd_live;
1002	__le64 *cdptr;
1003	struct arm_smmu_device *smmu = smmu_domain->smmu;
1004
1005	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1006		return -E2BIG;
1007
1008	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1009	if (!cdptr)
1010		return -ENOMEM;
1011
1012	val = le64_to_cpu(cdptr[0]);
1013	cd_live = !!(val & CTXDESC_CD_0_V);
1014
1015	if (!cd) { /* (4) */
1016		val = 0;
1017	} else if (cd_live) { /* (3) */
1018		val &= ~CTXDESC_CD_0_ASID;
1019		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1020		/*
1021		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1022		 * this substream's traffic
1023		 */
1024	} else { /* (1) and (2) */
1025		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1026		cdptr[2] = 0;
1027		cdptr[3] = cpu_to_le64(cd->mair);
1028
1029		/*
1030		 * STE is live, and the SMMU might read dwords of this CD in any
1031		 * order. Ensure that it observes valid values before reading
1032		 * V=1.
1033		 */
1034		arm_smmu_sync_cd(smmu_domain, ssid, true);
1035
1036		val = cd->tcr |
1037#ifdef __BIG_ENDIAN
1038			CTXDESC_CD_0_ENDI |
1039#endif
1040			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1041			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1042			CTXDESC_CD_0_AA64 |
1043			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1044			CTXDESC_CD_0_V;
1045
1046		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1047		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1048			val |= CTXDESC_CD_0_S;
1049	}
1050
1051	/*
1052	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1053	 * "Configuration structures and configuration invalidation completion"
1054	 *
1055	 *   The size of single-copy atomic reads made by the SMMU is
1056	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1057	 *   field within an aligned 64-bit span of a structure can be altered
1058	 *   without first making the structure invalid.
1059	 */
1060	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1061	arm_smmu_sync_cd(smmu_domain, ssid, true);
1062	return 0;
1063}
1064
1065static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1066{
1067	int ret;
1068	size_t l1size;
1069	size_t max_contexts;
1070	struct arm_smmu_device *smmu = smmu_domain->smmu;
1071	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1072	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1073
1074	max_contexts = 1 << cfg->s1cdmax;
1075
1076	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1077	    max_contexts <= CTXDESC_L2_ENTRIES) {
1078		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1079		cdcfg->num_l1_ents = max_contexts;
1080
1081		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1082	} else {
1083		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1084		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1085						  CTXDESC_L2_ENTRIES);
1086
1087		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1088					      sizeof(*cdcfg->l1_desc),
1089					      GFP_KERNEL);
1090		if (!cdcfg->l1_desc)
1091			return -ENOMEM;
1092
1093		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1094	}
1095
1096	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1097					   GFP_KERNEL);
1098	if (!cdcfg->cdtab) {
1099		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1100		ret = -ENOMEM;
1101		goto err_free_l1;
1102	}
1103
1104	return 0;
1105
1106err_free_l1:
1107	if (cdcfg->l1_desc) {
1108		devm_kfree(smmu->dev, cdcfg->l1_desc);
1109		cdcfg->l1_desc = NULL;
1110	}
1111	return ret;
1112}
1113
1114static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1115{
1116	int i;
1117	size_t size, l1size;
1118	struct arm_smmu_device *smmu = smmu_domain->smmu;
1119	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1120
1121	if (cdcfg->l1_desc) {
1122		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1123
1124		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1125			if (!cdcfg->l1_desc[i].l2ptr)
1126				continue;
1127
1128			dmam_free_coherent(smmu->dev, size,
1129					   cdcfg->l1_desc[i].l2ptr,
1130					   cdcfg->l1_desc[i].l2ptr_dma);
1131		}
1132		devm_kfree(smmu->dev, cdcfg->l1_desc);
1133		cdcfg->l1_desc = NULL;
1134
1135		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1136	} else {
1137		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1138	}
1139
1140	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1141	cdcfg->cdtab_dma = 0;
1142	cdcfg->cdtab = NULL;
1143}
1144
1145bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1146{
1147	bool free;
1148	struct arm_smmu_ctx_desc *old_cd;
1149
1150	if (!cd->asid)
1151		return false;
1152
1153	free = refcount_dec_and_test(&cd->refs);
1154	if (free) {
1155		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1156		WARN_ON(old_cd != cd);
1157	}
1158	return free;
1159}
1160
1161/* Stream table manipulation functions */
1162static void
1163arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1164{
1165	u64 val = 0;
1166
1167	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1168	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1169
1170	/* See comment in arm_smmu_write_ctx_desc() */
1171	WRITE_ONCE(*dst, cpu_to_le64(val));
1172}
1173
1174static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1175{
1176	struct arm_smmu_cmdq_ent cmd = {
1177		.opcode	= CMDQ_OP_CFGI_STE,
1178		.cfgi	= {
1179			.sid	= sid,
1180			.leaf	= true,
1181		},
1182	};
1183
1184	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1185	arm_smmu_cmdq_issue_sync(smmu);
1186}
1187
1188static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1189				      __le64 *dst)
1190{
1191	/*
1192	 * This is hideously complicated, but we only really care about
1193	 * three cases at the moment:
1194	 *
1195	 * 1. Invalid (all zero) -> bypass/fault (init)
1196	 * 2. Bypass/fault -> translation/bypass (attach)
1197	 * 3. Translation/bypass -> bypass/fault (detach)
1198	 *
1199	 * Given that we can't update the STE atomically and the SMMU
1200	 * doesn't read the thing in a defined order, that leaves us
1201	 * with the following maintenance requirements:
1202	 *
1203	 * 1. Update Config, return (init time STEs aren't live)
1204	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1205	 * 3. Update Config, sync
1206	 */
1207	u64 val = le64_to_cpu(dst[0]);
1208	bool ste_live = false;
1209	struct arm_smmu_device *smmu = NULL;
1210	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1211	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1212	struct arm_smmu_domain *smmu_domain = NULL;
1213	struct arm_smmu_cmdq_ent prefetch_cmd = {
1214		.opcode		= CMDQ_OP_PREFETCH_CFG,
1215		.prefetch	= {
1216			.sid	= sid,
1217		},
1218	};
1219
1220	if (master) {
1221		smmu_domain = master->domain;
1222		smmu = master->smmu;
1223	}
1224
1225	if (smmu_domain) {
1226		switch (smmu_domain->stage) {
1227		case ARM_SMMU_DOMAIN_S1:
1228			s1_cfg = &smmu_domain->s1_cfg;
1229			break;
1230		case ARM_SMMU_DOMAIN_S2:
1231		case ARM_SMMU_DOMAIN_NESTED:
1232			s2_cfg = &smmu_domain->s2_cfg;
1233			break;
1234		default:
1235			break;
1236		}
1237	}
1238
1239	if (val & STRTAB_STE_0_V) {
1240		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1241		case STRTAB_STE_0_CFG_BYPASS:
1242			break;
1243		case STRTAB_STE_0_CFG_S1_TRANS:
1244		case STRTAB_STE_0_CFG_S2_TRANS:
1245			ste_live = true;
1246			break;
1247		case STRTAB_STE_0_CFG_ABORT:
1248			BUG_ON(!disable_bypass);
1249			break;
1250		default:
1251			BUG(); /* STE corruption */
1252		}
1253	}
1254
1255	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1256	val = STRTAB_STE_0_V;
1257
1258	/* Bypass/fault */
1259	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1260		if (!smmu_domain && disable_bypass)
1261			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1262		else
1263			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1264
1265		dst[0] = cpu_to_le64(val);
1266		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1267						STRTAB_STE_1_SHCFG_INCOMING));
1268		dst[2] = 0; /* Nuke the VMID */
1269		/*
1270		 * The SMMU can perform negative caching, so we must sync
1271		 * the STE regardless of whether the old value was live.
1272		 */
1273		if (smmu)
1274			arm_smmu_sync_ste_for_sid(smmu, sid);
1275		return;
1276	}
1277
1278	if (s1_cfg) {
1279		BUG_ON(ste_live);
1280		dst[1] = cpu_to_le64(
1281			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1282			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1283			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1284			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1285			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1286
1287		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1288		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1289			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1290
1291		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1292			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1293			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1294			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1295	}
1296
1297	if (s2_cfg) {
1298		BUG_ON(ste_live);
1299		dst[2] = cpu_to_le64(
1300			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1301			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1302#ifdef __BIG_ENDIAN
1303			 STRTAB_STE_2_S2ENDI |
1304#endif
1305			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1306			 STRTAB_STE_2_S2R);
1307
1308		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1309
1310		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1311	}
1312
1313	if (master->ats_enabled)
1314		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1315						 STRTAB_STE_1_EATS_TRANS));
1316
1317	arm_smmu_sync_ste_for_sid(smmu, sid);
1318	/* See comment in arm_smmu_write_ctx_desc() */
1319	WRITE_ONCE(dst[0], cpu_to_le64(val));
1320	arm_smmu_sync_ste_for_sid(smmu, sid);
1321
1322	/* It's likely that we'll want to use the new STE soon */
1323	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1324		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1325}
1326
1327static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1328{
1329	unsigned int i;
1330
1331	for (i = 0; i < nent; ++i) {
1332		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1333		strtab += STRTAB_STE_DWORDS;
1334	}
1335}
1336
1337static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1338{
1339	size_t size;
1340	void *strtab;
1341	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1342	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1343
1344	if (desc->l2ptr)
1345		return 0;
1346
1347	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1348	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1349
1350	desc->span = STRTAB_SPLIT + 1;
1351	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1352					  GFP_KERNEL);
1353	if (!desc->l2ptr) {
1354		dev_err(smmu->dev,
1355			"failed to allocate l2 stream table for SID %u\n",
1356			sid);
1357		return -ENOMEM;
1358	}
1359
1360	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1361	arm_smmu_write_strtab_l1_desc(strtab, desc);
1362	return 0;
1363}
1364
1365/* IRQ and event handlers */
1366static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1367{
1368	int i;
1369	struct arm_smmu_device *smmu = dev;
1370	struct arm_smmu_queue *q = &smmu->evtq.q;
1371	struct arm_smmu_ll_queue *llq = &q->llq;
1372	u64 evt[EVTQ_ENT_DWORDS];
1373
1374	do {
1375		while (!queue_remove_raw(q, evt)) {
1376			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1377
1378			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1379			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1380				dev_info(smmu->dev, "\t0x%016llx\n",
1381					 (unsigned long long)evt[i]);
1382
1383			cond_resched();
1384		}
1385
1386		/*
1387		 * Not much we can do on overflow, so scream and pretend we're
1388		 * trying harder.
1389		 */
1390		if (queue_sync_prod_in(q) == -EOVERFLOW)
1391			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1392	} while (!queue_empty(llq));
1393
1394	/* Sync our overflow flag, as we believe we're up to speed */
1395	queue_sync_cons_ovf(q);
1396	return IRQ_HANDLED;
1397}
1398
1399static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1400{
1401	u32 sid, ssid;
1402	u16 grpid;
1403	bool ssv, last;
1404
1405	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1406	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1407	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1408	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1409	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1410
1411	dev_info(smmu->dev, "unexpected PRI request received:\n");
1412	dev_info(smmu->dev,
1413		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1414		 sid, ssid, grpid, last ? "L" : "",
1415		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1416		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1417		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1418		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1419		 evt[1] & PRIQ_1_ADDR_MASK);
1420
1421	if (last) {
1422		struct arm_smmu_cmdq_ent cmd = {
1423			.opcode			= CMDQ_OP_PRI_RESP,
1424			.substream_valid	= ssv,
1425			.pri			= {
1426				.sid	= sid,
1427				.ssid	= ssid,
1428				.grpid	= grpid,
1429				.resp	= PRI_RESP_DENY,
1430			},
1431		};
1432
1433		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1434	}
1435}
1436
1437static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1438{
1439	struct arm_smmu_device *smmu = dev;
1440	struct arm_smmu_queue *q = &smmu->priq.q;
1441	struct arm_smmu_ll_queue *llq = &q->llq;
1442	u64 evt[PRIQ_ENT_DWORDS];
1443
1444	do {
1445		while (!queue_remove_raw(q, evt))
1446			arm_smmu_handle_ppr(smmu, evt);
1447
1448		if (queue_sync_prod_in(q) == -EOVERFLOW)
1449			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1450	} while (!queue_empty(llq));
1451
1452	/* Sync our overflow flag, as we believe we're up to speed */
1453	queue_sync_cons_ovf(q);
1454	return IRQ_HANDLED;
1455}
1456
1457static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1458
1459static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1460{
1461	u32 gerror, gerrorn, active;
1462	struct arm_smmu_device *smmu = dev;
1463
1464	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1465	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1466
1467	active = gerror ^ gerrorn;
1468	if (!(active & GERROR_ERR_MASK))
1469		return IRQ_NONE; /* No errors pending */
1470
1471	dev_warn(smmu->dev,
1472		 "unexpected global error reported (0x%08x), this could be serious\n",
1473		 active);
1474
1475	if (active & GERROR_SFM_ERR) {
1476		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1477		arm_smmu_device_disable(smmu);
1478	}
1479
1480	if (active & GERROR_MSI_GERROR_ABT_ERR)
1481		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1482
1483	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1484		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1485
1486	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1487		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1488
1489	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1490		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1491
1492	if (active & GERROR_PRIQ_ABT_ERR)
1493		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1494
1495	if (active & GERROR_EVTQ_ABT_ERR)
1496		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1497
1498	if (active & GERROR_CMDQ_ERR)
1499		arm_smmu_cmdq_skip_err(smmu);
1500
1501	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1502	return IRQ_HANDLED;
1503}
1504
1505static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1506{
1507	struct arm_smmu_device *smmu = dev;
1508
1509	arm_smmu_evtq_thread(irq, dev);
1510	if (smmu->features & ARM_SMMU_FEAT_PRI)
1511		arm_smmu_priq_thread(irq, dev);
1512
1513	return IRQ_HANDLED;
1514}
1515
1516static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1517{
1518	arm_smmu_gerror_handler(irq, dev);
1519	return IRQ_WAKE_THREAD;
1520}
1521
1522static void
1523arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1524			struct arm_smmu_cmdq_ent *cmd)
1525{
1526	size_t log2_span;
1527	size_t span_mask;
1528	/* ATC invalidates are always on 4096-bytes pages */
1529	size_t inval_grain_shift = 12;
1530	unsigned long page_start, page_end;
1531
1532	*cmd = (struct arm_smmu_cmdq_ent) {
1533		.opcode			= CMDQ_OP_ATC_INV,
1534		.substream_valid	= !!ssid,
1535		.atc.ssid		= ssid,
1536	};
1537
1538	if (!size) {
1539		cmd->atc.size = ATC_INV_SIZE_ALL;
1540		return;
1541	}
1542
1543	page_start	= iova >> inval_grain_shift;
1544	page_end	= (iova + size - 1) >> inval_grain_shift;
1545
1546	/*
1547	 * In an ATS Invalidate Request, the address must be aligned on the
1548	 * range size, which must be a power of two number of page sizes. We
1549	 * thus have to choose between grossly over-invalidating the region, or
1550	 * splitting the invalidation into multiple commands. For simplicity
1551	 * we'll go with the first solution, but should refine it in the future
1552	 * if multiple commands are shown to be more efficient.
1553	 *
1554	 * Find the smallest power of two that covers the range. The most
1555	 * significant differing bit between the start and end addresses,
1556	 * fls(start ^ end), indicates the required span. For example:
1557	 *
1558	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1559	 *		x = 0b1000 ^ 0b1011 = 0b11
1560	 *		span = 1 << fls(x) = 4
1561	 *
1562	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1563	 *		x = 0b0111 ^ 0b1010 = 0b1101
1564	 *		span = 1 << fls(x) = 16
1565	 */
1566	log2_span	= fls_long(page_start ^ page_end);
1567	span_mask	= (1ULL << log2_span) - 1;
1568
1569	page_start	&= ~span_mask;
1570
1571	cmd->atc.addr	= page_start << inval_grain_shift;
1572	cmd->atc.size	= log2_span;
1573}
1574
1575static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1576{
1577	int i;
1578	struct arm_smmu_cmdq_ent cmd;
1579
1580	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1581
1582	for (i = 0; i < master->num_sids; i++) {
1583		cmd.atc.sid = master->sids[i];
1584		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1585	}
1586
1587	return arm_smmu_cmdq_issue_sync(master->smmu);
1588}
1589
1590static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1591				   int ssid, unsigned long iova, size_t size)
1592{
1593	int i;
1594	unsigned long flags;
1595	struct arm_smmu_cmdq_ent cmd;
1596	struct arm_smmu_master *master;
1597	struct arm_smmu_cmdq_batch cmds = {};
1598
1599	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1600		return 0;
1601
1602	/*
1603	 * Ensure that we've completed prior invalidation of the main TLBs
1604	 * before we read 'nr_ats_masters' in case of a concurrent call to
1605	 * arm_smmu_enable_ats():
1606	 *
1607	 *	// unmap()			// arm_smmu_enable_ats()
1608	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1609	 *	smp_mb();			[...]
1610	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1611	 *
1612	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1613	 * ATS was enabled at the PCI device before completion of the TLBI.
1614	 */
1615	smp_mb();
1616	if (!atomic_read(&smmu_domain->nr_ats_masters))
1617		return 0;
1618
1619	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1620
1621	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1622	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1623		if (!master->ats_enabled)
1624			continue;
1625
1626		for (i = 0; i < master->num_sids; i++) {
1627			cmd.atc.sid = master->sids[i];
1628			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1629		}
1630	}
1631	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1632
1633	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1634}
1635
1636/* IO_PGTABLE API */
1637static void arm_smmu_tlb_inv_context(void *cookie)
1638{
1639	struct arm_smmu_domain *smmu_domain = cookie;
1640	struct arm_smmu_device *smmu = smmu_domain->smmu;
1641	struct arm_smmu_cmdq_ent cmd;
1642
1643	/*
1644	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1645	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1646	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1647	 * insertion to guarantee those are observed before the TLBI. Do be
1648	 * careful, 007.
1649	 */
1650	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1651		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1652	} else {
1653		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1654		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1655		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1656		arm_smmu_cmdq_issue_sync(smmu);
1657	}
1658	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1659}
1660
1661static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1662				   size_t granule, bool leaf,
1663				   struct arm_smmu_domain *smmu_domain)
1664{
1665	struct arm_smmu_device *smmu = smmu_domain->smmu;
1666	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1667	size_t inv_range = granule;
1668	struct arm_smmu_cmdq_batch cmds = {};
1669	struct arm_smmu_cmdq_ent cmd = {
1670		.tlbi = {
1671			.leaf	= leaf,
1672		},
1673	};
1674
1675	if (!size)
1676		return;
1677
1678	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1679		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1680		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1681	} else {
1682		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1683		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1684	}
1685
1686	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1687		/* Get the leaf page size */
1688		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1689
1690		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1691		cmd.tlbi.tg = (tg - 10) / 2;
1692
1693		/* Determine what level the granule is at */
1694		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1695
1696		num_pages = size >> tg;
1697	}
1698
1699	while (iova < end) {
1700		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1701			/*
1702			 * On each iteration of the loop, the range is 5 bits
1703			 * worth of the aligned size remaining.
1704			 * The range in pages is:
1705			 *
1706			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1707			 */
1708			unsigned long scale, num;
1709
1710			/* Determine the power of 2 multiple number of pages */
1711			scale = __ffs(num_pages);
1712			cmd.tlbi.scale = scale;
1713
1714			/* Determine how many chunks of 2^scale size we have */
1715			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1716			cmd.tlbi.num = num - 1;
1717
1718			/* range is num * 2^scale * pgsize */
1719			inv_range = num << (scale + tg);
1720
1721			/* Clear out the lower order bits for the next iteration */
1722			num_pages -= num << scale;
1723		}
1724
1725		cmd.tlbi.addr = iova;
1726		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1727		iova += inv_range;
1728	}
1729	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1730
1731	/*
1732	 * Unfortunately, this can't be leaf-only since we may have
1733	 * zapped an entire table.
1734	 */
1735	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1736}
1737
1738static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1739					 unsigned long iova, size_t granule,
1740					 void *cookie)
1741{
1742	struct arm_smmu_domain *smmu_domain = cookie;
1743	struct iommu_domain *domain = &smmu_domain->domain;
1744
1745	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1746}
1747
1748static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1749				  size_t granule, void *cookie)
1750{
1751	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1752}
1753
1754static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
1755				  size_t granule, void *cookie)
1756{
1757	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
1758}
1759
1760static const struct iommu_flush_ops arm_smmu_flush_ops = {
1761	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1762	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1763	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
1764	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1765};
1766
1767/* IOMMU API */
1768static bool arm_smmu_capable(enum iommu_cap cap)
1769{
1770	switch (cap) {
1771	case IOMMU_CAP_CACHE_COHERENCY:
1772		return true;
1773	case IOMMU_CAP_NOEXEC:
1774		return true;
1775	default:
1776		return false;
1777	}
1778}
1779
1780static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1781{
1782	struct arm_smmu_domain *smmu_domain;
1783
1784	if (type != IOMMU_DOMAIN_UNMANAGED &&
1785	    type != IOMMU_DOMAIN_DMA &&
1786	    type != IOMMU_DOMAIN_IDENTITY)
1787		return NULL;
1788
1789	/*
1790	 * Allocate the domain and initialise some of its data structures.
1791	 * We can't really do anything meaningful until we've added a
1792	 * master.
1793	 */
1794	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1795	if (!smmu_domain)
1796		return NULL;
1797
1798	if (type == IOMMU_DOMAIN_DMA &&
1799	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1800		kfree(smmu_domain);
1801		return NULL;
1802	}
1803
1804	mutex_init(&smmu_domain->init_mutex);
1805	INIT_LIST_HEAD(&smmu_domain->devices);
1806	spin_lock_init(&smmu_domain->devices_lock);
1807
1808	return &smmu_domain->domain;
1809}
1810
1811static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1812{
1813	int idx, size = 1 << span;
1814
1815	do {
1816		idx = find_first_zero_bit(map, size);
1817		if (idx == size)
1818			return -ENOSPC;
1819	} while (test_and_set_bit(idx, map));
1820
1821	return idx;
1822}
1823
1824static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1825{
1826	clear_bit(idx, map);
1827}
1828
1829static void arm_smmu_domain_free(struct iommu_domain *domain)
1830{
1831	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1832	struct arm_smmu_device *smmu = smmu_domain->smmu;
1833
1834	iommu_put_dma_cookie(domain);
1835	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1836
1837	/* Free the CD and ASID, if we allocated them */
1838	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1839		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1840
1841		/* Prevent SVA from touching the CD while we're freeing it */
1842		mutex_lock(&arm_smmu_asid_lock);
1843		if (cfg->cdcfg.cdtab)
1844			arm_smmu_free_cd_tables(smmu_domain);
1845		arm_smmu_free_asid(&cfg->cd);
1846		mutex_unlock(&arm_smmu_asid_lock);
1847	} else {
1848		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1849		if (cfg->vmid)
1850			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1851	}
1852
1853	kfree(smmu_domain);
1854}
1855
1856static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1857				       struct arm_smmu_master *master,
1858				       struct io_pgtable_cfg *pgtbl_cfg)
1859{
1860	int ret;
1861	u32 asid;
1862	struct arm_smmu_device *smmu = smmu_domain->smmu;
1863	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1864	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1865
1866	refcount_set(&cfg->cd.refs, 1);
1867
1868	/* Prevent SVA from modifying the ASID until it is written to the CD */
1869	mutex_lock(&arm_smmu_asid_lock);
1870	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1871		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1872	if (ret)
1873		goto out_unlock;
1874
1875	cfg->s1cdmax = master->ssid_bits;
1876
1877	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1878	if (ret)
1879		goto out_free_asid;
1880
1881	cfg->cd.asid	= (u16)asid;
1882	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1883	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1884			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1885			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1886			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1887			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1888			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1889			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1890	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1891
1892	/*
1893	 * Note that this will end up calling arm_smmu_sync_cd() before
1894	 * the master has been added to the devices list for this domain.
1895	 * This isn't an issue because the STE hasn't been installed yet.
1896	 */
1897	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1898	if (ret)
1899		goto out_free_cd_tables;
1900
1901	mutex_unlock(&arm_smmu_asid_lock);
1902	return 0;
1903
1904out_free_cd_tables:
1905	arm_smmu_free_cd_tables(smmu_domain);
1906out_free_asid:
1907	arm_smmu_free_asid(&cfg->cd);
1908out_unlock:
1909	mutex_unlock(&arm_smmu_asid_lock);
1910	return ret;
1911}
1912
1913static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1914				       struct arm_smmu_master *master,
1915				       struct io_pgtable_cfg *pgtbl_cfg)
1916{
1917	int vmid;
1918	struct arm_smmu_device *smmu = smmu_domain->smmu;
1919	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1920	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1921
1922	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1923	if (vmid < 0)
1924		return vmid;
1925
1926	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1927	cfg->vmid	= (u16)vmid;
1928	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1929	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1930			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1931			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1932			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1933			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1934			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1935			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1936	return 0;
1937}
1938
1939static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1940				    struct arm_smmu_master *master)
1941{
1942	int ret;
1943	unsigned long ias, oas;
1944	enum io_pgtable_fmt fmt;
1945	struct io_pgtable_cfg pgtbl_cfg;
1946	struct io_pgtable_ops *pgtbl_ops;
1947	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1948				 struct arm_smmu_master *,
1949				 struct io_pgtable_cfg *);
1950	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1951	struct arm_smmu_device *smmu = smmu_domain->smmu;
1952
1953	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1954		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1955		return 0;
1956	}
1957
1958	/* Restrict the stage to what we can actually support */
1959	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1960		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1961	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1962		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1963
1964	switch (smmu_domain->stage) {
1965	case ARM_SMMU_DOMAIN_S1:
1966		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1967		ias = min_t(unsigned long, ias, VA_BITS);
1968		oas = smmu->ias;
1969		fmt = ARM_64_LPAE_S1;
1970		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1971		break;
1972	case ARM_SMMU_DOMAIN_NESTED:
1973	case ARM_SMMU_DOMAIN_S2:
1974		ias = smmu->ias;
1975		oas = smmu->oas;
1976		fmt = ARM_64_LPAE_S2;
1977		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1978		break;
1979	default:
1980		return -EINVAL;
1981	}
1982
1983	pgtbl_cfg = (struct io_pgtable_cfg) {
1984		.pgsize_bitmap	= smmu->pgsize_bitmap,
1985		.ias		= ias,
1986		.oas		= oas,
1987		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1988		.tlb		= &arm_smmu_flush_ops,
1989		.iommu_dev	= smmu->dev,
1990	};
1991
1992	if (smmu_domain->non_strict)
1993		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1994
1995	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1996	if (!pgtbl_ops)
1997		return -ENOMEM;
1998
1999	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2000	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2001	domain->geometry.force_aperture = true;
2002
2003	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2004	if (ret < 0) {
2005		free_io_pgtable_ops(pgtbl_ops);
2006		return ret;
2007	}
2008
2009	smmu_domain->pgtbl_ops = pgtbl_ops;
2010	return 0;
2011}
2012
2013static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2014{
2015	__le64 *step;
2016	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2017
2018	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2019		struct arm_smmu_strtab_l1_desc *l1_desc;
2020		int idx;
2021
2022		/* Two-level walk */
2023		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2024		l1_desc = &cfg->l1_desc[idx];
2025		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2026		step = &l1_desc->l2ptr[idx];
2027	} else {
2028		/* Simple linear lookup */
2029		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2030	}
2031
2032	return step;
2033}
2034
2035static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2036{
2037	int i, j;
2038	struct arm_smmu_device *smmu = master->smmu;
2039
2040	for (i = 0; i < master->num_sids; ++i) {
2041		u32 sid = master->sids[i];
2042		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2043
2044		/* Bridged PCI devices may end up with duplicated IDs */
2045		for (j = 0; j < i; j++)
2046			if (master->sids[j] == sid)
2047				break;
2048		if (j < i)
2049			continue;
2050
2051		arm_smmu_write_strtab_ent(master, sid, step);
2052	}
2053}
2054
2055static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2056{
2057	struct device *dev = master->dev;
2058	struct arm_smmu_device *smmu = master->smmu;
2059	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2060
2061	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2062		return false;
2063
2064	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2065		return false;
2066
2067	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2068}
2069
2070static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2071{
2072	size_t stu;
2073	struct pci_dev *pdev;
2074	struct arm_smmu_device *smmu = master->smmu;
2075	struct arm_smmu_domain *smmu_domain = master->domain;
2076
2077	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2078	if (!master->ats_enabled)
2079		return;
2080
2081	/* Smallest Translation Unit: log2 of the smallest supported granule */
2082	stu = __ffs(smmu->pgsize_bitmap);
2083	pdev = to_pci_dev(master->dev);
2084
2085	atomic_inc(&smmu_domain->nr_ats_masters);
2086	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2087	if (pci_enable_ats(pdev, stu))
2088		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2089}
2090
2091static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2092{
2093	struct arm_smmu_domain *smmu_domain = master->domain;
2094
2095	if (!master->ats_enabled)
2096		return;
2097
2098	pci_disable_ats(to_pci_dev(master->dev));
2099	/*
2100	 * Ensure ATS is disabled at the endpoint before we issue the
2101	 * ATC invalidation via the SMMU.
2102	 */
2103	wmb();
2104	arm_smmu_atc_inv_master(master);
2105	atomic_dec(&smmu_domain->nr_ats_masters);
2106}
2107
2108static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2109{
2110	int ret;
2111	int features;
2112	int num_pasids;
2113	struct pci_dev *pdev;
2114
2115	if (!dev_is_pci(master->dev))
2116		return -ENODEV;
2117
2118	pdev = to_pci_dev(master->dev);
2119
2120	features = pci_pasid_features(pdev);
2121	if (features < 0)
2122		return features;
2123
2124	num_pasids = pci_max_pasids(pdev);
2125	if (num_pasids <= 0)
2126		return num_pasids;
2127
2128	ret = pci_enable_pasid(pdev, features);
2129	if (ret) {
2130		dev_err(&pdev->dev, "Failed to enable PASID\n");
2131		return ret;
2132	}
2133
2134	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2135				  master->smmu->ssid_bits);
2136	return 0;
2137}
2138
2139static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2140{
2141	struct pci_dev *pdev;
2142
2143	if (!dev_is_pci(master->dev))
2144		return;
2145
2146	pdev = to_pci_dev(master->dev);
2147
2148	if (!pdev->pasid_enabled)
2149		return;
2150
2151	master->ssid_bits = 0;
2152	pci_disable_pasid(pdev);
2153}
2154
2155static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2156{
2157	unsigned long flags;
2158	struct arm_smmu_domain *smmu_domain = master->domain;
2159
2160	if (!smmu_domain)
2161		return;
2162
2163	arm_smmu_disable_ats(master);
2164
2165	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2166	list_del(&master->domain_head);
2167	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2168
2169	master->domain = NULL;
2170	master->ats_enabled = false;
2171	arm_smmu_install_ste_for_dev(master);
2172}
2173
2174static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2175{
2176	int ret = 0;
2177	unsigned long flags;
2178	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2179	struct arm_smmu_device *smmu;
2180	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2181	struct arm_smmu_master *master;
2182
2183	if (!fwspec)
2184		return -ENOENT;
2185
2186	master = dev_iommu_priv_get(dev);
2187	smmu = master->smmu;
2188
2189	/*
2190	 * Checking that SVA is disabled ensures that this device isn't bound to
2191	 * any mm, and can be safely detached from its old domain. Bonds cannot
2192	 * be removed concurrently since we're holding the group mutex.
2193	 */
2194	if (arm_smmu_master_sva_enabled(master)) {
2195		dev_err(dev, "cannot attach - SVA enabled\n");
2196		return -EBUSY;
2197	}
2198
2199	arm_smmu_detach_dev(master);
2200
2201	mutex_lock(&smmu_domain->init_mutex);
2202
2203	if (!smmu_domain->smmu) {
2204		smmu_domain->smmu = smmu;
2205		ret = arm_smmu_domain_finalise(domain, master);
2206		if (ret) {
2207			smmu_domain->smmu = NULL;
2208			goto out_unlock;
2209		}
2210	} else if (smmu_domain->smmu != smmu) {
2211		dev_err(dev,
2212			"cannot attach to SMMU %s (upstream of %s)\n",
2213			dev_name(smmu_domain->smmu->dev),
2214			dev_name(smmu->dev));
2215		ret = -ENXIO;
2216		goto out_unlock;
2217	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2218		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2219		dev_err(dev,
2220			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2221			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2222		ret = -EINVAL;
2223		goto out_unlock;
2224	}
2225
2226	master->domain = smmu_domain;
2227
2228	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2229		master->ats_enabled = arm_smmu_ats_supported(master);
2230
2231	arm_smmu_install_ste_for_dev(master);
2232
2233	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2234	list_add(&master->domain_head, &smmu_domain->devices);
2235	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2236
2237	arm_smmu_enable_ats(master);
2238
2239out_unlock:
2240	mutex_unlock(&smmu_domain->init_mutex);
2241	return ret;
2242}
2243
2244static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2245			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2246{
2247	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2248
2249	if (!ops)
2250		return -ENODEV;
2251
2252	return ops->map(ops, iova, paddr, size, prot, gfp);
2253}
2254
2255static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2256			     size_t size, struct iommu_iotlb_gather *gather)
2257{
2258	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2259	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2260
2261	if (!ops)
2262		return 0;
2263
2264	return ops->unmap(ops, iova, size, gather);
2265}
2266
2267static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2268{
2269	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2270
2271	if (smmu_domain->smmu)
2272		arm_smmu_tlb_inv_context(smmu_domain);
2273}
2274
2275static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2276				struct iommu_iotlb_gather *gather)
2277{
2278	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2279
2280	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2281			       gather->pgsize, true, smmu_domain);
2282}
2283
2284static phys_addr_t
2285arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2286{
2287	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2288
2289	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2290		return iova;
2291
2292	if (!ops)
2293		return 0;
2294
2295	return ops->iova_to_phys(ops, iova);
2296}
2297
2298static struct platform_driver arm_smmu_driver;
2299
2300static
2301struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2302{
2303	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2304							  fwnode);
2305	put_device(dev);
2306	return dev ? dev_get_drvdata(dev) : NULL;
2307}
2308
2309static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2310{
2311	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2312
2313	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2314		limit *= 1UL << STRTAB_SPLIT;
2315
2316	return sid < limit;
2317}
2318
2319static struct iommu_ops arm_smmu_ops;
2320
2321static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2322{
2323	int i, ret;
2324	struct arm_smmu_device *smmu;
2325	struct arm_smmu_master *master;
2326	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2327
2328	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2329		return ERR_PTR(-ENODEV);
2330
2331	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2332		return ERR_PTR(-EBUSY);
2333
2334	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2335	if (!smmu)
2336		return ERR_PTR(-ENODEV);
2337
2338	master = kzalloc(sizeof(*master), GFP_KERNEL);
2339	if (!master)
2340		return ERR_PTR(-ENOMEM);
2341
2342	master->dev = dev;
2343	master->smmu = smmu;
2344	master->sids = fwspec->ids;
2345	master->num_sids = fwspec->num_ids;
2346	INIT_LIST_HEAD(&master->bonds);
2347	dev_iommu_priv_set(dev, master);
2348
2349	/* Check the SIDs are in range of the SMMU and our stream table */
2350	for (i = 0; i < master->num_sids; i++) {
2351		u32 sid = master->sids[i];
2352
2353		if (!arm_smmu_sid_in_range(smmu, sid)) {
2354			ret = -ERANGE;
2355			goto err_free_master;
2356		}
2357
2358		/* Ensure l2 strtab is initialised */
2359		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2360			ret = arm_smmu_init_l2_strtab(smmu, sid);
2361			if (ret)
2362				goto err_free_master;
2363		}
2364	}
2365
2366	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2367
2368	/*
2369	 * Note that PASID must be enabled before, and disabled after ATS:
2370	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2371	 *
2372	 *   Behavior is undefined if this bit is Set and the value of the PASID
2373	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2374	 *   are changed.
2375	 */
2376	arm_smmu_enable_pasid(master);
2377
2378	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2379		master->ssid_bits = min_t(u8, master->ssid_bits,
2380					  CTXDESC_LINEAR_CDMAX);
2381
2382	return &smmu->iommu;
2383
2384err_free_master:
2385	kfree(master);
2386	dev_iommu_priv_set(dev, NULL);
2387	return ERR_PTR(ret);
2388}
2389
2390static void arm_smmu_release_device(struct device *dev)
2391{
2392	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2393	struct arm_smmu_master *master;
2394
2395	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2396		return;
2397
2398	master = dev_iommu_priv_get(dev);
2399	WARN_ON(arm_smmu_master_sva_enabled(master));
2400	arm_smmu_detach_dev(master);
2401	arm_smmu_disable_pasid(master);
2402	kfree(master);
2403	iommu_fwspec_free(dev);
2404}
2405
2406static struct iommu_group *arm_smmu_device_group(struct device *dev)
2407{
2408	struct iommu_group *group;
2409
2410	/*
2411	 * We don't support devices sharing stream IDs other than PCI RID
2412	 * aliases, since the necessary ID-to-device lookup becomes rather
2413	 * impractical given a potential sparse 32-bit stream ID space.
2414	 */
2415	if (dev_is_pci(dev))
2416		group = pci_device_group(dev);
2417	else
2418		group = generic_device_group(dev);
2419
2420	return group;
2421}
2422
2423static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2424				    enum iommu_attr attr, void *data)
2425{
2426	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2427
2428	switch (domain->type) {
2429	case IOMMU_DOMAIN_UNMANAGED:
2430		switch (attr) {
2431		case DOMAIN_ATTR_NESTING:
2432			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2433			return 0;
2434		default:
2435			return -ENODEV;
2436		}
2437		break;
2438	case IOMMU_DOMAIN_DMA:
2439		switch (attr) {
2440		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2441			*(int *)data = smmu_domain->non_strict;
2442			return 0;
2443		default:
2444			return -ENODEV;
2445		}
2446		break;
2447	default:
2448		return -EINVAL;
2449	}
2450}
2451
2452static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2453				    enum iommu_attr attr, void *data)
2454{
2455	int ret = 0;
2456	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2457
2458	mutex_lock(&smmu_domain->init_mutex);
2459
2460	switch (domain->type) {
2461	case IOMMU_DOMAIN_UNMANAGED:
2462		switch (attr) {
2463		case DOMAIN_ATTR_NESTING:
2464			if (smmu_domain->smmu) {
2465				ret = -EPERM;
2466				goto out_unlock;
2467			}
2468
2469			if (*(int *)data)
2470				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2471			else
2472				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2473			break;
2474		default:
2475			ret = -ENODEV;
2476		}
2477		break;
2478	case IOMMU_DOMAIN_DMA:
2479		switch(attr) {
2480		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2481			smmu_domain->non_strict = *(int *)data;
2482			break;
2483		default:
2484			ret = -ENODEV;
2485		}
2486		break;
2487	default:
2488		ret = -EINVAL;
2489	}
2490
2491out_unlock:
2492	mutex_unlock(&smmu_domain->init_mutex);
2493	return ret;
2494}
2495
2496static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2497{
2498	return iommu_fwspec_add_ids(dev, args->args, 1);
2499}
2500
2501static void arm_smmu_get_resv_regions(struct device *dev,
2502				      struct list_head *head)
2503{
2504	struct iommu_resv_region *region;
2505	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2506
2507	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2508					 prot, IOMMU_RESV_SW_MSI);
2509	if (!region)
2510		return;
2511
2512	list_add_tail(&region->list, head);
2513
2514	iommu_dma_get_resv_regions(dev, head);
2515}
2516
2517static bool arm_smmu_dev_has_feature(struct device *dev,
2518				     enum iommu_dev_features feat)
2519{
2520	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2521
2522	if (!master)
2523		return false;
2524
2525	switch (feat) {
2526	case IOMMU_DEV_FEAT_SVA:
2527		return arm_smmu_master_sva_supported(master);
2528	default:
2529		return false;
2530	}
2531}
2532
2533static bool arm_smmu_dev_feature_enabled(struct device *dev,
2534					 enum iommu_dev_features feat)
2535{
2536	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2537
2538	if (!master)
2539		return false;
2540
2541	switch (feat) {
2542	case IOMMU_DEV_FEAT_SVA:
2543		return arm_smmu_master_sva_enabled(master);
2544	default:
2545		return false;
2546	}
2547}
2548
2549static int arm_smmu_dev_enable_feature(struct device *dev,
2550				       enum iommu_dev_features feat)
2551{
2552	if (!arm_smmu_dev_has_feature(dev, feat))
2553		return -ENODEV;
2554
2555	if (arm_smmu_dev_feature_enabled(dev, feat))
2556		return -EBUSY;
2557
2558	switch (feat) {
2559	case IOMMU_DEV_FEAT_SVA:
2560		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2561	default:
2562		return -EINVAL;
2563	}
2564}
2565
2566static int arm_smmu_dev_disable_feature(struct device *dev,
2567					enum iommu_dev_features feat)
2568{
2569	if (!arm_smmu_dev_feature_enabled(dev, feat))
2570		return -EINVAL;
2571
2572	switch (feat) {
2573	case IOMMU_DEV_FEAT_SVA:
2574		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2575	default:
2576		return -EINVAL;
2577	}
2578}
2579
2580static struct iommu_ops arm_smmu_ops = {
2581	.capable		= arm_smmu_capable,
2582	.domain_alloc		= arm_smmu_domain_alloc,
2583	.domain_free		= arm_smmu_domain_free,
2584	.attach_dev		= arm_smmu_attach_dev,
2585	.map			= arm_smmu_map,
2586	.unmap			= arm_smmu_unmap,
2587	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2588	.iotlb_sync		= arm_smmu_iotlb_sync,
2589	.iova_to_phys		= arm_smmu_iova_to_phys,
2590	.probe_device		= arm_smmu_probe_device,
2591	.release_device		= arm_smmu_release_device,
2592	.device_group		= arm_smmu_device_group,
2593	.domain_get_attr	= arm_smmu_domain_get_attr,
2594	.domain_set_attr	= arm_smmu_domain_set_attr,
2595	.of_xlate		= arm_smmu_of_xlate,
2596	.get_resv_regions	= arm_smmu_get_resv_regions,
2597	.put_resv_regions	= generic_iommu_put_resv_regions,
2598	.dev_has_feat		= arm_smmu_dev_has_feature,
2599	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2600	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2601	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2602	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2603};
2604
2605/* Probing and initialisation functions */
2606static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2607				   struct arm_smmu_queue *q,
2608				   unsigned long prod_off,
2609				   unsigned long cons_off,
2610				   size_t dwords, const char *name)
2611{
2612	size_t qsz;
2613
2614	do {
2615		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2616		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2617					      GFP_KERNEL);
2618		if (q->base || qsz < PAGE_SIZE)
2619			break;
2620
2621		q->llq.max_n_shift--;
2622	} while (1);
2623
2624	if (!q->base) {
2625		dev_err(smmu->dev,
2626			"failed to allocate queue (0x%zx bytes) for %s\n",
2627			qsz, name);
2628		return -ENOMEM;
2629	}
2630
2631	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2632		dev_info(smmu->dev, "allocated %u entries for %s\n",
2633			 1 << q->llq.max_n_shift, name);
2634	}
2635
2636	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2637	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2638	q->ent_dwords	= dwords;
2639
2640	q->q_base  = Q_BASE_RWA;
2641	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2642	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2643
2644	q->llq.prod = q->llq.cons = 0;
2645	return 0;
2646}
2647
2648static void arm_smmu_cmdq_free_bitmap(void *data)
2649{
2650	unsigned long *bitmap = data;
2651	bitmap_free(bitmap);
2652}
2653
2654static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2655{
2656	int ret = 0;
2657	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2658	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2659	atomic_long_t *bitmap;
2660
2661	atomic_set(&cmdq->owner_prod, 0);
2662	atomic_set(&cmdq->lock, 0);
2663
2664	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2665	if (!bitmap) {
2666		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2667		ret = -ENOMEM;
2668	} else {
2669		cmdq->valid_map = bitmap;
2670		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2671	}
2672
2673	return ret;
2674}
2675
2676static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2677{
2678	int ret;
2679
2680	/* cmdq */
2681	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2682				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2683				      "cmdq");
2684	if (ret)
2685		return ret;
2686
2687	ret = arm_smmu_cmdq_init(smmu);
2688	if (ret)
2689		return ret;
2690
2691	/* evtq */
2692	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2693				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2694				      "evtq");
2695	if (ret)
2696		return ret;
2697
2698	/* priq */
2699	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2700		return 0;
2701
2702	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2703				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2704				       "priq");
2705}
2706
2707static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2708{
2709	unsigned int i;
2710	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2711	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2712	void *strtab = smmu->strtab_cfg.strtab;
2713
2714	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2715	if (!cfg->l1_desc) {
2716		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2717		return -ENOMEM;
2718	}
2719
2720	for (i = 0; i < cfg->num_l1_ents; ++i) {
2721		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2722		strtab += STRTAB_L1_DESC_DWORDS << 3;
2723	}
2724
2725	return 0;
2726}
2727
2728static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2729{
2730	void *strtab;
2731	u64 reg;
2732	u32 size, l1size;
2733	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2734
2735	/* Calculate the L1 size, capped to the SIDSIZE. */
2736	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2737	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2738	cfg->num_l1_ents = 1 << size;
2739
2740	size += STRTAB_SPLIT;
2741	if (size < smmu->sid_bits)
2742		dev_warn(smmu->dev,
2743			 "2-level strtab only covers %u/%u bits of SID\n",
2744			 size, smmu->sid_bits);
2745
2746	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2747	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2748				     GFP_KERNEL);
2749	if (!strtab) {
2750		dev_err(smmu->dev,
2751			"failed to allocate l1 stream table (%u bytes)\n",
2752			l1size);
2753		return -ENOMEM;
2754	}
2755	cfg->strtab = strtab;
2756
2757	/* Configure strtab_base_cfg for 2 levels */
2758	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2759	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2760	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2761	cfg->strtab_base_cfg = reg;
2762
2763	return arm_smmu_init_l1_strtab(smmu);
2764}
2765
2766static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2767{
2768	void *strtab;
2769	u64 reg;
2770	u32 size;
2771	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2772
2773	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2774	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2775				     GFP_KERNEL);
2776	if (!strtab) {
2777		dev_err(smmu->dev,
2778			"failed to allocate linear stream table (%u bytes)\n",
2779			size);
2780		return -ENOMEM;
2781	}
2782	cfg->strtab = strtab;
2783	cfg->num_l1_ents = 1 << smmu->sid_bits;
2784
2785	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2786	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2787	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2788	cfg->strtab_base_cfg = reg;
2789
2790	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2791	return 0;
2792}
2793
2794static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2795{
2796	u64 reg;
2797	int ret;
2798
2799	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2800		ret = arm_smmu_init_strtab_2lvl(smmu);
2801	else
2802		ret = arm_smmu_init_strtab_linear(smmu);
2803
2804	if (ret)
2805		return ret;
2806
2807	/* Set the strtab base address */
2808	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2809	reg |= STRTAB_BASE_RA;
2810	smmu->strtab_cfg.strtab_base = reg;
2811
2812	/* Allocate the first VMID for stage-2 bypass STEs */
2813	set_bit(0, smmu->vmid_map);
2814	return 0;
2815}
2816
2817static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2818{
2819	int ret;
2820
2821	ret = arm_smmu_init_queues(smmu);
2822	if (ret)
2823		return ret;
2824
2825	return arm_smmu_init_strtab(smmu);
2826}
2827
2828static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2829				   unsigned int reg_off, unsigned int ack_off)
2830{
2831	u32 reg;
2832
2833	writel_relaxed(val, smmu->base + reg_off);
2834	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2835					  1, ARM_SMMU_POLL_TIMEOUT_US);
2836}
2837
2838/* GBPA is "special" */
2839static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2840{
2841	int ret;
2842	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2843
2844	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2845					 1, ARM_SMMU_POLL_TIMEOUT_US);
2846	if (ret)
2847		return ret;
2848
2849	reg &= ~clr;
2850	reg |= set;
2851	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2852	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2853					 1, ARM_SMMU_POLL_TIMEOUT_US);
2854
2855	if (ret)
2856		dev_err(smmu->dev, "GBPA not responding to update\n");
2857	return ret;
2858}
2859
2860static void arm_smmu_free_msis(void *data)
2861{
2862	struct device *dev = data;
2863	platform_msi_domain_free_irqs(dev);
2864}
2865
2866static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2867{
2868	phys_addr_t doorbell;
2869	struct device *dev = msi_desc_to_dev(desc);
2870	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2871	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2872
2873	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2874	doorbell &= MSI_CFG0_ADDR_MASK;
2875
2876	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2877	writel_relaxed(msg->data, smmu->base + cfg[1]);
2878	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2879}
2880
2881static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2882{
2883	struct msi_desc *desc;
2884	int ret, nvec = ARM_SMMU_MAX_MSIS;
2885	struct device *dev = smmu->dev;
2886
2887	/* Clear the MSI address regs */
2888	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2889	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2890
2891	if (smmu->features & ARM_SMMU_FEAT_PRI)
2892		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2893	else
2894		nvec--;
2895
2896	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2897		return;
2898
2899	if (!dev->msi_domain) {
2900		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2901		return;
2902	}
2903
2904	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2905	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2906	if (ret) {
2907		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2908		return;
2909	}
2910
2911	for_each_msi_entry(desc, dev) {
2912		switch (desc->platform.msi_index) {
2913		case EVTQ_MSI_INDEX:
2914			smmu->evtq.q.irq = desc->irq;
2915			break;
2916		case GERROR_MSI_INDEX:
2917			smmu->gerr_irq = desc->irq;
2918			break;
2919		case PRIQ_MSI_INDEX:
2920			smmu->priq.q.irq = desc->irq;
2921			break;
2922		default:	/* Unknown */
2923			continue;
2924		}
2925	}
2926
2927	/* Add callback to free MSIs on teardown */
2928	devm_add_action(dev, arm_smmu_free_msis, dev);
2929}
2930
2931static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2932{
2933	int irq, ret;
2934
2935	arm_smmu_setup_msis(smmu);
2936
2937	/* Request interrupt lines */
2938	irq = smmu->evtq.q.irq;
2939	if (irq) {
2940		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2941						arm_smmu_evtq_thread,
2942						IRQF_ONESHOT,
2943						"arm-smmu-v3-evtq", smmu);
2944		if (ret < 0)
2945			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2946	} else {
2947		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2948	}
2949
2950	irq = smmu->gerr_irq;
2951	if (irq) {
2952		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2953				       0, "arm-smmu-v3-gerror", smmu);
2954		if (ret < 0)
2955			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2956	} else {
2957		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2958	}
2959
2960	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2961		irq = smmu->priq.q.irq;
2962		if (irq) {
2963			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2964							arm_smmu_priq_thread,
2965							IRQF_ONESHOT,
2966							"arm-smmu-v3-priq",
2967							smmu);
2968			if (ret < 0)
2969				dev_warn(smmu->dev,
2970					 "failed to enable priq irq\n");
2971		} else {
2972			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2973		}
2974	}
2975}
2976
2977static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2978{
2979	int ret, irq;
2980	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2981
2982	/* Disable IRQs first */
2983	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2984				      ARM_SMMU_IRQ_CTRLACK);
2985	if (ret) {
2986		dev_err(smmu->dev, "failed to disable irqs\n");
2987		return ret;
2988	}
2989
2990	irq = smmu->combined_irq;
2991	if (irq) {
2992		/*
2993		 * Cavium ThunderX2 implementation doesn't support unique irq
2994		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2995		 */
2996		ret = devm_request_threaded_irq(smmu->dev, irq,
2997					arm_smmu_combined_irq_handler,
2998					arm_smmu_combined_irq_thread,
2999					IRQF_ONESHOT,
3000					"arm-smmu-v3-combined-irq", smmu);
3001		if (ret < 0)
3002			dev_warn(smmu->dev, "failed to enable combined irq\n");
3003	} else
3004		arm_smmu_setup_unique_irqs(smmu);
3005
3006	if (smmu->features & ARM_SMMU_FEAT_PRI)
3007		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3008
3009	/* Enable interrupt generation on the SMMU */
3010	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3011				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3012	if (ret)
3013		dev_warn(smmu->dev, "failed to enable irqs\n");
3014
3015	return 0;
3016}
3017
3018static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3019{
3020	int ret;
3021
3022	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3023	if (ret)
3024		dev_err(smmu->dev, "failed to clear cr0\n");
3025
3026	return ret;
3027}
3028
3029static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3030{
3031	int ret;
3032	u32 reg, enables;
3033	struct arm_smmu_cmdq_ent cmd;
3034
3035	/* Clear CR0 and sync (disables SMMU and queue processing) */
3036	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3037	if (reg & CR0_SMMUEN) {
3038		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3039		WARN_ON(is_kdump_kernel() && !disable_bypass);
3040		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3041	}
3042
3043	ret = arm_smmu_device_disable(smmu);
3044	if (ret)
3045		return ret;
3046
3047	/* CR1 (table and queue memory attributes) */
3048	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3049	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3050	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3051	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3052	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3053	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3054	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3055
3056	/* CR2 (random crap) */
3057	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3058	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3059
3060	/* Stream table */
3061	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3062		       smmu->base + ARM_SMMU_STRTAB_BASE);
3063	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3064		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3065
3066	/* Command queue */
3067	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3068	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3069	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3070
3071	enables = CR0_CMDQEN;
3072	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3073				      ARM_SMMU_CR0ACK);
3074	if (ret) {
3075		dev_err(smmu->dev, "failed to enable command queue\n");
3076		return ret;
3077	}
3078
3079	/* Invalidate any cached configuration */
3080	cmd.opcode = CMDQ_OP_CFGI_ALL;
3081	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3082	arm_smmu_cmdq_issue_sync(smmu);
3083
3084	/* Invalidate any stale TLB entries */
3085	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3086		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3087		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3088	}
3089
3090	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3091	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3092	arm_smmu_cmdq_issue_sync(smmu);
3093
3094	/* Event queue */
3095	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3096	writel_relaxed(smmu->evtq.q.llq.prod,
3097		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3098	writel_relaxed(smmu->evtq.q.llq.cons,
3099		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3100
3101	enables |= CR0_EVTQEN;
3102	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3103				      ARM_SMMU_CR0ACK);
3104	if (ret) {
3105		dev_err(smmu->dev, "failed to enable event queue\n");
3106		return ret;
3107	}
3108
3109	/* PRI queue */
3110	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3111		writeq_relaxed(smmu->priq.q.q_base,
3112			       smmu->base + ARM_SMMU_PRIQ_BASE);
3113		writel_relaxed(smmu->priq.q.llq.prod,
3114			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3115		writel_relaxed(smmu->priq.q.llq.cons,
3116			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3117
3118		enables |= CR0_PRIQEN;
3119		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3120					      ARM_SMMU_CR0ACK);
3121		if (ret) {
3122			dev_err(smmu->dev, "failed to enable PRI queue\n");
3123			return ret;
3124		}
3125	}
3126
3127	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3128		enables |= CR0_ATSCHK;
3129		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3130					      ARM_SMMU_CR0ACK);
3131		if (ret) {
3132			dev_err(smmu->dev, "failed to enable ATS check\n");
3133			return ret;
3134		}
3135	}
3136
3137	ret = arm_smmu_setup_irqs(smmu);
3138	if (ret) {
3139		dev_err(smmu->dev, "failed to setup irqs\n");
3140		return ret;
3141	}
3142
3143	if (is_kdump_kernel())
3144		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3145
3146	/* Enable the SMMU interface, or ensure bypass */
3147	if (!bypass || disable_bypass) {
3148		enables |= CR0_SMMUEN;
3149	} else {
3150		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3151		if (ret)
3152			return ret;
3153	}
3154	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3155				      ARM_SMMU_CR0ACK);
3156	if (ret) {
3157		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3158		return ret;
3159	}
3160
3161	return 0;
3162}
3163
3164static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3165{
3166	u32 reg;
3167	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3168
3169	/* IDR0 */
3170	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3171
3172	/* 2-level structures */
3173	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3174		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3175
3176	if (reg & IDR0_CD2L)
3177		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3178
3179	/*
3180	 * Translation table endianness.
3181	 * We currently require the same endianness as the CPU, but this
3182	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3183	 */
3184	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3185	case IDR0_TTENDIAN_MIXED:
3186		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3187		break;
3188#ifdef __BIG_ENDIAN
3189	case IDR0_TTENDIAN_BE:
3190		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3191		break;
3192#else
3193	case IDR0_TTENDIAN_LE:
3194		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3195		break;
3196#endif
3197	default:
3198		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3199		return -ENXIO;
3200	}
3201
3202	/* Boolean feature flags */
3203	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3204		smmu->features |= ARM_SMMU_FEAT_PRI;
3205
3206	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3207		smmu->features |= ARM_SMMU_FEAT_ATS;
3208
3209	if (reg & IDR0_SEV)
3210		smmu->features |= ARM_SMMU_FEAT_SEV;
3211
3212	if (reg & IDR0_MSI) {
3213		smmu->features |= ARM_SMMU_FEAT_MSI;
3214		if (coherent && !disable_msipolling)
3215			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3216	}
3217
3218	if (reg & IDR0_HYP)
3219		smmu->features |= ARM_SMMU_FEAT_HYP;
3220
3221	/*
3222	 * The coherency feature as set by FW is used in preference to the ID
3223	 * register, but warn on mismatch.
3224	 */
3225	if (!!(reg & IDR0_COHACC) != coherent)
3226		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3227			 coherent ? "true" : "false");
3228
3229	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3230	case IDR0_STALL_MODEL_FORCE:
3231		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3232		fallthrough;
3233	case IDR0_STALL_MODEL_STALL:
3234		smmu->features |= ARM_SMMU_FEAT_STALLS;
3235	}
3236
3237	if (reg & IDR0_S1P)
3238		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3239
3240	if (reg & IDR0_S2P)
3241		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3242
3243	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3244		dev_err(smmu->dev, "no translation support!\n");
3245		return -ENXIO;
3246	}
3247
3248	/* We only support the AArch64 table format at present */
3249	switch (FIELD_GET(IDR0_TTF, reg)) {
3250	case IDR0_TTF_AARCH32_64:
3251		smmu->ias = 40;
3252		fallthrough;
3253	case IDR0_TTF_AARCH64:
3254		break;
3255	default:
3256		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3257		return -ENXIO;
3258	}
3259
3260	/* ASID/VMID sizes */
3261	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3262	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3263
3264	/* IDR1 */
3265	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3266	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3267		dev_err(smmu->dev, "embedded implementation not supported\n");
3268		return -ENXIO;
3269	}
3270
3271	/* Queue sizes, capped to ensure natural alignment */
3272	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3273					     FIELD_GET(IDR1_CMDQS, reg));
3274	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3275		/*
3276		 * We don't support splitting up batches, so one batch of
3277		 * commands plus an extra sync needs to fit inside the command
3278		 * queue. There's also no way we can handle the weird alignment
3279		 * restrictions on the base pointer for a unit-length queue.
3280		 */
3281		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3282			CMDQ_BATCH_ENTRIES);
3283		return -ENXIO;
3284	}
3285
3286	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3287					     FIELD_GET(IDR1_EVTQS, reg));
3288	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3289					     FIELD_GET(IDR1_PRIQS, reg));
3290
3291	/* SID/SSID sizes */
3292	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3293	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3294
3295	/*
3296	 * If the SMMU supports fewer bits than would fill a single L2 stream
3297	 * table, use a linear table instead.
3298	 */
3299	if (smmu->sid_bits <= STRTAB_SPLIT)
3300		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3301
3302	/* IDR3 */
3303	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3304	if (FIELD_GET(IDR3_RIL, reg))
3305		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3306
3307	/* IDR5 */
3308	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3309
3310	/* Maximum number of outstanding stalls */
3311	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3312
3313	/* Page sizes */
3314	if (reg & IDR5_GRAN64K)
3315		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3316	if (reg & IDR5_GRAN16K)
3317		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3318	if (reg & IDR5_GRAN4K)
3319		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3320
3321	/* Input address size */
3322	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3323		smmu->features |= ARM_SMMU_FEAT_VAX;
3324
3325	/* Output address size */
3326	switch (FIELD_GET(IDR5_OAS, reg)) {
3327	case IDR5_OAS_32_BIT:
3328		smmu->oas = 32;
3329		break;
3330	case IDR5_OAS_36_BIT:
3331		smmu->oas = 36;
3332		break;
3333	case IDR5_OAS_40_BIT:
3334		smmu->oas = 40;
3335		break;
3336	case IDR5_OAS_42_BIT:
3337		smmu->oas = 42;
3338		break;
3339	case IDR5_OAS_44_BIT:
3340		smmu->oas = 44;
3341		break;
3342	case IDR5_OAS_52_BIT:
3343		smmu->oas = 52;
3344		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3345		break;
3346	default:
3347		dev_info(smmu->dev,
3348			"unknown output address size. Truncating to 48-bit\n");
3349		fallthrough;
3350	case IDR5_OAS_48_BIT:
3351		smmu->oas = 48;
3352	}
3353
3354	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3355		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3356	else
3357		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3358
3359	/* Set the DMA mask for our table walker */
3360	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3361		dev_warn(smmu->dev,
3362			 "failed to set DMA mask for table walker\n");
3363
3364	smmu->ias = max(smmu->ias, smmu->oas);
3365
3366	if (arm_smmu_sva_supported(smmu))
3367		smmu->features |= ARM_SMMU_FEAT_SVA;
3368
3369	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3370		 smmu->ias, smmu->oas, smmu->features);
3371	return 0;
3372}
3373
3374#ifdef CONFIG_ACPI
3375static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3376{
3377	switch (model) {
3378	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3379		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3380		break;
3381	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3382		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3383		break;
3384	}
3385
3386	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3387}
3388
3389static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3390				      struct arm_smmu_device *smmu)
3391{
3392	struct acpi_iort_smmu_v3 *iort_smmu;
3393	struct device *dev = smmu->dev;
3394	struct acpi_iort_node *node;
3395
3396	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3397
3398	/* Retrieve SMMUv3 specific data */
3399	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3400
3401	acpi_smmu_get_options(iort_smmu->model, smmu);
3402
3403	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3404		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3405
3406	return 0;
3407}
3408#else
3409static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3410					     struct arm_smmu_device *smmu)
3411{
3412	return -ENODEV;
3413}
3414#endif
3415
3416static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3417				    struct arm_smmu_device *smmu)
3418{
3419	struct device *dev = &pdev->dev;
3420	u32 cells;
3421	int ret = -EINVAL;
3422
3423	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3424		dev_err(dev, "missing #iommu-cells property\n");
3425	else if (cells != 1)
3426		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3427	else
3428		ret = 0;
3429
3430	parse_driver_options(smmu);
3431
3432	if (of_dma_is_coherent(dev->of_node))
3433		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3434
3435	return ret;
3436}
3437
3438static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3439{
3440	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3441		return SZ_64K;
3442	else
3443		return SZ_128K;
3444}
3445
3446static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3447{
3448	int err;
3449
3450#ifdef CONFIG_PCI
3451	if (pci_bus_type.iommu_ops != ops) {
3452		err = bus_set_iommu(&pci_bus_type, ops);
3453		if (err)
3454			return err;
3455	}
3456#endif
3457#ifdef CONFIG_ARM_AMBA
3458	if (amba_bustype.iommu_ops != ops) {
3459		err = bus_set_iommu(&amba_bustype, ops);
3460		if (err)
3461			goto err_reset_pci_ops;
3462	}
3463#endif
3464	if (platform_bus_type.iommu_ops != ops) {
3465		err = bus_set_iommu(&platform_bus_type, ops);
3466		if (err)
3467			goto err_reset_amba_ops;
3468	}
3469
3470	return 0;
3471
3472err_reset_amba_ops:
3473#ifdef CONFIG_ARM_AMBA
3474	bus_set_iommu(&amba_bustype, NULL);
3475#endif
3476err_reset_pci_ops: __maybe_unused;
3477#ifdef CONFIG_PCI
3478	bus_set_iommu(&pci_bus_type, NULL);
3479#endif
3480	return err;
3481}
3482
3483static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3484				      resource_size_t size)
3485{
3486	struct resource res = {
3487		.flags = IORESOURCE_MEM,
3488		.start = start,
3489		.end = start + size - 1,
3490	};
3491
3492	return devm_ioremap_resource(dev, &res);
3493}
3494
3495static int arm_smmu_device_probe(struct platform_device *pdev)
3496{
3497	int irq, ret;
3498	struct resource *res;
3499	resource_size_t ioaddr;
3500	struct arm_smmu_device *smmu;
3501	struct device *dev = &pdev->dev;
3502	bool bypass;
3503
3504	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3505	if (!smmu) {
3506		dev_err(dev, "failed to allocate arm_smmu_device\n");
3507		return -ENOMEM;
3508	}
3509	smmu->dev = dev;
3510
3511	if (dev->of_node) {
3512		ret = arm_smmu_device_dt_probe(pdev, smmu);
3513	} else {
3514		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3515		if (ret == -ENODEV)
3516			return ret;
3517	}
3518
3519	/* Set bypass mode according to firmware probing result */
3520	bypass = !!ret;
3521
3522	/* Base address */
3523	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3524	if (!res)
3525		return -EINVAL;
3526	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3527		dev_err(dev, "MMIO region too small (%pr)\n", res);
3528		return -EINVAL;
3529	}
3530	ioaddr = res->start;
3531
3532	/*
3533	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3534	 * the PMCG registers which are reserved by the PMU driver.
3535	 */
3536	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3537	if (IS_ERR(smmu->base))
3538		return PTR_ERR(smmu->base);
3539
3540	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3541		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3542					       ARM_SMMU_REG_SZ);
3543		if (IS_ERR(smmu->page1))
3544			return PTR_ERR(smmu->page1);
3545	} else {
3546		smmu->page1 = smmu->base;
3547	}
3548
3549	/* Interrupt lines */
3550
3551	irq = platform_get_irq_byname_optional(pdev, "combined");
3552	if (irq > 0)
3553		smmu->combined_irq = irq;
3554	else {
3555		irq = platform_get_irq_byname_optional(pdev, "eventq");
3556		if (irq > 0)
3557			smmu->evtq.q.irq = irq;
3558
3559		irq = platform_get_irq_byname_optional(pdev, "priq");
3560		if (irq > 0)
3561			smmu->priq.q.irq = irq;
3562
3563		irq = platform_get_irq_byname_optional(pdev, "gerror");
3564		if (irq > 0)
3565			smmu->gerr_irq = irq;
3566	}
3567	/* Probe the h/w */
3568	ret = arm_smmu_device_hw_probe(smmu);
3569	if (ret)
3570		return ret;
3571
3572	/* Initialise in-memory data structures */
3573	ret = arm_smmu_init_structures(smmu);
3574	if (ret)
3575		return ret;
3576
3577	/* Record our private device structure */
3578	platform_set_drvdata(pdev, smmu);
3579
3580	/* Reset the device */
3581	ret = arm_smmu_device_reset(smmu, bypass);
3582	if (ret)
3583		return ret;
3584
3585	/* And we're up. Go go go! */
3586	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3587				     "smmu3.%pa", &ioaddr);
3588	if (ret)
3589		return ret;
3590
3591	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3592	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3593
3594	ret = iommu_device_register(&smmu->iommu);
3595	if (ret) {
3596		dev_err(dev, "Failed to register iommu\n");
3597		return ret;
3598	}
3599
3600	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3601}
3602
3603static int arm_smmu_device_remove(struct platform_device *pdev)
3604{
3605	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3606
3607	arm_smmu_set_bus_ops(NULL);
3608	iommu_device_unregister(&smmu->iommu);
3609	iommu_device_sysfs_remove(&smmu->iommu);
3610	arm_smmu_device_disable(smmu);
3611
3612	return 0;
3613}
3614
3615static void arm_smmu_device_shutdown(struct platform_device *pdev)
3616{
3617	arm_smmu_device_remove(pdev);
3618}
3619
3620static const struct of_device_id arm_smmu_of_match[] = {
3621	{ .compatible = "arm,smmu-v3", },
3622	{ },
3623};
3624MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3625
3626static struct platform_driver arm_smmu_driver = {
3627	.driver	= {
3628		.name			= "arm-smmu-v3",
3629		.of_match_table		= arm_smmu_of_match,
3630		.suppress_bind_attrs	= true,
3631	},
3632	.probe	= arm_smmu_device_probe,
3633	.remove	= arm_smmu_device_remove,
3634	.shutdown = arm_smmu_device_shutdown,
3635};
3636module_platform_driver(arm_smmu_driver);
3637
3638MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3639MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3640MODULE_ALIAS("platform:arm-smmu-v3");
3641MODULE_LICENSE("GPL v2");
3642