1// SPDX-License-Identifier: GPL-2.0
2/*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12#include <linux/acpi.h>
13#include <linux/acpi_iort.h>
14#include <linux/bitops.h>
15#include <linux/crash_dump.h>
16#include <linux/delay.h>
17#include <linux/err.h>
18#include <linux/interrupt.h>
19#include <linux/io-pgtable.h>
20#include <linux/iopoll.h>
21#include <linux/module.h>
22#include <linux/msi.h>
23#include <linux/of.h>
24#include <linux/of_address.h>
25#include <linux/of_platform.h>
26#include <linux/pci.h>
27#include <linux/pci-ats.h>
28#include <linux/platform_device.h>
29
30#include "arm-smmu-v3.h"
31#include "../../dma-iommu.h"
32#include "../../iommu-sva.h"
33
34static bool disable_bypass = true;
35module_param(disable_bypass, bool, 0444);
36MODULE_PARM_DESC(disable_bypass,
37	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38
39static bool disable_msipolling;
40module_param(disable_msipolling, bool, 0444);
41MODULE_PARM_DESC(disable_msipolling,
42	"Disable MSI-based polling for CMD_SYNC completion.");
43
44enum arm_smmu_msi_index {
45	EVTQ_MSI_INDEX,
46	GERROR_MSI_INDEX,
47	PRIQ_MSI_INDEX,
48	ARM_SMMU_MAX_MSIS,
49};
50
51static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52	[EVTQ_MSI_INDEX] = {
53		ARM_SMMU_EVTQ_IRQ_CFG0,
54		ARM_SMMU_EVTQ_IRQ_CFG1,
55		ARM_SMMU_EVTQ_IRQ_CFG2,
56	},
57	[GERROR_MSI_INDEX] = {
58		ARM_SMMU_GERROR_IRQ_CFG0,
59		ARM_SMMU_GERROR_IRQ_CFG1,
60		ARM_SMMU_GERROR_IRQ_CFG2,
61	},
62	[PRIQ_MSI_INDEX] = {
63		ARM_SMMU_PRIQ_IRQ_CFG0,
64		ARM_SMMU_PRIQ_IRQ_CFG1,
65		ARM_SMMU_PRIQ_IRQ_CFG2,
66	},
67};
68
69struct arm_smmu_option_prop {
70	u32 opt;
71	const char *prop;
72};
73
74DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75DEFINE_MUTEX(arm_smmu_asid_lock);
76
77/*
78 * Special value used by SVA when a process dies, to quiesce a CD without
79 * disabling it.
80 */
81struct arm_smmu_ctx_desc quiet_cd = { 0 };
82
83static struct arm_smmu_option_prop arm_smmu_options[] = {
84	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86	{ 0, NULL},
87};
88
89static void parse_driver_options(struct arm_smmu_device *smmu)
90{
91	int i = 0;
92
93	do {
94		if (of_property_read_bool(smmu->dev->of_node,
95						arm_smmu_options[i].prop)) {
96			smmu->options |= arm_smmu_options[i].opt;
97			dev_notice(smmu->dev, "option %s\n",
98				arm_smmu_options[i].prop);
99		}
100	} while (arm_smmu_options[++i].opt);
101}
102
103/* Low-level queue manipulation functions */
104static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105{
106	u32 space, prod, cons;
107
108	prod = Q_IDX(q, q->prod);
109	cons = Q_IDX(q, q->cons);
110
111	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112		space = (1 << q->max_n_shift) - (prod - cons);
113	else
114		space = cons - prod;
115
116	return space >= n;
117}
118
119static bool queue_full(struct arm_smmu_ll_queue *q)
120{
121	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123}
124
125static bool queue_empty(struct arm_smmu_ll_queue *q)
126{
127	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129}
130
131static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132{
133	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137}
138
139static void queue_sync_cons_out(struct arm_smmu_queue *q)
140{
141	/*
142	 * Ensure that all CPU accesses (reads and writes) to the queue
143	 * are complete before we update the cons pointer.
144	 */
145	__iomb();
146	writel_relaxed(q->llq.cons, q->cons_reg);
147}
148
149static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150{
151	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153}
154
155static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156{
157	struct arm_smmu_ll_queue *llq = &q->llq;
158
159	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160		return;
161
162	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163		      Q_IDX(llq, llq->cons);
164	queue_sync_cons_out(q);
165}
166
167static int queue_sync_prod_in(struct arm_smmu_queue *q)
168{
169	u32 prod;
170	int ret = 0;
171
172	/*
173	 * We can't use the _relaxed() variant here, as we must prevent
174	 * speculative reads of the queue before we have determined that
175	 * prod has indeed moved.
176	 */
177	prod = readl(q->prod_reg);
178
179	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180		ret = -EOVERFLOW;
181
182	q->llq.prod = prod;
183	return ret;
184}
185
186static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187{
188	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190}
191
192static void queue_poll_init(struct arm_smmu_device *smmu,
193			    struct arm_smmu_queue_poll *qp)
194{
195	qp->delay = 1;
196	qp->spin_cnt = 0;
197	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199}
200
201static int queue_poll(struct arm_smmu_queue_poll *qp)
202{
203	if (ktime_compare(ktime_get(), qp->timeout) > 0)
204		return -ETIMEDOUT;
205
206	if (qp->wfe) {
207		wfe();
208	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209		cpu_relax();
210	} else {
211		udelay(qp->delay);
212		qp->delay *= 2;
213		qp->spin_cnt = 0;
214	}
215
216	return 0;
217}
218
219static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220{
221	int i;
222
223	for (i = 0; i < n_dwords; ++i)
224		*dst++ = cpu_to_le64(*src++);
225}
226
227static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228{
229	int i;
230
231	for (i = 0; i < n_dwords; ++i)
232		*dst++ = le64_to_cpu(*src++);
233}
234
235static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236{
237	if (queue_empty(&q->llq))
238		return -EAGAIN;
239
240	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241	queue_inc_cons(&q->llq);
242	queue_sync_cons_out(q);
243	return 0;
244}
245
246/* High-level queue accessors */
247static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248{
249	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251
252	switch (ent->opcode) {
253	case CMDQ_OP_TLBI_EL2_ALL:
254	case CMDQ_OP_TLBI_NSNH_ALL:
255		break;
256	case CMDQ_OP_PREFETCH_CFG:
257		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258		break;
259	case CMDQ_OP_CFGI_CD:
260		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261		fallthrough;
262	case CMDQ_OP_CFGI_STE:
263		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265		break;
266	case CMDQ_OP_CFGI_CD_ALL:
267		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268		break;
269	case CMDQ_OP_CFGI_ALL:
270		/* Cover the entire SID range */
271		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272		break;
273	case CMDQ_OP_TLBI_NH_VA:
274		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275		fallthrough;
276	case CMDQ_OP_TLBI_EL2_VA:
277		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284		break;
285	case CMDQ_OP_TLBI_S2_IPA:
286		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293		break;
294	case CMDQ_OP_TLBI_NH_ASID:
295		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296		fallthrough;
297	case CMDQ_OP_TLBI_S12_VMALL:
298		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299		break;
300	case CMDQ_OP_TLBI_EL2_ASID:
301		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302		break;
303	case CMDQ_OP_ATC_INV:
304		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310		break;
311	case CMDQ_OP_PRI_RESP:
312		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316		switch (ent->pri.resp) {
317		case PRI_RESP_DENY:
318		case PRI_RESP_FAIL:
319		case PRI_RESP_SUCC:
320			break;
321		default:
322			return -EINVAL;
323		}
324		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325		break;
326	case CMDQ_OP_RESUME:
327		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330		break;
331	case CMDQ_OP_CMD_SYNC:
332		if (ent->sync.msiaddr) {
333			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335		} else {
336			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337		}
338		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340		break;
341	default:
342		return -ENOENT;
343	}
344
345	return 0;
346}
347
348static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349{
350	return &smmu->cmdq;
351}
352
353static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354					 struct arm_smmu_queue *q, u32 prod)
355{
356	struct arm_smmu_cmdq_ent ent = {
357		.opcode = CMDQ_OP_CMD_SYNC,
358	};
359
360	/*
361	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362	 * payload, so the write will zero the entire command on that platform.
363	 */
364	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366				   q->ent_dwords * 8;
367	}
368
369	arm_smmu_cmdq_build_cmd(cmd, &ent);
370}
371
372static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373				     struct arm_smmu_queue *q)
374{
375	static const char * const cerror_str[] = {
376		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
377		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
378		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
379		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
380	};
381
382	int i;
383	u64 cmd[CMDQ_ENT_DWORDS];
384	u32 cons = readl_relaxed(q->cons_reg);
385	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386	struct arm_smmu_cmdq_ent cmd_sync = {
387		.opcode = CMDQ_OP_CMD_SYNC,
388	};
389
390	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
392
393	switch (idx) {
394	case CMDQ_ERR_CERROR_ABT_IDX:
395		dev_err(smmu->dev, "retrying command fetch\n");
396		return;
397	case CMDQ_ERR_CERROR_NONE_IDX:
398		return;
399	case CMDQ_ERR_CERROR_ATC_INV_IDX:
400		/*
401		 * ATC Invalidation Completion timeout. CONS is still pointing
402		 * at the CMD_SYNC. Attempt to complete other pending commands
403		 * by repeating the CMD_SYNC, though we might well end up back
404		 * here since the ATC invalidation may still be pending.
405		 */
406		return;
407	case CMDQ_ERR_CERROR_ILL_IDX:
408	default:
409		break;
410	}
411
412	/*
413	 * We may have concurrent producers, so we need to be careful
414	 * not to touch any of the shadow cmdq state.
415	 */
416	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417	dev_err(smmu->dev, "skipping command in error state:\n");
418	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420
421	/* Convert the erroneous command into a CMD_SYNC */
422	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423
424	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425}
426
427static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428{
429	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430}
431
432/*
433 * Command queue locking.
434 * This is a form of bastardised rwlock with the following major changes:
435 *
436 * - The only LOCK routines are exclusive_trylock() and shared_lock().
437 *   Neither have barrier semantics, and instead provide only a control
438 *   dependency.
439 *
440 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441 *   fails if the caller appears to be the last lock holder (yes, this is
442 *   racy). All successful UNLOCK routines have RELEASE semantics.
443 */
444static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445{
446	int val;
447
448	/*
449	 * We can try to avoid the cmpxchg() loop by simply incrementing the
450	 * lock counter. When held in exclusive state, the lock counter is set
451	 * to INT_MIN so these increments won't hurt as the value will remain
452	 * negative.
453	 */
454	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455		return;
456
457	do {
458		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460}
461
462static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463{
464	(void)atomic_dec_return_release(&cmdq->lock);
465}
466
467static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468{
469	if (atomic_read(&cmdq->lock) == 1)
470		return false;
471
472	arm_smmu_cmdq_shared_unlock(cmdq);
473	return true;
474}
475
476#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
477({									\
478	bool __ret;							\
479	local_irq_save(flags);						\
480	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
481	if (!__ret)							\
482		local_irq_restore(flags);				\
483	__ret;								\
484})
485
486#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
487({									\
488	atomic_set_release(&cmdq->lock, 0);				\
489	local_irq_restore(flags);					\
490})
491
492
493/*
494 * Command queue insertion.
495 * This is made fiddly by our attempts to achieve some sort of scalability
496 * since there is one queue shared amongst all of the CPUs in the system.  If
497 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498 * then you'll *love* this monstrosity.
499 *
500 * The basic idea is to split the queue up into ranges of commands that are
501 * owned by a given CPU; the owner may not have written all of the commands
502 * itself, but is responsible for advancing the hardware prod pointer when
503 * the time comes. The algorithm is roughly:
504 *
505 * 	1. Allocate some space in the queue. At this point we also discover
506 *	   whether the head of the queue is currently owned by another CPU,
507 *	   or whether we are the owner.
508 *
509 *	2. Write our commands into our allocated slots in the queue.
510 *
511 *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512 *
513 *	4. If we are an owner:
514 *		a. Wait for the previous owner to finish.
515 *		b. Mark the queue head as unowned, which tells us the range
516 *		   that we are responsible for publishing.
517 *		c. Wait for all commands in our owned range to become valid.
518 *		d. Advance the hardware prod pointer.
519 *		e. Tell the next owner we've finished.
520 *
521 *	5. If we are inserting a CMD_SYNC (we may or may not have been an
522 *	   owner), then we need to stick around until it has completed:
523 *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524 *		   to clear the first 4 bytes.
525 *		b. Otherwise, we spin waiting for the hardware cons pointer to
526 *		   advance past our command.
527 *
528 * The devil is in the details, particularly the use of locking for handling
529 * SYNC completion and freeing up space in the queue before we think that it is
530 * full.
531 */
532static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533					       u32 sprod, u32 eprod, bool set)
534{
535	u32 swidx, sbidx, ewidx, ebidx;
536	struct arm_smmu_ll_queue llq = {
537		.max_n_shift	= cmdq->q.llq.max_n_shift,
538		.prod		= sprod,
539	};
540
541	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543
544	while (llq.prod != eprod) {
545		unsigned long mask;
546		atomic_long_t *ptr;
547		u32 limit = BITS_PER_LONG;
548
549		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551
552		ptr = &cmdq->valid_map[swidx];
553
554		if ((swidx == ewidx) && (sbidx < ebidx))
555			limit = ebidx;
556
557		mask = GENMASK(limit - 1, sbidx);
558
559		/*
560		 * The valid bit is the inverse of the wrap bit. This means
561		 * that a zero-initialised queue is invalid and, after marking
562		 * all entries as valid, they become invalid again when we
563		 * wrap.
564		 */
565		if (set) {
566			atomic_long_xor(mask, ptr);
567		} else { /* Poll */
568			unsigned long valid;
569
570			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572		}
573
574		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575	}
576}
577
578/* Mark all entries in the range [sprod, eprod) as valid */
579static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580					u32 sprod, u32 eprod)
581{
582	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583}
584
585/* Wait for all entries in the range [sprod, eprod) to become valid */
586static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587					 u32 sprod, u32 eprod)
588{
589	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590}
591
592/* Wait for the command queue to become non-full */
593static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594					     struct arm_smmu_ll_queue *llq)
595{
596	unsigned long flags;
597	struct arm_smmu_queue_poll qp;
598	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599	int ret = 0;
600
601	/*
602	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603	 * that fails, spin until somebody else updates it for us.
604	 */
605	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608		llq->val = READ_ONCE(cmdq->q.llq.val);
609		return 0;
610	}
611
612	queue_poll_init(smmu, &qp);
613	do {
614		llq->val = READ_ONCE(cmdq->q.llq.val);
615		if (!queue_full(llq))
616			break;
617
618		ret = queue_poll(&qp);
619	} while (!ret);
620
621	return ret;
622}
623
624/*
625 * Wait until the SMMU signals a CMD_SYNC completion MSI.
626 * Must be called with the cmdq lock held in some capacity.
627 */
628static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629					  struct arm_smmu_ll_queue *llq)
630{
631	int ret = 0;
632	struct arm_smmu_queue_poll qp;
633	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635
636	queue_poll_init(smmu, &qp);
637
638	/*
639	 * The MSI won't generate an event, since it's being written back
640	 * into the command queue.
641	 */
642	qp.wfe = false;
643	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645	return ret;
646}
647
648/*
649 * Wait until the SMMU cons index passes llq->prod.
650 * Must be called with the cmdq lock held in some capacity.
651 */
652static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653					       struct arm_smmu_ll_queue *llq)
654{
655	struct arm_smmu_queue_poll qp;
656	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657	u32 prod = llq->prod;
658	int ret = 0;
659
660	queue_poll_init(smmu, &qp);
661	llq->val = READ_ONCE(cmdq->q.llq.val);
662	do {
663		if (queue_consumed(llq, prod))
664			break;
665
666		ret = queue_poll(&qp);
667
668		/*
669		 * This needs to be a readl() so that our subsequent call
670		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671		 *
672		 * Specifically, we need to ensure that we observe all
673		 * shared_lock()s by other CMD_SYNCs that share our owner,
674		 * so that a failing call to tryunlock() means that we're
675		 * the last one out and therefore we can safely advance
676		 * cmdq->q.llq.cons. Roughly speaking:
677		 *
678		 * CPU 0		CPU1			CPU2 (us)
679		 *
680		 * if (sync)
681		 * 	shared_lock();
682		 *
683		 * dma_wmb();
684		 * set_valid_map();
685		 *
686		 * 			if (owner) {
687		 *				poll_valid_map();
688		 *				<control dependency>
689		 *				writel(prod_reg);
690		 *
691		 *						readl(cons_reg);
692		 *						tryunlock();
693		 *
694		 * Requires us to see CPU 0's shared_lock() acquisition.
695		 */
696		llq->cons = readl(cmdq->q.cons_reg);
697	} while (!ret);
698
699	return ret;
700}
701
702static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703					 struct arm_smmu_ll_queue *llq)
704{
705	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707
708	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709}
710
711static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712					u32 prod, int n)
713{
714	int i;
715	struct arm_smmu_ll_queue llq = {
716		.max_n_shift	= cmdq->q.llq.max_n_shift,
717		.prod		= prod,
718	};
719
720	for (i = 0; i < n; ++i) {
721		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722
723		prod = queue_inc_prod_n(&llq, i);
724		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725	}
726}
727
728/*
729 * This is the actual insertion function, and provides the following
730 * ordering guarantees to callers:
731 *
732 * - There is a dma_wmb() before publishing any commands to the queue.
733 *   This can be relied upon to order prior writes to data structures
734 *   in memory (such as a CD or an STE) before the command.
735 *
736 * - On completion of a CMD_SYNC, there is a control dependency.
737 *   This can be relied upon to order subsequent writes to memory (e.g.
738 *   freeing an IOVA) after completion of the CMD_SYNC.
739 *
740 * - Command insertion is totally ordered, so if two CPUs each race to
741 *   insert their own list of commands then all of the commands from one
742 *   CPU will appear before any of the commands from the other CPU.
743 */
744static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745				       u64 *cmds, int n, bool sync)
746{
747	u64 cmd_sync[CMDQ_ENT_DWORDS];
748	u32 prod;
749	unsigned long flags;
750	bool owner;
751	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752	struct arm_smmu_ll_queue llq, head;
753	int ret = 0;
754
755	llq.max_n_shift = cmdq->q.llq.max_n_shift;
756
757	/* 1. Allocate some space in the queue */
758	local_irq_save(flags);
759	llq.val = READ_ONCE(cmdq->q.llq.val);
760	do {
761		u64 old;
762
763		while (!queue_has_space(&llq, n + sync)) {
764			local_irq_restore(flags);
765			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767			local_irq_save(flags);
768		}
769
770		head.cons = llq.cons;
771		head.prod = queue_inc_prod_n(&llq, n + sync) |
772					     CMDQ_PROD_OWNED_FLAG;
773
774		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775		if (old == llq.val)
776			break;
777
778		llq.val = old;
779	} while (1);
780	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783
784	/*
785	 * 2. Write our commands into the queue
786	 * Dependency ordering from the cmpxchg() loop above.
787	 */
788	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789	if (sync) {
790		prod = queue_inc_prod_n(&llq, n);
791		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793
794		/*
795		 * In order to determine completion of our CMD_SYNC, we must
796		 * ensure that the queue can't wrap twice without us noticing.
797		 * We achieve that by taking the cmdq lock as shared before
798		 * marking our slot as valid.
799		 */
800		arm_smmu_cmdq_shared_lock(cmdq);
801	}
802
803	/* 3. Mark our slots as valid, ensuring commands are visible first */
804	dma_wmb();
805	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806
807	/* 4. If we are the owner, take control of the SMMU hardware */
808	if (owner) {
809		/* a. Wait for previous owner to finish */
810		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811
812		/* b. Stop gathering work by clearing the owned flag */
813		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814						   &cmdq->q.llq.atomic.prod);
815		prod &= ~CMDQ_PROD_OWNED_FLAG;
816
817		/*
818		 * c. Wait for any gathered work to be written to the queue.
819		 * Note that we read our own entries so that we have the control
820		 * dependency required by (d).
821		 */
822		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823
824		/*
825		 * d. Advance the hardware prod pointer
826		 * Control dependency ordering from the entries becoming valid.
827		 */
828		writel_relaxed(prod, cmdq->q.prod_reg);
829
830		/*
831		 * e. Tell the next owner we're done
832		 * Make sure we've updated the hardware first, so that we don't
833		 * race to update prod and potentially move it backwards.
834		 */
835		atomic_set_release(&cmdq->owner_prod, prod);
836	}
837
838	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839	if (sync) {
840		llq.prod = queue_inc_prod_n(&llq, n);
841		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842		if (ret) {
843			dev_err_ratelimited(smmu->dev,
844					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845					    llq.prod,
846					    readl_relaxed(cmdq->q.prod_reg),
847					    readl_relaxed(cmdq->q.cons_reg));
848		}
849
850		/*
851		 * Try to unlock the cmdq lock. This will fail if we're the last
852		 * reader, in which case we can safely update cmdq->q.llq.cons
853		 */
854		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856			arm_smmu_cmdq_shared_unlock(cmdq);
857		}
858	}
859
860	local_irq_restore(flags);
861	return ret;
862}
863
864static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865				     struct arm_smmu_cmdq_ent *ent,
866				     bool sync)
867{
868	u64 cmd[CMDQ_ENT_DWORDS];
869
870	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872			 ent->opcode);
873		return -EINVAL;
874	}
875
876	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877}
878
879static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880				   struct arm_smmu_cmdq_ent *ent)
881{
882	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883}
884
885static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886					     struct arm_smmu_cmdq_ent *ent)
887{
888	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889}
890
891static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892				    struct arm_smmu_cmdq_batch *cmds,
893				    struct arm_smmu_cmdq_ent *cmd)
894{
895	int index;
896
897	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900		cmds->num = 0;
901	}
902
903	if (cmds->num == CMDQ_BATCH_ENTRIES) {
904		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905		cmds->num = 0;
906	}
907
908	index = cmds->num * CMDQ_ENT_DWORDS;
909	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911			 cmd->opcode);
912		return;
913	}
914
915	cmds->num++;
916}
917
918static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919				      struct arm_smmu_cmdq_batch *cmds)
920{
921	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922}
923
924static int arm_smmu_page_response(struct device *dev,
925				  struct iommu_fault_event *unused,
926				  struct iommu_page_response *resp)
927{
928	struct arm_smmu_cmdq_ent cmd = {0};
929	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930	int sid = master->streams[0].id;
931
932	if (master->stall_enabled) {
933		cmd.opcode		= CMDQ_OP_RESUME;
934		cmd.resume.sid		= sid;
935		cmd.resume.stag		= resp->grpid;
936		switch (resp->code) {
937		case IOMMU_PAGE_RESP_INVALID:
938		case IOMMU_PAGE_RESP_FAILURE:
939			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940			break;
941		case IOMMU_PAGE_RESP_SUCCESS:
942			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943			break;
944		default:
945			return -EINVAL;
946		}
947	} else {
948		return -ENODEV;
949	}
950
951	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952	/*
953	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954	 * RESUME consumption guarantees that the stalled transaction will be
955	 * terminated... at some point in the future. PRI_RESP is fire and
956	 * forget.
957	 */
958
959	return 0;
960}
961
962/* Context descriptor manipulation functions */
963void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964{
965	struct arm_smmu_cmdq_ent cmd = {
966		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
967			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968		.tlbi.asid = asid,
969	};
970
971	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972}
973
974static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
975			     int ssid, bool leaf)
976{
977	size_t i;
978	unsigned long flags;
979	struct arm_smmu_master *master;
980	struct arm_smmu_cmdq_batch cmds;
981	struct arm_smmu_device *smmu = smmu_domain->smmu;
982	struct arm_smmu_cmdq_ent cmd = {
983		.opcode	= CMDQ_OP_CFGI_CD,
984		.cfgi	= {
985			.ssid	= ssid,
986			.leaf	= leaf,
987		},
988	};
989
990	cmds.num = 0;
991
992	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
993	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
994		for (i = 0; i < master->num_streams; i++) {
995			cmd.cfgi.sid = master->streams[i].id;
996			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
997		}
998	}
999	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1000
1001	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1002}
1003
1004static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1005					struct arm_smmu_l1_ctx_desc *l1_desc)
1006{
1007	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1008
1009	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1010					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1011	if (!l1_desc->l2ptr) {
1012		dev_warn(smmu->dev,
1013			 "failed to allocate context descriptor table\n");
1014		return -ENOMEM;
1015	}
1016	return 0;
1017}
1018
1019static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1020				      struct arm_smmu_l1_ctx_desc *l1_desc)
1021{
1022	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1023		  CTXDESC_L1_DESC_V;
1024
1025	/* See comment in arm_smmu_write_ctx_desc() */
1026	WRITE_ONCE(*dst, cpu_to_le64(val));
1027}
1028
1029static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1030				   u32 ssid)
1031{
1032	__le64 *l1ptr;
1033	unsigned int idx;
1034	struct arm_smmu_l1_ctx_desc *l1_desc;
1035	struct arm_smmu_device *smmu = smmu_domain->smmu;
1036	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1037
1038	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1039		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1040
1041	idx = ssid >> CTXDESC_SPLIT;
1042	l1_desc = &cdcfg->l1_desc[idx];
1043	if (!l1_desc->l2ptr) {
1044		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1045			return NULL;
1046
1047		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1048		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1049		/* An invalid L1CD can be cached */
1050		arm_smmu_sync_cd(smmu_domain, ssid, false);
1051	}
1052	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1053	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1054}
1055
1056int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1057			    struct arm_smmu_ctx_desc *cd)
1058{
1059	/*
1060	 * This function handles the following cases:
1061	 *
1062	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1063	 * (2) Install a secondary CD, for SID+SSID traffic.
1064	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1065	 *     CD, then invalidate the old entry and mappings.
1066	 * (4) Quiesce the context without clearing the valid bit. Disable
1067	 *     translation, and ignore any translation fault.
1068	 * (5) Remove a secondary CD.
1069	 */
1070	u64 val;
1071	bool cd_live;
1072	__le64 *cdptr;
1073
1074	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1075		return -E2BIG;
1076
1077	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1078	if (!cdptr)
1079		return -ENOMEM;
1080
1081	val = le64_to_cpu(cdptr[0]);
1082	cd_live = !!(val & CTXDESC_CD_0_V);
1083
1084	if (!cd) { /* (5) */
1085		val = 0;
1086	} else if (cd == &quiet_cd) { /* (4) */
1087		val |= CTXDESC_CD_0_TCR_EPD0;
1088	} else if (cd_live) { /* (3) */
1089		val &= ~CTXDESC_CD_0_ASID;
1090		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1091		/*
1092		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1093		 * this substream's traffic
1094		 */
1095	} else { /* (1) and (2) */
1096		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1097		cdptr[2] = 0;
1098		cdptr[3] = cpu_to_le64(cd->mair);
1099
1100		/*
1101		 * STE is live, and the SMMU might read dwords of this CD in any
1102		 * order. Ensure that it observes valid values before reading
1103		 * V=1.
1104		 */
1105		arm_smmu_sync_cd(smmu_domain, ssid, true);
1106
1107		val = cd->tcr |
1108#ifdef __BIG_ENDIAN
1109			CTXDESC_CD_0_ENDI |
1110#endif
1111			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1112			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1113			CTXDESC_CD_0_AA64 |
1114			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1115			CTXDESC_CD_0_V;
1116
1117		if (smmu_domain->stall_enabled)
1118			val |= CTXDESC_CD_0_S;
1119	}
1120
1121	/*
1122	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1123	 * "Configuration structures and configuration invalidation completion"
1124	 *
1125	 *   The size of single-copy atomic reads made by the SMMU is
1126	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1127	 *   field within an aligned 64-bit span of a structure can be altered
1128	 *   without first making the structure invalid.
1129	 */
1130	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1131	arm_smmu_sync_cd(smmu_domain, ssid, true);
1132	return 0;
1133}
1134
1135static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1136{
1137	int ret;
1138	size_t l1size;
1139	size_t max_contexts;
1140	struct arm_smmu_device *smmu = smmu_domain->smmu;
1141	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1142	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1143
1144	max_contexts = 1 << cfg->s1cdmax;
1145
1146	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1147	    max_contexts <= CTXDESC_L2_ENTRIES) {
1148		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1149		cdcfg->num_l1_ents = max_contexts;
1150
1151		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1152	} else {
1153		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1154		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1155						  CTXDESC_L2_ENTRIES);
1156
1157		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1158					      sizeof(*cdcfg->l1_desc),
1159					      GFP_KERNEL);
1160		if (!cdcfg->l1_desc)
1161			return -ENOMEM;
1162
1163		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1164	}
1165
1166	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1167					   GFP_KERNEL);
1168	if (!cdcfg->cdtab) {
1169		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1170		ret = -ENOMEM;
1171		goto err_free_l1;
1172	}
1173
1174	return 0;
1175
1176err_free_l1:
1177	if (cdcfg->l1_desc) {
1178		devm_kfree(smmu->dev, cdcfg->l1_desc);
1179		cdcfg->l1_desc = NULL;
1180	}
1181	return ret;
1182}
1183
1184static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1185{
1186	int i;
1187	size_t size, l1size;
1188	struct arm_smmu_device *smmu = smmu_domain->smmu;
1189	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1190
1191	if (cdcfg->l1_desc) {
1192		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1193
1194		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1195			if (!cdcfg->l1_desc[i].l2ptr)
1196				continue;
1197
1198			dmam_free_coherent(smmu->dev, size,
1199					   cdcfg->l1_desc[i].l2ptr,
1200					   cdcfg->l1_desc[i].l2ptr_dma);
1201		}
1202		devm_kfree(smmu->dev, cdcfg->l1_desc);
1203		cdcfg->l1_desc = NULL;
1204
1205		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1206	} else {
1207		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1208	}
1209
1210	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1211	cdcfg->cdtab_dma = 0;
1212	cdcfg->cdtab = NULL;
1213}
1214
1215bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1216{
1217	bool free;
1218	struct arm_smmu_ctx_desc *old_cd;
1219
1220	if (!cd->asid)
1221		return false;
1222
1223	free = refcount_dec_and_test(&cd->refs);
1224	if (free) {
1225		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1226		WARN_ON(old_cd != cd);
1227	}
1228	return free;
1229}
1230
1231/* Stream table manipulation functions */
1232static void
1233arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1234{
1235	u64 val = 0;
1236
1237	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1238	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1239
1240	/* See comment in arm_smmu_write_ctx_desc() */
1241	WRITE_ONCE(*dst, cpu_to_le64(val));
1242}
1243
1244static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1245{
1246	struct arm_smmu_cmdq_ent cmd = {
1247		.opcode	= CMDQ_OP_CFGI_STE,
1248		.cfgi	= {
1249			.sid	= sid,
1250			.leaf	= true,
1251		},
1252	};
1253
1254	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1255}
1256
1257static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1258				      __le64 *dst)
1259{
1260	/*
1261	 * This is hideously complicated, but we only really care about
1262	 * three cases at the moment:
1263	 *
1264	 * 1. Invalid (all zero) -> bypass/fault (init)
1265	 * 2. Bypass/fault -> translation/bypass (attach)
1266	 * 3. Translation/bypass -> bypass/fault (detach)
1267	 *
1268	 * Given that we can't update the STE atomically and the SMMU
1269	 * doesn't read the thing in a defined order, that leaves us
1270	 * with the following maintenance requirements:
1271	 *
1272	 * 1. Update Config, return (init time STEs aren't live)
1273	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1274	 * 3. Update Config, sync
1275	 */
1276	u64 val = le64_to_cpu(dst[0]);
1277	bool ste_live = false;
1278	struct arm_smmu_device *smmu = NULL;
1279	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1280	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1281	struct arm_smmu_domain *smmu_domain = NULL;
1282	struct arm_smmu_cmdq_ent prefetch_cmd = {
1283		.opcode		= CMDQ_OP_PREFETCH_CFG,
1284		.prefetch	= {
1285			.sid	= sid,
1286		},
1287	};
1288
1289	if (master) {
1290		smmu_domain = master->domain;
1291		smmu = master->smmu;
1292	}
1293
1294	if (smmu_domain) {
1295		switch (smmu_domain->stage) {
1296		case ARM_SMMU_DOMAIN_S1:
1297			s1_cfg = &smmu_domain->s1_cfg;
1298			break;
1299		case ARM_SMMU_DOMAIN_S2:
1300		case ARM_SMMU_DOMAIN_NESTED:
1301			s2_cfg = &smmu_domain->s2_cfg;
1302			break;
1303		default:
1304			break;
1305		}
1306	}
1307
1308	if (val & STRTAB_STE_0_V) {
1309		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1310		case STRTAB_STE_0_CFG_BYPASS:
1311			break;
1312		case STRTAB_STE_0_CFG_S1_TRANS:
1313		case STRTAB_STE_0_CFG_S2_TRANS:
1314			ste_live = true;
1315			break;
1316		case STRTAB_STE_0_CFG_ABORT:
1317			BUG_ON(!disable_bypass);
1318			break;
1319		default:
1320			BUG(); /* STE corruption */
1321		}
1322	}
1323
1324	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1325	val = STRTAB_STE_0_V;
1326
1327	/* Bypass/fault */
1328	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1329		if (!smmu_domain && disable_bypass)
1330			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1331		else
1332			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1333
1334		dst[0] = cpu_to_le64(val);
1335		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1336						STRTAB_STE_1_SHCFG_INCOMING));
1337		dst[2] = 0; /* Nuke the VMID */
1338		/*
1339		 * The SMMU can perform negative caching, so we must sync
1340		 * the STE regardless of whether the old value was live.
1341		 */
1342		if (smmu)
1343			arm_smmu_sync_ste_for_sid(smmu, sid);
1344		return;
1345	}
1346
1347	if (s1_cfg) {
1348		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1349			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1350
1351		BUG_ON(ste_live);
1352		dst[1] = cpu_to_le64(
1353			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1354			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1355			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1356			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1357			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1358
1359		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1360		    !master->stall_enabled)
1361			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1362
1363		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1364			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1365			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1366			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1367	}
1368
1369	if (s2_cfg) {
1370		BUG_ON(ste_live);
1371		dst[2] = cpu_to_le64(
1372			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1373			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1374#ifdef __BIG_ENDIAN
1375			 STRTAB_STE_2_S2ENDI |
1376#endif
1377			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1378			 STRTAB_STE_2_S2R);
1379
1380		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1381
1382		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1383	}
1384
1385	if (master->ats_enabled)
1386		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1387						 STRTAB_STE_1_EATS_TRANS));
1388
1389	arm_smmu_sync_ste_for_sid(smmu, sid);
1390	/* See comment in arm_smmu_write_ctx_desc() */
1391	WRITE_ONCE(dst[0], cpu_to_le64(val));
1392	arm_smmu_sync_ste_for_sid(smmu, sid);
1393
1394	/* It's likely that we'll want to use the new STE soon */
1395	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1396		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1397}
1398
1399static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1400{
1401	unsigned int i;
1402	u64 val = STRTAB_STE_0_V;
1403
1404	if (disable_bypass && !force)
1405		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1406	else
1407		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1408
1409	for (i = 0; i < nent; ++i) {
1410		strtab[0] = cpu_to_le64(val);
1411		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1412						   STRTAB_STE_1_SHCFG_INCOMING));
1413		strtab[2] = 0;
1414		strtab += STRTAB_STE_DWORDS;
1415	}
1416}
1417
1418static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1419{
1420	size_t size;
1421	void *strtab;
1422	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1423	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1424
1425	if (desc->l2ptr)
1426		return 0;
1427
1428	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1429	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1430
1431	desc->span = STRTAB_SPLIT + 1;
1432	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1433					  GFP_KERNEL);
1434	if (!desc->l2ptr) {
1435		dev_err(smmu->dev,
1436			"failed to allocate l2 stream table for SID %u\n",
1437			sid);
1438		return -ENOMEM;
1439	}
1440
1441	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1442	arm_smmu_write_strtab_l1_desc(strtab, desc);
1443	return 0;
1444}
1445
1446static struct arm_smmu_master *
1447arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1448{
1449	struct rb_node *node;
1450	struct arm_smmu_stream *stream;
1451
1452	lockdep_assert_held(&smmu->streams_mutex);
1453
1454	node = smmu->streams.rb_node;
1455	while (node) {
1456		stream = rb_entry(node, struct arm_smmu_stream, node);
1457		if (stream->id < sid)
1458			node = node->rb_right;
1459		else if (stream->id > sid)
1460			node = node->rb_left;
1461		else
1462			return stream->master;
1463	}
1464
1465	return NULL;
1466}
1467
1468/* IRQ and event handlers */
1469static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1470{
1471	int ret;
1472	u32 reason;
1473	u32 perm = 0;
1474	struct arm_smmu_master *master;
1475	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1476	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1477	struct iommu_fault_event fault_evt = { };
1478	struct iommu_fault *flt = &fault_evt.fault;
1479
1480	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1481	case EVT_ID_TRANSLATION_FAULT:
1482		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1483		break;
1484	case EVT_ID_ADDR_SIZE_FAULT:
1485		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1486		break;
1487	case EVT_ID_ACCESS_FAULT:
1488		reason = IOMMU_FAULT_REASON_ACCESS;
1489		break;
1490	case EVT_ID_PERMISSION_FAULT:
1491		reason = IOMMU_FAULT_REASON_PERMISSION;
1492		break;
1493	default:
1494		return -EOPNOTSUPP;
1495	}
1496
1497	/* Stage-2 is always pinned at the moment */
1498	if (evt[1] & EVTQ_1_S2)
1499		return -EFAULT;
1500
1501	if (evt[1] & EVTQ_1_RnW)
1502		perm |= IOMMU_FAULT_PERM_READ;
1503	else
1504		perm |= IOMMU_FAULT_PERM_WRITE;
1505
1506	if (evt[1] & EVTQ_1_InD)
1507		perm |= IOMMU_FAULT_PERM_EXEC;
1508
1509	if (evt[1] & EVTQ_1_PnU)
1510		perm |= IOMMU_FAULT_PERM_PRIV;
1511
1512	if (evt[1] & EVTQ_1_STALL) {
1513		flt->type = IOMMU_FAULT_PAGE_REQ;
1514		flt->prm = (struct iommu_fault_page_request) {
1515			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1516			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1517			.perm = perm,
1518			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1519		};
1520
1521		if (ssid_valid) {
1522			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1523			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1524		}
1525	} else {
1526		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1527		flt->event = (struct iommu_fault_unrecoverable) {
1528			.reason = reason,
1529			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1530			.perm = perm,
1531			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1532		};
1533
1534		if (ssid_valid) {
1535			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1536			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1537		}
1538	}
1539
1540	mutex_lock(&smmu->streams_mutex);
1541	master = arm_smmu_find_master(smmu, sid);
1542	if (!master) {
1543		ret = -EINVAL;
1544		goto out_unlock;
1545	}
1546
1547	ret = iommu_report_device_fault(master->dev, &fault_evt);
1548	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1549		/* Nobody cared, abort the access */
1550		struct iommu_page_response resp = {
1551			.pasid		= flt->prm.pasid,
1552			.grpid		= flt->prm.grpid,
1553			.code		= IOMMU_PAGE_RESP_FAILURE,
1554		};
1555		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1556	}
1557
1558out_unlock:
1559	mutex_unlock(&smmu->streams_mutex);
1560	return ret;
1561}
1562
1563static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1564{
1565	int i, ret;
1566	struct arm_smmu_device *smmu = dev;
1567	struct arm_smmu_queue *q = &smmu->evtq.q;
1568	struct arm_smmu_ll_queue *llq = &q->llq;
1569	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1570				      DEFAULT_RATELIMIT_BURST);
1571	u64 evt[EVTQ_ENT_DWORDS];
1572
1573	do {
1574		while (!queue_remove_raw(q, evt)) {
1575			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1576
1577			ret = arm_smmu_handle_evt(smmu, evt);
1578			if (!ret || !__ratelimit(&rs))
1579				continue;
1580
1581			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1582			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1583				dev_info(smmu->dev, "\t0x%016llx\n",
1584					 (unsigned long long)evt[i]);
1585
1586			cond_resched();
1587		}
1588
1589		/*
1590		 * Not much we can do on overflow, so scream and pretend we're
1591		 * trying harder.
1592		 */
1593		if (queue_sync_prod_in(q) == -EOVERFLOW)
1594			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1595	} while (!queue_empty(llq));
1596
1597	/* Sync our overflow flag, as we believe we're up to speed */
1598	queue_sync_cons_ovf(q);
1599	return IRQ_HANDLED;
1600}
1601
1602static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1603{
1604	u32 sid, ssid;
1605	u16 grpid;
1606	bool ssv, last;
1607
1608	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1609	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1610	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1611	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1612	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1613
1614	dev_info(smmu->dev, "unexpected PRI request received:\n");
1615	dev_info(smmu->dev,
1616		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1617		 sid, ssid, grpid, last ? "L" : "",
1618		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1619		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1620		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1621		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1622		 evt[1] & PRIQ_1_ADDR_MASK);
1623
1624	if (last) {
1625		struct arm_smmu_cmdq_ent cmd = {
1626			.opcode			= CMDQ_OP_PRI_RESP,
1627			.substream_valid	= ssv,
1628			.pri			= {
1629				.sid	= sid,
1630				.ssid	= ssid,
1631				.grpid	= grpid,
1632				.resp	= PRI_RESP_DENY,
1633			},
1634		};
1635
1636		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1637	}
1638}
1639
1640static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1641{
1642	struct arm_smmu_device *smmu = dev;
1643	struct arm_smmu_queue *q = &smmu->priq.q;
1644	struct arm_smmu_ll_queue *llq = &q->llq;
1645	u64 evt[PRIQ_ENT_DWORDS];
1646
1647	do {
1648		while (!queue_remove_raw(q, evt))
1649			arm_smmu_handle_ppr(smmu, evt);
1650
1651		if (queue_sync_prod_in(q) == -EOVERFLOW)
1652			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1653	} while (!queue_empty(llq));
1654
1655	/* Sync our overflow flag, as we believe we're up to speed */
1656	queue_sync_cons_ovf(q);
1657	return IRQ_HANDLED;
1658}
1659
1660static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1661
1662static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1663{
1664	u32 gerror, gerrorn, active;
1665	struct arm_smmu_device *smmu = dev;
1666
1667	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1668	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1669
1670	active = gerror ^ gerrorn;
1671	if (!(active & GERROR_ERR_MASK))
1672		return IRQ_NONE; /* No errors pending */
1673
1674	dev_warn(smmu->dev,
1675		 "unexpected global error reported (0x%08x), this could be serious\n",
1676		 active);
1677
1678	if (active & GERROR_SFM_ERR) {
1679		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1680		arm_smmu_device_disable(smmu);
1681	}
1682
1683	if (active & GERROR_MSI_GERROR_ABT_ERR)
1684		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1685
1686	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1687		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1688
1689	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1690		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1691
1692	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1693		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1694
1695	if (active & GERROR_PRIQ_ABT_ERR)
1696		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1697
1698	if (active & GERROR_EVTQ_ABT_ERR)
1699		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1700
1701	if (active & GERROR_CMDQ_ERR)
1702		arm_smmu_cmdq_skip_err(smmu);
1703
1704	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1705	return IRQ_HANDLED;
1706}
1707
1708static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1709{
1710	struct arm_smmu_device *smmu = dev;
1711
1712	arm_smmu_evtq_thread(irq, dev);
1713	if (smmu->features & ARM_SMMU_FEAT_PRI)
1714		arm_smmu_priq_thread(irq, dev);
1715
1716	return IRQ_HANDLED;
1717}
1718
1719static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1720{
1721	arm_smmu_gerror_handler(irq, dev);
1722	return IRQ_WAKE_THREAD;
1723}
1724
1725static void
1726arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1727			struct arm_smmu_cmdq_ent *cmd)
1728{
1729	size_t log2_span;
1730	size_t span_mask;
1731	/* ATC invalidates are always on 4096-bytes pages */
1732	size_t inval_grain_shift = 12;
1733	unsigned long page_start, page_end;
1734
1735	/*
1736	 * ATS and PASID:
1737	 *
1738	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1739	 * prefix. In that case all ATC entries within the address range are
1740	 * invalidated, including those that were requested with a PASID! There
1741	 * is no way to invalidate only entries without PASID.
1742	 *
1743	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1744	 * traffic), translation requests without PASID create ATC entries
1745	 * without PASID, which must be invalidated with substream_valid clear.
1746	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1747	 * ATC entries within the address range.
1748	 */
1749	*cmd = (struct arm_smmu_cmdq_ent) {
1750		.opcode			= CMDQ_OP_ATC_INV,
1751		.substream_valid	= (ssid != IOMMU_NO_PASID),
1752		.atc.ssid		= ssid,
1753	};
1754
1755	if (!size) {
1756		cmd->atc.size = ATC_INV_SIZE_ALL;
1757		return;
1758	}
1759
1760	page_start	= iova >> inval_grain_shift;
1761	page_end	= (iova + size - 1) >> inval_grain_shift;
1762
1763	/*
1764	 * In an ATS Invalidate Request, the address must be aligned on the
1765	 * range size, which must be a power of two number of page sizes. We
1766	 * thus have to choose between grossly over-invalidating the region, or
1767	 * splitting the invalidation into multiple commands. For simplicity
1768	 * we'll go with the first solution, but should refine it in the future
1769	 * if multiple commands are shown to be more efficient.
1770	 *
1771	 * Find the smallest power of two that covers the range. The most
1772	 * significant differing bit between the start and end addresses,
1773	 * fls(start ^ end), indicates the required span. For example:
1774	 *
1775	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1776	 *		x = 0b1000 ^ 0b1011 = 0b11
1777	 *		span = 1 << fls(x) = 4
1778	 *
1779	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1780	 *		x = 0b0111 ^ 0b1010 = 0b1101
1781	 *		span = 1 << fls(x) = 16
1782	 */
1783	log2_span	= fls_long(page_start ^ page_end);
1784	span_mask	= (1ULL << log2_span) - 1;
1785
1786	page_start	&= ~span_mask;
1787
1788	cmd->atc.addr	= page_start << inval_grain_shift;
1789	cmd->atc.size	= log2_span;
1790}
1791
1792static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1793{
1794	int i;
1795	struct arm_smmu_cmdq_ent cmd;
1796	struct arm_smmu_cmdq_batch cmds;
1797
1798	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1799
1800	cmds.num = 0;
1801	for (i = 0; i < master->num_streams; i++) {
1802		cmd.atc.sid = master->streams[i].id;
1803		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1804	}
1805
1806	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1807}
1808
1809int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1810			    unsigned long iova, size_t size)
1811{
1812	int i;
1813	unsigned long flags;
1814	struct arm_smmu_cmdq_ent cmd;
1815	struct arm_smmu_master *master;
1816	struct arm_smmu_cmdq_batch cmds;
1817
1818	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1819		return 0;
1820
1821	/*
1822	 * Ensure that we've completed prior invalidation of the main TLBs
1823	 * before we read 'nr_ats_masters' in case of a concurrent call to
1824	 * arm_smmu_enable_ats():
1825	 *
1826	 *	// unmap()			// arm_smmu_enable_ats()
1827	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1828	 *	smp_mb();			[...]
1829	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1830	 *
1831	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1832	 * ATS was enabled at the PCI device before completion of the TLBI.
1833	 */
1834	smp_mb();
1835	if (!atomic_read(&smmu_domain->nr_ats_masters))
1836		return 0;
1837
1838	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1839
1840	cmds.num = 0;
1841
1842	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1843	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1844		if (!master->ats_enabled)
1845			continue;
1846
1847		for (i = 0; i < master->num_streams; i++) {
1848			cmd.atc.sid = master->streams[i].id;
1849			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1850		}
1851	}
1852	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1853
1854	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1855}
1856
1857/* IO_PGTABLE API */
1858static void arm_smmu_tlb_inv_context(void *cookie)
1859{
1860	struct arm_smmu_domain *smmu_domain = cookie;
1861	struct arm_smmu_device *smmu = smmu_domain->smmu;
1862	struct arm_smmu_cmdq_ent cmd;
1863
1864	/*
1865	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1866	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1867	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1868	 * insertion to guarantee those are observed before the TLBI. Do be
1869	 * careful, 007.
1870	 */
1871	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1872		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1873	} else {
1874		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1875		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1876		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1877	}
1878	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1879}
1880
1881static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1882				     unsigned long iova, size_t size,
1883				     size_t granule,
1884				     struct arm_smmu_domain *smmu_domain)
1885{
1886	struct arm_smmu_device *smmu = smmu_domain->smmu;
1887	unsigned long end = iova + size, num_pages = 0, tg = 0;
1888	size_t inv_range = granule;
1889	struct arm_smmu_cmdq_batch cmds;
1890
1891	if (!size)
1892		return;
1893
1894	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1895		/* Get the leaf page size */
1896		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1897
1898		num_pages = size >> tg;
1899
1900		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1901		cmd->tlbi.tg = (tg - 10) / 2;
1902
1903		/*
1904		 * Determine what level the granule is at. For non-leaf, both
1905		 * io-pgtable and SVA pass a nominal last-level granule because
1906		 * they don't know what level(s) actually apply, so ignore that
1907		 * and leave TTL=0. However for various errata reasons we still
1908		 * want to use a range command, so avoid the SVA corner case
1909		 * where both scale and num could be 0 as well.
1910		 */
1911		if (cmd->tlbi.leaf)
1912			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1913		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1914			num_pages++;
1915	}
1916
1917	cmds.num = 0;
1918
1919	while (iova < end) {
1920		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1921			/*
1922			 * On each iteration of the loop, the range is 5 bits
1923			 * worth of the aligned size remaining.
1924			 * The range in pages is:
1925			 *
1926			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1927			 */
1928			unsigned long scale, num;
1929
1930			/* Determine the power of 2 multiple number of pages */
1931			scale = __ffs(num_pages);
1932			cmd->tlbi.scale = scale;
1933
1934			/* Determine how many chunks of 2^scale size we have */
1935			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1936			cmd->tlbi.num = num - 1;
1937
1938			/* range is num * 2^scale * pgsize */
1939			inv_range = num << (scale + tg);
1940
1941			/* Clear out the lower order bits for the next iteration */
1942			num_pages -= num << scale;
1943		}
1944
1945		cmd->tlbi.addr = iova;
1946		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1947		iova += inv_range;
1948	}
1949	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1950}
1951
1952static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1953					  size_t granule, bool leaf,
1954					  struct arm_smmu_domain *smmu_domain)
1955{
1956	struct arm_smmu_cmdq_ent cmd = {
1957		.tlbi = {
1958			.leaf	= leaf,
1959		},
1960	};
1961
1962	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1963		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1964				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1965		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1966	} else {
1967		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1968		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1969	}
1970	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1971
1972	/*
1973	 * Unfortunately, this can't be leaf-only since we may have
1974	 * zapped an entire table.
1975	 */
1976	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1977}
1978
1979void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1980				 size_t granule, bool leaf,
1981				 struct arm_smmu_domain *smmu_domain)
1982{
1983	struct arm_smmu_cmdq_ent cmd = {
1984		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1985			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1986		.tlbi = {
1987			.asid	= asid,
1988			.leaf	= leaf,
1989		},
1990	};
1991
1992	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1993}
1994
1995static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1996					 unsigned long iova, size_t granule,
1997					 void *cookie)
1998{
1999	struct arm_smmu_domain *smmu_domain = cookie;
2000	struct iommu_domain *domain = &smmu_domain->domain;
2001
2002	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2003}
2004
2005static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2006				  size_t granule, void *cookie)
2007{
2008	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2009}
2010
2011static const struct iommu_flush_ops arm_smmu_flush_ops = {
2012	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2013	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2014	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2015};
2016
2017/* IOMMU API */
2018static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2019{
2020	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2021
2022	switch (cap) {
2023	case IOMMU_CAP_CACHE_COHERENCY:
2024		/* Assume that a coherent TCU implies coherent TBUs */
2025		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2026	case IOMMU_CAP_NOEXEC:
2027	case IOMMU_CAP_DEFERRED_FLUSH:
2028		return true;
2029	default:
2030		return false;
2031	}
2032}
2033
2034static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2035{
2036	struct arm_smmu_domain *smmu_domain;
2037
2038	if (type == IOMMU_DOMAIN_SVA)
2039		return arm_smmu_sva_domain_alloc();
2040
2041	if (type != IOMMU_DOMAIN_UNMANAGED &&
2042	    type != IOMMU_DOMAIN_DMA &&
2043	    type != IOMMU_DOMAIN_IDENTITY)
2044		return NULL;
2045
2046	/*
2047	 * Allocate the domain and initialise some of its data structures.
2048	 * We can't really do anything meaningful until we've added a
2049	 * master.
2050	 */
2051	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2052	if (!smmu_domain)
2053		return NULL;
2054
2055	mutex_init(&smmu_domain->init_mutex);
2056	INIT_LIST_HEAD(&smmu_domain->devices);
2057	spin_lock_init(&smmu_domain->devices_lock);
2058	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2059
2060	return &smmu_domain->domain;
2061}
2062
2063static void arm_smmu_domain_free(struct iommu_domain *domain)
2064{
2065	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2066	struct arm_smmu_device *smmu = smmu_domain->smmu;
2067
2068	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2069
2070	/* Free the CD and ASID, if we allocated them */
2071	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2072		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2073
2074		/* Prevent SVA from touching the CD while we're freeing it */
2075		mutex_lock(&arm_smmu_asid_lock);
2076		if (cfg->cdcfg.cdtab)
2077			arm_smmu_free_cd_tables(smmu_domain);
2078		arm_smmu_free_asid(&cfg->cd);
2079		mutex_unlock(&arm_smmu_asid_lock);
2080	} else {
2081		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2082		if (cfg->vmid)
2083			ida_free(&smmu->vmid_map, cfg->vmid);
2084	}
2085
2086	kfree(smmu_domain);
2087}
2088
2089static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2090				       struct arm_smmu_master *master,
2091				       struct io_pgtable_cfg *pgtbl_cfg)
2092{
2093	int ret;
2094	u32 asid;
2095	struct arm_smmu_device *smmu = smmu_domain->smmu;
2096	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2097	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2098
2099	refcount_set(&cfg->cd.refs, 1);
2100
2101	/* Prevent SVA from modifying the ASID until it is written to the CD */
2102	mutex_lock(&arm_smmu_asid_lock);
2103	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2104		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2105	if (ret)
2106		goto out_unlock;
2107
2108	cfg->s1cdmax = master->ssid_bits;
2109
2110	smmu_domain->stall_enabled = master->stall_enabled;
2111
2112	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2113	if (ret)
2114		goto out_free_asid;
2115
2116	cfg->cd.asid	= (u16)asid;
2117	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2118	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2119			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2120			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2121			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2122			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2123			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2124			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2125	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2126
2127	/*
2128	 * Note that this will end up calling arm_smmu_sync_cd() before
2129	 * the master has been added to the devices list for this domain.
2130	 * This isn't an issue because the STE hasn't been installed yet.
2131	 */
2132	ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
2133	if (ret)
2134		goto out_free_cd_tables;
2135
2136	mutex_unlock(&arm_smmu_asid_lock);
2137	return 0;
2138
2139out_free_cd_tables:
2140	arm_smmu_free_cd_tables(smmu_domain);
2141out_free_asid:
2142	arm_smmu_free_asid(&cfg->cd);
2143out_unlock:
2144	mutex_unlock(&arm_smmu_asid_lock);
2145	return ret;
2146}
2147
2148static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2149				       struct arm_smmu_master *master,
2150				       struct io_pgtable_cfg *pgtbl_cfg)
2151{
2152	int vmid;
2153	struct arm_smmu_device *smmu = smmu_domain->smmu;
2154	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2155	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2156
2157	/* Reserve VMID 0 for stage-2 bypass STEs */
2158	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2159			       GFP_KERNEL);
2160	if (vmid < 0)
2161		return vmid;
2162
2163	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2164	cfg->vmid	= (u16)vmid;
2165	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2166	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2167			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2168			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2169			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2170			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2171			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2172			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2173	return 0;
2174}
2175
2176static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2177				    struct arm_smmu_master *master)
2178{
2179	int ret;
2180	unsigned long ias, oas;
2181	enum io_pgtable_fmt fmt;
2182	struct io_pgtable_cfg pgtbl_cfg;
2183	struct io_pgtable_ops *pgtbl_ops;
2184	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2185				 struct arm_smmu_master *,
2186				 struct io_pgtable_cfg *);
2187	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2188	struct arm_smmu_device *smmu = smmu_domain->smmu;
2189
2190	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2191		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2192		return 0;
2193	}
2194
2195	/* Restrict the stage to what we can actually support */
2196	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2197		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2198	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2199		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2200
2201	switch (smmu_domain->stage) {
2202	case ARM_SMMU_DOMAIN_S1:
2203		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2204		ias = min_t(unsigned long, ias, VA_BITS);
2205		oas = smmu->ias;
2206		fmt = ARM_64_LPAE_S1;
2207		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2208		break;
2209	case ARM_SMMU_DOMAIN_NESTED:
2210	case ARM_SMMU_DOMAIN_S2:
2211		ias = smmu->ias;
2212		oas = smmu->oas;
2213		fmt = ARM_64_LPAE_S2;
2214		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2215		break;
2216	default:
2217		return -EINVAL;
2218	}
2219
2220	pgtbl_cfg = (struct io_pgtable_cfg) {
2221		.pgsize_bitmap	= smmu->pgsize_bitmap,
2222		.ias		= ias,
2223		.oas		= oas,
2224		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2225		.tlb		= &arm_smmu_flush_ops,
2226		.iommu_dev	= smmu->dev,
2227	};
2228
2229	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2230	if (!pgtbl_ops)
2231		return -ENOMEM;
2232
2233	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2234	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2235	domain->geometry.force_aperture = true;
2236
2237	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2238	if (ret < 0) {
2239		free_io_pgtable_ops(pgtbl_ops);
2240		return ret;
2241	}
2242
2243	smmu_domain->pgtbl_ops = pgtbl_ops;
2244	return 0;
2245}
2246
2247static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2248{
2249	__le64 *step;
2250	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2251
2252	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2253		struct arm_smmu_strtab_l1_desc *l1_desc;
2254		int idx;
2255
2256		/* Two-level walk */
2257		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2258		l1_desc = &cfg->l1_desc[idx];
2259		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2260		step = &l1_desc->l2ptr[idx];
2261	} else {
2262		/* Simple linear lookup */
2263		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2264	}
2265
2266	return step;
2267}
2268
2269static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2270{
2271	int i, j;
2272	struct arm_smmu_device *smmu = master->smmu;
2273
2274	for (i = 0; i < master->num_streams; ++i) {
2275		u32 sid = master->streams[i].id;
2276		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2277
2278		/* Bridged PCI devices may end up with duplicated IDs */
2279		for (j = 0; j < i; j++)
2280			if (master->streams[j].id == sid)
2281				break;
2282		if (j < i)
2283			continue;
2284
2285		arm_smmu_write_strtab_ent(master, sid, step);
2286	}
2287}
2288
2289static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2290{
2291	struct device *dev = master->dev;
2292	struct arm_smmu_device *smmu = master->smmu;
2293	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2294
2295	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2296		return false;
2297
2298	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2299		return false;
2300
2301	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2302}
2303
2304static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2305{
2306	size_t stu;
2307	struct pci_dev *pdev;
2308	struct arm_smmu_device *smmu = master->smmu;
2309	struct arm_smmu_domain *smmu_domain = master->domain;
2310
2311	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2312	if (!master->ats_enabled)
2313		return;
2314
2315	/* Smallest Translation Unit: log2 of the smallest supported granule */
2316	stu = __ffs(smmu->pgsize_bitmap);
2317	pdev = to_pci_dev(master->dev);
2318
2319	atomic_inc(&smmu_domain->nr_ats_masters);
2320	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2321	if (pci_enable_ats(pdev, stu))
2322		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2323}
2324
2325static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2326{
2327	struct arm_smmu_domain *smmu_domain = master->domain;
2328
2329	if (!master->ats_enabled)
2330		return;
2331
2332	pci_disable_ats(to_pci_dev(master->dev));
2333	/*
2334	 * Ensure ATS is disabled at the endpoint before we issue the
2335	 * ATC invalidation via the SMMU.
2336	 */
2337	wmb();
2338	arm_smmu_atc_inv_master(master);
2339	atomic_dec(&smmu_domain->nr_ats_masters);
2340}
2341
2342static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2343{
2344	int ret;
2345	int features;
2346	int num_pasids;
2347	struct pci_dev *pdev;
2348
2349	if (!dev_is_pci(master->dev))
2350		return -ENODEV;
2351
2352	pdev = to_pci_dev(master->dev);
2353
2354	features = pci_pasid_features(pdev);
2355	if (features < 0)
2356		return features;
2357
2358	num_pasids = pci_max_pasids(pdev);
2359	if (num_pasids <= 0)
2360		return num_pasids;
2361
2362	ret = pci_enable_pasid(pdev, features);
2363	if (ret) {
2364		dev_err(&pdev->dev, "Failed to enable PASID\n");
2365		return ret;
2366	}
2367
2368	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2369				  master->smmu->ssid_bits);
2370	return 0;
2371}
2372
2373static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2374{
2375	struct pci_dev *pdev;
2376
2377	if (!dev_is_pci(master->dev))
2378		return;
2379
2380	pdev = to_pci_dev(master->dev);
2381
2382	if (!pdev->pasid_enabled)
2383		return;
2384
2385	master->ssid_bits = 0;
2386	pci_disable_pasid(pdev);
2387}
2388
2389static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2390{
2391	unsigned long flags;
2392	struct arm_smmu_domain *smmu_domain = master->domain;
2393
2394	if (!smmu_domain)
2395		return;
2396
2397	arm_smmu_disable_ats(master);
2398
2399	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2400	list_del(&master->domain_head);
2401	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2402
2403	master->domain = NULL;
2404	master->ats_enabled = false;
2405	arm_smmu_install_ste_for_dev(master);
2406}
2407
2408static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2409{
2410	int ret = 0;
2411	unsigned long flags;
2412	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2413	struct arm_smmu_device *smmu;
2414	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2415	struct arm_smmu_master *master;
2416
2417	if (!fwspec)
2418		return -ENOENT;
2419
2420	master = dev_iommu_priv_get(dev);
2421	smmu = master->smmu;
2422
2423	/*
2424	 * Checking that SVA is disabled ensures that this device isn't bound to
2425	 * any mm, and can be safely detached from its old domain. Bonds cannot
2426	 * be removed concurrently since we're holding the group mutex.
2427	 */
2428	if (arm_smmu_master_sva_enabled(master)) {
2429		dev_err(dev, "cannot attach - SVA enabled\n");
2430		return -EBUSY;
2431	}
2432
2433	arm_smmu_detach_dev(master);
2434
2435	mutex_lock(&smmu_domain->init_mutex);
2436
2437	if (!smmu_domain->smmu) {
2438		smmu_domain->smmu = smmu;
2439		ret = arm_smmu_domain_finalise(domain, master);
2440		if (ret) {
2441			smmu_domain->smmu = NULL;
2442			goto out_unlock;
2443		}
2444	} else if (smmu_domain->smmu != smmu) {
2445		ret = -EINVAL;
2446		goto out_unlock;
2447	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2448		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2449		ret = -EINVAL;
2450		goto out_unlock;
2451	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2452		   smmu_domain->stall_enabled != master->stall_enabled) {
2453		ret = -EINVAL;
2454		goto out_unlock;
2455	}
2456
2457	master->domain = smmu_domain;
2458
2459	/*
2460	 * The SMMU does not support enabling ATS with bypass. When the STE is
2461	 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2462	 * Translated transactions are denied as though ATS is disabled for the
2463	 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2464	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2465	 */
2466	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2467		master->ats_enabled = arm_smmu_ats_supported(master);
2468
2469	arm_smmu_install_ste_for_dev(master);
2470
2471	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2472	list_add(&master->domain_head, &smmu_domain->devices);
2473	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2474
2475	arm_smmu_enable_ats(master);
2476
2477out_unlock:
2478	mutex_unlock(&smmu_domain->init_mutex);
2479	return ret;
2480}
2481
2482static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2483			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2484			      int prot, gfp_t gfp, size_t *mapped)
2485{
2486	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2487
2488	if (!ops)
2489		return -ENODEV;
2490
2491	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2492}
2493
2494static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2495				   size_t pgsize, size_t pgcount,
2496				   struct iommu_iotlb_gather *gather)
2497{
2498	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2499	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2500
2501	if (!ops)
2502		return 0;
2503
2504	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2505}
2506
2507static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2508{
2509	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2510
2511	if (smmu_domain->smmu)
2512		arm_smmu_tlb_inv_context(smmu_domain);
2513}
2514
2515static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2516				struct iommu_iotlb_gather *gather)
2517{
2518	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2519
2520	if (!gather->pgsize)
2521		return;
2522
2523	arm_smmu_tlb_inv_range_domain(gather->start,
2524				      gather->end - gather->start + 1,
2525				      gather->pgsize, true, smmu_domain);
2526}
2527
2528static phys_addr_t
2529arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2530{
2531	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2532
2533	if (!ops)
2534		return 0;
2535
2536	return ops->iova_to_phys(ops, iova);
2537}
2538
2539static struct platform_driver arm_smmu_driver;
2540
2541static
2542struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2543{
2544	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2545							  fwnode);
2546	put_device(dev);
2547	return dev ? dev_get_drvdata(dev) : NULL;
2548}
2549
2550static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2551{
2552	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2553
2554	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2555		limit *= 1UL << STRTAB_SPLIT;
2556
2557	return sid < limit;
2558}
2559
2560static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2561{
2562	/* Check the SIDs are in range of the SMMU and our stream table */
2563	if (!arm_smmu_sid_in_range(smmu, sid))
2564		return -ERANGE;
2565
2566	/* Ensure l2 strtab is initialised */
2567	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2568		return arm_smmu_init_l2_strtab(smmu, sid);
2569
2570	return 0;
2571}
2572
2573static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2574				  struct arm_smmu_master *master)
2575{
2576	int i;
2577	int ret = 0;
2578	struct arm_smmu_stream *new_stream, *cur_stream;
2579	struct rb_node **new_node, *parent_node = NULL;
2580	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2581
2582	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2583				  GFP_KERNEL);
2584	if (!master->streams)
2585		return -ENOMEM;
2586	master->num_streams = fwspec->num_ids;
2587
2588	mutex_lock(&smmu->streams_mutex);
2589	for (i = 0; i < fwspec->num_ids; i++) {
2590		u32 sid = fwspec->ids[i];
2591
2592		new_stream = &master->streams[i];
2593		new_stream->id = sid;
2594		new_stream->master = master;
2595
2596		ret = arm_smmu_init_sid_strtab(smmu, sid);
2597		if (ret)
2598			break;
2599
2600		/* Insert into SID tree */
2601		new_node = &(smmu->streams.rb_node);
2602		while (*new_node) {
2603			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2604					      node);
2605			parent_node = *new_node;
2606			if (cur_stream->id > new_stream->id) {
2607				new_node = &((*new_node)->rb_left);
2608			} else if (cur_stream->id < new_stream->id) {
2609				new_node = &((*new_node)->rb_right);
2610			} else {
2611				dev_warn(master->dev,
2612					 "stream %u already in tree\n",
2613					 cur_stream->id);
2614				ret = -EINVAL;
2615				break;
2616			}
2617		}
2618		if (ret)
2619			break;
2620
2621		rb_link_node(&new_stream->node, parent_node, new_node);
2622		rb_insert_color(&new_stream->node, &smmu->streams);
2623	}
2624
2625	if (ret) {
2626		for (i--; i >= 0; i--)
2627			rb_erase(&master->streams[i].node, &smmu->streams);
2628		kfree(master->streams);
2629	}
2630	mutex_unlock(&smmu->streams_mutex);
2631
2632	return ret;
2633}
2634
2635static void arm_smmu_remove_master(struct arm_smmu_master *master)
2636{
2637	int i;
2638	struct arm_smmu_device *smmu = master->smmu;
2639	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2640
2641	if (!smmu || !master->streams)
2642		return;
2643
2644	mutex_lock(&smmu->streams_mutex);
2645	for (i = 0; i < fwspec->num_ids; i++)
2646		rb_erase(&master->streams[i].node, &smmu->streams);
2647	mutex_unlock(&smmu->streams_mutex);
2648
2649	kfree(master->streams);
2650}
2651
2652static struct iommu_ops arm_smmu_ops;
2653
2654static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2655{
2656	int ret;
2657	struct arm_smmu_device *smmu;
2658	struct arm_smmu_master *master;
2659	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2660
2661	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2662		return ERR_PTR(-ENODEV);
2663
2664	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2665		return ERR_PTR(-EBUSY);
2666
2667	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2668	if (!smmu)
2669		return ERR_PTR(-ENODEV);
2670
2671	master = kzalloc(sizeof(*master), GFP_KERNEL);
2672	if (!master)
2673		return ERR_PTR(-ENOMEM);
2674
2675	master->dev = dev;
2676	master->smmu = smmu;
2677	INIT_LIST_HEAD(&master->bonds);
2678	dev_iommu_priv_set(dev, master);
2679
2680	ret = arm_smmu_insert_master(smmu, master);
2681	if (ret)
2682		goto err_free_master;
2683
2684	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2685	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2686
2687	/*
2688	 * Note that PASID must be enabled before, and disabled after ATS:
2689	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2690	 *
2691	 *   Behavior is undefined if this bit is Set and the value of the PASID
2692	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2693	 *   are changed.
2694	 */
2695	arm_smmu_enable_pasid(master);
2696
2697	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2698		master->ssid_bits = min_t(u8, master->ssid_bits,
2699					  CTXDESC_LINEAR_CDMAX);
2700
2701	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2702	     device_property_read_bool(dev, "dma-can-stall")) ||
2703	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2704		master->stall_enabled = true;
2705
2706	return &smmu->iommu;
2707
2708err_free_master:
2709	kfree(master);
2710	dev_iommu_priv_set(dev, NULL);
2711	return ERR_PTR(ret);
2712}
2713
2714static void arm_smmu_release_device(struct device *dev)
2715{
2716	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2717
2718	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2719		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2720	arm_smmu_detach_dev(master);
2721	arm_smmu_disable_pasid(master);
2722	arm_smmu_remove_master(master);
2723	kfree(master);
2724}
2725
2726static struct iommu_group *arm_smmu_device_group(struct device *dev)
2727{
2728	struct iommu_group *group;
2729
2730	/*
2731	 * We don't support devices sharing stream IDs other than PCI RID
2732	 * aliases, since the necessary ID-to-device lookup becomes rather
2733	 * impractical given a potential sparse 32-bit stream ID space.
2734	 */
2735	if (dev_is_pci(dev))
2736		group = pci_device_group(dev);
2737	else
2738		group = generic_device_group(dev);
2739
2740	return group;
2741}
2742
2743static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2744{
2745	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2746	int ret = 0;
2747
2748	mutex_lock(&smmu_domain->init_mutex);
2749	if (smmu_domain->smmu)
2750		ret = -EPERM;
2751	else
2752		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2753	mutex_unlock(&smmu_domain->init_mutex);
2754
2755	return ret;
2756}
2757
2758static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2759{
2760	return iommu_fwspec_add_ids(dev, args->args, 1);
2761}
2762
2763static void arm_smmu_get_resv_regions(struct device *dev,
2764				      struct list_head *head)
2765{
2766	struct iommu_resv_region *region;
2767	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2768
2769	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2770					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2771	if (!region)
2772		return;
2773
2774	list_add_tail(&region->list, head);
2775
2776	iommu_dma_get_resv_regions(dev, head);
2777}
2778
2779static int arm_smmu_dev_enable_feature(struct device *dev,
2780				       enum iommu_dev_features feat)
2781{
2782	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2783
2784	if (!master)
2785		return -ENODEV;
2786
2787	switch (feat) {
2788	case IOMMU_DEV_FEAT_IOPF:
2789		if (!arm_smmu_master_iopf_supported(master))
2790			return -EINVAL;
2791		if (master->iopf_enabled)
2792			return -EBUSY;
2793		master->iopf_enabled = true;
2794		return 0;
2795	case IOMMU_DEV_FEAT_SVA:
2796		if (!arm_smmu_master_sva_supported(master))
2797			return -EINVAL;
2798		if (arm_smmu_master_sva_enabled(master))
2799			return -EBUSY;
2800		return arm_smmu_master_enable_sva(master);
2801	default:
2802		return -EINVAL;
2803	}
2804}
2805
2806static int arm_smmu_dev_disable_feature(struct device *dev,
2807					enum iommu_dev_features feat)
2808{
2809	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2810
2811	if (!master)
2812		return -EINVAL;
2813
2814	switch (feat) {
2815	case IOMMU_DEV_FEAT_IOPF:
2816		if (!master->iopf_enabled)
2817			return -EINVAL;
2818		if (master->sva_enabled)
2819			return -EBUSY;
2820		master->iopf_enabled = false;
2821		return 0;
2822	case IOMMU_DEV_FEAT_SVA:
2823		if (!arm_smmu_master_sva_enabled(master))
2824			return -EINVAL;
2825		return arm_smmu_master_disable_sva(master);
2826	default:
2827		return -EINVAL;
2828	}
2829}
2830
2831/*
2832 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2833 * PCIe link and save the data to memory by DMA. The hardware is restricted to
2834 * use identity mapping only.
2835 */
2836#define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2837					 (pdev)->device == 0xa12e)
2838
2839static int arm_smmu_def_domain_type(struct device *dev)
2840{
2841	if (dev_is_pci(dev)) {
2842		struct pci_dev *pdev = to_pci_dev(dev);
2843
2844		if (IS_HISI_PTT_DEVICE(pdev))
2845			return IOMMU_DOMAIN_IDENTITY;
2846	}
2847
2848	return 0;
2849}
2850
2851static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2852{
2853	struct iommu_domain *domain;
2854
2855	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2856	if (WARN_ON(IS_ERR(domain)) || !domain)
2857		return;
2858
2859	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2860}
2861
2862static struct iommu_ops arm_smmu_ops = {
2863	.capable		= arm_smmu_capable,
2864	.domain_alloc		= arm_smmu_domain_alloc,
2865	.probe_device		= arm_smmu_probe_device,
2866	.release_device		= arm_smmu_release_device,
2867	.device_group		= arm_smmu_device_group,
2868	.of_xlate		= arm_smmu_of_xlate,
2869	.get_resv_regions	= arm_smmu_get_resv_regions,
2870	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
2871	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2872	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2873	.page_response		= arm_smmu_page_response,
2874	.def_domain_type	= arm_smmu_def_domain_type,
2875	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2876	.owner			= THIS_MODULE,
2877	.default_domain_ops = &(const struct iommu_domain_ops) {
2878		.attach_dev		= arm_smmu_attach_dev,
2879		.map_pages		= arm_smmu_map_pages,
2880		.unmap_pages		= arm_smmu_unmap_pages,
2881		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2882		.iotlb_sync		= arm_smmu_iotlb_sync,
2883		.iova_to_phys		= arm_smmu_iova_to_phys,
2884		.enable_nesting		= arm_smmu_enable_nesting,
2885		.free			= arm_smmu_domain_free,
2886	}
2887};
2888
2889/* Probing and initialisation functions */
2890static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2891				   struct arm_smmu_queue *q,
2892				   void __iomem *page,
2893				   unsigned long prod_off,
2894				   unsigned long cons_off,
2895				   size_t dwords, const char *name)
2896{
2897	size_t qsz;
2898
2899	do {
2900		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2901		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2902					      GFP_KERNEL);
2903		if (q->base || qsz < PAGE_SIZE)
2904			break;
2905
2906		q->llq.max_n_shift--;
2907	} while (1);
2908
2909	if (!q->base) {
2910		dev_err(smmu->dev,
2911			"failed to allocate queue (0x%zx bytes) for %s\n",
2912			qsz, name);
2913		return -ENOMEM;
2914	}
2915
2916	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2917		dev_info(smmu->dev, "allocated %u entries for %s\n",
2918			 1 << q->llq.max_n_shift, name);
2919	}
2920
2921	q->prod_reg	= page + prod_off;
2922	q->cons_reg	= page + cons_off;
2923	q->ent_dwords	= dwords;
2924
2925	q->q_base  = Q_BASE_RWA;
2926	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2927	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2928
2929	q->llq.prod = q->llq.cons = 0;
2930	return 0;
2931}
2932
2933static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2934{
2935	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2936	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2937
2938	atomic_set(&cmdq->owner_prod, 0);
2939	atomic_set(&cmdq->lock, 0);
2940
2941	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2942							      GFP_KERNEL);
2943	if (!cmdq->valid_map)
2944		return -ENOMEM;
2945
2946	return 0;
2947}
2948
2949static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2950{
2951	int ret;
2952
2953	/* cmdq */
2954	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2955				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2956				      CMDQ_ENT_DWORDS, "cmdq");
2957	if (ret)
2958		return ret;
2959
2960	ret = arm_smmu_cmdq_init(smmu);
2961	if (ret)
2962		return ret;
2963
2964	/* evtq */
2965	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2966				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2967				      EVTQ_ENT_DWORDS, "evtq");
2968	if (ret)
2969		return ret;
2970
2971	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2972	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2973		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2974		if (!smmu->evtq.iopf)
2975			return -ENOMEM;
2976	}
2977
2978	/* priq */
2979	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2980		return 0;
2981
2982	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2983				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2984				       PRIQ_ENT_DWORDS, "priq");
2985}
2986
2987static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2988{
2989	unsigned int i;
2990	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2991	void *strtab = smmu->strtab_cfg.strtab;
2992
2993	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2994				    sizeof(*cfg->l1_desc), GFP_KERNEL);
2995	if (!cfg->l1_desc)
2996		return -ENOMEM;
2997
2998	for (i = 0; i < cfg->num_l1_ents; ++i) {
2999		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3000		strtab += STRTAB_L1_DESC_DWORDS << 3;
3001	}
3002
3003	return 0;
3004}
3005
3006static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3007{
3008	void *strtab;
3009	u64 reg;
3010	u32 size, l1size;
3011	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3012
3013	/* Calculate the L1 size, capped to the SIDSIZE. */
3014	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3015	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3016	cfg->num_l1_ents = 1 << size;
3017
3018	size += STRTAB_SPLIT;
3019	if (size < smmu->sid_bits)
3020		dev_warn(smmu->dev,
3021			 "2-level strtab only covers %u/%u bits of SID\n",
3022			 size, smmu->sid_bits);
3023
3024	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3025	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3026				     GFP_KERNEL);
3027	if (!strtab) {
3028		dev_err(smmu->dev,
3029			"failed to allocate l1 stream table (%u bytes)\n",
3030			l1size);
3031		return -ENOMEM;
3032	}
3033	cfg->strtab = strtab;
3034
3035	/* Configure strtab_base_cfg for 2 levels */
3036	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3037	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3038	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3039	cfg->strtab_base_cfg = reg;
3040
3041	return arm_smmu_init_l1_strtab(smmu);
3042}
3043
3044static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3045{
3046	void *strtab;
3047	u64 reg;
3048	u32 size;
3049	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3050
3051	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3052	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3053				     GFP_KERNEL);
3054	if (!strtab) {
3055		dev_err(smmu->dev,
3056			"failed to allocate linear stream table (%u bytes)\n",
3057			size);
3058		return -ENOMEM;
3059	}
3060	cfg->strtab = strtab;
3061	cfg->num_l1_ents = 1 << smmu->sid_bits;
3062
3063	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3064	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3065	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3066	cfg->strtab_base_cfg = reg;
3067
3068	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3069	return 0;
3070}
3071
3072static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3073{
3074	u64 reg;
3075	int ret;
3076
3077	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3078		ret = arm_smmu_init_strtab_2lvl(smmu);
3079	else
3080		ret = arm_smmu_init_strtab_linear(smmu);
3081
3082	if (ret)
3083		return ret;
3084
3085	/* Set the strtab base address */
3086	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3087	reg |= STRTAB_BASE_RA;
3088	smmu->strtab_cfg.strtab_base = reg;
3089
3090	ida_init(&smmu->vmid_map);
3091
3092	return 0;
3093}
3094
3095static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3096{
3097	int ret;
3098
3099	mutex_init(&smmu->streams_mutex);
3100	smmu->streams = RB_ROOT;
3101
3102	ret = arm_smmu_init_queues(smmu);
3103	if (ret)
3104		return ret;
3105
3106	return arm_smmu_init_strtab(smmu);
3107}
3108
3109static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3110				   unsigned int reg_off, unsigned int ack_off)
3111{
3112	u32 reg;
3113
3114	writel_relaxed(val, smmu->base + reg_off);
3115	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3116					  1, ARM_SMMU_POLL_TIMEOUT_US);
3117}
3118
3119/* GBPA is "special" */
3120static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3121{
3122	int ret;
3123	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3124
3125	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3126					 1, ARM_SMMU_POLL_TIMEOUT_US);
3127	if (ret)
3128		return ret;
3129
3130	reg &= ~clr;
3131	reg |= set;
3132	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3133	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3134					 1, ARM_SMMU_POLL_TIMEOUT_US);
3135
3136	if (ret)
3137		dev_err(smmu->dev, "GBPA not responding to update\n");
3138	return ret;
3139}
3140
3141static void arm_smmu_free_msis(void *data)
3142{
3143	struct device *dev = data;
3144	platform_msi_domain_free_irqs(dev);
3145}
3146
3147static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3148{
3149	phys_addr_t doorbell;
3150	struct device *dev = msi_desc_to_dev(desc);
3151	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3152	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3153
3154	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3155	doorbell &= MSI_CFG0_ADDR_MASK;
3156
3157	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3158	writel_relaxed(msg->data, smmu->base + cfg[1]);
3159	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3160}
3161
3162static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3163{
3164	int ret, nvec = ARM_SMMU_MAX_MSIS;
3165	struct device *dev = smmu->dev;
3166
3167	/* Clear the MSI address regs */
3168	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3169	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3170
3171	if (smmu->features & ARM_SMMU_FEAT_PRI)
3172		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3173	else
3174		nvec--;
3175
3176	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3177		return;
3178
3179	if (!dev->msi.domain) {
3180		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3181		return;
3182	}
3183
3184	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3185	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3186	if (ret) {
3187		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3188		return;
3189	}
3190
3191	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3192	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3193	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3194
3195	/* Add callback to free MSIs on teardown */
3196	devm_add_action(dev, arm_smmu_free_msis, dev);
3197}
3198
3199static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3200{
3201	int irq, ret;
3202
3203	arm_smmu_setup_msis(smmu);
3204
3205	/* Request interrupt lines */
3206	irq = smmu->evtq.q.irq;
3207	if (irq) {
3208		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3209						arm_smmu_evtq_thread,
3210						IRQF_ONESHOT,
3211						"arm-smmu-v3-evtq", smmu);
3212		if (ret < 0)
3213			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3214	} else {
3215		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3216	}
3217
3218	irq = smmu->gerr_irq;
3219	if (irq) {
3220		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3221				       0, "arm-smmu-v3-gerror", smmu);
3222		if (ret < 0)
3223			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3224	} else {
3225		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3226	}
3227
3228	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3229		irq = smmu->priq.q.irq;
3230		if (irq) {
3231			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3232							arm_smmu_priq_thread,
3233							IRQF_ONESHOT,
3234							"arm-smmu-v3-priq",
3235							smmu);
3236			if (ret < 0)
3237				dev_warn(smmu->dev,
3238					 "failed to enable priq irq\n");
3239		} else {
3240			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3241		}
3242	}
3243}
3244
3245static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3246{
3247	int ret, irq;
3248	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3249
3250	/* Disable IRQs first */
3251	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3252				      ARM_SMMU_IRQ_CTRLACK);
3253	if (ret) {
3254		dev_err(smmu->dev, "failed to disable irqs\n");
3255		return ret;
3256	}
3257
3258	irq = smmu->combined_irq;
3259	if (irq) {
3260		/*
3261		 * Cavium ThunderX2 implementation doesn't support unique irq
3262		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3263		 */
3264		ret = devm_request_threaded_irq(smmu->dev, irq,
3265					arm_smmu_combined_irq_handler,
3266					arm_smmu_combined_irq_thread,
3267					IRQF_ONESHOT,
3268					"arm-smmu-v3-combined-irq", smmu);
3269		if (ret < 0)
3270			dev_warn(smmu->dev, "failed to enable combined irq\n");
3271	} else
3272		arm_smmu_setup_unique_irqs(smmu);
3273
3274	if (smmu->features & ARM_SMMU_FEAT_PRI)
3275		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3276
3277	/* Enable interrupt generation on the SMMU */
3278	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3279				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3280	if (ret)
3281		dev_warn(smmu->dev, "failed to enable irqs\n");
3282
3283	return 0;
3284}
3285
3286static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3287{
3288	int ret;
3289
3290	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3291	if (ret)
3292		dev_err(smmu->dev, "failed to clear cr0\n");
3293
3294	return ret;
3295}
3296
3297static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3298{
3299	int ret;
3300	u32 reg, enables;
3301	struct arm_smmu_cmdq_ent cmd;
3302
3303	/* Clear CR0 and sync (disables SMMU and queue processing) */
3304	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3305	if (reg & CR0_SMMUEN) {
3306		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3307		WARN_ON(is_kdump_kernel() && !disable_bypass);
3308		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3309	}
3310
3311	ret = arm_smmu_device_disable(smmu);
3312	if (ret)
3313		return ret;
3314
3315	/* CR1 (table and queue memory attributes) */
3316	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3317	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3318	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3319	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3320	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3321	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3322	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3323
3324	/* CR2 (random crap) */
3325	reg = CR2_PTM | CR2_RECINVSID;
3326
3327	if (smmu->features & ARM_SMMU_FEAT_E2H)
3328		reg |= CR2_E2H;
3329
3330	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3331
3332	/* Stream table */
3333	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3334		       smmu->base + ARM_SMMU_STRTAB_BASE);
3335	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3336		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3337
3338	/* Command queue */
3339	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3340	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3341	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3342
3343	enables = CR0_CMDQEN;
3344	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3345				      ARM_SMMU_CR0ACK);
3346	if (ret) {
3347		dev_err(smmu->dev, "failed to enable command queue\n");
3348		return ret;
3349	}
3350
3351	/* Invalidate any cached configuration */
3352	cmd.opcode = CMDQ_OP_CFGI_ALL;
3353	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3354
3355	/* Invalidate any stale TLB entries */
3356	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3357		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3358		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3359	}
3360
3361	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3362	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3363
3364	/* Event queue */
3365	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3366	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3367	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3368
3369	enables |= CR0_EVTQEN;
3370	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3371				      ARM_SMMU_CR0ACK);
3372	if (ret) {
3373		dev_err(smmu->dev, "failed to enable event queue\n");
3374		return ret;
3375	}
3376
3377	/* PRI queue */
3378	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3379		writeq_relaxed(smmu->priq.q.q_base,
3380			       smmu->base + ARM_SMMU_PRIQ_BASE);
3381		writel_relaxed(smmu->priq.q.llq.prod,
3382			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3383		writel_relaxed(smmu->priq.q.llq.cons,
3384			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3385
3386		enables |= CR0_PRIQEN;
3387		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3388					      ARM_SMMU_CR0ACK);
3389		if (ret) {
3390			dev_err(smmu->dev, "failed to enable PRI queue\n");
3391			return ret;
3392		}
3393	}
3394
3395	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3396		enables |= CR0_ATSCHK;
3397		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3398					      ARM_SMMU_CR0ACK);
3399		if (ret) {
3400			dev_err(smmu->dev, "failed to enable ATS check\n");
3401			return ret;
3402		}
3403	}
3404
3405	ret = arm_smmu_setup_irqs(smmu);
3406	if (ret) {
3407		dev_err(smmu->dev, "failed to setup irqs\n");
3408		return ret;
3409	}
3410
3411	if (is_kdump_kernel())
3412		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3413
3414	/* Enable the SMMU interface, or ensure bypass */
3415	if (!bypass || disable_bypass) {
3416		enables |= CR0_SMMUEN;
3417	} else {
3418		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3419		if (ret)
3420			return ret;
3421	}
3422	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3423				      ARM_SMMU_CR0ACK);
3424	if (ret) {
3425		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3426		return ret;
3427	}
3428
3429	return 0;
3430}
3431
3432#define IIDR_IMPLEMENTER_ARM		0x43b
3433#define IIDR_PRODUCTID_ARM_MMU_600	0x483
3434#define IIDR_PRODUCTID_ARM_MMU_700	0x487
3435
3436static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3437{
3438	u32 reg;
3439	unsigned int implementer, productid, variant, revision;
3440
3441	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3442	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3443	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3444	variant = FIELD_GET(IIDR_VARIANT, reg);
3445	revision = FIELD_GET(IIDR_REVISION, reg);
3446
3447	switch (implementer) {
3448	case IIDR_IMPLEMENTER_ARM:
3449		switch (productid) {
3450		case IIDR_PRODUCTID_ARM_MMU_600:
3451			/* Arm erratum 1076982 */
3452			if (variant == 0 && revision <= 2)
3453				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3454			/* Arm erratum 1209401 */
3455			if (variant < 2)
3456				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3457			break;
3458		case IIDR_PRODUCTID_ARM_MMU_700:
3459			/* Arm erratum 2812531 */
3460			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3461			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3462			/* Arm errata 2268618, 2812531 */
3463			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3464			break;
3465		}
3466		break;
3467	}
3468}
3469
3470static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3471{
3472	u32 reg;
3473	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3474
3475	/* IDR0 */
3476	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3477
3478	/* 2-level structures */
3479	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3480		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3481
3482	if (reg & IDR0_CD2L)
3483		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3484
3485	/*
3486	 * Translation table endianness.
3487	 * We currently require the same endianness as the CPU, but this
3488	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3489	 */
3490	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3491	case IDR0_TTENDIAN_MIXED:
3492		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3493		break;
3494#ifdef __BIG_ENDIAN
3495	case IDR0_TTENDIAN_BE:
3496		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3497		break;
3498#else
3499	case IDR0_TTENDIAN_LE:
3500		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3501		break;
3502#endif
3503	default:
3504		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3505		return -ENXIO;
3506	}
3507
3508	/* Boolean feature flags */
3509	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3510		smmu->features |= ARM_SMMU_FEAT_PRI;
3511
3512	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3513		smmu->features |= ARM_SMMU_FEAT_ATS;
3514
3515	if (reg & IDR0_SEV)
3516		smmu->features |= ARM_SMMU_FEAT_SEV;
3517
3518	if (reg & IDR0_MSI) {
3519		smmu->features |= ARM_SMMU_FEAT_MSI;
3520		if (coherent && !disable_msipolling)
3521			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3522	}
3523
3524	if (reg & IDR0_HYP) {
3525		smmu->features |= ARM_SMMU_FEAT_HYP;
3526		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3527			smmu->features |= ARM_SMMU_FEAT_E2H;
3528	}
3529
3530	/*
3531	 * The coherency feature as set by FW is used in preference to the ID
3532	 * register, but warn on mismatch.
3533	 */
3534	if (!!(reg & IDR0_COHACC) != coherent)
3535		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3536			 coherent ? "true" : "false");
3537
3538	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3539	case IDR0_STALL_MODEL_FORCE:
3540		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3541		fallthrough;
3542	case IDR0_STALL_MODEL_STALL:
3543		smmu->features |= ARM_SMMU_FEAT_STALLS;
3544	}
3545
3546	if (reg & IDR0_S1P)
3547		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3548
3549	if (reg & IDR0_S2P)
3550		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3551
3552	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3553		dev_err(smmu->dev, "no translation support!\n");
3554		return -ENXIO;
3555	}
3556
3557	/* We only support the AArch64 table format at present */
3558	switch (FIELD_GET(IDR0_TTF, reg)) {
3559	case IDR0_TTF_AARCH32_64:
3560		smmu->ias = 40;
3561		fallthrough;
3562	case IDR0_TTF_AARCH64:
3563		break;
3564	default:
3565		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3566		return -ENXIO;
3567	}
3568
3569	/* ASID/VMID sizes */
3570	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3571	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3572
3573	/* IDR1 */
3574	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3575	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3576		dev_err(smmu->dev, "embedded implementation not supported\n");
3577		return -ENXIO;
3578	}
3579
3580	/* Queue sizes, capped to ensure natural alignment */
3581	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3582					     FIELD_GET(IDR1_CMDQS, reg));
3583	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3584		/*
3585		 * We don't support splitting up batches, so one batch of
3586		 * commands plus an extra sync needs to fit inside the command
3587		 * queue. There's also no way we can handle the weird alignment
3588		 * restrictions on the base pointer for a unit-length queue.
3589		 */
3590		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3591			CMDQ_BATCH_ENTRIES);
3592		return -ENXIO;
3593	}
3594
3595	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3596					     FIELD_GET(IDR1_EVTQS, reg));
3597	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3598					     FIELD_GET(IDR1_PRIQS, reg));
3599
3600	/* SID/SSID sizes */
3601	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3602	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3603	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3604
3605	/*
3606	 * If the SMMU supports fewer bits than would fill a single L2 stream
3607	 * table, use a linear table instead.
3608	 */
3609	if (smmu->sid_bits <= STRTAB_SPLIT)
3610		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3611
3612	/* IDR3 */
3613	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3614	if (FIELD_GET(IDR3_RIL, reg))
3615		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3616
3617	/* IDR5 */
3618	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3619
3620	/* Maximum number of outstanding stalls */
3621	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3622
3623	/* Page sizes */
3624	if (reg & IDR5_GRAN64K)
3625		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3626	if (reg & IDR5_GRAN16K)
3627		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3628	if (reg & IDR5_GRAN4K)
3629		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3630
3631	/* Input address size */
3632	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3633		smmu->features |= ARM_SMMU_FEAT_VAX;
3634
3635	/* Output address size */
3636	switch (FIELD_GET(IDR5_OAS, reg)) {
3637	case IDR5_OAS_32_BIT:
3638		smmu->oas = 32;
3639		break;
3640	case IDR5_OAS_36_BIT:
3641		smmu->oas = 36;
3642		break;
3643	case IDR5_OAS_40_BIT:
3644		smmu->oas = 40;
3645		break;
3646	case IDR5_OAS_42_BIT:
3647		smmu->oas = 42;
3648		break;
3649	case IDR5_OAS_44_BIT:
3650		smmu->oas = 44;
3651		break;
3652	case IDR5_OAS_52_BIT:
3653		smmu->oas = 52;
3654		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3655		break;
3656	default:
3657		dev_info(smmu->dev,
3658			"unknown output address size. Truncating to 48-bit\n");
3659		fallthrough;
3660	case IDR5_OAS_48_BIT:
3661		smmu->oas = 48;
3662	}
3663
3664	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3665		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3666	else
3667		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3668
3669	/* Set the DMA mask for our table walker */
3670	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3671		dev_warn(smmu->dev,
3672			 "failed to set DMA mask for table walker\n");
3673
3674	smmu->ias = max(smmu->ias, smmu->oas);
3675
3676	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3677	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3678		smmu->features |= ARM_SMMU_FEAT_NESTING;
3679
3680	arm_smmu_device_iidr_probe(smmu);
3681
3682	if (arm_smmu_sva_supported(smmu))
3683		smmu->features |= ARM_SMMU_FEAT_SVA;
3684
3685	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3686		 smmu->ias, smmu->oas, smmu->features);
3687	return 0;
3688}
3689
3690#ifdef CONFIG_ACPI
3691static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3692{
3693	switch (model) {
3694	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3695		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3696		break;
3697	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3698		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3699		break;
3700	}
3701
3702	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3703}
3704
3705static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3706				      struct arm_smmu_device *smmu)
3707{
3708	struct acpi_iort_smmu_v3 *iort_smmu;
3709	struct device *dev = smmu->dev;
3710	struct acpi_iort_node *node;
3711
3712	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3713
3714	/* Retrieve SMMUv3 specific data */
3715	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3716
3717	acpi_smmu_get_options(iort_smmu->model, smmu);
3718
3719	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3720		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3721
3722	return 0;
3723}
3724#else
3725static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3726					     struct arm_smmu_device *smmu)
3727{
3728	return -ENODEV;
3729}
3730#endif
3731
3732static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3733				    struct arm_smmu_device *smmu)
3734{
3735	struct device *dev = &pdev->dev;
3736	u32 cells;
3737	int ret = -EINVAL;
3738
3739	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3740		dev_err(dev, "missing #iommu-cells property\n");
3741	else if (cells != 1)
3742		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3743	else
3744		ret = 0;
3745
3746	parse_driver_options(smmu);
3747
3748	if (of_dma_is_coherent(dev->of_node))
3749		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3750
3751	return ret;
3752}
3753
3754static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3755{
3756	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3757		return SZ_64K;
3758	else
3759		return SZ_128K;
3760}
3761
3762static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3763				      resource_size_t size)
3764{
3765	struct resource res = DEFINE_RES_MEM(start, size);
3766
3767	return devm_ioremap_resource(dev, &res);
3768}
3769
3770static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3771{
3772	struct list_head rmr_list;
3773	struct iommu_resv_region *e;
3774
3775	INIT_LIST_HEAD(&rmr_list);
3776	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3777
3778	list_for_each_entry(e, &rmr_list, list) {
3779		__le64 *step;
3780		struct iommu_iort_rmr_data *rmr;
3781		int ret, i;
3782
3783		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3784		for (i = 0; i < rmr->num_sids; i++) {
3785			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3786			if (ret) {
3787				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3788					rmr->sids[i]);
3789				continue;
3790			}
3791
3792			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3793			arm_smmu_init_bypass_stes(step, 1, true);
3794		}
3795	}
3796
3797	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3798}
3799
3800static int arm_smmu_device_probe(struct platform_device *pdev)
3801{
3802	int irq, ret;
3803	struct resource *res;
3804	resource_size_t ioaddr;
3805	struct arm_smmu_device *smmu;
3806	struct device *dev = &pdev->dev;
3807	bool bypass;
3808
3809	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3810	if (!smmu)
3811		return -ENOMEM;
3812	smmu->dev = dev;
3813
3814	if (dev->of_node) {
3815		ret = arm_smmu_device_dt_probe(pdev, smmu);
3816	} else {
3817		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3818		if (ret == -ENODEV)
3819			return ret;
3820	}
3821
3822	/* Set bypass mode according to firmware probing result */
3823	bypass = !!ret;
3824
3825	/* Base address */
3826	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3827	if (!res)
3828		return -EINVAL;
3829	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3830		dev_err(dev, "MMIO region too small (%pr)\n", res);
3831		return -EINVAL;
3832	}
3833	ioaddr = res->start;
3834
3835	/*
3836	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3837	 * the PMCG registers which are reserved by the PMU driver.
3838	 */
3839	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3840	if (IS_ERR(smmu->base))
3841		return PTR_ERR(smmu->base);
3842
3843	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3844		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3845					       ARM_SMMU_REG_SZ);
3846		if (IS_ERR(smmu->page1))
3847			return PTR_ERR(smmu->page1);
3848	} else {
3849		smmu->page1 = smmu->base;
3850	}
3851
3852	/* Interrupt lines */
3853
3854	irq = platform_get_irq_byname_optional(pdev, "combined");
3855	if (irq > 0)
3856		smmu->combined_irq = irq;
3857	else {
3858		irq = platform_get_irq_byname_optional(pdev, "eventq");
3859		if (irq > 0)
3860			smmu->evtq.q.irq = irq;
3861
3862		irq = platform_get_irq_byname_optional(pdev, "priq");
3863		if (irq > 0)
3864			smmu->priq.q.irq = irq;
3865
3866		irq = platform_get_irq_byname_optional(pdev, "gerror");
3867		if (irq > 0)
3868			smmu->gerr_irq = irq;
3869	}
3870	/* Probe the h/w */
3871	ret = arm_smmu_device_hw_probe(smmu);
3872	if (ret)
3873		return ret;
3874
3875	/* Initialise in-memory data structures */
3876	ret = arm_smmu_init_structures(smmu);
3877	if (ret)
3878		return ret;
3879
3880	/* Record our private device structure */
3881	platform_set_drvdata(pdev, smmu);
3882
3883	/* Check for RMRs and install bypass STEs if any */
3884	arm_smmu_rmr_install_bypass_ste(smmu);
3885
3886	/* Reset the device */
3887	ret = arm_smmu_device_reset(smmu, bypass);
3888	if (ret)
3889		return ret;
3890
3891	/* And we're up. Go go go! */
3892	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3893				     "smmu3.%pa", &ioaddr);
3894	if (ret)
3895		return ret;
3896
3897	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3898	if (ret) {
3899		dev_err(dev, "Failed to register iommu\n");
3900		iommu_device_sysfs_remove(&smmu->iommu);
3901		return ret;
3902	}
3903
3904	return 0;
3905}
3906
3907static void arm_smmu_device_remove(struct platform_device *pdev)
3908{
3909	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3910
3911	iommu_device_unregister(&smmu->iommu);
3912	iommu_device_sysfs_remove(&smmu->iommu);
3913	arm_smmu_device_disable(smmu);
3914	iopf_queue_free(smmu->evtq.iopf);
3915	ida_destroy(&smmu->vmid_map);
3916}
3917
3918static void arm_smmu_device_shutdown(struct platform_device *pdev)
3919{
3920	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3921
3922	arm_smmu_device_disable(smmu);
3923}
3924
3925static const struct of_device_id arm_smmu_of_match[] = {
3926	{ .compatible = "arm,smmu-v3", },
3927	{ },
3928};
3929MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3930
3931static void arm_smmu_driver_unregister(struct platform_driver *drv)
3932{
3933	arm_smmu_sva_notifier_synchronize();
3934	platform_driver_unregister(drv);
3935}
3936
3937static struct platform_driver arm_smmu_driver = {
3938	.driver	= {
3939		.name			= "arm-smmu-v3",
3940		.of_match_table		= arm_smmu_of_match,
3941		.suppress_bind_attrs	= true,
3942	},
3943	.probe	= arm_smmu_device_probe,
3944	.remove_new = arm_smmu_device_remove,
3945	.shutdown = arm_smmu_device_shutdown,
3946};
3947module_driver(arm_smmu_driver, platform_driver_register,
3948	      arm_smmu_driver_unregister);
3949
3950MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3951MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3952MODULE_ALIAS("platform:arm-smmu-v3");
3953MODULE_LICENSE("GPL v2");
3954