1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5#include "msm_gem.h"
6#include "msm_mmu.h"
7#include "msm_gpu_trace.h"
8#include "a6xx_gpu.h"
9#include "a6xx_gmu.xml.h"
10
11#include <linux/bitfield.h>
12#include <linux/devfreq.h>
13#include <linux/pm_domain.h>
14#include <linux/soc/qcom/llcc-qcom.h>
15
16#define GPU_PAS_ID 13
17
18static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
19{
20	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
21	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
22
23	/* Check that the GMU is idle */
24	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
25		return false;
26
27	/* Check tha the CX master is idle */
28	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
29			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
30		return false;
31
32	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
33		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
34}
35
36static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
37{
38	/* wait for CP to drain ringbuffer: */
39	if (!adreno_idle(gpu, ring))
40		return false;
41
42	if (spin_until(_a6xx_check_idle(gpu))) {
43		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
44			gpu->name, __builtin_return_address(0),
45			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
46			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
47			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
48			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
49		return false;
50	}
51
52	return true;
53}
54
55static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
56{
57	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
58	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
59
60	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
61	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
62		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
63		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
64		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
65	}
66}
67
68static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
69{
70	uint32_t wptr;
71	unsigned long flags;
72
73	update_shadow_rptr(gpu, ring);
74
75	spin_lock_irqsave(&ring->preempt_lock, flags);
76
77	/* Copy the shadow to the actual register */
78	ring->cur = ring->next;
79
80	/* Make sure to wrap wptr if we need to */
81	wptr = get_wptr(ring);
82
83	spin_unlock_irqrestore(&ring->preempt_lock, flags);
84
85	/* Make sure everything is posted before making a decision */
86	mb();
87
88	gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
89}
90
91static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
92		u64 iova)
93{
94	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
95	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
96		CP_REG_TO_MEM_0_CNT(2) |
97		CP_REG_TO_MEM_0_64B);
98	OUT_RING(ring, lower_32_bits(iova));
99	OUT_RING(ring, upper_32_bits(iova));
100}
101
102static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
103		struct msm_ringbuffer *ring, struct msm_file_private *ctx)
104{
105	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
106	phys_addr_t ttbr;
107	u32 asid;
108	u64 memptr = rbmemptr(ring, ttbr0);
109
110	if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
111		return;
112
113	if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
114		return;
115
116	if (!sysprof) {
117		/* Turn off protected mode to write to special registers */
118		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
119		OUT_RING(ring, 0);
120
121		OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
122		OUT_RING(ring, 1);
123	}
124
125	/* Execute the table update */
126	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
127	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
128
129	OUT_RING(ring,
130		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
131		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
132	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
133	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
134
135	/*
136	 * Write the new TTBR0 to the memstore. This is good for debugging.
137	 */
138	OUT_PKT7(ring, CP_MEM_WRITE, 4);
139	OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
140	OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
141	OUT_RING(ring, lower_32_bits(ttbr));
142	OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
143
144	/*
145	 * And finally, trigger a uche flush to be sure there isn't anything
146	 * lingering in that part of the GPU
147	 */
148
149	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
150	OUT_RING(ring, CACHE_INVALIDATE);
151
152	if (!sysprof) {
153		/*
154		 * Wait for SRAM clear after the pgtable update, so the
155		 * two can happen in parallel:
156		 */
157		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
158		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
159		OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
160				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
161		OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
162		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
163		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
164		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
165
166		/* Re-enable protected mode: */
167		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
168		OUT_RING(ring, 1);
169	}
170}
171
172static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
173{
174	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
175	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
176	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
177	struct msm_ringbuffer *ring = submit->ring;
178	unsigned int i, ibs = 0;
179
180	a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
181
182	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
183		rbmemptr_stats(ring, index, cpcycles_start));
184
185	/*
186	 * For PM4 the GMU register offsets are calculated from the base of the
187	 * GPU registers so we need to add 0x1a800 to the register value on A630
188	 * to get the right value from PM4.
189	 */
190	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
191		rbmemptr_stats(ring, index, alwayson_start));
192
193	/* Invalidate CCU depth and color */
194	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
195	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
196
197	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
198	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
199
200	/* Submit the commands */
201	for (i = 0; i < submit->nr_cmds; i++) {
202		switch (submit->cmd[i].type) {
203		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
204			break;
205		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
206			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
207				break;
208			fallthrough;
209		case MSM_SUBMIT_CMD_BUF:
210			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
211			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
212			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
213			OUT_RING(ring, submit->cmd[i].size);
214			ibs++;
215			break;
216		}
217
218		/*
219		 * Periodically update shadow-wptr if needed, so that we
220		 * can see partial progress of submits with large # of
221		 * cmds.. otherwise we could needlessly stall waiting for
222		 * ringbuffer state, simply due to looking at a shadow
223		 * rptr value that has not been updated
224		 */
225		if ((ibs % 32) == 0)
226			update_shadow_rptr(gpu, ring);
227	}
228
229	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
230		rbmemptr_stats(ring, index, cpcycles_end));
231	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
232		rbmemptr_stats(ring, index, alwayson_end));
233
234	/* Write the fence to the scratch register */
235	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
236	OUT_RING(ring, submit->seqno);
237
238	/*
239	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
240	 * timestamp is written to the memory and then triggers the interrupt
241	 */
242	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
243	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
244		CP_EVENT_WRITE_0_IRQ);
245	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
246	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
247	OUT_RING(ring, submit->seqno);
248
249	trace_msm_gpu_submit_flush(submit,
250		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
251
252	a6xx_flush(gpu, ring);
253}
254
255const struct adreno_reglist a612_hwcg[] = {
256	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
257	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
258	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081},
259	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
260	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
261	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
262	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
263	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
264	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
265	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
266	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
267	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
268	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
269	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
270	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
271	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
272	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
273	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222},
274	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
275	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
276	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022},
277	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
278	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
279	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
280	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
281	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
282	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
283	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
284	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
285	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
286	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
287	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
288	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
289	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
290	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
291	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
292	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
293	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
294	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
295	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
296	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
297	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
298	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
299	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
300	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
301	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
302	{},
303};
304
305/* For a615 family (a615, a616, a618 and a619) */
306const struct adreno_reglist a615_hwcg[] = {
307	{REG_A6XX_RBBM_CLOCK_CNTL_SP0,  0x02222222},
308	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
309	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
310	{REG_A6XX_RBBM_CLOCK_HYST_SP0,  0x0000F3CF},
311	{REG_A6XX_RBBM_CLOCK_CNTL_TP0,  0x02222222},
312	{REG_A6XX_RBBM_CLOCK_CNTL_TP1,  0x02222222},
313	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
314	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
315	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
316	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
317	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
318	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
319	{REG_A6XX_RBBM_CLOCK_HYST_TP0,  0x77777777},
320	{REG_A6XX_RBBM_CLOCK_HYST_TP1,  0x77777777},
321	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
322	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
323	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
324	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
325	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
326	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
327	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
328	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
329	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
330	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
331	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
332	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
333	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
334	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
335	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE,  0x22222222},
336	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
337	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
338	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
339	{REG_A6XX_RBBM_CLOCK_HYST_UCHE,  0x00000004},
340	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
341	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
342	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
343	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002020},
344	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
345	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
346	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
347	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
348	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
349	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
350	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
351	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
352	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
353	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
354	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
355	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
356	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
357	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
358	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
359	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
360	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
361	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
362	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
363	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
364	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
365	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
366	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
367	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
368	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
369	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
370	{},
371};
372
373const struct adreno_reglist a630_hwcg[] = {
374	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
375	{REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
376	{REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
377	{REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
378	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
379	{REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
380	{REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
381	{REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
382	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
383	{REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
384	{REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
385	{REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
386	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
387	{REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
388	{REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
389	{REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
390	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
391	{REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
392	{REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
393	{REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
394	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
395	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
396	{REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
397	{REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
398	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
399	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
400	{REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
401	{REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
402	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
403	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
404	{REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
405	{REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
406	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
407	{REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
408	{REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
409	{REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
410	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
411	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
412	{REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
413	{REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
414	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
415	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
416	{REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
417	{REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
418	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
419	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
420	{REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
421	{REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
422	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
423	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
424	{REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
425	{REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
426	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
427	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
428	{REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
429	{REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
430	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
431	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
432	{REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
433	{REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
434	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
435	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
436	{REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
437	{REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
438	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
439	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
440	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
441	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
442	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
443	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
444	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
445	{REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
446	{REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
447	{REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
448	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
449	{REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
450	{REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
451	{REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
452	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
453	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
454	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
455	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
456	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
457	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
458	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
459	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
460	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
461	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
462	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
463	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
464	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
465	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
466	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
467	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
468	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
469	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
470	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
471	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
472	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
473	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
474	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
475	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
476	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
477	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
478	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
479	{},
480};
481
482const struct adreno_reglist a640_hwcg[] = {
483	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
484	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
485	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
486	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
487	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
488	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
489	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
490	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
491	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
492	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
493	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
494	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
495	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
496	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
497	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
498	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
499	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
500	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
501	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
502	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
503	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
504	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
505	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
506	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
507	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
508	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
509	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
510	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
511	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
512	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
513	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
514	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
515	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
516	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
517	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
518	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
519	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
520	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
521	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
522	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
523	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
524	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
525	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
526	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
527	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
528	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
529	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
530	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
531	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
532	{},
533};
534
535const struct adreno_reglist a650_hwcg[] = {
536	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
537	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
538	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
539	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
540	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
541	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
542	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
543	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
544	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
545	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
546	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
547	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
548	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
549	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
550	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
551	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
552	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
553	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
554	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
555	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
556	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
557	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
558	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
559	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
560	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
561	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
562	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
563	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
564	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
565	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
566	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
567	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
568	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
569	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
570	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
571	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
572	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
573	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
574	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
575	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
576	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
577	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
578	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
579	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
580	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
581	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
582	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
583	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
584	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
585	{},
586};
587
588const struct adreno_reglist a660_hwcg[] = {
589	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
590	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
591	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
592	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
593	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
594	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
595	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
596	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
597	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
598	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
599	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
600	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
601	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
602	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
603	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
604	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
605	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
606	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
607	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
608	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
609	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
610	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
611	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
612	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
613	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
614	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
615	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
616	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
617	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
618	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
619	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
620	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
621	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
622	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
623	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
624	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
625	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
626	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
627	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
628	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
629	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
630	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
631	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
632	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
633	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
634	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
635	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
636	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
637	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
638	{},
639};
640
641const struct adreno_reglist a690_hwcg[] = {
642	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
643	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
644	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
645	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
646	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
647	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
648	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
649	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
650	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
651	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
652	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
653	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
654	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
655	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
656	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
657	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
658	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
659	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
660	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
661	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
662	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
663	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
664	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
665	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
666	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
667	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
668	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
669	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
670	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
671	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
672	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
673	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
674	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
675	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
676	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
677	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
678	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
679	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
680	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
681	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
682	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
683	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
684	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
685	{REG_A6XX_RBBM_CLOCK_CNTL, 0x8AA8AA82},
686	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
687	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
688	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
689	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
690	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
691	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
692	{REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x20200},
693	{REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x10111},
694	{REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x5555},
695	{}
696};
697
698static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
699{
700	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
701	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
702	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
703	const struct adreno_reglist *reg;
704	unsigned int i;
705	u32 val, clock_cntl_on;
706
707	if (!adreno_gpu->info->hwcg)
708		return;
709
710	if (adreno_is_a630(adreno_gpu))
711		clock_cntl_on = 0x8aa8aa02;
712	else if (adreno_is_a610(adreno_gpu))
713		clock_cntl_on = 0xaaa8aa82;
714	else
715		clock_cntl_on = 0x8aa8aa82;
716
717	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
718
719	/* Don't re-program the registers if they are already correct */
720	if ((!state && !val) || (state && (val == clock_cntl_on)))
721		return;
722
723	/* Disable SP clock before programming HWCG registers */
724	if (!adreno_is_a610(adreno_gpu))
725		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
726
727	for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
728		gpu_write(gpu, reg->offset, state ? reg->value : 0);
729
730	/* Enable SP clock */
731	if (!adreno_is_a610(adreno_gpu))
732		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
733
734	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
735}
736
737/* For a615, a616, a618, a619, a630, a640 and a680 */
738static const u32 a6xx_protect[] = {
739	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
740	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
741	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
742	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
743	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
744	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
745	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
746	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
747	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
748	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
749	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
750	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
751	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
752	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
753	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
754	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
755	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
756	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
757	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
758	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
759	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
760	A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
761	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
762	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
763	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
764	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
765	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
766	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
767	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
768	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
769	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
770	A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
771};
772
773/* These are for a620 and a650 */
774static const u32 a650_protect[] = {
775	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
776	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
777	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
778	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
779	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
780	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
781	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
782	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
783	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
784	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
785	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
786	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
787	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
788	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
789	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
790	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
791	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
792	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
793	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
794	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
795	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
796	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
797	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
798	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
799	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
800	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
801	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
802	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
803	A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
804	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
805	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
806	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
807	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
808	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
809	A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
810	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
811	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
812	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
813	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
814};
815
816/* These are for a635 and a660 */
817static const u32 a660_protect[] = {
818	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
819	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
820	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
821	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
822	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
823	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
824	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
825	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
826	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
827	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
828	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
829	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
830	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
831	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
832	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
833	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
834	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
835	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
836	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
837	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
838	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
839	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
840	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
841	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
842	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
843	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
844	A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
845	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
846	A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
847	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
848	A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
849	A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
850	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
851	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
852	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
853	A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
854	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
855	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
856	A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
857	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
858	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
859};
860
861/* These are for a690 */
862static const u32 a690_protect[] = {
863	A6XX_PROTECT_RDONLY(0x00000, 0x004ff),
864	A6XX_PROTECT_RDONLY(0x00501, 0x00001),
865	A6XX_PROTECT_RDONLY(0x0050b, 0x002f4),
866	A6XX_PROTECT_NORDWR(0x0050e, 0x00000),
867	A6XX_PROTECT_NORDWR(0x00510, 0x00000),
868	A6XX_PROTECT_NORDWR(0x00534, 0x00000),
869	A6XX_PROTECT_NORDWR(0x00800, 0x00082),
870	A6XX_PROTECT_NORDWR(0x008a0, 0x00008),
871	A6XX_PROTECT_NORDWR(0x008ab, 0x00024),
872	A6XX_PROTECT_RDONLY(0x008de, 0x000ae),
873	A6XX_PROTECT_NORDWR(0x00900, 0x0004d),
874	A6XX_PROTECT_NORDWR(0x0098d, 0x00272),
875	A6XX_PROTECT_NORDWR(0x00e00, 0x00001),
876	A6XX_PROTECT_NORDWR(0x00e03, 0x0000c),
877	A6XX_PROTECT_NORDWR(0x03c00, 0x000c3),
878	A6XX_PROTECT_RDONLY(0x03cc4, 0x01fff),
879	A6XX_PROTECT_NORDWR(0x08630, 0x001cf),
880	A6XX_PROTECT_NORDWR(0x08e00, 0x00000),
881	A6XX_PROTECT_NORDWR(0x08e08, 0x00007),
882	A6XX_PROTECT_NORDWR(0x08e50, 0x0001f),
883	A6XX_PROTECT_NORDWR(0x08e80, 0x0027f),
884	A6XX_PROTECT_NORDWR(0x09624, 0x001db),
885	A6XX_PROTECT_NORDWR(0x09e60, 0x00011),
886	A6XX_PROTECT_NORDWR(0x09e78, 0x00187),
887	A6XX_PROTECT_NORDWR(0x0a630, 0x001cf),
888	A6XX_PROTECT_NORDWR(0x0ae02, 0x00000),
889	A6XX_PROTECT_NORDWR(0x0ae50, 0x0012f),
890	A6XX_PROTECT_NORDWR(0x0b604, 0x00000),
891	A6XX_PROTECT_NORDWR(0x0b608, 0x00006),
892	A6XX_PROTECT_NORDWR(0x0be02, 0x00001),
893	A6XX_PROTECT_NORDWR(0x0be20, 0x0015f),
894	A6XX_PROTECT_NORDWR(0x0d000, 0x005ff),
895	A6XX_PROTECT_NORDWR(0x0f000, 0x00bff),
896	A6XX_PROTECT_RDONLY(0x0fc00, 0x01fff),
897	A6XX_PROTECT_NORDWR(0x11c00, 0x00000), /*note: infiite range */
898};
899
900static void a6xx_set_cp_protect(struct msm_gpu *gpu)
901{
902	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
903	const u32 *regs = a6xx_protect;
904	unsigned i, count, count_max;
905
906	if (adreno_is_a650(adreno_gpu)) {
907		regs = a650_protect;
908		count = ARRAY_SIZE(a650_protect);
909		count_max = 48;
910		BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
911	} else if (adreno_is_a690(adreno_gpu)) {
912		regs = a690_protect;
913		count = ARRAY_SIZE(a690_protect);
914		count_max = 48;
915		BUILD_BUG_ON(ARRAY_SIZE(a690_protect) > 48);
916	} else if (adreno_is_a660_family(adreno_gpu)) {
917		regs = a660_protect;
918		count = ARRAY_SIZE(a660_protect);
919		count_max = 48;
920		BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
921	} else {
922		regs = a6xx_protect;
923		count = ARRAY_SIZE(a6xx_protect);
924		count_max = 32;
925		BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
926	}
927
928	/*
929	 * Enable access protection to privileged registers, fault on an access
930	 * protect violation and select the last span to protect from the start
931	 * address all the way to the end of the register address space
932	 */
933	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
934		  A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
935		  A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
936		  A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
937
938	for (i = 0; i < count - 1; i++) {
939		/* Intentionally skip writing to some registers */
940		if (regs[i])
941			gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
942	}
943	/* last CP_PROTECT to have "infinite" length on the last entry */
944	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
945}
946
947static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
948{
949	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
950	/* Unknown, introduced with A650 family, related to UBWC mode/ver 4 */
951	u32 rgb565_predicator = 0;
952	/* Unknown, introduced with A650 family */
953	u32 uavflagprd_inv = 0;
954	/* Whether the minimum access length is 64 bits */
955	u32 min_acc_len = 0;
956	/* Entirely magic, per-GPU-gen value */
957	u32 ubwc_mode = 0;
958	/*
959	 * The Highest Bank Bit value represents the bit of the highest DDR bank.
960	 * We then subtract 13 from it (13 is the minimum value allowed by hw) and
961	 * write the lowest two bits of the remaining value as hbb_lo and the
962	 * one above it as hbb_hi to the hardware. This should ideally use DRAM
963	 * type detection.
964	 */
965	u32 hbb_hi = 0;
966	u32 hbb_lo = 2;
967	/* Unknown, introduced with A640/680 */
968	u32 amsbc = 0;
969
970	if (adreno_is_a610(adreno_gpu)) {
971		/* HBB = 14 */
972		hbb_lo = 1;
973		min_acc_len = 1;
974		ubwc_mode = 1;
975	}
976
977	/* a618 is using the hw default values */
978	if (adreno_is_a618(adreno_gpu))
979		return;
980
981	if (adreno_is_a619_holi(adreno_gpu))
982		hbb_lo = 0;
983
984	if (adreno_is_a640_family(adreno_gpu))
985		amsbc = 1;
986
987	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
988		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
989		hbb_lo = 3;
990		amsbc = 1;
991		rgb565_predicator = 1;
992		uavflagprd_inv = 2;
993	}
994
995	if (adreno_is_a690(adreno_gpu)) {
996		hbb_lo = 2;
997		amsbc = 1;
998		rgb565_predicator = 1;
999		uavflagprd_inv = 2;
1000	}
1001
1002	if (adreno_is_7c3(adreno_gpu)) {
1003		hbb_lo = 1;
1004		amsbc = 1;
1005		rgb565_predicator = 1;
1006		uavflagprd_inv = 2;
1007	}
1008
1009	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
1010		  rgb565_predicator << 11 | hbb_hi << 10 | amsbc << 4 |
1011		  min_acc_len << 3 | hbb_lo << 1 | ubwc_mode);
1012
1013	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, hbb_hi << 4 |
1014		  min_acc_len << 3 | hbb_lo << 1 | ubwc_mode);
1015
1016	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, hbb_hi << 10 |
1017		  uavflagprd_inv << 4 | min_acc_len << 3 |
1018		  hbb_lo << 1 | ubwc_mode);
1019
1020	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | hbb_lo << 21);
1021}
1022
1023static int a6xx_cp_init(struct msm_gpu *gpu)
1024{
1025	struct msm_ringbuffer *ring = gpu->rb[0];
1026
1027	OUT_PKT7(ring, CP_ME_INIT, 8);
1028
1029	OUT_RING(ring, 0x0000002f);
1030
1031	/* Enable multiple hardware contexts */
1032	OUT_RING(ring, 0x00000003);
1033
1034	/* Enable error detection */
1035	OUT_RING(ring, 0x20000000);
1036
1037	/* Don't enable header dump */
1038	OUT_RING(ring, 0x00000000);
1039	OUT_RING(ring, 0x00000000);
1040
1041	/* No workarounds enabled */
1042	OUT_RING(ring, 0x00000000);
1043
1044	/* Pad rest of the cmds with 0's */
1045	OUT_RING(ring, 0x00000000);
1046	OUT_RING(ring, 0x00000000);
1047
1048	a6xx_flush(gpu, ring);
1049	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
1050}
1051
1052/*
1053 * Check that the microcode version is new enough to include several key
1054 * security fixes. Return true if the ucode is safe.
1055 */
1056static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
1057		struct drm_gem_object *obj)
1058{
1059	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1060	struct msm_gpu *gpu = &adreno_gpu->base;
1061	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
1062	u32 *buf = msm_gem_get_vaddr(obj);
1063	bool ret = false;
1064
1065	if (IS_ERR(buf))
1066		return false;
1067
1068	/*
1069	 * Targets up to a640 (a618, a630 and a640) need to check for a
1070	 * microcode version that is patched to support the whereami opcode or
1071	 * one that is new enough to include it by default.
1072	 *
1073	 * a650 tier targets don't need whereami but still need to be
1074	 * equal to or newer than 0.95 for other security fixes
1075	 *
1076	 * a660 targets have all the critical security fixes from the start
1077	 */
1078	if (!strcmp(sqe_name, "a630_sqe.fw")) {
1079		/*
1080		 * If the lowest nibble is 0xa that is an indication that this
1081		 * microcode has been patched. The actual version is in dword
1082		 * [3] but we only care about the patchlevel which is the lowest
1083		 * nibble of dword [3]
1084		 *
1085		 * Otherwise check that the firmware is greater than or equal
1086		 * to 1.90 which was the first version that had this fix built
1087		 * in
1088		 */
1089		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
1090			(buf[0] & 0xfff) >= 0x190) {
1091			a6xx_gpu->has_whereami = true;
1092			ret = true;
1093			goto out;
1094		}
1095
1096		DRM_DEV_ERROR(&gpu->pdev->dev,
1097			"a630 SQE ucode is too old. Have version %x need at least %x\n",
1098			buf[0] & 0xfff, 0x190);
1099	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
1100		if ((buf[0] & 0xfff) >= 0x095) {
1101			ret = true;
1102			goto out;
1103		}
1104
1105		DRM_DEV_ERROR(&gpu->pdev->dev,
1106			"a650 SQE ucode is too old. Have version %x need at least %x\n",
1107			buf[0] & 0xfff, 0x095);
1108	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
1109		ret = true;
1110	} else {
1111		DRM_DEV_ERROR(&gpu->pdev->dev,
1112			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
1113	}
1114out:
1115	msm_gem_put_vaddr(obj);
1116	return ret;
1117}
1118
1119static int a6xx_ucode_load(struct msm_gpu *gpu)
1120{
1121	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1122	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1123
1124	if (!a6xx_gpu->sqe_bo) {
1125		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
1126			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
1127
1128		if (IS_ERR(a6xx_gpu->sqe_bo)) {
1129			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
1130
1131			a6xx_gpu->sqe_bo = NULL;
1132			DRM_DEV_ERROR(&gpu->pdev->dev,
1133				"Could not allocate SQE ucode: %d\n", ret);
1134
1135			return ret;
1136		}
1137
1138		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
1139		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
1140			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1141			drm_gem_object_put(a6xx_gpu->sqe_bo);
1142
1143			a6xx_gpu->sqe_bo = NULL;
1144			return -EPERM;
1145		}
1146	}
1147
1148	/*
1149	 * Expanded APRIV and targets that support WHERE_AM_I both need a
1150	 * privileged buffer to store the RPTR shadow
1151	 */
1152	if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
1153	    !a6xx_gpu->shadow_bo) {
1154		a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1155						      sizeof(u32) * gpu->nr_rings,
1156						      MSM_BO_WC | MSM_BO_MAP_PRIV,
1157						      gpu->aspace, &a6xx_gpu->shadow_bo,
1158						      &a6xx_gpu->shadow_iova);
1159
1160		if (IS_ERR(a6xx_gpu->shadow))
1161			return PTR_ERR(a6xx_gpu->shadow);
1162
1163		msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1164	}
1165
1166	return 0;
1167}
1168
1169static int a6xx_zap_shader_init(struct msm_gpu *gpu)
1170{
1171	static bool loaded;
1172	int ret;
1173
1174	if (loaded)
1175		return 0;
1176
1177	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
1178
1179	loaded = !ret;
1180	return ret;
1181}
1182
1183#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1184	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1185	  A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1186	  A6XX_RBBM_INT_0_MASK_CP_IB2 | \
1187	  A6XX_RBBM_INT_0_MASK_CP_IB1 | \
1188	  A6XX_RBBM_INT_0_MASK_CP_RB | \
1189	  A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1190	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1191	  A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1192	  A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1193	  A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1194
1195static int hw_init(struct msm_gpu *gpu)
1196{
1197	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1198	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1199	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1200	int ret;
1201
1202	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1203		/* Make sure the GMU keeps the GPU on while we set it up */
1204		ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1205		if (ret)
1206			return ret;
1207	}
1208
1209	/* Clear GBIF halt in case GX domain was not collapsed */
1210	if (adreno_is_a619_holi(adreno_gpu)) {
1211		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1212		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
1213		/* Let's make extra sure that the GPU can access the memory.. */
1214		mb();
1215	} else if (a6xx_has_gbif(adreno_gpu)) {
1216		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1217		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1218		/* Let's make extra sure that the GPU can access the memory.. */
1219		mb();
1220	}
1221
1222	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1223
1224	if (adreno_is_a619_holi(adreno_gpu))
1225		a6xx_sptprac_enable(gmu);
1226
1227	/*
1228	 * Disable the trusted memory range - we don't actually supported secure
1229	 * memory rendering at this point in time and we don't want to block off
1230	 * part of the virtual memory space.
1231	 */
1232	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1233	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1234
1235	/* Turn on 64 bit addressing for all blocks */
1236	gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1237	gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1238	gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1239	gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1240	gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1241	gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1242	gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1243	gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1244	gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1245	gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1246	gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1247	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1248
1249	/* enable hardware clockgating */
1250	a6xx_set_hwcg(gpu, true);
1251
1252	/* VBIF/GBIF start*/
1253	if (adreno_is_a610(adreno_gpu) ||
1254	    adreno_is_a640_family(adreno_gpu) ||
1255	    adreno_is_a650_family(adreno_gpu)) {
1256		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1257		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1258		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1259		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1260		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1261	} else {
1262		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1263	}
1264
1265	if (adreno_is_a630(adreno_gpu))
1266		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1267
1268	/* Make all blocks contribute to the GPU BUSY perf counter */
1269	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1270
1271	/* Disable L2 bypass in the UCHE */
1272	gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
1273	gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
1274	gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
1275
1276	if (!adreno_is_a650_family(adreno_gpu)) {
1277		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1278		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, 0x00100000);
1279
1280		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1281			0x00100000 + adreno_gpu->info->gmem - 1);
1282	}
1283
1284	gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1285	gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1286
1287	if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
1288		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1289		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1290	} else if (adreno_is_a610(adreno_gpu)) {
1291		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
1292		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
1293	} else {
1294		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1295		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1296	}
1297
1298	if (adreno_is_a660_family(adreno_gpu))
1299		gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1300
1301	/* Setting the mem pool size */
1302	if (adreno_is_a610(adreno_gpu)) {
1303		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1304		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1305	} else
1306		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1307
1308	/* Setting the primFifo thresholds default values,
1309	 * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
1310	*/
1311	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu) || adreno_is_a690(adreno_gpu))
1312		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1313	else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
1314		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
1315	else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1316		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1317	else if (adreno_is_a619(adreno_gpu))
1318		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00018000);
1319	else if (adreno_is_a610(adreno_gpu))
1320		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00080000);
1321	else
1322		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
1323
1324	/* Set the AHB default slave response to "ERROR" */
1325	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1326
1327	/* Turn on performance counters */
1328	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1329
1330	/* Select CP0 to always count cycles */
1331	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1332
1333	a6xx_set_ubwc_config(gpu);
1334
1335	/* Enable fault detection */
1336	if (adreno_is_a619(adreno_gpu))
1337		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1338	else if (adreno_is_a610(adreno_gpu))
1339		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1340	else
1341		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1342
1343	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
1344
1345	/* Set weights for bicubic filtering */
1346	if (adreno_is_a650_family(adreno_gpu)) {
1347		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1348		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1349			0x3fe05ff4);
1350		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1351			0x3fa0ebee);
1352		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1353			0x3f5193ed);
1354		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1355			0x3f0243f0);
1356	}
1357
1358	/* Set up the CX GMU counter 0 to count busy ticks */
1359	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1360
1361	/* Enable the power counter */
1362	gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1363	gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1364
1365	/* Protect registers from the CP */
1366	a6xx_set_cp_protect(gpu);
1367
1368	if (adreno_is_a660_family(adreno_gpu)) {
1369		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1370		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1371	}
1372
1373	/* Set dualQ + disable afull for A660 GPU */
1374	if (adreno_is_a660(adreno_gpu))
1375		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1376
1377	/* Enable expanded apriv for targets that support it */
1378	if (gpu->hw_apriv) {
1379		gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1380			(1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
1381	}
1382
1383	/* Enable interrupts */
1384	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
1385
1386	ret = adreno_hw_init(gpu);
1387	if (ret)
1388		goto out;
1389
1390	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1391
1392	/* Set the ringbuffer address */
1393	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1394
1395	/* Targets that support extended APRIV can use the RPTR shadow from
1396	 * hardware but all the other ones need to disable the feature. Targets
1397	 * that support the WHERE_AM_I opcode can use that instead
1398	 */
1399	if (adreno_gpu->base.hw_apriv)
1400		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1401	else
1402		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1403			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1404
1405	/* Configure the RPTR shadow if needed: */
1406	if (a6xx_gpu->shadow_bo) {
1407		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1408			shadowptr(a6xx_gpu, gpu->rb[0]));
1409	}
1410
1411	/* Always come up on rb 0 */
1412	a6xx_gpu->cur_ring = gpu->rb[0];
1413
1414	gpu->cur_ctx_seqno = 0;
1415
1416	/* Enable the SQE_to start the CP engine */
1417	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1418
1419	ret = a6xx_cp_init(gpu);
1420	if (ret)
1421		goto out;
1422
1423	/*
1424	 * Try to load a zap shader into the secure world. If successful
1425	 * we can use the CP to switch out of secure mode. If not then we
1426	 * have no resource but to try to switch ourselves out manually. If we
1427	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1428	 * be blocked and a permissions violation will soon follow.
1429	 */
1430	ret = a6xx_zap_shader_init(gpu);
1431	if (!ret) {
1432		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1433		OUT_RING(gpu->rb[0], 0x00000000);
1434
1435		a6xx_flush(gpu, gpu->rb[0]);
1436		if (!a6xx_idle(gpu, gpu->rb[0]))
1437			return -EINVAL;
1438	} else if (ret == -ENODEV) {
1439		/*
1440		 * This device does not use zap shader (but print a warning
1441		 * just in case someone got their dt wrong.. hopefully they
1442		 * have a debug UART to realize the error of their ways...
1443		 * if you mess this up you are about to crash horribly)
1444		 */
1445		dev_warn_once(gpu->dev->dev,
1446			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1447		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1448		ret = 0;
1449	} else {
1450		return ret;
1451	}
1452
1453out:
1454	if (adreno_has_gmu_wrapper(adreno_gpu))
1455		return ret;
1456	/*
1457	 * Tell the GMU that we are done touching the GPU and it can start power
1458	 * management
1459	 */
1460	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1461
1462	if (a6xx_gpu->gmu.legacy) {
1463		/* Take the GMU out of its special boot mode */
1464		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1465	}
1466
1467	return ret;
1468}
1469
1470static int a6xx_hw_init(struct msm_gpu *gpu)
1471{
1472	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1473	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1474	int ret;
1475
1476	mutex_lock(&a6xx_gpu->gmu.lock);
1477	ret = hw_init(gpu);
1478	mutex_unlock(&a6xx_gpu->gmu.lock);
1479
1480	return ret;
1481}
1482
1483static void a6xx_dump(struct msm_gpu *gpu)
1484{
1485	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1486			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1487	adreno_dump(gpu);
1488}
1489
1490static void a6xx_recover(struct msm_gpu *gpu)
1491{
1492	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1493	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1494	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1495	int i, active_submits;
1496
1497	adreno_dump_info(gpu);
1498
1499	for (i = 0; i < 8; i++)
1500		DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1501			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1502
1503	if (hang_debug)
1504		a6xx_dump(gpu);
1505
1506	/*
1507	 * To handle recovery specific sequences during the rpm suspend we are
1508	 * about to trigger
1509	 */
1510	a6xx_gpu->hung = true;
1511
1512	/* Halt SQE first */
1513	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1514
1515	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1516
1517	/* active_submit won't change until we make a submission */
1518	mutex_lock(&gpu->active_lock);
1519	active_submits = gpu->active_submits;
1520
1521	/*
1522	 * Temporarily clear active_submits count to silence a WARN() in the
1523	 * runtime suspend cb
1524	 */
1525	gpu->active_submits = 0;
1526
1527	if (adreno_has_gmu_wrapper(adreno_gpu)) {
1528		/* Drain the outstanding traffic on memory buses */
1529		a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1530
1531		/* Reset the GPU to a clean state */
1532		a6xx_gpu_sw_reset(gpu, true);
1533		a6xx_gpu_sw_reset(gpu, false);
1534	}
1535
1536	reinit_completion(&gmu->pd_gate);
1537	dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1538	dev_pm_genpd_synced_poweroff(gmu->cxpd);
1539
1540	/* Drop the rpm refcount from active submits */
1541	if (active_submits)
1542		pm_runtime_put(&gpu->pdev->dev);
1543
1544	/* And the final one from recover worker */
1545	pm_runtime_put_sync(&gpu->pdev->dev);
1546
1547	if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1548		DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1549
1550	dev_pm_genpd_remove_notifier(gmu->cxpd);
1551
1552	pm_runtime_use_autosuspend(&gpu->pdev->dev);
1553
1554	if (active_submits)
1555		pm_runtime_get(&gpu->pdev->dev);
1556
1557	pm_runtime_get_sync(&gpu->pdev->dev);
1558
1559	gpu->active_submits = active_submits;
1560	mutex_unlock(&gpu->active_lock);
1561
1562	msm_gpu_hw_init(gpu);
1563	a6xx_gpu->hung = false;
1564}
1565
1566static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1567{
1568	static const char *uche_clients[7] = {
1569		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1570	};
1571	u32 val;
1572
1573	if (mid < 1 || mid > 3)
1574		return "UNKNOWN";
1575
1576	/*
1577	 * The source of the data depends on the mid ID read from FSYNR1.
1578	 * and the client ID read from the UCHE block
1579	 */
1580	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1581
1582	/* mid = 3 is most precise and refers to only one block per client */
1583	if (mid == 3)
1584		return uche_clients[val & 7];
1585
1586	/* For mid=2 the source is TP or VFD except when the client id is 0 */
1587	if (mid == 2)
1588		return ((val & 7) == 0) ? "TP" : "TP|VFD";
1589
1590	/* For mid=1 just return "UCHE" as a catchall for everything else */
1591	return "UCHE";
1592}
1593
1594static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1595{
1596	if (id == 0)
1597		return "CP";
1598	else if (id == 4)
1599		return "CCU";
1600	else if (id == 6)
1601		return "CDP Prefetch";
1602
1603	return a6xx_uche_fault_block(gpu, id);
1604}
1605
1606static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1607{
1608	struct msm_gpu *gpu = arg;
1609	struct adreno_smmu_fault_info *info = data;
1610	const char *block = "unknown";
1611
1612	u32 scratch[] = {
1613			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1614			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1615			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1616			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
1617	};
1618
1619	if (info)
1620		block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1621
1622	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1623}
1624
1625static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1626{
1627	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1628
1629	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1630		u32 val;
1631
1632		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1633		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1634		dev_err_ratelimited(&gpu->pdev->dev,
1635			"CP | opcode error | possible opcode=0x%8.8X\n",
1636			val);
1637	}
1638
1639	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1640		dev_err_ratelimited(&gpu->pdev->dev,
1641			"CP ucode error interrupt\n");
1642
1643	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1644		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1645			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1646
1647	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1648		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1649
1650		dev_err_ratelimited(&gpu->pdev->dev,
1651			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1652			val & (1 << 20) ? "READ" : "WRITE",
1653			(val & 0x3ffff), val);
1654	}
1655
1656	if (status & A6XX_CP_INT_CP_AHB_ERROR)
1657		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1658
1659	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1660		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1661
1662	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1663		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1664
1665}
1666
1667static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1668{
1669	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1670	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1671	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1672
1673	/*
1674	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1675	 * but the fault handler will trigger the devcore dump, and we want
1676	 * to otherwise resume normally rather than killing the submit, so
1677	 * just bail.
1678	 */
1679	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1680		return;
1681
1682	/*
1683	 * Force the GPU to stay on until after we finish
1684	 * collecting information
1685	 */
1686	if (!adreno_has_gmu_wrapper(adreno_gpu))
1687		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1688
1689	DRM_DEV_ERROR(&gpu->pdev->dev,
1690		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1691		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1692		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1693		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1694		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1695		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1696		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1697		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1698		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1699
1700	/* Turn off the hangcheck timer to keep it from bothering us */
1701	del_timer(&gpu->hangcheck_timer);
1702
1703	kthread_queue_work(gpu->worker, &gpu->recover_work);
1704}
1705
1706static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1707{
1708	struct msm_drm_private *priv = gpu->dev->dev_private;
1709	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1710
1711	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1712
1713	if (priv->disable_err_irq)
1714		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1715
1716	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1717		a6xx_fault_detect_irq(gpu);
1718
1719	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1720		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1721
1722	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1723		a6xx_cp_hw_err_irq(gpu);
1724
1725	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1726		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1727
1728	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1729		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1730
1731	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1732		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1733
1734	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
1735		msm_gpu_retire(gpu);
1736
1737	return IRQ_HANDLED;
1738}
1739
1740static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1741{
1742	llcc_slice_deactivate(a6xx_gpu->llc_slice);
1743	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1744}
1745
1746static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1747{
1748	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1749	struct msm_gpu *gpu = &adreno_gpu->base;
1750	u32 cntl1_regval = 0;
1751
1752	if (IS_ERR(a6xx_gpu->llc_mmio))
1753		return;
1754
1755	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1756		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1757
1758		gpu_scid &= 0x1f;
1759		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1760			       (gpu_scid << 15) | (gpu_scid << 20);
1761
1762		/* On A660, the SCID programming for UCHE traffic is done in
1763		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1764		 */
1765		if (adreno_is_a660_family(adreno_gpu))
1766			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1767				(1 << 8), (gpu_scid << 10) | (1 << 8));
1768	}
1769
1770	/*
1771	 * For targets with a MMU500, activate the slice but don't program the
1772	 * register.  The XBL will take care of that.
1773	 */
1774	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1775		if (!a6xx_gpu->have_mmu500) {
1776			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1777
1778			gpuhtw_scid &= 0x1f;
1779			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1780		}
1781	}
1782
1783	if (!cntl1_regval)
1784		return;
1785
1786	/*
1787	 * Program the slice IDs for the various GPU blocks and GPU MMU
1788	 * pagetables
1789	 */
1790	if (!a6xx_gpu->have_mmu500) {
1791		a6xx_llc_write(a6xx_gpu,
1792			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1793
1794		/*
1795		 * Program cacheability overrides to not allocate cache
1796		 * lines on a write miss
1797		 */
1798		a6xx_llc_rmw(a6xx_gpu,
1799			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1800		return;
1801	}
1802
1803	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1804}
1805
1806static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1807{
1808	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1809	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1810		return;
1811
1812	llcc_slice_putd(a6xx_gpu->llc_slice);
1813	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1814}
1815
1816static void a6xx_llc_slices_init(struct platform_device *pdev,
1817		struct a6xx_gpu *a6xx_gpu)
1818{
1819	struct device_node *phandle;
1820
1821	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1822	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1823		return;
1824
1825	/*
1826	 * There is a different programming path for targets with an mmu500
1827	 * attached, so detect if that is the case
1828	 */
1829	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1830	a6xx_gpu->have_mmu500 = (phandle &&
1831		of_device_is_compatible(phandle, "arm,mmu-500"));
1832	of_node_put(phandle);
1833
1834	if (a6xx_gpu->have_mmu500)
1835		a6xx_gpu->llc_mmio = NULL;
1836	else
1837		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1838
1839	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1840	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1841
1842	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1843		a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1844}
1845
1846#define GBIF_CLIENT_HALT_MASK		BIT(0)
1847#define GBIF_ARB_HALT_MASK		BIT(1)
1848#define VBIF_XIN_HALT_CTRL0_MASK	GENMASK(3, 0)
1849#define VBIF_RESET_ACK_MASK		0xF0
1850#define GPR0_GBIF_HALT_REQUEST		0x1E0
1851
1852void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1853{
1854	struct msm_gpu *gpu = &adreno_gpu->base;
1855
1856	if (adreno_is_a619_holi(adreno_gpu)) {
1857		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
1858		spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
1859				(VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
1860	} else if (!a6xx_has_gbif(adreno_gpu)) {
1861		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
1862		spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
1863				(VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
1864		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
1865
1866		return;
1867	}
1868
1869	if (gx_off) {
1870		/* Halt the gx side of GBIF */
1871		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
1872		spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
1873	}
1874
1875	/* Halt new client requests on GBIF */
1876	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1877	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1878			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1879
1880	/* Halt all AXI requests on GBIF */
1881	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1882	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
1883			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1884
1885	/* The GBIF halt needs to be explicitly cleared */
1886	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1887}
1888
1889void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
1890{
1891	/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
1892	if (adreno_is_a610(to_adreno_gpu(gpu)))
1893		return;
1894
1895	gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
1896	/* Perform a bogus read and add a brief delay to ensure ordering. */
1897	gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
1898	udelay(1);
1899
1900	/* The reset line needs to be asserted for at least 100 us */
1901	if (assert)
1902		udelay(100);
1903}
1904
1905static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
1906{
1907	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1908	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1909	int ret;
1910
1911	gpu->needs_hw_init = true;
1912
1913	trace_msm_gpu_resume(0);
1914
1915	mutex_lock(&a6xx_gpu->gmu.lock);
1916	ret = a6xx_gmu_resume(a6xx_gpu);
1917	mutex_unlock(&a6xx_gpu->gmu.lock);
1918	if (ret)
1919		return ret;
1920
1921	msm_devfreq_resume(gpu);
1922
1923	a6xx_llc_activate(a6xx_gpu);
1924
1925	return ret;
1926}
1927
1928static int a6xx_pm_resume(struct msm_gpu *gpu)
1929{
1930	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1931	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1932	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1933	unsigned long freq = gpu->fast_rate;
1934	struct dev_pm_opp *opp;
1935	int ret;
1936
1937	gpu->needs_hw_init = true;
1938
1939	trace_msm_gpu_resume(0);
1940
1941	mutex_lock(&a6xx_gpu->gmu.lock);
1942
1943	opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
1944	if (IS_ERR(opp)) {
1945		ret = PTR_ERR(opp);
1946		goto err_set_opp;
1947	}
1948	dev_pm_opp_put(opp);
1949
1950	/* Set the core clock and bus bw, having VDD scaling in mind */
1951	dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
1952
1953	pm_runtime_resume_and_get(gmu->dev);
1954	pm_runtime_resume_and_get(gmu->gxpd);
1955
1956	ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
1957	if (ret)
1958		goto err_bulk_clk;
1959
1960	if (adreno_is_a619_holi(adreno_gpu))
1961		a6xx_sptprac_enable(gmu);
1962
1963	/* If anything goes south, tear the GPU down piece by piece.. */
1964	if (ret) {
1965err_bulk_clk:
1966		pm_runtime_put(gmu->gxpd);
1967		pm_runtime_put(gmu->dev);
1968		dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
1969	}
1970err_set_opp:
1971	mutex_unlock(&a6xx_gpu->gmu.lock);
1972
1973	if (!ret)
1974		msm_devfreq_resume(gpu);
1975
1976	return ret;
1977}
1978
1979static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
1980{
1981	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1982	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1983	int i, ret;
1984
1985	trace_msm_gpu_suspend(0);
1986
1987	a6xx_llc_deactivate(a6xx_gpu);
1988
1989	msm_devfreq_suspend(gpu);
1990
1991	mutex_lock(&a6xx_gpu->gmu.lock);
1992	ret = a6xx_gmu_stop(a6xx_gpu);
1993	mutex_unlock(&a6xx_gpu->gmu.lock);
1994	if (ret)
1995		return ret;
1996
1997	if (a6xx_gpu->shadow_bo)
1998		for (i = 0; i < gpu->nr_rings; i++)
1999			a6xx_gpu->shadow[i] = 0;
2000
2001	gpu->suspend_count++;
2002
2003	return 0;
2004}
2005
2006static int a6xx_pm_suspend(struct msm_gpu *gpu)
2007{
2008	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2009	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2010	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2011	int i;
2012
2013	trace_msm_gpu_suspend(0);
2014
2015	msm_devfreq_suspend(gpu);
2016
2017	mutex_lock(&a6xx_gpu->gmu.lock);
2018
2019	/* Drain the outstanding traffic on memory buses */
2020	a6xx_bus_clear_pending_transactions(adreno_gpu, true);
2021
2022	if (adreno_is_a619_holi(adreno_gpu))
2023		a6xx_sptprac_disable(gmu);
2024
2025	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2026
2027	pm_runtime_put_sync(gmu->gxpd);
2028	dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2029	pm_runtime_put_sync(gmu->dev);
2030
2031	mutex_unlock(&a6xx_gpu->gmu.lock);
2032
2033	if (a6xx_gpu->shadow_bo)
2034		for (i = 0; i < gpu->nr_rings; i++)
2035			a6xx_gpu->shadow[i] = 0;
2036
2037	gpu->suspend_count++;
2038
2039	return 0;
2040}
2041
2042static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2043{
2044	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2045	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2046
2047	mutex_lock(&a6xx_gpu->gmu.lock);
2048
2049	/* Force the GPU power on so we can read this register */
2050	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
2051
2052	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2053
2054	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
2055
2056	mutex_unlock(&a6xx_gpu->gmu.lock);
2057
2058	return 0;
2059}
2060
2061static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2062{
2063	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2064	return 0;
2065}
2066
2067static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
2068{
2069	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2070	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2071
2072	return a6xx_gpu->cur_ring;
2073}
2074
2075static void a6xx_destroy(struct msm_gpu *gpu)
2076{
2077	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2078	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2079
2080	if (a6xx_gpu->sqe_bo) {
2081		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
2082		drm_gem_object_put(a6xx_gpu->sqe_bo);
2083	}
2084
2085	if (a6xx_gpu->shadow_bo) {
2086		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
2087		drm_gem_object_put(a6xx_gpu->shadow_bo);
2088	}
2089
2090	a6xx_llc_slices_destroy(a6xx_gpu);
2091
2092	a6xx_gmu_remove(a6xx_gpu);
2093
2094	adreno_gpu_cleanup(adreno_gpu);
2095
2096	kfree(a6xx_gpu);
2097}
2098
2099static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2100{
2101	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2102	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2103	u64 busy_cycles;
2104
2105	/* 19.2MHz */
2106	*out_sample_rate = 19200000;
2107
2108	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2109			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2110			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2111
2112	return busy_cycles;
2113}
2114
2115static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2116			      bool suspended)
2117{
2118	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2119	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2120
2121	mutex_lock(&a6xx_gpu->gmu.lock);
2122	a6xx_gmu_set_freq(gpu, opp, suspended);
2123	mutex_unlock(&a6xx_gpu->gmu.lock);
2124}
2125
2126static struct msm_gem_address_space *
2127a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
2128{
2129	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2130	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2131	unsigned long quirks = 0;
2132
2133	/*
2134	 * This allows GPU to set the bus attributes required to use system
2135	 * cache on behalf of the iommu page table walker.
2136	 */
2137	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2138	    !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2139		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2140
2141	return adreno_iommu_create_address_space(gpu, pdev, quirks);
2142}
2143
2144static struct msm_gem_address_space *
2145a6xx_create_private_address_space(struct msm_gpu *gpu)
2146{
2147	struct msm_mmu *mmu;
2148
2149	mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
2150
2151	if (IS_ERR(mmu))
2152		return ERR_CAST(mmu);
2153
2154	return msm_gem_address_space_create(mmu,
2155		"gpu", 0x100000000ULL,
2156		adreno_private_address_space_size(gpu));
2157}
2158
2159static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2160{
2161	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2162	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2163
2164	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2165		return a6xx_gpu->shadow[ring->id];
2166
2167	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2168}
2169
2170static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2171{
2172	struct msm_cp_state cp_state = {
2173		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2174		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2175		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2176		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2177	};
2178	bool progress;
2179
2180	/*
2181	 * Adjust the remaining data to account for what has already been
2182	 * fetched from memory, but not yet consumed by the SQE.
2183	 *
2184	 * This is not *technically* correct, the amount buffered could
2185	 * exceed the IB size due to hw prefetching ahead, but:
2186	 *
2187	 * (1) We aren't trying to find the exact position, just whether
2188	 *     progress has been made
2189	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2190	 *     to prevent prefetching into an unrelated submit.  (And
2191	 *     either way, at some point the ROQ will be full.)
2192	 */
2193	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2194	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2195
2196	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2197
2198	ring->last_cp_state = cp_state;
2199
2200	return progress;
2201}
2202
2203static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2204{
2205	if (!info->speedbins)
2206		return UINT_MAX;
2207
2208	for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2209		if (info->speedbins[i].fuse == fuse)
2210			return BIT(info->speedbins[i].speedbin);
2211
2212	return UINT_MAX;
2213}
2214
2215static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info)
2216{
2217	u32 supp_hw;
2218	u32 speedbin;
2219	int ret;
2220
2221	ret = adreno_read_speedbin(dev, &speedbin);
2222	/*
2223	 * -ENOENT means that the platform doesn't support speedbin which is
2224	 * fine
2225	 */
2226	if (ret == -ENOENT) {
2227		return 0;
2228	} else if (ret) {
2229		dev_err_probe(dev, ret,
2230			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2231		return ret;
2232	}
2233
2234	supp_hw = fuse_to_supp_hw(info, speedbin);
2235
2236	if (supp_hw == UINT_MAX) {
2237		DRM_DEV_ERROR(dev,
2238			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2239			speedbin);
2240		supp_hw = BIT(0); /* Default */
2241	}
2242
2243	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2244	if (ret)
2245		return ret;
2246
2247	return 0;
2248}
2249
2250static const struct adreno_gpu_funcs funcs = {
2251	.base = {
2252		.get_param = adreno_get_param,
2253		.set_param = adreno_set_param,
2254		.hw_init = a6xx_hw_init,
2255		.ucode_load = a6xx_ucode_load,
2256		.pm_suspend = a6xx_gmu_pm_suspend,
2257		.pm_resume = a6xx_gmu_pm_resume,
2258		.recover = a6xx_recover,
2259		.submit = a6xx_submit,
2260		.active_ring = a6xx_active_ring,
2261		.irq = a6xx_irq,
2262		.destroy = a6xx_destroy,
2263#if defined(CONFIG_DRM_MSM_GPU_STATE)
2264		.show = a6xx_show,
2265#endif
2266		.gpu_busy = a6xx_gpu_busy,
2267		.gpu_get_freq = a6xx_gmu_get_freq,
2268		.gpu_set_freq = a6xx_gpu_set_freq,
2269#if defined(CONFIG_DRM_MSM_GPU_STATE)
2270		.gpu_state_get = a6xx_gpu_state_get,
2271		.gpu_state_put = a6xx_gpu_state_put,
2272#endif
2273		.create_address_space = a6xx_create_address_space,
2274		.create_private_address_space = a6xx_create_private_address_space,
2275		.get_rptr = a6xx_get_rptr,
2276		.progress = a6xx_progress,
2277	},
2278	.get_timestamp = a6xx_gmu_get_timestamp,
2279};
2280
2281static const struct adreno_gpu_funcs funcs_gmuwrapper = {
2282	.base = {
2283		.get_param = adreno_get_param,
2284		.set_param = adreno_set_param,
2285		.hw_init = a6xx_hw_init,
2286		.ucode_load = a6xx_ucode_load,
2287		.pm_suspend = a6xx_pm_suspend,
2288		.pm_resume = a6xx_pm_resume,
2289		.recover = a6xx_recover,
2290		.submit = a6xx_submit,
2291		.active_ring = a6xx_active_ring,
2292		.irq = a6xx_irq,
2293		.destroy = a6xx_destroy,
2294#if defined(CONFIG_DRM_MSM_GPU_STATE)
2295		.show = a6xx_show,
2296#endif
2297		.gpu_busy = a6xx_gpu_busy,
2298#if defined(CONFIG_DRM_MSM_GPU_STATE)
2299		.gpu_state_get = a6xx_gpu_state_get,
2300		.gpu_state_put = a6xx_gpu_state_put,
2301#endif
2302		.create_address_space = a6xx_create_address_space,
2303		.create_private_address_space = a6xx_create_private_address_space,
2304		.get_rptr = a6xx_get_rptr,
2305		.progress = a6xx_progress,
2306	},
2307	.get_timestamp = a6xx_get_timestamp,
2308};
2309
2310struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2311{
2312	struct msm_drm_private *priv = dev->dev_private;
2313	struct platform_device *pdev = priv->gpu_pdev;
2314	struct adreno_platform_config *config = pdev->dev.platform_data;
2315	struct device_node *node;
2316	struct a6xx_gpu *a6xx_gpu;
2317	struct adreno_gpu *adreno_gpu;
2318	struct msm_gpu *gpu;
2319	int ret;
2320
2321	a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
2322	if (!a6xx_gpu)
2323		return ERR_PTR(-ENOMEM);
2324
2325	adreno_gpu = &a6xx_gpu->base;
2326	gpu = &adreno_gpu->base;
2327
2328	mutex_init(&a6xx_gpu->gmu.lock);
2329
2330	adreno_gpu->registers = NULL;
2331
2332	/* Check if there is a GMU phandle and set it up */
2333	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2334	/* FIXME: How do we gracefully handle this? */
2335	BUG_ON(!node);
2336
2337	adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2338
2339	adreno_gpu->base.hw_apriv =
2340		!!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2341
2342	a6xx_llc_slices_init(pdev, a6xx_gpu);
2343
2344	ret = a6xx_set_supported_hw(&pdev->dev, config->info);
2345	if (ret) {
2346		a6xx_destroy(&(a6xx_gpu->base.base));
2347		return ERR_PTR(ret);
2348	}
2349
2350	if (adreno_has_gmu_wrapper(adreno_gpu))
2351		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
2352	else
2353		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2354	if (ret) {
2355		a6xx_destroy(&(a6xx_gpu->base.base));
2356		return ERR_PTR(ret);
2357	}
2358
2359	/*
2360	 * For now only clamp to idle freq for devices where this is known not
2361	 * to cause power supply issues:
2362	 */
2363	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2364		priv->gpu_clamp_to_idle = true;
2365
2366	if (adreno_has_gmu_wrapper(adreno_gpu))
2367		ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2368	else
2369		ret = a6xx_gmu_init(a6xx_gpu, node);
2370	of_node_put(node);
2371	if (ret) {
2372		a6xx_destroy(&(a6xx_gpu->base.base));
2373		return ERR_PTR(ret);
2374	}
2375
2376	if (gpu->aspace)
2377		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2378				a6xx_fault_handler);
2379
2380	return gpu;
2381}
2382