1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3 */
4
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/cpumask.h>
8#include <linux/firmware/qcom/qcom_scm.h>
9#include <linux/pm_opp.h>
10#include <linux/nvmem-consumer.h>
11#include <linux/slab.h>
12#include "msm_gem.h"
13#include "msm_mmu.h"
14#include "a5xx_gpu.h"
15
16extern bool hang_debug;
17static void a5xx_dump(struct msm_gpu *gpu);
18
19#define GPU_PAS_ID 13
20
21static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22{
23	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25
26	if (a5xx_gpu->has_whereami) {
27		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
28		OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
29		OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
30	}
31}
32
33void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
34		bool sync)
35{
36	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
37	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
38	uint32_t wptr;
39	unsigned long flags;
40
41	/*
42	 * Most flush operations need to issue a WHERE_AM_I opcode to sync up
43	 * the rptr shadow
44	 */
45	if (sync)
46		update_shadow_rptr(gpu, ring);
47
48	spin_lock_irqsave(&ring->preempt_lock, flags);
49
50	/* Copy the shadow to the actual register */
51	ring->cur = ring->next;
52
53	/* Make sure to wrap wptr if we need to */
54	wptr = get_wptr(ring);
55
56	spin_unlock_irqrestore(&ring->preempt_lock, flags);
57
58	/* Make sure everything is posted before making a decision */
59	mb();
60
61	/* Update HW if this is the current ring and we are not in preempt */
62	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
63		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
64}
65
66static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
67{
68	struct msm_ringbuffer *ring = submit->ring;
69	struct drm_gem_object *obj;
70	uint32_t *ptr, dwords;
71	unsigned int i;
72
73	for (i = 0; i < submit->nr_cmds; i++) {
74		switch (submit->cmd[i].type) {
75		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
76			break;
77		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
78			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
79				break;
80			fallthrough;
81		case MSM_SUBMIT_CMD_BUF:
82			/* copy commands into RB: */
83			obj = submit->bos[submit->cmd[i].idx].obj;
84			dwords = submit->cmd[i].size;
85
86			ptr = msm_gem_get_vaddr(obj);
87
88			/* _get_vaddr() shouldn't fail at this point,
89			 * since we've already mapped it once in
90			 * submit_reloc()
91			 */
92			if (WARN_ON(IS_ERR_OR_NULL(ptr)))
93				return;
94
95			for (i = 0; i < dwords; i++) {
96				/* normally the OUT_PKTn() would wait
97				 * for space for the packet.  But since
98				 * we just OUT_RING() the whole thing,
99				 * need to call adreno_wait_ring()
100				 * ourself:
101				 */
102				adreno_wait_ring(ring, 1);
103				OUT_RING(ring, ptr[i]);
104			}
105
106			msm_gem_put_vaddr(obj);
107
108			break;
109		}
110	}
111
112	a5xx_flush(gpu, ring, true);
113	a5xx_preempt_trigger(gpu);
114
115	/* we might not necessarily have a cmd from userspace to
116	 * trigger an event to know that submit has completed, so
117	 * do this manually:
118	 */
119	a5xx_idle(gpu, ring);
120	ring->memptrs->fence = submit->seqno;
121	msm_gpu_retire(gpu);
122}
123
124static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
125{
126	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
127	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
128	struct msm_ringbuffer *ring = submit->ring;
129	unsigned int i, ibs = 0;
130
131	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
132		gpu->cur_ctx_seqno = 0;
133		a5xx_submit_in_rb(gpu, submit);
134		return;
135	}
136
137	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
138	OUT_RING(ring, 0x02);
139
140	/* Turn off protected mode to write to special registers */
141	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
142	OUT_RING(ring, 0);
143
144	/* Set the save preemption record for the ring/command */
145	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
146	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
147	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
148
149	/* Turn back on protected mode */
150	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
151	OUT_RING(ring, 1);
152
153	/* Enable local preemption for finegrain preemption */
154	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
155	OUT_RING(ring, 0x1);
156
157	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
158	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
159	OUT_RING(ring, 0x02);
160
161	/* Submit the commands */
162	for (i = 0; i < submit->nr_cmds; i++) {
163		switch (submit->cmd[i].type) {
164		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
165			break;
166		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
167			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
168				break;
169			fallthrough;
170		case MSM_SUBMIT_CMD_BUF:
171			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
172			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
173			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
174			OUT_RING(ring, submit->cmd[i].size);
175			ibs++;
176			break;
177		}
178
179		/*
180		 * Periodically update shadow-wptr if needed, so that we
181		 * can see partial progress of submits with large # of
182		 * cmds.. otherwise we could needlessly stall waiting for
183		 * ringbuffer state, simply due to looking at a shadow
184		 * rptr value that has not been updated
185		 */
186		if ((ibs % 32) == 0)
187			update_shadow_rptr(gpu, ring);
188	}
189
190	/*
191	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
192	 * are done rendering - otherwise a lucky preemption would start
193	 * replaying from the last checkpoint
194	 */
195	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
196	OUT_RING(ring, 0);
197	OUT_RING(ring, 0);
198	OUT_RING(ring, 0);
199	OUT_RING(ring, 0);
200	OUT_RING(ring, 0);
201
202	/* Turn off IB level preemptions */
203	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
204	OUT_RING(ring, 0x01);
205
206	/* Write the fence to the scratch register */
207	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
208	OUT_RING(ring, submit->seqno);
209
210	/*
211	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
212	 * timestamp is written to the memory and then triggers the interrupt
213	 */
214	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
215	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
216		CP_EVENT_WRITE_0_IRQ);
217	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
218	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
219	OUT_RING(ring, submit->seqno);
220
221	/* Yield the floor on command completion */
222	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
223	/*
224	 * If dword[2:1] are non zero, they specify an address for the CP to
225	 * write the value of dword[3] to on preemption complete. Write 0 to
226	 * skip the write
227	 */
228	OUT_RING(ring, 0x00);
229	OUT_RING(ring, 0x00);
230	/* Data value - not used if the address above is 0 */
231	OUT_RING(ring, 0x01);
232	/* Set bit 0 to trigger an interrupt on preempt complete */
233	OUT_RING(ring, 0x01);
234
235	/* A WHERE_AM_I packet is not needed after a YIELD */
236	a5xx_flush(gpu, ring, false);
237
238	/* Check to see if we need to start preemption */
239	a5xx_preempt_trigger(gpu);
240}
241
242static const struct adreno_five_hwcg_regs {
243	u32 offset;
244	u32 value;
245} a5xx_hwcg[] = {
246	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
247	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
248	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
249	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
250	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
251	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
252	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
253	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
254	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
255	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
256	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
257	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
258	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
259	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
260	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
261	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
262	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
263	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
264	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
265	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
266	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
267	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
268	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
269	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
270	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
271	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
272	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
273	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
274	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
275	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
276	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
277	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
278	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
279	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
280	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
281	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
282	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
283	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
284	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
285	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
286	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
287	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
288	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
289	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
290	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
291	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
292	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
293	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
294	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
295	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
296	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
297	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
298	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
299	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
300	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
301	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
302	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
303	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
304	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
305	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
306	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
307	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
308	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
309	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
310	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
311	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
312	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
313	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
314	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
315	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
316	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
317	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
318	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
319	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
320	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
321	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
322	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
323	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
324	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
325	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
326	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
327	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
328	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
329	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
330	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
331	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
332	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
333	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
334	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
335	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
336	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
337	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
338}, a50x_hwcg[] = {
339	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
340	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
341	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
342	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
343	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
344	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
345	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
346	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
347	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
348	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
349	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
350	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
351	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
352	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
353	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
354	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
355	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
356	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
357	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
358	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
359	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
360	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
361	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
362	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
363	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
364	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
365	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
366	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
367	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
368	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
369	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
370	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
371	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
372	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
373	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
374	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
375	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
376	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
377}, a512_hwcg[] = {
378	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
379	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
380	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
381	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
382	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
383	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
384	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
385	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
386	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
387	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
388	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
389	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
390	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
391	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
392	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
393	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
394	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
395	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
396	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
397	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
398	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
399	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
400	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
401	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
402	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
403	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
404	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
405	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
406	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
407	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
408	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
409	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
410	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
411	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
412	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
413	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
414	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
415	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
416	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
417	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
418	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
419	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
420	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
421	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
422	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
423	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
424	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
425	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
426	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
427	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
428	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
429	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
430	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
431	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
432	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
433	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
434};
435
436void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
437{
438	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
439	const struct adreno_five_hwcg_regs *regs;
440	unsigned int i, sz;
441
442	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu)) {
443		regs = a50x_hwcg;
444		sz = ARRAY_SIZE(a50x_hwcg);
445	} else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
446		regs = a512_hwcg;
447		sz = ARRAY_SIZE(a512_hwcg);
448	} else {
449		regs = a5xx_hwcg;
450		sz = ARRAY_SIZE(a5xx_hwcg);
451	}
452
453	for (i = 0; i < sz; i++)
454		gpu_write(gpu, regs[i].offset,
455			  state ? regs[i].value : 0);
456
457	if (adreno_is_a540(adreno_gpu)) {
458		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
459		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
460	}
461
462	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
463	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
464}
465
466static int a5xx_me_init(struct msm_gpu *gpu)
467{
468	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
469	struct msm_ringbuffer *ring = gpu->rb[0];
470
471	OUT_PKT7(ring, CP_ME_INIT, 8);
472
473	OUT_RING(ring, 0x0000002F);
474
475	/* Enable multiple hardware contexts */
476	OUT_RING(ring, 0x00000003);
477
478	/* Enable error detection */
479	OUT_RING(ring, 0x20000000);
480
481	/* Don't enable header dump */
482	OUT_RING(ring, 0x00000000);
483	OUT_RING(ring, 0x00000000);
484
485	/* Specify workarounds for various microcode issues */
486	if (adreno_is_a506(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
487		/* Workaround for token end syncs
488		 * Force a WFI after every direct-render 3D mode draw and every
489		 * 2D mode 3 draw
490		 */
491		OUT_RING(ring, 0x0000000B);
492	} else if (adreno_is_a510(adreno_gpu)) {
493		/* Workaround for token and syncs */
494		OUT_RING(ring, 0x00000001);
495	} else {
496		/* No workarounds enabled */
497		OUT_RING(ring, 0x00000000);
498	}
499
500	OUT_RING(ring, 0x00000000);
501	OUT_RING(ring, 0x00000000);
502
503	a5xx_flush(gpu, ring, true);
504	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
505}
506
507static int a5xx_preempt_start(struct msm_gpu *gpu)
508{
509	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
510	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
511	struct msm_ringbuffer *ring = gpu->rb[0];
512
513	if (gpu->nr_rings == 1)
514		return 0;
515
516	/* Turn off protected mode to write to special registers */
517	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
518	OUT_RING(ring, 0);
519
520	/* Set the save preemption record for the ring/command */
521	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
522	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
523	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
524
525	/* Turn back on protected mode */
526	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
527	OUT_RING(ring, 1);
528
529	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
530	OUT_RING(ring, 0x00);
531
532	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
533	OUT_RING(ring, 0x01);
534
535	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
536	OUT_RING(ring, 0x01);
537
538	/* Yield the floor on command completion */
539	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
540	OUT_RING(ring, 0x00);
541	OUT_RING(ring, 0x00);
542	OUT_RING(ring, 0x01);
543	OUT_RING(ring, 0x01);
544
545	/* The WHERE_AMI_I packet is not needed after a YIELD is issued */
546	a5xx_flush(gpu, ring, false);
547
548	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
549}
550
551static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
552		struct drm_gem_object *obj)
553{
554	u32 *buf = msm_gem_get_vaddr(obj);
555
556	if (IS_ERR(buf))
557		return;
558
559	/*
560	 * If the lowest nibble is 0xa that is an indication that this microcode
561	 * has been patched. The actual version is in dword [3] but we only care
562	 * about the patchlevel which is the lowest nibble of dword [3]
563	 */
564	if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
565		a5xx_gpu->has_whereami = true;
566
567	msm_gem_put_vaddr(obj);
568}
569
570static int a5xx_ucode_load(struct msm_gpu *gpu)
571{
572	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
573	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
574	int ret;
575
576	if (!a5xx_gpu->pm4_bo) {
577		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
578			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
579
580
581		if (IS_ERR(a5xx_gpu->pm4_bo)) {
582			ret = PTR_ERR(a5xx_gpu->pm4_bo);
583			a5xx_gpu->pm4_bo = NULL;
584			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
585				ret);
586			return ret;
587		}
588
589		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
590	}
591
592	if (!a5xx_gpu->pfp_bo) {
593		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
594			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
595
596		if (IS_ERR(a5xx_gpu->pfp_bo)) {
597			ret = PTR_ERR(a5xx_gpu->pfp_bo);
598			a5xx_gpu->pfp_bo = NULL;
599			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
600				ret);
601			return ret;
602		}
603
604		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
605		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
606	}
607
608	if (a5xx_gpu->has_whereami) {
609		if (!a5xx_gpu->shadow_bo) {
610			a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
611				sizeof(u32) * gpu->nr_rings,
612				MSM_BO_WC | MSM_BO_MAP_PRIV,
613				gpu->aspace, &a5xx_gpu->shadow_bo,
614				&a5xx_gpu->shadow_iova);
615
616			if (IS_ERR(a5xx_gpu->shadow))
617				return PTR_ERR(a5xx_gpu->shadow);
618
619			msm_gem_object_set_name(a5xx_gpu->shadow_bo, "shadow");
620		}
621	} else if (gpu->nr_rings > 1) {
622		/* Disable preemption if WHERE_AM_I isn't available */
623		a5xx_preempt_fini(gpu);
624		gpu->nr_rings = 1;
625	}
626
627	return 0;
628}
629
630#define SCM_GPU_ZAP_SHADER_RESUME 0
631
632static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
633{
634	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
635	int ret;
636
637	/*
638	 * Adreno 506 have CPZ Retention feature and doesn't require
639	 * to resume zap shader
640	 */
641	if (adreno_is_a506(adreno_gpu))
642		return 0;
643
644	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
645	if (ret)
646		DRM_ERROR("%s: zap-shader resume failed: %d\n",
647			gpu->name, ret);
648
649	return ret;
650}
651
652static int a5xx_zap_shader_init(struct msm_gpu *gpu)
653{
654	static bool loaded;
655	int ret;
656
657	/*
658	 * If the zap shader is already loaded into memory we just need to kick
659	 * the remote processor to reinitialize it
660	 */
661	if (loaded)
662		return a5xx_zap_shader_resume(gpu);
663
664	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
665
666	loaded = !ret;
667	return ret;
668}
669
670#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
671	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
672	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
673	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
674	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
675	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
676	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
677	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
678	  A5XX_RBBM_INT_0_MASK_CP_SW | \
679	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
680	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
681	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
682
683static int a5xx_hw_init(struct msm_gpu *gpu)
684{
685	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
686	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
687	u32 regbit;
688	int ret;
689
690	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
691
692	if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
693	    adreno_is_a540(adreno_gpu))
694		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
695
696	/* Make all blocks contribute to the GPU BUSY perf counter */
697	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
698
699	/* Enable RBBM error reporting bits */
700	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
701
702	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
703		/*
704		 * Mask out the activity signals from RB1-3 to avoid false
705		 * positives
706		 */
707
708		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
709			0xF0000000);
710		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
711			0xFFFFFFFF);
712		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
713			0xFFFFFFFF);
714		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
715			0xFFFFFFFF);
716		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
717			0xFFFFFFFF);
718		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
719			0xFFFFFFFF);
720		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
721			0xFFFFFFFF);
722		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
723			0xFFFFFFFF);
724	}
725
726	/* Enable fault detection */
727	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
728		(1 << 30) | 0xFFFF);
729
730	/* Turn on performance counters */
731	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
732
733	/* Select CP0 to always count cycles */
734	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
735
736	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
737	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
738
739	/* Increase VFD cache access so LRZ and other data gets evicted less */
740	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
741
742	/* Disable L2 bypass in the UCHE */
743	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
744	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
745	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
746	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
747
748	/* Set the GMEM VA range (0 to gpu->gmem) */
749	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
750	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
751	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
752		0x00100000 + adreno_gpu->info->gmem - 1);
753	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
754
755	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
756	    adreno_is_a510(adreno_gpu)) {
757		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
758		if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
759			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
760		else
761			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
762		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
763		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
764	} else {
765		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
766		if (adreno_is_a530(adreno_gpu))
767			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
768		else
769			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
770		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
771		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
772	}
773
774	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
775		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
776			  (0x100 << 11 | 0x100 << 22));
777	else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
778		 adreno_is_a512(adreno_gpu))
779		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
780			  (0x200 << 11 | 0x200 << 22));
781	else
782		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
783			  (0x400 << 11 | 0x300 << 22));
784
785	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
786		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
787
788	/*
789	 * Disable the RB sampler datapath DP2 clock gating optimization
790	 * for 1-SP GPUs, as it is enabled by default.
791	 */
792	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
793	    adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu))
794		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
795
796	/* Disable UCHE global filter as SP can invalidate/flush independently */
797	gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
798
799	/* Enable USE_RETENTION_FLOPS */
800	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
801
802	/* Enable ME/PFP split notification */
803	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
804
805	/*
806	 *  In A5x, CCU can send context_done event of a particular context to
807	 *  UCHE which ultimately reaches CP even when there is valid
808	 *  transaction of that context inside CCU. This can let CP to program
809	 *  config registers, which will make the "valid transaction" inside
810	 *  CCU to be interpreted differently. This can cause gpu fault. This
811	 *  bug is fixed in latest A510 revision. To enable this bug fix -
812	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
813	 *  (disable). For older A510 version this bit is unused.
814	 */
815	if (adreno_is_a510(adreno_gpu))
816		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
817
818	/* Enable HWCG */
819	a5xx_set_hwcg(gpu, true);
820
821	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
822
823	/* Set the highest bank bit */
824	if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
825		regbit = 2;
826	else
827		regbit = 1;
828
829	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
830	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
831
832	if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
833	    adreno_is_a540(adreno_gpu))
834		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
835
836	/* Disable All flat shading optimization (ALLFLATOPTDIS) */
837	gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
838
839	/* Protect registers from the CP */
840	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
841
842	/* RBBM */
843	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
844	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
845	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
846	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
847	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
848	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
849
850	/* Content protect */
851	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
852		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
853			16));
854	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
855		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
856
857	/* CP */
858	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
859	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
860	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
861	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
862
863	/* RB */
864	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
865	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
866
867	/* VPC */
868	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
869	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
870
871	/* UCHE */
872	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
873
874	/* SMMU */
875	gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
876			ADRENO_PROTECT_RW(0x10000, 0x8000));
877
878	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
879	/*
880	 * Disable the trusted memory range - we don't actually supported secure
881	 * memory rendering at this point in time and we don't want to block off
882	 * part of the virtual memory space.
883	 */
884	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
885	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
886
887	/* Put the GPU into 64 bit by default */
888	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
889	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
890	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
891	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
892	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
893	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
894	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
895	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
896	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
897	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
898	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
899	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
900
901	/*
902	 * VPC corner case with local memory load kill leads to corrupt
903	 * internal state. Normal Disable does not work for all a5x chips.
904	 * So do the following setting to disable it.
905	 */
906	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
907		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
908		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
909	}
910
911	ret = adreno_hw_init(gpu);
912	if (ret)
913		return ret;
914
915	if (adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))
916		a5xx_gpmu_ucode_init(gpu);
917
918	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
919	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
920
921	/* Set the ringbuffer address */
922	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
923
924	/*
925	 * If the microcode supports the WHERE_AM_I opcode then we can use that
926	 * in lieu of the RPTR shadow and enable preemption. Otherwise, we
927	 * can't safely use the RPTR shadow or preemption. In either case, the
928	 * RPTR shadow should be disabled in hardware.
929	 */
930	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
931		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
932
933	/* Configure the RPTR shadow if needed: */
934	if (a5xx_gpu->shadow_bo) {
935		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
936			    shadowptr(a5xx_gpu, gpu->rb[0]));
937	}
938
939	a5xx_preempt_hw_init(gpu);
940
941	/* Disable the interrupts through the initial bringup stage */
942	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
943
944	/* Clear ME_HALT to start the micro engine */
945	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
946	ret = a5xx_me_init(gpu);
947	if (ret)
948		return ret;
949
950	ret = a5xx_power_init(gpu);
951	if (ret)
952		return ret;
953
954	/*
955	 * Send a pipeline event stat to get misbehaving counters to start
956	 * ticking correctly
957	 */
958	if (adreno_is_a530(adreno_gpu)) {
959		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
960		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
961
962		a5xx_flush(gpu, gpu->rb[0], true);
963		if (!a5xx_idle(gpu, gpu->rb[0]))
964			return -EINVAL;
965	}
966
967	/*
968	 * If the chip that we are using does support loading one, then
969	 * try to load a zap shader into the secure world. If successful
970	 * we can use the CP to switch out of secure mode. If not then we
971	 * have no resource but to try to switch ourselves out manually. If we
972	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
973	 * be blocked and a permissions violation will soon follow.
974	 */
975	ret = a5xx_zap_shader_init(gpu);
976	if (!ret) {
977		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
978		OUT_RING(gpu->rb[0], 0x00000000);
979
980		a5xx_flush(gpu, gpu->rb[0], true);
981		if (!a5xx_idle(gpu, gpu->rb[0]))
982			return -EINVAL;
983	} else if (ret == -ENODEV) {
984		/*
985		 * This device does not use zap shader (but print a warning
986		 * just in case someone got their dt wrong.. hopefully they
987		 * have a debug UART to realize the error of their ways...
988		 * if you mess this up you are about to crash horribly)
989		 */
990		dev_warn_once(gpu->dev->dev,
991			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
992		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
993	} else {
994		return ret;
995	}
996
997	/* Last step - yield the ringbuffer */
998	a5xx_preempt_start(gpu);
999
1000	return 0;
1001}
1002
1003static void a5xx_recover(struct msm_gpu *gpu)
1004{
1005	int i;
1006
1007	adreno_dump_info(gpu);
1008
1009	for (i = 0; i < 8; i++) {
1010		printk("CP_SCRATCH_REG%d: %u\n", i,
1011			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
1012	}
1013
1014	if (hang_debug)
1015		a5xx_dump(gpu);
1016
1017	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
1018	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1019	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
1020	adreno_recover(gpu);
1021}
1022
1023static void a5xx_destroy(struct msm_gpu *gpu)
1024{
1025	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1026	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1027
1028	DBG("%s", gpu->name);
1029
1030	a5xx_preempt_fini(gpu);
1031
1032	if (a5xx_gpu->pm4_bo) {
1033		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1034		drm_gem_object_put(a5xx_gpu->pm4_bo);
1035	}
1036
1037	if (a5xx_gpu->pfp_bo) {
1038		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1039		drm_gem_object_put(a5xx_gpu->pfp_bo);
1040	}
1041
1042	if (a5xx_gpu->gpmu_bo) {
1043		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1044		drm_gem_object_put(a5xx_gpu->gpmu_bo);
1045	}
1046
1047	if (a5xx_gpu->shadow_bo) {
1048		msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1049		drm_gem_object_put(a5xx_gpu->shadow_bo);
1050	}
1051
1052	adreno_gpu_cleanup(adreno_gpu);
1053	kfree(a5xx_gpu);
1054}
1055
1056static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1057{
1058	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1059		return false;
1060
1061	/*
1062	 * Nearly every abnormality ends up pausing the GPU and triggering a
1063	 * fault so we can safely just watch for this one interrupt to fire
1064	 */
1065	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1066		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1067}
1068
1069bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1070{
1071	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1072	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1073
1074	if (ring != a5xx_gpu->cur_ring) {
1075		WARN(1, "Tried to idle a non-current ringbuffer\n");
1076		return false;
1077	}
1078
1079	/* wait for CP to drain ringbuffer: */
1080	if (!adreno_idle(gpu, ring))
1081		return false;
1082
1083	if (spin_until(_a5xx_check_idle(gpu))) {
1084		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1085			gpu->name, __builtin_return_address(0),
1086			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1087			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1088			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1089			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1090		return false;
1091	}
1092
1093	return true;
1094}
1095
1096static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1097{
1098	struct msm_gpu *gpu = arg;
1099	struct adreno_smmu_fault_info *info = data;
1100	char block[12] = "unknown";
1101	u32 scratch[] = {
1102			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
1103			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
1104			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
1105			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)),
1106	};
1107
1108	if (info)
1109		snprintf(block, sizeof(block), "%x", info->fsynr1);
1110
1111	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1112}
1113
1114static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1115{
1116	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1117
1118	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1119		u32 val;
1120
1121		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
1122
1123		/*
1124		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1125		 * read it twice
1126		 */
1127
1128		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1129		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1130
1131		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
1132			val);
1133	}
1134
1135	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1136		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
1137			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1138
1139	if (status & A5XX_CP_INT_CP_DMA_ERROR)
1140		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
1141
1142	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1143		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1144
1145		dev_err_ratelimited(gpu->dev->dev,
1146			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1147			val & (1 << 24) ? "WRITE" : "READ",
1148			(val & 0xFFFFF) >> 2, val);
1149	}
1150
1151	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1152		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1153		const char *access[16] = { "reserved", "reserved",
1154			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
1155			"", "", "me read", "me write", "", "", "crashdump read",
1156			"crashdump write" };
1157
1158		dev_err_ratelimited(gpu->dev->dev,
1159			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
1160			status & 0xFFFFF, access[(status >> 24) & 0xF],
1161			(status & (1 << 31)), status);
1162	}
1163}
1164
1165static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1166{
1167	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1168		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1169
1170		dev_err_ratelimited(gpu->dev->dev,
1171			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1172			val & (1 << 28) ? "WRITE" : "READ",
1173			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1174			(val >> 24) & 0xF);
1175
1176		/* Clear the error */
1177		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1178
1179		/* Clear the interrupt */
1180		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1181			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1182	}
1183
1184	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1185		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1186
1187	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1188		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1189			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1190
1191	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1192		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1193			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1194
1195	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1196		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1197			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1198
1199	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1200		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1201
1202	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1203		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1204}
1205
1206static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1207{
1208	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1209
1210	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1211
1212	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1213		addr);
1214}
1215
1216static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1217{
1218	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1219}
1220
1221static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1222{
1223	struct drm_device *dev = gpu->dev;
1224	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1225
1226	/*
1227	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1228	 * but the fault handler will trigger the devcore dump, and we want
1229	 * to otherwise resume normally rather than killing the submit, so
1230	 * just bail.
1231	 */
1232	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
1233		return;
1234
1235	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1236		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1237		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1238		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1239		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1240		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
1241		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1242		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
1243		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1244
1245	/* Turn off the hangcheck timer to keep it from bothering us */
1246	del_timer(&gpu->hangcheck_timer);
1247
1248	kthread_queue_work(gpu->worker, &gpu->recover_work);
1249}
1250
1251#define RBBM_ERROR_MASK \
1252	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1253	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1254	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1255	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1256	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1257	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1258
1259static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1260{
1261	struct msm_drm_private *priv = gpu->dev->dev_private;
1262	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1263
1264	/*
1265	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1266	 * before the source is cleared the interrupt will storm.
1267	 */
1268	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1269		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1270
1271	if (priv->disable_err_irq) {
1272		status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS |
1273			  A5XX_RBBM_INT_0_MASK_CP_SW;
1274	}
1275
1276	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1277	if (status & RBBM_ERROR_MASK)
1278		a5xx_rbbm_err_irq(gpu, status);
1279
1280	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1281		a5xx_cp_err_irq(gpu);
1282
1283	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1284		a5xx_fault_detect_irq(gpu);
1285
1286	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1287		a5xx_uche_err_irq(gpu);
1288
1289	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1290		a5xx_gpmu_err_irq(gpu);
1291
1292	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1293		a5xx_preempt_trigger(gpu);
1294		msm_gpu_retire(gpu);
1295	}
1296
1297	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1298		a5xx_preempt_irq(gpu);
1299
1300	return IRQ_HANDLED;
1301}
1302
1303static const u32 a5xx_registers[] = {
1304	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1305	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1306	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1307	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1308	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1309	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1310	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1311	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1312	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1313	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1314	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1315	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1316	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1317	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1318	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1319	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1320	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1321	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1322	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1323	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1324	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1325	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1326	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1327	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1328	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1329	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1330	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1331	0xAC60, 0xAC60, ~0,
1332};
1333
1334static void a5xx_dump(struct msm_gpu *gpu)
1335{
1336	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1337		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1338	adreno_dump(gpu);
1339}
1340
1341static int a5xx_pm_resume(struct msm_gpu *gpu)
1342{
1343	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1344	int ret;
1345
1346	/* Turn on the core power */
1347	ret = msm_gpu_pm_resume(gpu);
1348	if (ret)
1349		return ret;
1350
1351	/* Adreno 506, 508, 509, 510, 512 needs manual RBBM sus/res control */
1352	if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
1353		/* Halt the sp_input_clk at HM level */
1354		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1355		a5xx_set_hwcg(gpu, true);
1356		/* Turn on sp_input_clk at HM level */
1357		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1358		return 0;
1359	}
1360
1361	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1362	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1363
1364	/* Wait 3 usecs before polling */
1365	udelay(3);
1366
1367	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1368		(1 << 20), (1 << 20));
1369	if (ret) {
1370		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1371			gpu->name,
1372			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1373		return ret;
1374	}
1375
1376	/* Turn on the SP domain */
1377	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1378	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1379		(1 << 20), (1 << 20));
1380	if (ret)
1381		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1382			gpu->name);
1383
1384	return ret;
1385}
1386
1387static int a5xx_pm_suspend(struct msm_gpu *gpu)
1388{
1389	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1390	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1391	u32 mask = 0xf;
1392	int i, ret;
1393
1394	/* A506, A508, A510 have 3 XIN ports in VBIF */
1395	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
1396	    adreno_is_a510(adreno_gpu))
1397		mask = 0x7;
1398
1399	/* Clear the VBIF pipe before shutting down */
1400	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1401	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1402				mask) == mask);
1403
1404	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1405
1406	/*
1407	 * Reset the VBIF before power collapse to avoid issue with FIFO
1408	 * entries on Adreno A510 and A530 (the others will tend to lock up)
1409	 */
1410	if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
1411		gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1412		gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1413	}
1414
1415	ret = msm_gpu_pm_suspend(gpu);
1416	if (ret)
1417		return ret;
1418
1419	if (a5xx_gpu->has_whereami)
1420		for (i = 0; i < gpu->nr_rings; i++)
1421			a5xx_gpu->shadow[i] = 0;
1422
1423	return 0;
1424}
1425
1426static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1427{
1428	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
1429
1430	return 0;
1431}
1432
1433struct a5xx_crashdumper {
1434	void *ptr;
1435	struct drm_gem_object *bo;
1436	u64 iova;
1437};
1438
1439struct a5xx_gpu_state {
1440	struct msm_gpu_state base;
1441	u32 *hlsqregs;
1442};
1443
1444static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1445		struct a5xx_crashdumper *dumper)
1446{
1447	dumper->ptr = msm_gem_kernel_new(gpu->dev,
1448		SZ_1M, MSM_BO_WC, gpu->aspace,
1449		&dumper->bo, &dumper->iova);
1450
1451	if (!IS_ERR(dumper->ptr))
1452		msm_gem_object_set_name(dumper->bo, "crashdump");
1453
1454	return PTR_ERR_OR_ZERO(dumper->ptr);
1455}
1456
1457static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1458		struct a5xx_crashdumper *dumper)
1459{
1460	u32 val;
1461
1462	if (IS_ERR_OR_NULL(dumper->ptr))
1463		return -EINVAL;
1464
1465	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
1466
1467	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1468
1469	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1470		val & 0x04, 100, 10000);
1471}
1472
1473/*
1474 * These are a list of the registers that need to be read through the HLSQ
1475 * aperture through the crashdumper.  These are not nominally accessible from
1476 * the CPU on a secure platform.
1477 */
1478static const struct {
1479	u32 type;
1480	u32 regoffset;
1481	u32 count;
1482} a5xx_hlsq_aperture_regs[] = {
1483	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1484	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1485	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1486	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1487	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1488	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1489	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1490	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1491	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1492	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1493	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1494	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1495	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1496	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1497	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1498};
1499
1500static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1501		struct a5xx_gpu_state *a5xx_state)
1502{
1503	struct a5xx_crashdumper dumper = { 0 };
1504	u32 offset, count = 0;
1505	u64 *ptr;
1506	int i;
1507
1508	if (a5xx_crashdumper_init(gpu, &dumper))
1509		return;
1510
1511	/* The script will be written at offset 0 */
1512	ptr = dumper.ptr;
1513
1514	/* Start writing the data at offset 256k */
1515	offset = dumper.iova + (256 * SZ_1K);
1516
1517	/* Count how many additional registers to get from the HLSQ aperture */
1518	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1519		count += a5xx_hlsq_aperture_regs[i].count;
1520
1521	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1522	if (!a5xx_state->hlsqregs)
1523		return;
1524
1525	/* Build the crashdump script */
1526	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1527		u32 type = a5xx_hlsq_aperture_regs[i].type;
1528		u32 c = a5xx_hlsq_aperture_regs[i].count;
1529
1530		/* Write the register to select the desired bank */
1531		*ptr++ = ((u64) type << 8);
1532		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1533			(1 << 21) | 1;
1534
1535		*ptr++ = offset;
1536		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1537			| c;
1538
1539		offset += c * sizeof(u32);
1540	}
1541
1542	/* Write two zeros to close off the script */
1543	*ptr++ = 0;
1544	*ptr++ = 0;
1545
1546	if (a5xx_crashdumper_run(gpu, &dumper)) {
1547		kfree(a5xx_state->hlsqregs);
1548		msm_gem_kernel_put(dumper.bo, gpu->aspace);
1549		return;
1550	}
1551
1552	/* Copy the data from the crashdumper to the state */
1553	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1554		count * sizeof(u32));
1555
1556	msm_gem_kernel_put(dumper.bo, gpu->aspace);
1557}
1558
1559static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1560{
1561	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1562			GFP_KERNEL);
1563	bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
1564
1565	if (!a5xx_state)
1566		return ERR_PTR(-ENOMEM);
1567
1568	/* Temporarily disable hardware clock gating before reading the hw */
1569	a5xx_set_hwcg(gpu, false);
1570
1571	/* First get the generic state from the adreno core */
1572	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1573
1574	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1575
1576	/*
1577	 * Get the HLSQ regs with the help of the crashdumper, but only if
1578	 * we are not stalled in an iommu fault (in which case the crashdumper
1579	 * would not have access to memory)
1580	 */
1581	if (!stalled)
1582		a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1583
1584	a5xx_set_hwcg(gpu, true);
1585
1586	return &a5xx_state->base;
1587}
1588
1589static void a5xx_gpu_state_destroy(struct kref *kref)
1590{
1591	struct msm_gpu_state *state = container_of(kref,
1592		struct msm_gpu_state, ref);
1593	struct a5xx_gpu_state *a5xx_state = container_of(state,
1594		struct a5xx_gpu_state, base);
1595
1596	kfree(a5xx_state->hlsqregs);
1597
1598	adreno_gpu_state_destroy(state);
1599	kfree(a5xx_state);
1600}
1601
1602static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1603{
1604	if (IS_ERR_OR_NULL(state))
1605		return 1;
1606
1607	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1608}
1609
1610
1611#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1612static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1613		      struct drm_printer *p)
1614{
1615	int i, j;
1616	u32 pos = 0;
1617	struct a5xx_gpu_state *a5xx_state = container_of(state,
1618		struct a5xx_gpu_state, base);
1619
1620	if (IS_ERR_OR_NULL(state))
1621		return;
1622
1623	adreno_show(gpu, state, p);
1624
1625	/* Dump the additional a5xx HLSQ registers */
1626	if (!a5xx_state->hlsqregs)
1627		return;
1628
1629	drm_printf(p, "registers-hlsq:\n");
1630
1631	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1632		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1633		u32 c = a5xx_hlsq_aperture_regs[i].count;
1634
1635		for (j = 0; j < c; j++, pos++, o++) {
1636			/*
1637			 * To keep the crashdump simple we pull the entire range
1638			 * for each register type but not all of the registers
1639			 * in the range are valid. Fortunately invalid registers
1640			 * stick out like a sore thumb with a value of
1641			 * 0xdeadbeef
1642			 */
1643			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1644				continue;
1645
1646			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1647				o << 2, a5xx_state->hlsqregs[pos]);
1648		}
1649	}
1650}
1651#endif
1652
1653static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1654{
1655	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1656	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1657
1658	return a5xx_gpu->cur_ring;
1659}
1660
1661static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1662{
1663	u64 busy_cycles;
1664
1665	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
1666	*out_sample_rate = clk_get_rate(gpu->core_clk);
1667
1668	return busy_cycles;
1669}
1670
1671static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1672{
1673	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1674	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1675
1676	if (a5xx_gpu->has_whereami)
1677		return a5xx_gpu->shadow[ring->id];
1678
1679	return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1680}
1681
1682static const struct adreno_gpu_funcs funcs = {
1683	.base = {
1684		.get_param = adreno_get_param,
1685		.set_param = adreno_set_param,
1686		.hw_init = a5xx_hw_init,
1687		.ucode_load = a5xx_ucode_load,
1688		.pm_suspend = a5xx_pm_suspend,
1689		.pm_resume = a5xx_pm_resume,
1690		.recover = a5xx_recover,
1691		.submit = a5xx_submit,
1692		.active_ring = a5xx_active_ring,
1693		.irq = a5xx_irq,
1694		.destroy = a5xx_destroy,
1695#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1696		.show = a5xx_show,
1697#endif
1698#if defined(CONFIG_DEBUG_FS)
1699		.debugfs_init = a5xx_debugfs_init,
1700#endif
1701		.gpu_busy = a5xx_gpu_busy,
1702		.gpu_state_get = a5xx_gpu_state_get,
1703		.gpu_state_put = a5xx_gpu_state_put,
1704		.create_address_space = adreno_create_address_space,
1705		.get_rptr = a5xx_get_rptr,
1706	},
1707	.get_timestamp = a5xx_get_timestamp,
1708};
1709
1710static void check_speed_bin(struct device *dev)
1711{
1712	struct nvmem_cell *cell;
1713	u32 val;
1714
1715	/*
1716	 * If the OPP table specifies a opp-supported-hw property then we have
1717	 * to set something with dev_pm_opp_set_supported_hw() or the table
1718	 * doesn't get populated so pick an arbitrary value that should
1719	 * ensure the default frequencies are selected but not conflict with any
1720	 * actual bins
1721	 */
1722	val = 0x80;
1723
1724	cell = nvmem_cell_get(dev, "speed_bin");
1725
1726	if (!IS_ERR(cell)) {
1727		void *buf = nvmem_cell_read(cell, NULL);
1728
1729		if (!IS_ERR(buf)) {
1730			u8 bin = *((u8 *) buf);
1731
1732			val = (1 << bin);
1733			kfree(buf);
1734		}
1735
1736		nvmem_cell_put(cell);
1737	}
1738
1739	devm_pm_opp_set_supported_hw(dev, &val, 1);
1740}
1741
1742struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1743{
1744	struct msm_drm_private *priv = dev->dev_private;
1745	struct platform_device *pdev = priv->gpu_pdev;
1746	struct adreno_platform_config *config = pdev->dev.platform_data;
1747	struct a5xx_gpu *a5xx_gpu = NULL;
1748	struct adreno_gpu *adreno_gpu;
1749	struct msm_gpu *gpu;
1750	unsigned int nr_rings;
1751	int ret;
1752
1753	if (!pdev) {
1754		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1755		return ERR_PTR(-ENXIO);
1756	}
1757
1758	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1759	if (!a5xx_gpu)
1760		return ERR_PTR(-ENOMEM);
1761
1762	adreno_gpu = &a5xx_gpu->base;
1763	gpu = &adreno_gpu->base;
1764
1765	adreno_gpu->registers = a5xx_registers;
1766
1767	a5xx_gpu->lm_leakage = 0x4E001A;
1768
1769	check_speed_bin(&pdev->dev);
1770
1771	nr_rings = 4;
1772
1773	if (config->info->revn == 510)
1774		nr_rings = 1;
1775
1776	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
1777	if (ret) {
1778		a5xx_destroy(&(a5xx_gpu->base.base));
1779		return ERR_PTR(ret);
1780	}
1781
1782	if (gpu->aspace)
1783		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1784
1785	/* Set up the preemption specific bits and pieces for each ringbuffer */
1786	a5xx_preempt_init(gpu);
1787
1788	return gpu;
1789}
1790