1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3 */
4
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/cpumask.h>
8#include <linux/qcom_scm.h>
9#include <linux/pm_opp.h>
10#include <linux/nvmem-consumer.h>
11#include <linux/slab.h>
12#include "msm_gem.h"
13#include "msm_mmu.h"
14#include "a5xx_gpu.h"
15
16extern bool hang_debug;
17static void a5xx_dump(struct msm_gpu *gpu);
18
19#define GPU_PAS_ID 13
20
21void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
22		bool sync)
23{
24	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
25	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
26	uint32_t wptr;
27	unsigned long flags;
28
29	/*
30	 * Most flush operations need to issue a WHERE_AM_I opcode to sync up
31	 * the rptr shadow
32	 */
33	if (a5xx_gpu->has_whereami && sync) {
34		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
35		OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
36		OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
37	}
38
39	spin_lock_irqsave(&ring->preempt_lock, flags);
40
41	/* Copy the shadow to the actual register */
42	ring->cur = ring->next;
43
44	/* Make sure to wrap wptr if we need to */
45	wptr = get_wptr(ring);
46
47	spin_unlock_irqrestore(&ring->preempt_lock, flags);
48
49	/* Make sure everything is posted before making a decision */
50	mb();
51
52	/* Update HW if this is the current ring and we are not in preempt */
53	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
54		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
55}
56
57static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
58{
59	struct msm_drm_private *priv = gpu->dev->dev_private;
60	struct msm_ringbuffer *ring = submit->ring;
61	struct msm_gem_object *obj;
62	uint32_t *ptr, dwords;
63	unsigned int i;
64
65	for (i = 0; i < submit->nr_cmds; i++) {
66		switch (submit->cmd[i].type) {
67		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
68			break;
69		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
70			if (priv->lastctx == submit->queue->ctx)
71				break;
72			fallthrough;
73		case MSM_SUBMIT_CMD_BUF:
74			/* copy commands into RB: */
75			obj = submit->bos[submit->cmd[i].idx].obj;
76			dwords = submit->cmd[i].size;
77
78			ptr = msm_gem_get_vaddr(&obj->base);
79
80			/* _get_vaddr() shouldn't fail at this point,
81			 * since we've already mapped it once in
82			 * submit_reloc()
83			 */
84			if (WARN_ON(IS_ERR_OR_NULL(ptr)))
85				return;
86
87			for (i = 0; i < dwords; i++) {
88				/* normally the OUT_PKTn() would wait
89				 * for space for the packet.  But since
90				 * we just OUT_RING() the whole thing,
91				 * need to call adreno_wait_ring()
92				 * ourself:
93				 */
94				adreno_wait_ring(ring, 1);
95				OUT_RING(ring, ptr[i]);
96			}
97
98			msm_gem_put_vaddr(&obj->base);
99
100			break;
101		}
102	}
103
104	a5xx_flush(gpu, ring, true);
105	a5xx_preempt_trigger(gpu);
106
107	/* we might not necessarily have a cmd from userspace to
108	 * trigger an event to know that submit has completed, so
109	 * do this manually:
110	 */
111	a5xx_idle(gpu, ring);
112	ring->memptrs->fence = submit->seqno;
113	msm_gpu_retire(gpu);
114}
115
116static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
117{
118	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
119	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
120	struct msm_drm_private *priv = gpu->dev->dev_private;
121	struct msm_ringbuffer *ring = submit->ring;
122	unsigned int i, ibs = 0;
123
124	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
125		priv->lastctx = NULL;
126		a5xx_submit_in_rb(gpu, submit);
127		return;
128	}
129
130	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
131	OUT_RING(ring, 0x02);
132
133	/* Turn off protected mode to write to special registers */
134	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135	OUT_RING(ring, 0);
136
137	/* Set the save preemption record for the ring/command */
138	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
139	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
140	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
141
142	/* Turn back on protected mode */
143	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
144	OUT_RING(ring, 1);
145
146	/* Enable local preemption for finegrain preemption */
147	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
148	OUT_RING(ring, 0x1);
149
150	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
151	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
152	OUT_RING(ring, 0x02);
153
154	/* Submit the commands */
155	for (i = 0; i < submit->nr_cmds; i++) {
156		switch (submit->cmd[i].type) {
157		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158			break;
159		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160			if (priv->lastctx == submit->queue->ctx)
161				break;
162			fallthrough;
163		case MSM_SUBMIT_CMD_BUF:
164			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
165			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
166			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
167			OUT_RING(ring, submit->cmd[i].size);
168			ibs++;
169			break;
170		}
171	}
172
173	/*
174	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
175	 * are done rendering - otherwise a lucky preemption would start
176	 * replaying from the last checkpoint
177	 */
178	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
179	OUT_RING(ring, 0);
180	OUT_RING(ring, 0);
181	OUT_RING(ring, 0);
182	OUT_RING(ring, 0);
183	OUT_RING(ring, 0);
184
185	/* Turn off IB level preemptions */
186	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
187	OUT_RING(ring, 0x01);
188
189	/* Write the fence to the scratch register */
190	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
191	OUT_RING(ring, submit->seqno);
192
193	/*
194	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
195	 * timestamp is written to the memory and then triggers the interrupt
196	 */
197	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
198	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
199		CP_EVENT_WRITE_0_IRQ);
200	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
201	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
202	OUT_RING(ring, submit->seqno);
203
204	/* Yield the floor on command completion */
205	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
206	/*
207	 * If dword[2:1] are non zero, they specify an address for the CP to
208	 * write the value of dword[3] to on preemption complete. Write 0 to
209	 * skip the write
210	 */
211	OUT_RING(ring, 0x00);
212	OUT_RING(ring, 0x00);
213	/* Data value - not used if the address above is 0 */
214	OUT_RING(ring, 0x01);
215	/* Set bit 0 to trigger an interrupt on preempt complete */
216	OUT_RING(ring, 0x01);
217
218	/* A WHERE_AM_I packet is not needed after a YIELD */
219	a5xx_flush(gpu, ring, false);
220
221	/* Check to see if we need to start preemption */
222	a5xx_preempt_trigger(gpu);
223}
224
225static const struct {
226	u32 offset;
227	u32 value;
228} a5xx_hwcg[] = {
229	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
230	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
231	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
232	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
233	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
234	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
235	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
236	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
237	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
238	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
239	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
240	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
241	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
242	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
243	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
244	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
245	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
246	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
247	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
248	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
249	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
250	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
251	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
252	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
253	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
254	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
255	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
256	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
257	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
258	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
259	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
260	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
261	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
262	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
263	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
264	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
265	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
266	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
267	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
268	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
269	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
270	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
271	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
272	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
273	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
274	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
275	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
276	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
277	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
278	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
279	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
280	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
281	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
282	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
283	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
284	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
285	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
286	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
287	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
288	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
289	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
290	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
291	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
292	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
293	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
294	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
295	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
296	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
297	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
298	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
299	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
300	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
301	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
302	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
303	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
304	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
305	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
306	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
307	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
308	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
309	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
310	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
311	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
312	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
313	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
314	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
315	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
316	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
317	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
318	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
319	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
320	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
321};
322
323void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
324{
325	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
326	unsigned int i;
327
328	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
329		gpu_write(gpu, a5xx_hwcg[i].offset,
330			state ? a5xx_hwcg[i].value : 0);
331
332	if (adreno_is_a540(adreno_gpu)) {
333		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
334		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
335	}
336
337	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
338	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
339}
340
341static int a5xx_me_init(struct msm_gpu *gpu)
342{
343	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
344	struct msm_ringbuffer *ring = gpu->rb[0];
345
346	OUT_PKT7(ring, CP_ME_INIT, 8);
347
348	OUT_RING(ring, 0x0000002F);
349
350	/* Enable multiple hardware contexts */
351	OUT_RING(ring, 0x00000003);
352
353	/* Enable error detection */
354	OUT_RING(ring, 0x20000000);
355
356	/* Don't enable header dump */
357	OUT_RING(ring, 0x00000000);
358	OUT_RING(ring, 0x00000000);
359
360	/* Specify workarounds for various microcode issues */
361	if (adreno_is_a530(adreno_gpu)) {
362		/* Workaround for token end syncs
363		 * Force a WFI after every direct-render 3D mode draw and every
364		 * 2D mode 3 draw
365		 */
366		OUT_RING(ring, 0x0000000B);
367	} else if (adreno_is_a510(adreno_gpu)) {
368		/* Workaround for token and syncs */
369		OUT_RING(ring, 0x00000001);
370	} else {
371		/* No workarounds enabled */
372		OUT_RING(ring, 0x00000000);
373	}
374
375	OUT_RING(ring, 0x00000000);
376	OUT_RING(ring, 0x00000000);
377
378	a5xx_flush(gpu, ring, true);
379	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
380}
381
382static int a5xx_preempt_start(struct msm_gpu *gpu)
383{
384	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
385	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
386	struct msm_ringbuffer *ring = gpu->rb[0];
387
388	if (gpu->nr_rings == 1)
389		return 0;
390
391	/* Turn off protected mode to write to special registers */
392	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
393	OUT_RING(ring, 0);
394
395	/* Set the save preemption record for the ring/command */
396	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
397	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
398	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
399
400	/* Turn back on protected mode */
401	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
402	OUT_RING(ring, 1);
403
404	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
405	OUT_RING(ring, 0x00);
406
407	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
408	OUT_RING(ring, 0x01);
409
410	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
411	OUT_RING(ring, 0x01);
412
413	/* Yield the floor on command completion */
414	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
415	OUT_RING(ring, 0x00);
416	OUT_RING(ring, 0x00);
417	OUT_RING(ring, 0x01);
418	OUT_RING(ring, 0x01);
419
420	/* The WHERE_AMI_I packet is not needed after a YIELD is issued */
421	a5xx_flush(gpu, ring, false);
422
423	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
424}
425
426static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
427		struct drm_gem_object *obj)
428{
429	u32 *buf = msm_gem_get_vaddr_active(obj);
430
431	if (IS_ERR(buf))
432		return;
433
434	/*
435	 * If the lowest nibble is 0xa that is an indication that this microcode
436	 * has been patched. The actual version is in dword [3] but we only care
437	 * about the patchlevel which is the lowest nibble of dword [3]
438	 */
439	if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
440		a5xx_gpu->has_whereami = true;
441
442	msm_gem_put_vaddr(obj);
443}
444
445static int a5xx_ucode_init(struct msm_gpu *gpu)
446{
447	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
448	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
449	int ret;
450
451	if (!a5xx_gpu->pm4_bo) {
452		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
453			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
454
455
456		if (IS_ERR(a5xx_gpu->pm4_bo)) {
457			ret = PTR_ERR(a5xx_gpu->pm4_bo);
458			a5xx_gpu->pm4_bo = NULL;
459			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
460				ret);
461			return ret;
462		}
463
464		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
465	}
466
467	if (!a5xx_gpu->pfp_bo) {
468		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
469			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
470
471		if (IS_ERR(a5xx_gpu->pfp_bo)) {
472			ret = PTR_ERR(a5xx_gpu->pfp_bo);
473			a5xx_gpu->pfp_bo = NULL;
474			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
475				ret);
476			return ret;
477		}
478
479		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
480		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
481	}
482
483	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
484		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
485
486	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
487		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
488
489	return 0;
490}
491
492#define SCM_GPU_ZAP_SHADER_RESUME 0
493
494static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
495{
496	int ret;
497
498	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
499	if (ret)
500		DRM_ERROR("%s: zap-shader resume failed: %d\n",
501			gpu->name, ret);
502
503	return ret;
504}
505
506static int a5xx_zap_shader_init(struct msm_gpu *gpu)
507{
508	static bool loaded;
509	int ret;
510
511	/*
512	 * If the zap shader is already loaded into memory we just need to kick
513	 * the remote processor to reinitialize it
514	 */
515	if (loaded)
516		return a5xx_zap_shader_resume(gpu);
517
518	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
519
520	loaded = !ret;
521	return ret;
522}
523
524#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
525	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
526	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
527	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
528	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
529	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
530	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
531	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
532	  A5XX_RBBM_INT_0_MASK_CP_SW | \
533	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
534	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
535	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
536
537static int a5xx_hw_init(struct msm_gpu *gpu)
538{
539	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
540	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
541	int ret;
542
543	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
544
545	if (adreno_is_a540(adreno_gpu))
546		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
547
548	/* Make all blocks contribute to the GPU BUSY perf counter */
549	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
550
551	/* Enable RBBM error reporting bits */
552	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
553
554	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
555		/*
556		 * Mask out the activity signals from RB1-3 to avoid false
557		 * positives
558		 */
559
560		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
561			0xF0000000);
562		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
563			0xFFFFFFFF);
564		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
565			0xFFFFFFFF);
566		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
567			0xFFFFFFFF);
568		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
569			0xFFFFFFFF);
570		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
571			0xFFFFFFFF);
572		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
573			0xFFFFFFFF);
574		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
575			0xFFFFFFFF);
576	}
577
578	/* Enable fault detection */
579	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
580		(1 << 30) | 0xFFFF);
581
582	/* Turn on performance counters */
583	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
584
585	/* Select CP0 to always count cycles */
586	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
587
588	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
589	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
590
591	/* Increase VFD cache access so LRZ and other data gets evicted less */
592	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
593
594	/* Disable L2 bypass in the UCHE */
595	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
596	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
597	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
598	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
599
600	/* Set the GMEM VA range (0 to gpu->gmem) */
601	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
602	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
603	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
604		0x00100000 + adreno_gpu->gmem - 1);
605	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
606
607	if (adreno_is_a510(adreno_gpu)) {
608		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
609		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
610		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
611		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
612		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
613			  (0x200 << 11 | 0x200 << 22));
614	} else {
615		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
616		if (adreno_is_a530(adreno_gpu))
617			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
618		if (adreno_is_a540(adreno_gpu))
619			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
620		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
621		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
622		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
623			  (0x400 << 11 | 0x300 << 22));
624	}
625
626	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
627		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
628
629	/* Enable USE_RETENTION_FLOPS */
630	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
631
632	/* Enable ME/PFP split notification */
633	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
634
635	/*
636	 *  In A5x, CCU can send context_done event of a particular context to
637	 *  UCHE which ultimately reaches CP even when there is valid
638	 *  transaction of that context inside CCU. This can let CP to program
639	 *  config registers, which will make the "valid transaction" inside
640	 *  CCU to be interpreted differently. This can cause gpu fault. This
641	 *  bug is fixed in latest A510 revision. To enable this bug fix -
642	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
643	 *  (disable). For older A510 version this bit is unused.
644	 */
645	if (adreno_is_a510(adreno_gpu))
646		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
647
648	/* Enable HWCG */
649	a5xx_set_hwcg(gpu, true);
650
651	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
652
653	/* Set the highest bank bit */
654	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
655	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
656	if (adreno_is_a540(adreno_gpu))
657		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
658
659	/* Protect registers from the CP */
660	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
661
662	/* RBBM */
663	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
664	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
665	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
666	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
667	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
668	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
669
670	/* Content protect */
671	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
672		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
673			16));
674	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
675		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
676
677	/* CP */
678	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
679	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
680	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
681	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
682
683	/* RB */
684	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
685	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
686
687	/* VPC */
688	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
689	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
690
691	/* UCHE */
692	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
693
694	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
695		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
696			ADRENO_PROTECT_RW(0x10000, 0x8000));
697
698	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
699	/*
700	 * Disable the trusted memory range - we don't actually supported secure
701	 * memory rendering at this point in time and we don't want to block off
702	 * part of the virtual memory space.
703	 */
704	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
705		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
706	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
707
708	/* Put the GPU into 64 bit by default */
709	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
710	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
711	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
712	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
713	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
714	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
715	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
716	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
717	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
718	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
719	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
720	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
721
722	/*
723	 * VPC corner case with local memory load kill leads to corrupt
724	 * internal state. Normal Disable does not work for all a5x chips.
725	 * So do the following setting to disable it.
726	 */
727	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
728		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
729		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
730	}
731
732	ret = adreno_hw_init(gpu);
733	if (ret)
734		return ret;
735
736	if (!adreno_is_a510(adreno_gpu))
737		a5xx_gpmu_ucode_init(gpu);
738
739	ret = a5xx_ucode_init(gpu);
740	if (ret)
741		return ret;
742
743	/* Set the ringbuffer address */
744	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
745		gpu->rb[0]->iova);
746
747	/*
748	 * If the microcode supports the WHERE_AM_I opcode then we can use that
749	 * in lieu of the RPTR shadow and enable preemption. Otherwise, we
750	 * can't safely use the RPTR shadow or preemption. In either case, the
751	 * RPTR shadow should be disabled in hardware.
752	 */
753	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
754		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
755
756	/* Create a privileged buffer for the RPTR shadow */
757	if (a5xx_gpu->has_whereami) {
758		if (!a5xx_gpu->shadow_bo) {
759			a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
760				sizeof(u32) * gpu->nr_rings,
761				MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
762				gpu->aspace, &a5xx_gpu->shadow_bo,
763				&a5xx_gpu->shadow_iova);
764
765			if (IS_ERR(a5xx_gpu->shadow))
766				return PTR_ERR(a5xx_gpu->shadow);
767		}
768
769		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
770			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
771	} else if (gpu->nr_rings > 1) {
772		/* Disable preemption if WHERE_AM_I isn't available */
773		a5xx_preempt_fini(gpu);
774		gpu->nr_rings = 1;
775	}
776
777	a5xx_preempt_hw_init(gpu);
778
779	/* Disable the interrupts through the initial bringup stage */
780	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
781
782	/* Clear ME_HALT to start the micro engine */
783	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
784	ret = a5xx_me_init(gpu);
785	if (ret)
786		return ret;
787
788	ret = a5xx_power_init(gpu);
789	if (ret)
790		return ret;
791
792	/*
793	 * Send a pipeline event stat to get misbehaving counters to start
794	 * ticking correctly
795	 */
796	if (adreno_is_a530(adreno_gpu)) {
797		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
798		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
799
800		a5xx_flush(gpu, gpu->rb[0], true);
801		if (!a5xx_idle(gpu, gpu->rb[0]))
802			return -EINVAL;
803	}
804
805	/*
806	 * If the chip that we are using does support loading one, then
807	 * try to load a zap shader into the secure world. If successful
808	 * we can use the CP to switch out of secure mode. If not then we
809	 * have no resource but to try to switch ourselves out manually. If we
810	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
811	 * be blocked and a permissions violation will soon follow.
812	 */
813	ret = a5xx_zap_shader_init(gpu);
814	if (!ret) {
815		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
816		OUT_RING(gpu->rb[0], 0x00000000);
817
818		a5xx_flush(gpu, gpu->rb[0], true);
819		if (!a5xx_idle(gpu, gpu->rb[0]))
820			return -EINVAL;
821	} else if (ret == -ENODEV) {
822		/*
823		 * This device does not use zap shader (but print a warning
824		 * just in case someone got their dt wrong.. hopefully they
825		 * have a debug UART to realize the error of their ways...
826		 * if you mess this up you are about to crash horribly)
827		 */
828		dev_warn_once(gpu->dev->dev,
829			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
830		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
831	} else {
832		return ret;
833	}
834
835	/* Last step - yield the ringbuffer */
836	a5xx_preempt_start(gpu);
837
838	return 0;
839}
840
841static void a5xx_recover(struct msm_gpu *gpu)
842{
843	int i;
844
845	adreno_dump_info(gpu);
846
847	for (i = 0; i < 8; i++) {
848		printk("CP_SCRATCH_REG%d: %u\n", i,
849			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
850	}
851
852	if (hang_debug)
853		a5xx_dump(gpu);
854
855	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
856	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
857	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
858	adreno_recover(gpu);
859}
860
861static void a5xx_destroy(struct msm_gpu *gpu)
862{
863	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
864	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
865
866	DBG("%s", gpu->name);
867
868	a5xx_preempt_fini(gpu);
869
870	if (a5xx_gpu->pm4_bo) {
871		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
872		drm_gem_object_put(a5xx_gpu->pm4_bo);
873	}
874
875	if (a5xx_gpu->pfp_bo) {
876		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
877		drm_gem_object_put(a5xx_gpu->pfp_bo);
878	}
879
880	if (a5xx_gpu->gpmu_bo) {
881		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
882		drm_gem_object_put(a5xx_gpu->gpmu_bo);
883	}
884
885	if (a5xx_gpu->shadow_bo) {
886		msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
887		drm_gem_object_put(a5xx_gpu->shadow_bo);
888	}
889
890	adreno_gpu_cleanup(adreno_gpu);
891	kfree(a5xx_gpu);
892}
893
894static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
895{
896	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
897		return false;
898
899	/*
900	 * Nearly every abnormality ends up pausing the GPU and triggering a
901	 * fault so we can safely just watch for this one interrupt to fire
902	 */
903	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
904		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
905}
906
907bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
908{
909	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
910	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
911
912	if (ring != a5xx_gpu->cur_ring) {
913		WARN(1, "Tried to idle a non-current ringbuffer\n");
914		return false;
915	}
916
917	/* wait for CP to drain ringbuffer: */
918	if (!adreno_idle(gpu, ring))
919		return false;
920
921	if (spin_until(_a5xx_check_idle(gpu))) {
922		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
923			gpu->name, __builtin_return_address(0),
924			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
925			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
926			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
927			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
928		return false;
929	}
930
931	return true;
932}
933
934static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
935{
936	struct msm_gpu *gpu = arg;
937	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
938			iova, flags,
939			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
940			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
941			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
942			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
943
944	return -EFAULT;
945}
946
947static void a5xx_cp_err_irq(struct msm_gpu *gpu)
948{
949	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
950
951	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
952		u32 val;
953
954		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
955
956		/*
957		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
958		 * read it twice
959		 */
960
961		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
962		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
963
964		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
965			val);
966	}
967
968	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
969		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
970			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
971
972	if (status & A5XX_CP_INT_CP_DMA_ERROR)
973		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
974
975	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
976		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
977
978		dev_err_ratelimited(gpu->dev->dev,
979			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
980			val & (1 << 24) ? "WRITE" : "READ",
981			(val & 0xFFFFF) >> 2, val);
982	}
983
984	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
985		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
986		const char *access[16] = { "reserved", "reserved",
987			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
988			"", "", "me read", "me write", "", "", "crashdump read",
989			"crashdump write" };
990
991		dev_err_ratelimited(gpu->dev->dev,
992			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
993			status & 0xFFFFF, access[(status >> 24) & 0xF],
994			(status & (1 << 31)), status);
995	}
996}
997
998static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
999{
1000	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1001		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1002
1003		dev_err_ratelimited(gpu->dev->dev,
1004			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1005			val & (1 << 28) ? "WRITE" : "READ",
1006			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1007			(val >> 24) & 0xF);
1008
1009		/* Clear the error */
1010		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1011
1012		/* Clear the interrupt */
1013		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1014			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1015	}
1016
1017	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1018		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1019
1020	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1021		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1022			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1023
1024	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1025		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1026			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1027
1028	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1029		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1030			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1031
1032	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1033		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1034
1035	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1036		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1037}
1038
1039static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1040{
1041	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1042
1043	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1044
1045	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1046		addr);
1047}
1048
1049static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1050{
1051	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1052}
1053
1054static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1055{
1056	struct drm_device *dev = gpu->dev;
1057	struct msm_drm_private *priv = dev->dev_private;
1058	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1059
1060	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1061		ring ? ring->id : -1, ring ? ring->seqno : 0,
1062		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1063		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1064		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1065		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1066		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1067		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1068		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1069
1070	/* Turn off the hangcheck timer to keep it from bothering us */
1071	del_timer(&gpu->hangcheck_timer);
1072
1073	queue_work(priv->wq, &gpu->recover_work);
1074}
1075
1076#define RBBM_ERROR_MASK \
1077	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1078	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1079	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1080	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1081	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1082	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1083
1084static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1085{
1086	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1087
1088	/*
1089	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1090	 * before the source is cleared the interrupt will storm.
1091	 */
1092	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1093		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1094
1095	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1096	if (status & RBBM_ERROR_MASK)
1097		a5xx_rbbm_err_irq(gpu, status);
1098
1099	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1100		a5xx_cp_err_irq(gpu);
1101
1102	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1103		a5xx_fault_detect_irq(gpu);
1104
1105	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1106		a5xx_uche_err_irq(gpu);
1107
1108	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1109		a5xx_gpmu_err_irq(gpu);
1110
1111	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1112		a5xx_preempt_trigger(gpu);
1113		msm_gpu_retire(gpu);
1114	}
1115
1116	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1117		a5xx_preempt_irq(gpu);
1118
1119	return IRQ_HANDLED;
1120}
1121
1122static const u32 a5xx_registers[] = {
1123	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1124	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1125	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1126	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1127	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1128	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1129	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1130	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1131	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1132	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1133	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1134	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1135	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1136	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1137	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1138	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1139	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1140	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1141	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1142	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1143	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1144	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1145	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1146	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1147	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1148	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1149	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1150	0xAC60, 0xAC60, ~0,
1151};
1152
1153static void a5xx_dump(struct msm_gpu *gpu)
1154{
1155	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1156		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1157	adreno_dump(gpu);
1158}
1159
1160static int a5xx_pm_resume(struct msm_gpu *gpu)
1161{
1162	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163	int ret;
1164
1165	/* Turn on the core power */
1166	ret = msm_gpu_pm_resume(gpu);
1167	if (ret)
1168		return ret;
1169
1170	if (adreno_is_a510(adreno_gpu)) {
1171		/* Halt the sp_input_clk at HM level */
1172		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1173		a5xx_set_hwcg(gpu, true);
1174		/* Turn on sp_input_clk at HM level */
1175		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1176		return 0;
1177	}
1178
1179	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1180	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1181
1182	/* Wait 3 usecs before polling */
1183	udelay(3);
1184
1185	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1186		(1 << 20), (1 << 20));
1187	if (ret) {
1188		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1189			gpu->name,
1190			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1191		return ret;
1192	}
1193
1194	/* Turn on the SP domain */
1195	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1196	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1197		(1 << 20), (1 << 20));
1198	if (ret)
1199		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1200			gpu->name);
1201
1202	return ret;
1203}
1204
1205static int a5xx_pm_suspend(struct msm_gpu *gpu)
1206{
1207	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1208	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1209	u32 mask = 0xf;
1210	int i, ret;
1211
1212	/* A510 has 3 XIN ports in VBIF */
1213	if (adreno_is_a510(adreno_gpu))
1214		mask = 0x7;
1215
1216	/* Clear the VBIF pipe before shutting down */
1217	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1218	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1219				mask) == mask);
1220
1221	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1222
1223	/*
1224	 * Reset the VBIF before power collapse to avoid issue with FIFO
1225	 * entries
1226	 */
1227	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1228	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1229
1230	ret = msm_gpu_pm_suspend(gpu);
1231	if (ret)
1232		return ret;
1233
1234	if (a5xx_gpu->has_whereami)
1235		for (i = 0; i < gpu->nr_rings; i++)
1236			a5xx_gpu->shadow[i] = 0;
1237
1238	return 0;
1239}
1240
1241static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1242{
1243	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1244		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1245
1246	return 0;
1247}
1248
1249struct a5xx_crashdumper {
1250	void *ptr;
1251	struct drm_gem_object *bo;
1252	u64 iova;
1253};
1254
1255struct a5xx_gpu_state {
1256	struct msm_gpu_state base;
1257	u32 *hlsqregs;
1258};
1259
1260static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1261		struct a5xx_crashdumper *dumper)
1262{
1263	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1264		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1265		&dumper->bo, &dumper->iova);
1266
1267	if (!IS_ERR(dumper->ptr))
1268		msm_gem_object_set_name(dumper->bo, "crashdump");
1269
1270	return PTR_ERR_OR_ZERO(dumper->ptr);
1271}
1272
1273static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1274		struct a5xx_crashdumper *dumper)
1275{
1276	u32 val;
1277
1278	if (IS_ERR_OR_NULL(dumper->ptr))
1279		return -EINVAL;
1280
1281	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1282		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1283
1284	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1285
1286	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1287		val & 0x04, 100, 10000);
1288}
1289
1290/*
1291 * These are a list of the registers that need to be read through the HLSQ
1292 * aperture through the crashdumper.  These are not nominally accessible from
1293 * the CPU on a secure platform.
1294 */
1295static const struct {
1296	u32 type;
1297	u32 regoffset;
1298	u32 count;
1299} a5xx_hlsq_aperture_regs[] = {
1300	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1301	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1302	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1303	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1304	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1305	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1306	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1307	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1308	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1309	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1310	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1311	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1312	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1313	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1314	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1315};
1316
1317static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1318		struct a5xx_gpu_state *a5xx_state)
1319{
1320	struct a5xx_crashdumper dumper = { 0 };
1321	u32 offset, count = 0;
1322	u64 *ptr;
1323	int i;
1324
1325	if (a5xx_crashdumper_init(gpu, &dumper))
1326		return;
1327
1328	/* The script will be written at offset 0 */
1329	ptr = dumper.ptr;
1330
1331	/* Start writing the data at offset 256k */
1332	offset = dumper.iova + (256 * SZ_1K);
1333
1334	/* Count how many additional registers to get from the HLSQ aperture */
1335	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1336		count += a5xx_hlsq_aperture_regs[i].count;
1337
1338	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1339	if (!a5xx_state->hlsqregs)
1340		return;
1341
1342	/* Build the crashdump script */
1343	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1344		u32 type = a5xx_hlsq_aperture_regs[i].type;
1345		u32 c = a5xx_hlsq_aperture_regs[i].count;
1346
1347		/* Write the register to select the desired bank */
1348		*ptr++ = ((u64) type << 8);
1349		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1350			(1 << 21) | 1;
1351
1352		*ptr++ = offset;
1353		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1354			| c;
1355
1356		offset += c * sizeof(u32);
1357	}
1358
1359	/* Write two zeros to close off the script */
1360	*ptr++ = 0;
1361	*ptr++ = 0;
1362
1363	if (a5xx_crashdumper_run(gpu, &dumper)) {
1364		kfree(a5xx_state->hlsqregs);
1365		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1366		return;
1367	}
1368
1369	/* Copy the data from the crashdumper to the state */
1370	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1371		count * sizeof(u32));
1372
1373	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1374}
1375
1376static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1377{
1378	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1379			GFP_KERNEL);
1380
1381	if (!a5xx_state)
1382		return ERR_PTR(-ENOMEM);
1383
1384	/* Temporarily disable hardware clock gating before reading the hw */
1385	a5xx_set_hwcg(gpu, false);
1386
1387	/* First get the generic state from the adreno core */
1388	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1389
1390	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1391
1392	/* Get the HLSQ regs with the help of the crashdumper */
1393	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1394
1395	a5xx_set_hwcg(gpu, true);
1396
1397	return &a5xx_state->base;
1398}
1399
1400static void a5xx_gpu_state_destroy(struct kref *kref)
1401{
1402	struct msm_gpu_state *state = container_of(kref,
1403		struct msm_gpu_state, ref);
1404	struct a5xx_gpu_state *a5xx_state = container_of(state,
1405		struct a5xx_gpu_state, base);
1406
1407	kfree(a5xx_state->hlsqregs);
1408
1409	adreno_gpu_state_destroy(state);
1410	kfree(a5xx_state);
1411}
1412
1413static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1414{
1415	if (IS_ERR_OR_NULL(state))
1416		return 1;
1417
1418	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1419}
1420
1421
1422#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1423static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1424		      struct drm_printer *p)
1425{
1426	int i, j;
1427	u32 pos = 0;
1428	struct a5xx_gpu_state *a5xx_state = container_of(state,
1429		struct a5xx_gpu_state, base);
1430
1431	if (IS_ERR_OR_NULL(state))
1432		return;
1433
1434	adreno_show(gpu, state, p);
1435
1436	/* Dump the additional a5xx HLSQ registers */
1437	if (!a5xx_state->hlsqregs)
1438		return;
1439
1440	drm_printf(p, "registers-hlsq:\n");
1441
1442	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1443		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1444		u32 c = a5xx_hlsq_aperture_regs[i].count;
1445
1446		for (j = 0; j < c; j++, pos++, o++) {
1447			/*
1448			 * To keep the crashdump simple we pull the entire range
1449			 * for each register type but not all of the registers
1450			 * in the range are valid. Fortunately invalid registers
1451			 * stick out like a sore thumb with a value of
1452			 * 0xdeadbeef
1453			 */
1454			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1455				continue;
1456
1457			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1458				o << 2, a5xx_state->hlsqregs[pos]);
1459		}
1460	}
1461}
1462#endif
1463
1464static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1465{
1466	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1467	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1468
1469	return a5xx_gpu->cur_ring;
1470}
1471
1472static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1473{
1474	u64 busy_cycles, busy_time;
1475
1476	/* Only read the gpu busy if the hardware is already active */
1477	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1478		return 0;
1479
1480	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1481			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1482
1483	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1484	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1485
1486	gpu->devfreq.busy_cycles = busy_cycles;
1487
1488	pm_runtime_put(&gpu->pdev->dev);
1489
1490	if (WARN_ON(busy_time > ~0LU))
1491		return ~0LU;
1492
1493	return (unsigned long)busy_time;
1494}
1495
1496static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1497{
1498	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1499	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1500
1501	if (a5xx_gpu->has_whereami)
1502		return a5xx_gpu->shadow[ring->id];
1503
1504	return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1505}
1506
1507static const struct adreno_gpu_funcs funcs = {
1508	.base = {
1509		.get_param = adreno_get_param,
1510		.hw_init = a5xx_hw_init,
1511		.pm_suspend = a5xx_pm_suspend,
1512		.pm_resume = a5xx_pm_resume,
1513		.recover = a5xx_recover,
1514		.submit = a5xx_submit,
1515		.active_ring = a5xx_active_ring,
1516		.irq = a5xx_irq,
1517		.destroy = a5xx_destroy,
1518#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1519		.show = a5xx_show,
1520#endif
1521#if defined(CONFIG_DEBUG_FS)
1522		.debugfs_init = a5xx_debugfs_init,
1523#endif
1524		.gpu_busy = a5xx_gpu_busy,
1525		.gpu_state_get = a5xx_gpu_state_get,
1526		.gpu_state_put = a5xx_gpu_state_put,
1527		.create_address_space = adreno_iommu_create_address_space,
1528		.get_rptr = a5xx_get_rptr,
1529	},
1530	.get_timestamp = a5xx_get_timestamp,
1531};
1532
1533static void check_speed_bin(struct device *dev)
1534{
1535	struct nvmem_cell *cell;
1536	u32 val;
1537
1538	/*
1539	 * If the OPP table specifies a opp-supported-hw property then we have
1540	 * to set something with dev_pm_opp_set_supported_hw() or the table
1541	 * doesn't get populated so pick an arbitrary value that should
1542	 * ensure the default frequencies are selected but not conflict with any
1543	 * actual bins
1544	 */
1545	val = 0x80;
1546
1547	cell = nvmem_cell_get(dev, "speed_bin");
1548
1549	if (!IS_ERR(cell)) {
1550		void *buf = nvmem_cell_read(cell, NULL);
1551
1552		if (!IS_ERR(buf)) {
1553			u8 bin = *((u8 *) buf);
1554
1555			val = (1 << bin);
1556			kfree(buf);
1557		}
1558
1559		nvmem_cell_put(cell);
1560	}
1561
1562	dev_pm_opp_set_supported_hw(dev, &val, 1);
1563}
1564
1565struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1566{
1567	struct msm_drm_private *priv = dev->dev_private;
1568	struct platform_device *pdev = priv->gpu_pdev;
1569	struct a5xx_gpu *a5xx_gpu = NULL;
1570	struct adreno_gpu *adreno_gpu;
1571	struct msm_gpu *gpu;
1572	unsigned int nr_rings;
1573	int ret;
1574
1575	if (!pdev) {
1576		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1577		return ERR_PTR(-ENXIO);
1578	}
1579
1580	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1581	if (!a5xx_gpu)
1582		return ERR_PTR(-ENOMEM);
1583
1584	adreno_gpu = &a5xx_gpu->base;
1585	gpu = &adreno_gpu->base;
1586
1587	adreno_gpu->registers = a5xx_registers;
1588
1589	a5xx_gpu->lm_leakage = 0x4E001A;
1590
1591	check_speed_bin(&pdev->dev);
1592
1593	nr_rings = 4;
1594
1595	if (adreno_is_a510(adreno_gpu))
1596		nr_rings = 1;
1597
1598	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
1599	if (ret) {
1600		a5xx_destroy(&(a5xx_gpu->base.base));
1601		return ERR_PTR(ret);
1602	}
1603
1604	if (gpu->aspace)
1605		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1606
1607	/* Set up the preemption specific bits and pieces for each ringbuffer */
1608	a5xx_preempt_init(gpu);
1609
1610	return gpu;
1611}
1612