1// SPDX-License-Identifier: GPL-2.0-only 2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 3 */ 4 5#include <linux/kernel.h> 6#include <linux/types.h> 7#include <linux/cpumask.h> 8#include <linux/qcom_scm.h> 9#include <linux/pm_opp.h> 10#include <linux/nvmem-consumer.h> 11#include <linux/slab.h> 12#include "msm_gem.h" 13#include "msm_mmu.h" 14#include "a5xx_gpu.h" 15 16extern bool hang_debug; 17static void a5xx_dump(struct msm_gpu *gpu); 18 19#define GPU_PAS_ID 13 20 21void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 22 bool sync) 23{ 24 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 25 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 26 uint32_t wptr; 27 unsigned long flags; 28 29 /* 30 * Most flush operations need to issue a WHERE_AM_I opcode to sync up 31 * the rptr shadow 32 */ 33 if (a5xx_gpu->has_whereami && sync) { 34 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 35 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring))); 36 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); 37 } 38 39 spin_lock_irqsave(&ring->preempt_lock, flags); 40 41 /* Copy the shadow to the actual register */ 42 ring->cur = ring->next; 43 44 /* Make sure to wrap wptr if we need to */ 45 wptr = get_wptr(ring); 46 47 spin_unlock_irqrestore(&ring->preempt_lock, flags); 48 49 /* Make sure everything is posted before making a decision */ 50 mb(); 51 52 /* Update HW if this is the current ring and we are not in preempt */ 53 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 54 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 55} 56 57static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit) 58{ 59 struct msm_drm_private *priv = gpu->dev->dev_private; 60 struct msm_ringbuffer *ring = submit->ring; 61 struct msm_gem_object *obj; 62 uint32_t *ptr, dwords; 63 unsigned int i; 64 65 for (i = 0; i < submit->nr_cmds; i++) { 66 switch (submit->cmd[i].type) { 67 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 68 break; 69 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 70 if (priv->lastctx == submit->queue->ctx) 71 break; 72 fallthrough; 73 case MSM_SUBMIT_CMD_BUF: 74 /* copy commands into RB: */ 75 obj = submit->bos[submit->cmd[i].idx].obj; 76 dwords = submit->cmd[i].size; 77 78 ptr = msm_gem_get_vaddr(&obj->base); 79 80 /* _get_vaddr() shouldn't fail at this point, 81 * since we've already mapped it once in 82 * submit_reloc() 83 */ 84 if (WARN_ON(IS_ERR_OR_NULL(ptr))) 85 return; 86 87 for (i = 0; i < dwords; i++) { 88 /* normally the OUT_PKTn() would wait 89 * for space for the packet. But since 90 * we just OUT_RING() the whole thing, 91 * need to call adreno_wait_ring() 92 * ourself: 93 */ 94 adreno_wait_ring(ring, 1); 95 OUT_RING(ring, ptr[i]); 96 } 97 98 msm_gem_put_vaddr(&obj->base); 99 100 break; 101 } 102 } 103 104 a5xx_flush(gpu, ring, true); 105 a5xx_preempt_trigger(gpu); 106 107 /* we might not necessarily have a cmd from userspace to 108 * trigger an event to know that submit has completed, so 109 * do this manually: 110 */ 111 a5xx_idle(gpu, ring); 112 ring->memptrs->fence = submit->seqno; 113 msm_gpu_retire(gpu); 114} 115 116static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 117{ 118 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 119 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 120 struct msm_drm_private *priv = gpu->dev->dev_private; 121 struct msm_ringbuffer *ring = submit->ring; 122 unsigned int i, ibs = 0; 123 124 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { 125 priv->lastctx = NULL; 126 a5xx_submit_in_rb(gpu, submit); 127 return; 128 } 129 130 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 131 OUT_RING(ring, 0x02); 132 133 /* Turn off protected mode to write to special registers */ 134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 135 OUT_RING(ring, 0); 136 137 /* Set the save preemption record for the ring/command */ 138 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 139 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 140 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 141 142 /* Turn back on protected mode */ 143 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 144 OUT_RING(ring, 1); 145 146 /* Enable local preemption for finegrain preemption */ 147 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 148 OUT_RING(ring, 0x1); 149 150 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 151 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 152 OUT_RING(ring, 0x02); 153 154 /* Submit the commands */ 155 for (i = 0; i < submit->nr_cmds; i++) { 156 switch (submit->cmd[i].type) { 157 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 158 break; 159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 160 if (priv->lastctx == submit->queue->ctx) 161 break; 162 fallthrough; 163 case MSM_SUBMIT_CMD_BUF: 164 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 165 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 166 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 167 OUT_RING(ring, submit->cmd[i].size); 168 ibs++; 169 break; 170 } 171 } 172 173 /* 174 * Write the render mode to NULL (0) to indicate to the CP that the IBs 175 * are done rendering - otherwise a lucky preemption would start 176 * replaying from the last checkpoint 177 */ 178 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 179 OUT_RING(ring, 0); 180 OUT_RING(ring, 0); 181 OUT_RING(ring, 0); 182 OUT_RING(ring, 0); 183 OUT_RING(ring, 0); 184 185 /* Turn off IB level preemptions */ 186 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 187 OUT_RING(ring, 0x01); 188 189 /* Write the fence to the scratch register */ 190 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 191 OUT_RING(ring, submit->seqno); 192 193 /* 194 * Execute a CACHE_FLUSH_TS event. This will ensure that the 195 * timestamp is written to the memory and then triggers the interrupt 196 */ 197 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 198 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 199 CP_EVENT_WRITE_0_IRQ); 200 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 201 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 202 OUT_RING(ring, submit->seqno); 203 204 /* Yield the floor on command completion */ 205 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 206 /* 207 * If dword[2:1] are non zero, they specify an address for the CP to 208 * write the value of dword[3] to on preemption complete. Write 0 to 209 * skip the write 210 */ 211 OUT_RING(ring, 0x00); 212 OUT_RING(ring, 0x00); 213 /* Data value - not used if the address above is 0 */ 214 OUT_RING(ring, 0x01); 215 /* Set bit 0 to trigger an interrupt on preempt complete */ 216 OUT_RING(ring, 0x01); 217 218 /* A WHERE_AM_I packet is not needed after a YIELD */ 219 a5xx_flush(gpu, ring, false); 220 221 /* Check to see if we need to start preemption */ 222 a5xx_preempt_trigger(gpu); 223} 224 225static const struct { 226 u32 offset; 227 u32 value; 228} a5xx_hwcg[] = { 229 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 230 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 231 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 232 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 233 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 234 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 235 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 236 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 237 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 238 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 239 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 240 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 241 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 242 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 243 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 244 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 245 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 246 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 247 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 248 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 249 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 250 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 251 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 252 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 253 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 254 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 255 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 256 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 257 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 258 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 259 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 260 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 261 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 262 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 263 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 264 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 265 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 266 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 267 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 268 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 269 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 270 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 271 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 272 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 273 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 274 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 275 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 276 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 277 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 278 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 279 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 280 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 281 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 282 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 283 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 284 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 285 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 286 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 287 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 288 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 289 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 290 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 291 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 292 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 293 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 294 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 295 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 296 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 297 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 298 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 299 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 300 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 301 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 302 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 303 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 304 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 305 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 306 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 307 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 308 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 309 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 310 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 311 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 312 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 313 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 314 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 315 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 316 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 317 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 318 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 319 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 320 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 321}; 322 323void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 324{ 325 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 326 unsigned int i; 327 328 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 329 gpu_write(gpu, a5xx_hwcg[i].offset, 330 state ? a5xx_hwcg[i].value : 0); 331 332 if (adreno_is_a540(adreno_gpu)) { 333 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0); 334 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0); 335 } 336 337 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 338 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 339} 340 341static int a5xx_me_init(struct msm_gpu *gpu) 342{ 343 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 344 struct msm_ringbuffer *ring = gpu->rb[0]; 345 346 OUT_PKT7(ring, CP_ME_INIT, 8); 347 348 OUT_RING(ring, 0x0000002F); 349 350 /* Enable multiple hardware contexts */ 351 OUT_RING(ring, 0x00000003); 352 353 /* Enable error detection */ 354 OUT_RING(ring, 0x20000000); 355 356 /* Don't enable header dump */ 357 OUT_RING(ring, 0x00000000); 358 OUT_RING(ring, 0x00000000); 359 360 /* Specify workarounds for various microcode issues */ 361 if (adreno_is_a530(adreno_gpu)) { 362 /* Workaround for token end syncs 363 * Force a WFI after every direct-render 3D mode draw and every 364 * 2D mode 3 draw 365 */ 366 OUT_RING(ring, 0x0000000B); 367 } else if (adreno_is_a510(adreno_gpu)) { 368 /* Workaround for token and syncs */ 369 OUT_RING(ring, 0x00000001); 370 } else { 371 /* No workarounds enabled */ 372 OUT_RING(ring, 0x00000000); 373 } 374 375 OUT_RING(ring, 0x00000000); 376 OUT_RING(ring, 0x00000000); 377 378 a5xx_flush(gpu, ring, true); 379 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 380} 381 382static int a5xx_preempt_start(struct msm_gpu *gpu) 383{ 384 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 385 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 386 struct msm_ringbuffer *ring = gpu->rb[0]; 387 388 if (gpu->nr_rings == 1) 389 return 0; 390 391 /* Turn off protected mode to write to special registers */ 392 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 393 OUT_RING(ring, 0); 394 395 /* Set the save preemption record for the ring/command */ 396 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 397 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 398 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 399 400 /* Turn back on protected mode */ 401 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 402 OUT_RING(ring, 1); 403 404 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 405 OUT_RING(ring, 0x00); 406 407 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 408 OUT_RING(ring, 0x01); 409 410 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 411 OUT_RING(ring, 0x01); 412 413 /* Yield the floor on command completion */ 414 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 415 OUT_RING(ring, 0x00); 416 OUT_RING(ring, 0x00); 417 OUT_RING(ring, 0x01); 418 OUT_RING(ring, 0x01); 419 420 /* The WHERE_AMI_I packet is not needed after a YIELD is issued */ 421 a5xx_flush(gpu, ring, false); 422 423 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 424} 425 426static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu, 427 struct drm_gem_object *obj) 428{ 429 u32 *buf = msm_gem_get_vaddr_active(obj); 430 431 if (IS_ERR(buf)) 432 return; 433 434 /* 435 * If the lowest nibble is 0xa that is an indication that this microcode 436 * has been patched. The actual version is in dword [3] but we only care 437 * about the patchlevel which is the lowest nibble of dword [3] 438 */ 439 if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) 440 a5xx_gpu->has_whereami = true; 441 442 msm_gem_put_vaddr(obj); 443} 444 445static int a5xx_ucode_init(struct msm_gpu *gpu) 446{ 447 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 448 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 449 int ret; 450 451 if (!a5xx_gpu->pm4_bo) { 452 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu, 453 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova); 454 455 456 if (IS_ERR(a5xx_gpu->pm4_bo)) { 457 ret = PTR_ERR(a5xx_gpu->pm4_bo); 458 a5xx_gpu->pm4_bo = NULL; 459 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n", 460 ret); 461 return ret; 462 } 463 464 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw"); 465 } 466 467 if (!a5xx_gpu->pfp_bo) { 468 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu, 469 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova); 470 471 if (IS_ERR(a5xx_gpu->pfp_bo)) { 472 ret = PTR_ERR(a5xx_gpu->pfp_bo); 473 a5xx_gpu->pfp_bo = NULL; 474 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n", 475 ret); 476 return ret; 477 } 478 479 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw"); 480 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); 481 } 482 483 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 484 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 485 486 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 487 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 488 489 return 0; 490} 491 492#define SCM_GPU_ZAP_SHADER_RESUME 0 493 494static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 495{ 496 int ret; 497 498 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 499 if (ret) 500 DRM_ERROR("%s: zap-shader resume failed: %d\n", 501 gpu->name, ret); 502 503 return ret; 504} 505 506static int a5xx_zap_shader_init(struct msm_gpu *gpu) 507{ 508 static bool loaded; 509 int ret; 510 511 /* 512 * If the zap shader is already loaded into memory we just need to kick 513 * the remote processor to reinitialize it 514 */ 515 if (loaded) 516 return a5xx_zap_shader_resume(gpu); 517 518 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 519 520 loaded = !ret; 521 return ret; 522} 523 524#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 525 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 526 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 527 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 528 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 529 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 530 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 531 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 532 A5XX_RBBM_INT_0_MASK_CP_SW | \ 533 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 534 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 535 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 536 537static int a5xx_hw_init(struct msm_gpu *gpu) 538{ 539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 540 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 541 int ret; 542 543 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 544 545 if (adreno_is_a540(adreno_gpu)) 546 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 547 548 /* Make all blocks contribute to the GPU BUSY perf counter */ 549 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 550 551 /* Enable RBBM error reporting bits */ 552 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 553 554 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 555 /* 556 * Mask out the activity signals from RB1-3 to avoid false 557 * positives 558 */ 559 560 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 561 0xF0000000); 562 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 563 0xFFFFFFFF); 564 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 565 0xFFFFFFFF); 566 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 567 0xFFFFFFFF); 568 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 569 0xFFFFFFFF); 570 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 571 0xFFFFFFFF); 572 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 573 0xFFFFFFFF); 574 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 575 0xFFFFFFFF); 576 } 577 578 /* Enable fault detection */ 579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 580 (1 << 30) | 0xFFFF); 581 582 /* Turn on performance counters */ 583 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 584 585 /* Select CP0 to always count cycles */ 586 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); 587 588 /* Select RBBM0 to countable 6 to get the busy status for devfreq */ 589 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); 590 591 /* Increase VFD cache access so LRZ and other data gets evicted less */ 592 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 593 594 /* Disable L2 bypass in the UCHE */ 595 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 596 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 597 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 598 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 599 600 /* Set the GMEM VA range (0 to gpu->gmem) */ 601 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 602 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 603 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 604 0x00100000 + adreno_gpu->gmem - 1); 605 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 606 607 if (adreno_is_a510(adreno_gpu)) { 608 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); 609 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); 610 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); 611 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); 612 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 613 (0x200 << 11 | 0x200 << 22)); 614 } else { 615 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 616 if (adreno_is_a530(adreno_gpu)) 617 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 618 if (adreno_is_a540(adreno_gpu)) 619 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); 620 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 621 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 622 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 623 (0x400 << 11 | 0x300 << 22)); 624 } 625 626 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 627 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 628 629 /* Enable USE_RETENTION_FLOPS */ 630 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 631 632 /* Enable ME/PFP split notification */ 633 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 634 635 /* 636 * In A5x, CCU can send context_done event of a particular context to 637 * UCHE which ultimately reaches CP even when there is valid 638 * transaction of that context inside CCU. This can let CP to program 639 * config registers, which will make the "valid transaction" inside 640 * CCU to be interpreted differently. This can cause gpu fault. This 641 * bug is fixed in latest A510 revision. To enable this bug fix - 642 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 643 * (disable). For older A510 version this bit is unused. 644 */ 645 if (adreno_is_a510(adreno_gpu)) 646 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); 647 648 /* Enable HWCG */ 649 a5xx_set_hwcg(gpu, true); 650 651 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 652 653 /* Set the highest bank bit */ 654 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 655 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 656 if (adreno_is_a540(adreno_gpu)) 657 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2); 658 659 /* Protect registers from the CP */ 660 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 661 662 /* RBBM */ 663 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 664 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 665 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 666 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 667 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 668 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 669 670 /* Content protect */ 671 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 672 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 673 16)); 674 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 675 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 676 677 /* CP */ 678 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 679 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 680 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 681 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 682 683 /* RB */ 684 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 685 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 686 687 /* VPC */ 688 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 689 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 690 691 /* UCHE */ 692 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 693 694 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) 695 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 696 ADRENO_PROTECT_RW(0x10000, 0x8000)); 697 698 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 699 /* 700 * Disable the trusted memory range - we don't actually supported secure 701 * memory rendering at this point in time and we don't want to block off 702 * part of the virtual memory space. 703 */ 704 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 705 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 706 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 707 708 /* Put the GPU into 64 bit by default */ 709 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1); 710 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1); 711 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1); 712 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1); 713 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1); 714 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); 715 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1); 716 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1); 717 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1); 718 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1); 719 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1); 720 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 721 722 /* 723 * VPC corner case with local memory load kill leads to corrupt 724 * internal state. Normal Disable does not work for all a5x chips. 725 * So do the following setting to disable it. 726 */ 727 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) { 728 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23)); 729 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0); 730 } 731 732 ret = adreno_hw_init(gpu); 733 if (ret) 734 return ret; 735 736 if (!adreno_is_a510(adreno_gpu)) 737 a5xx_gpmu_ucode_init(gpu); 738 739 ret = a5xx_ucode_init(gpu); 740 if (ret) 741 return ret; 742 743 /* Set the ringbuffer address */ 744 gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, 745 gpu->rb[0]->iova); 746 747 /* 748 * If the microcode supports the WHERE_AM_I opcode then we can use that 749 * in lieu of the RPTR shadow and enable preemption. Otherwise, we 750 * can't safely use the RPTR shadow or preemption. In either case, the 751 * RPTR shadow should be disabled in hardware. 752 */ 753 gpu_write(gpu, REG_A5XX_CP_RB_CNTL, 754 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 755 756 /* Create a privileged buffer for the RPTR shadow */ 757 if (a5xx_gpu->has_whereami) { 758 if (!a5xx_gpu->shadow_bo) { 759 a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 760 sizeof(u32) * gpu->nr_rings, 761 MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, 762 gpu->aspace, &a5xx_gpu->shadow_bo, 763 &a5xx_gpu->shadow_iova); 764 765 if (IS_ERR(a5xx_gpu->shadow)) 766 return PTR_ERR(a5xx_gpu->shadow); 767 } 768 769 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, 770 REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); 771 } else if (gpu->nr_rings > 1) { 772 /* Disable preemption if WHERE_AM_I isn't available */ 773 a5xx_preempt_fini(gpu); 774 gpu->nr_rings = 1; 775 } 776 777 a5xx_preempt_hw_init(gpu); 778 779 /* Disable the interrupts through the initial bringup stage */ 780 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 781 782 /* Clear ME_HALT to start the micro engine */ 783 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 784 ret = a5xx_me_init(gpu); 785 if (ret) 786 return ret; 787 788 ret = a5xx_power_init(gpu); 789 if (ret) 790 return ret; 791 792 /* 793 * Send a pipeline event stat to get misbehaving counters to start 794 * ticking correctly 795 */ 796 if (adreno_is_a530(adreno_gpu)) { 797 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 798 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT)); 799 800 a5xx_flush(gpu, gpu->rb[0], true); 801 if (!a5xx_idle(gpu, gpu->rb[0])) 802 return -EINVAL; 803 } 804 805 /* 806 * If the chip that we are using does support loading one, then 807 * try to load a zap shader into the secure world. If successful 808 * we can use the CP to switch out of secure mode. If not then we 809 * have no resource but to try to switch ourselves out manually. If we 810 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 811 * be blocked and a permissions violation will soon follow. 812 */ 813 ret = a5xx_zap_shader_init(gpu); 814 if (!ret) { 815 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 816 OUT_RING(gpu->rb[0], 0x00000000); 817 818 a5xx_flush(gpu, gpu->rb[0], true); 819 if (!a5xx_idle(gpu, gpu->rb[0])) 820 return -EINVAL; 821 } else if (ret == -ENODEV) { 822 /* 823 * This device does not use zap shader (but print a warning 824 * just in case someone got their dt wrong.. hopefully they 825 * have a debug UART to realize the error of their ways... 826 * if you mess this up you are about to crash horribly) 827 */ 828 dev_warn_once(gpu->dev->dev, 829 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 830 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 831 } else { 832 return ret; 833 } 834 835 /* Last step - yield the ringbuffer */ 836 a5xx_preempt_start(gpu); 837 838 return 0; 839} 840 841static void a5xx_recover(struct msm_gpu *gpu) 842{ 843 int i; 844 845 adreno_dump_info(gpu); 846 847 for (i = 0; i < 8; i++) { 848 printk("CP_SCRATCH_REG%d: %u\n", i, 849 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 850 } 851 852 if (hang_debug) 853 a5xx_dump(gpu); 854 855 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 856 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 857 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 858 adreno_recover(gpu); 859} 860 861static void a5xx_destroy(struct msm_gpu *gpu) 862{ 863 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 864 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 865 866 DBG("%s", gpu->name); 867 868 a5xx_preempt_fini(gpu); 869 870 if (a5xx_gpu->pm4_bo) { 871 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); 872 drm_gem_object_put(a5xx_gpu->pm4_bo); 873 } 874 875 if (a5xx_gpu->pfp_bo) { 876 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); 877 drm_gem_object_put(a5xx_gpu->pfp_bo); 878 } 879 880 if (a5xx_gpu->gpmu_bo) { 881 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 882 drm_gem_object_put(a5xx_gpu->gpmu_bo); 883 } 884 885 if (a5xx_gpu->shadow_bo) { 886 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace); 887 drm_gem_object_put(a5xx_gpu->shadow_bo); 888 } 889 890 adreno_gpu_cleanup(adreno_gpu); 891 kfree(a5xx_gpu); 892} 893 894static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 895{ 896 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 897 return false; 898 899 /* 900 * Nearly every abnormality ends up pausing the GPU and triggering a 901 * fault so we can safely just watch for this one interrupt to fire 902 */ 903 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 904 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 905} 906 907bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 908{ 909 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 910 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 911 912 if (ring != a5xx_gpu->cur_ring) { 913 WARN(1, "Tried to idle a non-current ringbuffer\n"); 914 return false; 915 } 916 917 /* wait for CP to drain ringbuffer: */ 918 if (!adreno_idle(gpu, ring)) 919 return false; 920 921 if (spin_until(_a5xx_check_idle(gpu))) { 922 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 923 gpu->name, __builtin_return_address(0), 924 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 925 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 926 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 927 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 928 return false; 929 } 930 931 return true; 932} 933 934static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 935{ 936 struct msm_gpu *gpu = arg; 937 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 938 iova, flags, 939 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 940 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 941 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 942 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 943 944 return -EFAULT; 945} 946 947static void a5xx_cp_err_irq(struct msm_gpu *gpu) 948{ 949 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 950 951 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 952 u32 val; 953 954 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 955 956 /* 957 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 958 * read it twice 959 */ 960 961 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 962 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 963 964 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 965 val); 966 } 967 968 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 969 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 970 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 971 972 if (status & A5XX_CP_INT_CP_DMA_ERROR) 973 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 974 975 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 976 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 977 978 dev_err_ratelimited(gpu->dev->dev, 979 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 980 val & (1 << 24) ? "WRITE" : "READ", 981 (val & 0xFFFFF) >> 2, val); 982 } 983 984 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 985 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 986 const char *access[16] = { "reserved", "reserved", 987 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 988 "", "", "me read", "me write", "", "", "crashdump read", 989 "crashdump write" }; 990 991 dev_err_ratelimited(gpu->dev->dev, 992 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 993 status & 0xFFFFF, access[(status >> 24) & 0xF], 994 (status & (1 << 31)), status); 995 } 996} 997 998static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 999{ 1000 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 1001 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 1002 1003 dev_err_ratelimited(gpu->dev->dev, 1004 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 1005 val & (1 << 28) ? "WRITE" : "READ", 1006 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 1007 (val >> 24) & 0xF); 1008 1009 /* Clear the error */ 1010 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 1011 1012 /* Clear the interrupt */ 1013 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1014 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1015 } 1016 1017 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 1018 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 1019 1020 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 1021 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 1022 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 1023 1024 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 1025 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 1026 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 1027 1028 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 1029 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 1030 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 1031 1032 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1033 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 1034 1035 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1036 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 1037} 1038 1039static void a5xx_uche_err_irq(struct msm_gpu *gpu) 1040{ 1041 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 1042 1043 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 1044 1045 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 1046 addr); 1047} 1048 1049static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 1050{ 1051 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 1052} 1053 1054static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 1055{ 1056 struct drm_device *dev = gpu->dev; 1057 struct msm_drm_private *priv = dev->dev_private; 1058 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1059 1060 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1061 ring ? ring->id : -1, ring ? ring->seqno : 0, 1062 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 1063 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 1064 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 1065 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 1066 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 1067 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 1068 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 1069 1070 /* Turn off the hangcheck timer to keep it from bothering us */ 1071 del_timer(&gpu->hangcheck_timer); 1072 1073 queue_work(priv->wq, &gpu->recover_work); 1074} 1075 1076#define RBBM_ERROR_MASK \ 1077 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 1078 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 1079 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 1080 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 1081 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 1082 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1083 1084static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 1085{ 1086 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1087 1088 /* 1089 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1090 * before the source is cleared the interrupt will storm. 1091 */ 1092 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1093 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1094 1095 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1096 if (status & RBBM_ERROR_MASK) 1097 a5xx_rbbm_err_irq(gpu, status); 1098 1099 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1100 a5xx_cp_err_irq(gpu); 1101 1102 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1103 a5xx_fault_detect_irq(gpu); 1104 1105 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1106 a5xx_uche_err_irq(gpu); 1107 1108 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1109 a5xx_gpmu_err_irq(gpu); 1110 1111 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1112 a5xx_preempt_trigger(gpu); 1113 msm_gpu_retire(gpu); 1114 } 1115 1116 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1117 a5xx_preempt_irq(gpu); 1118 1119 return IRQ_HANDLED; 1120} 1121 1122static const u32 a5xx_registers[] = { 1123 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1124 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1125 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1126 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1127 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1128 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1129 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1130 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1131 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1132 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1133 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1134 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1135 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1136 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1137 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1138 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1139 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1140 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1141 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1142 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1143 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1144 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1145 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1146 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1147 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1148 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D, 1149 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5, 1150 0xAC60, 0xAC60, ~0, 1151}; 1152 1153static void a5xx_dump(struct msm_gpu *gpu) 1154{ 1155 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n", 1156 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1157 adreno_dump(gpu); 1158} 1159 1160static int a5xx_pm_resume(struct msm_gpu *gpu) 1161{ 1162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1163 int ret; 1164 1165 /* Turn on the core power */ 1166 ret = msm_gpu_pm_resume(gpu); 1167 if (ret) 1168 return ret; 1169 1170 if (adreno_is_a510(adreno_gpu)) { 1171 /* Halt the sp_input_clk at HM level */ 1172 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); 1173 a5xx_set_hwcg(gpu, true); 1174 /* Turn on sp_input_clk at HM level */ 1175 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); 1176 return 0; 1177 } 1178 1179 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1180 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1181 1182 /* Wait 3 usecs before polling */ 1183 udelay(3); 1184 1185 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1186 (1 << 20), (1 << 20)); 1187 if (ret) { 1188 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1189 gpu->name, 1190 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1191 return ret; 1192 } 1193 1194 /* Turn on the SP domain */ 1195 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1196 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1197 (1 << 20), (1 << 20)); 1198 if (ret) 1199 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1200 gpu->name); 1201 1202 return ret; 1203} 1204 1205static int a5xx_pm_suspend(struct msm_gpu *gpu) 1206{ 1207 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1208 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1209 u32 mask = 0xf; 1210 int i, ret; 1211 1212 /* A510 has 3 XIN ports in VBIF */ 1213 if (adreno_is_a510(adreno_gpu)) 1214 mask = 0x7; 1215 1216 /* Clear the VBIF pipe before shutting down */ 1217 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); 1218 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 1219 mask) == mask); 1220 1221 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1222 1223 /* 1224 * Reset the VBIF before power collapse to avoid issue with FIFO 1225 * entries 1226 */ 1227 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1228 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1229 1230 ret = msm_gpu_pm_suspend(gpu); 1231 if (ret) 1232 return ret; 1233 1234 if (a5xx_gpu->has_whereami) 1235 for (i = 0; i < gpu->nr_rings; i++) 1236 a5xx_gpu->shadow[i] = 0; 1237 1238 return 0; 1239} 1240 1241static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1242{ 1243 *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, 1244 REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); 1245 1246 return 0; 1247} 1248 1249struct a5xx_crashdumper { 1250 void *ptr; 1251 struct drm_gem_object *bo; 1252 u64 iova; 1253}; 1254 1255struct a5xx_gpu_state { 1256 struct msm_gpu_state base; 1257 u32 *hlsqregs; 1258}; 1259 1260static int a5xx_crashdumper_init(struct msm_gpu *gpu, 1261 struct a5xx_crashdumper *dumper) 1262{ 1263 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 1264 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 1265 &dumper->bo, &dumper->iova); 1266 1267 if (!IS_ERR(dumper->ptr)) 1268 msm_gem_object_set_name(dumper->bo, "crashdump"); 1269 1270 return PTR_ERR_OR_ZERO(dumper->ptr); 1271} 1272 1273static int a5xx_crashdumper_run(struct msm_gpu *gpu, 1274 struct a5xx_crashdumper *dumper) 1275{ 1276 u32 val; 1277 1278 if (IS_ERR_OR_NULL(dumper->ptr)) 1279 return -EINVAL; 1280 1281 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, 1282 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 1283 1284 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); 1285 1286 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val, 1287 val & 0x04, 100, 10000); 1288} 1289 1290/* 1291 * These are a list of the registers that need to be read through the HLSQ 1292 * aperture through the crashdumper. These are not nominally accessible from 1293 * the CPU on a secure platform. 1294 */ 1295static const struct { 1296 u32 type; 1297 u32 regoffset; 1298 u32 count; 1299} a5xx_hlsq_aperture_regs[] = { 1300 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */ 1301 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */ 1302 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */ 1303 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */ 1304 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */ 1305 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */ 1306 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */ 1307 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */ 1308 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */ 1309 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */ 1310 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */ 1311 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */ 1312 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */ 1313 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */ 1314 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */ 1315}; 1316 1317static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, 1318 struct a5xx_gpu_state *a5xx_state) 1319{ 1320 struct a5xx_crashdumper dumper = { 0 }; 1321 u32 offset, count = 0; 1322 u64 *ptr; 1323 int i; 1324 1325 if (a5xx_crashdumper_init(gpu, &dumper)) 1326 return; 1327 1328 /* The script will be written at offset 0 */ 1329 ptr = dumper.ptr; 1330 1331 /* Start writing the data at offset 256k */ 1332 offset = dumper.iova + (256 * SZ_1K); 1333 1334 /* Count how many additional registers to get from the HLSQ aperture */ 1335 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) 1336 count += a5xx_hlsq_aperture_regs[i].count; 1337 1338 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL); 1339 if (!a5xx_state->hlsqregs) 1340 return; 1341 1342 /* Build the crashdump script */ 1343 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1344 u32 type = a5xx_hlsq_aperture_regs[i].type; 1345 u32 c = a5xx_hlsq_aperture_regs[i].count; 1346 1347 /* Write the register to select the desired bank */ 1348 *ptr++ = ((u64) type << 8); 1349 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) | 1350 (1 << 21) | 1; 1351 1352 *ptr++ = offset; 1353 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44) 1354 | c; 1355 1356 offset += c * sizeof(u32); 1357 } 1358 1359 /* Write two zeros to close off the script */ 1360 *ptr++ = 0; 1361 *ptr++ = 0; 1362 1363 if (a5xx_crashdumper_run(gpu, &dumper)) { 1364 kfree(a5xx_state->hlsqregs); 1365 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1366 return; 1367 } 1368 1369 /* Copy the data from the crashdumper to the state */ 1370 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), 1371 count * sizeof(u32)); 1372 1373 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1374} 1375 1376static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) 1377{ 1378 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state), 1379 GFP_KERNEL); 1380 1381 if (!a5xx_state) 1382 return ERR_PTR(-ENOMEM); 1383 1384 /* Temporarily disable hardware clock gating before reading the hw */ 1385 a5xx_set_hwcg(gpu, false); 1386 1387 /* First get the generic state from the adreno core */ 1388 adreno_gpu_state_get(gpu, &(a5xx_state->base)); 1389 1390 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS); 1391 1392 /* Get the HLSQ regs with the help of the crashdumper */ 1393 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); 1394 1395 a5xx_set_hwcg(gpu, true); 1396 1397 return &a5xx_state->base; 1398} 1399 1400static void a5xx_gpu_state_destroy(struct kref *kref) 1401{ 1402 struct msm_gpu_state *state = container_of(kref, 1403 struct msm_gpu_state, ref); 1404 struct a5xx_gpu_state *a5xx_state = container_of(state, 1405 struct a5xx_gpu_state, base); 1406 1407 kfree(a5xx_state->hlsqregs); 1408 1409 adreno_gpu_state_destroy(state); 1410 kfree(a5xx_state); 1411} 1412 1413static int a5xx_gpu_state_put(struct msm_gpu_state *state) 1414{ 1415 if (IS_ERR_OR_NULL(state)) 1416 return 1; 1417 1418 return kref_put(&state->ref, a5xx_gpu_state_destroy); 1419} 1420 1421 1422#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1423static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1424 struct drm_printer *p) 1425{ 1426 int i, j; 1427 u32 pos = 0; 1428 struct a5xx_gpu_state *a5xx_state = container_of(state, 1429 struct a5xx_gpu_state, base); 1430 1431 if (IS_ERR_OR_NULL(state)) 1432 return; 1433 1434 adreno_show(gpu, state, p); 1435 1436 /* Dump the additional a5xx HLSQ registers */ 1437 if (!a5xx_state->hlsqregs) 1438 return; 1439 1440 drm_printf(p, "registers-hlsq:\n"); 1441 1442 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1443 u32 o = a5xx_hlsq_aperture_regs[i].regoffset; 1444 u32 c = a5xx_hlsq_aperture_regs[i].count; 1445 1446 for (j = 0; j < c; j++, pos++, o++) { 1447 /* 1448 * To keep the crashdump simple we pull the entire range 1449 * for each register type but not all of the registers 1450 * in the range are valid. Fortunately invalid registers 1451 * stick out like a sore thumb with a value of 1452 * 0xdeadbeef 1453 */ 1454 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef) 1455 continue; 1456 1457 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n", 1458 o << 2, a5xx_state->hlsqregs[pos]); 1459 } 1460 } 1461} 1462#endif 1463 1464static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1465{ 1466 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1467 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1468 1469 return a5xx_gpu->cur_ring; 1470} 1471 1472static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) 1473{ 1474 u64 busy_cycles, busy_time; 1475 1476 /* Only read the gpu busy if the hardware is already active */ 1477 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) 1478 return 0; 1479 1480 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, 1481 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); 1482 1483 busy_time = busy_cycles - gpu->devfreq.busy_cycles; 1484 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000); 1485 1486 gpu->devfreq.busy_cycles = busy_cycles; 1487 1488 pm_runtime_put(&gpu->pdev->dev); 1489 1490 if (WARN_ON(busy_time > ~0LU)) 1491 return ~0LU; 1492 1493 return (unsigned long)busy_time; 1494} 1495 1496static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1497{ 1498 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1499 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1500 1501 if (a5xx_gpu->has_whereami) 1502 return a5xx_gpu->shadow[ring->id]; 1503 1504 return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR); 1505} 1506 1507static const struct adreno_gpu_funcs funcs = { 1508 .base = { 1509 .get_param = adreno_get_param, 1510 .hw_init = a5xx_hw_init, 1511 .pm_suspend = a5xx_pm_suspend, 1512 .pm_resume = a5xx_pm_resume, 1513 .recover = a5xx_recover, 1514 .submit = a5xx_submit, 1515 .active_ring = a5xx_active_ring, 1516 .irq = a5xx_irq, 1517 .destroy = a5xx_destroy, 1518#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1519 .show = a5xx_show, 1520#endif 1521#if defined(CONFIG_DEBUG_FS) 1522 .debugfs_init = a5xx_debugfs_init, 1523#endif 1524 .gpu_busy = a5xx_gpu_busy, 1525 .gpu_state_get = a5xx_gpu_state_get, 1526 .gpu_state_put = a5xx_gpu_state_put, 1527 .create_address_space = adreno_iommu_create_address_space, 1528 .get_rptr = a5xx_get_rptr, 1529 }, 1530 .get_timestamp = a5xx_get_timestamp, 1531}; 1532 1533static void check_speed_bin(struct device *dev) 1534{ 1535 struct nvmem_cell *cell; 1536 u32 val; 1537 1538 /* 1539 * If the OPP table specifies a opp-supported-hw property then we have 1540 * to set something with dev_pm_opp_set_supported_hw() or the table 1541 * doesn't get populated so pick an arbitrary value that should 1542 * ensure the default frequencies are selected but not conflict with any 1543 * actual bins 1544 */ 1545 val = 0x80; 1546 1547 cell = nvmem_cell_get(dev, "speed_bin"); 1548 1549 if (!IS_ERR(cell)) { 1550 void *buf = nvmem_cell_read(cell, NULL); 1551 1552 if (!IS_ERR(buf)) { 1553 u8 bin = *((u8 *) buf); 1554 1555 val = (1 << bin); 1556 kfree(buf); 1557 } 1558 1559 nvmem_cell_put(cell); 1560 } 1561 1562 dev_pm_opp_set_supported_hw(dev, &val, 1); 1563} 1564 1565struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1566{ 1567 struct msm_drm_private *priv = dev->dev_private; 1568 struct platform_device *pdev = priv->gpu_pdev; 1569 struct a5xx_gpu *a5xx_gpu = NULL; 1570 struct adreno_gpu *adreno_gpu; 1571 struct msm_gpu *gpu; 1572 unsigned int nr_rings; 1573 int ret; 1574 1575 if (!pdev) { 1576 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); 1577 return ERR_PTR(-ENXIO); 1578 } 1579 1580 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1581 if (!a5xx_gpu) 1582 return ERR_PTR(-ENOMEM); 1583 1584 adreno_gpu = &a5xx_gpu->base; 1585 gpu = &adreno_gpu->base; 1586 1587 adreno_gpu->registers = a5xx_registers; 1588 1589 a5xx_gpu->lm_leakage = 0x4E001A; 1590 1591 check_speed_bin(&pdev->dev); 1592 1593 nr_rings = 4; 1594 1595 if (adreno_is_a510(adreno_gpu)) 1596 nr_rings = 1; 1597 1598 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings); 1599 if (ret) { 1600 a5xx_destroy(&(a5xx_gpu->base.base)); 1601 return ERR_PTR(ret); 1602 } 1603 1604 if (gpu->aspace) 1605 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1606 1607 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1608 a5xx_preempt_init(gpu); 1609 1610 return gpu; 1611} 1612