1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <ctype.h> 26#include <errno.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <sys/mman.h> 31#include <unistd.h> 32 33#include "util/u_math.h" 34 35#include "freedreno_pm4.h" 36 37#include "emu.h" 38#include "util.h" 39 40#define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 41#define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) 42 43/** 44 * AFUC emulator. Currently only supports a6xx 45 * 46 * TODO to add a5xx it might be easier to compile this multiple times 47 * with conditional compile to deal with differences between generations. 48 */ 49 50static uint32_t 51emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2) 52{ 53 uint64_t tmp; 54 switch (opc) { 55 case OPC_ADD: 56 tmp = (uint64_t)src1 + (uint64_t)src2; 57 emu->carry = tmp >> 32; 58 return (uint32_t)tmp; 59 case OPC_ADDHI: 60 return src1 + src2 + emu->carry; 61 case OPC_SUB: 62 tmp = (uint64_t)src1 - (uint64_t)src2; 63 emu->carry = tmp >> 32; 64 return (uint32_t)tmp; 65 case OPC_SUBHI: 66 return src1 - src2 + emu->carry; 67 case OPC_AND: 68 return src1 & src2; 69 case OPC_OR: 70 return src1 | src2; 71 case OPC_XOR: 72 return src1 ^ src2; 73 case OPC_NOT: 74 return ~src1; 75 case OPC_SHL: 76 return src1 << src2; 77 case OPC_USHR: 78 return src1 >> src2; 79 case OPC_ISHR: 80 return (int32_t)src1 >> src2; 81 case OPC_ROT: 82 if (src2 & 0x80000000) 83 return rotl64(src1, -*(int32_t *)&src2); 84 else 85 return rotl32(src1, src2); 86 case OPC_MUL8: 87 return (src1 & 0xff) * (src2 & 0xff); 88 case OPC_MIN: 89 return MIN2(src1, src2); 90 case OPC_MAX: 91 return MAX2(src1, src2); 92 case OPC_CMP: 93 if (src1 > src2) 94 return 0x00; 95 else if (src1 == src2) 96 return 0x2b; 97 return 0x1e; 98 case OPC_MSB: 99 if (!src2) 100 return 0; 101 return util_last_bit(src2) - 1; 102 default: 103 printf("unhandled alu opc: 0x%02x\n", opc); 104 exit(1); 105 } 106} 107 108/** 109 * Helper to calculate load/store address based on LOAD_STORE_HI 110 */ 111static uintptr_t 112load_store_addr(struct emu *emu, unsigned gpr) 113{ 114 EMU_CONTROL_REG(LOAD_STORE_HI); 115 116 uintptr_t addr = emu_get_reg32(emu, &LOAD_STORE_HI); 117 addr <<= 32; 118 119 return addr + emu_get_gpr_reg(emu, gpr); 120} 121 122static void 123emu_instr(struct emu *emu, afuc_instr *instr) 124{ 125 uint32_t rem = emu_get_gpr_reg(emu, REG_REM); 126 afuc_opc opc; 127 bool rep; 128 129 afuc_get_opc(instr, &opc, &rep); 130 131 switch (opc) { 132 case OPC_NOP: 133 break; 134 case OPC_ADD ... OPC_CMP: { 135 uint32_t val = emu_alu(emu, opc, 136 emu_get_gpr_reg(emu, instr->alui.src), 137 instr->alui.uimm); 138 emu_set_gpr_reg(emu, instr->alui.dst, val); 139 break; 140 } 141 case OPC_MOVI: { 142 uint32_t val = instr->movi.uimm << instr->movi.shift; 143 emu_set_gpr_reg(emu, instr->movi.dst, val); 144 break; 145 } 146 case OPC_ALU: { 147 uint32_t val = emu_alu(emu, instr->alu.alu, 148 emu_get_gpr_reg(emu, instr->alu.src1), 149 emu_get_gpr_reg(emu, instr->alu.src2)); 150 emu_set_gpr_reg(emu, instr->alu.dst, val); 151 152 if (instr->alu.xmov) { 153 unsigned m = MIN2(instr->alu.xmov, rem); 154 155 assert(m <= 3); 156 157 if (m == 1) { 158 emu_set_gpr_reg(emu, REG_REM, --rem); 159 emu_dump_state_change(emu); 160 emu_set_gpr_reg(emu, REG_DATA, 161 emu_get_gpr_reg(emu, instr->alu.src2)); 162 } else if (m == 2) { 163 emu_set_gpr_reg(emu, REG_REM, --rem); 164 emu_dump_state_change(emu); 165 emu_set_gpr_reg(emu, REG_DATA, 166 emu_get_gpr_reg(emu, instr->alu.src2)); 167 emu_set_gpr_reg(emu, REG_REM, --rem); 168 emu_dump_state_change(emu); 169 emu_set_gpr_reg(emu, REG_DATA, 170 emu_get_gpr_reg(emu, instr->alu.src2)); 171 } else if (m == 3) { 172 emu_set_gpr_reg(emu, REG_REM, --rem); 173 emu_dump_state_change(emu); 174 emu_set_gpr_reg(emu, REG_DATA, 175 emu_get_gpr_reg(emu, instr->alu.src2)); 176 emu_set_gpr_reg(emu, REG_REM, --rem); 177 emu_dump_state_change(emu); 178 emu_set_gpr_reg(emu, instr->alu.dst, 179 emu_get_gpr_reg(emu, instr->alu.src2)); 180 emu_set_gpr_reg(emu, REG_REM, --rem); 181 emu_dump_state_change(emu); 182 emu_set_gpr_reg(emu, REG_DATA, 183 emu_get_gpr_reg(emu, instr->alu.src2)); 184 } 185 } 186 break; 187 } 188 case OPC_CWRITE6: { 189 uint32_t src1 = emu_get_gpr_reg(emu, instr->control.src1); 190 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); 191 192 if (instr->control.flags == 0x4) { 193 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); 194 } else if (instr->control.flags && !emu->quiet) { 195 printf("unhandled flags: %x\n", instr->control.flags); 196 } 197 198 emu_set_control_reg(emu, src2 + instr->control.uimm, src1); 199 break; 200 } 201 case OPC_CREAD6: { 202 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); 203 204 if (instr->control.flags == 0x4) { 205 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); 206 } else if (instr->control.flags && !emu->quiet) { 207 printf("unhandled flags: %x\n", instr->control.flags); 208 } 209 210 emu_set_gpr_reg(emu, instr->control.src1, 211 emu_get_control_reg(emu, src2 + instr->control.uimm)); 212 break; 213 } 214 case OPC_LOAD6: { 215 uintptr_t addr = load_store_addr(emu, instr->control.src2) + 216 instr->control.uimm; 217 218 if (instr->control.flags == 0x4) { 219 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); 220 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); 221 } else if (instr->control.flags && !emu->quiet) { 222 printf("unhandled flags: %x\n", instr->control.flags); 223 } 224 225 uint32_t val = emu_mem_read_dword(emu, addr); 226 227 emu_set_gpr_reg(emu, instr->control.src1, val); 228 229 break; 230 } 231 case OPC_STORE6: { 232 uintptr_t addr = load_store_addr(emu, instr->control.src2) + 233 instr->control.uimm; 234 235 if (instr->control.flags == 0x4) { 236 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); 237 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); 238 } else if (instr->control.flags && !emu->quiet) { 239 printf("unhandled flags: %x\n", instr->control.flags); 240 } 241 242 uint32_t val = emu_get_gpr_reg(emu, instr->control.src1); 243 244 emu_mem_write_dword(emu, addr, val); 245 246 break; 247 } 248 case OPC_BRNEI ... OPC_BREQB: { 249 uint32_t off = emu->gpr_regs.pc + instr->br.ioff; 250 uint32_t src = emu_get_gpr_reg(emu, instr->br.src); 251 252 if (opc == OPC_BRNEI) { 253 if (src != instr->br.bit_or_imm) 254 emu->branch_target = off; 255 } else if (opc == OPC_BREQI) { 256 if (src == instr->br.bit_or_imm) 257 emu->branch_target = off; 258 } else if (opc == OPC_BRNEB) { 259 if (!(src & (1 << instr->br.bit_or_imm))) 260 emu->branch_target = off; 261 } else if (opc == OPC_BREQB) { 262 if (src & (1 << instr->br.bit_or_imm)) 263 emu->branch_target = off; 264 } else { 265 assert(0); 266 } 267 break; 268 } 269 case OPC_RET: { 270 assert(emu->call_stack_idx > 0); 271 272 /* counter-part to 'call' instruction, also has a delay slot: */ 273 emu->branch_target = emu->call_stack[--emu->call_stack_idx]; 274 275 break; 276 } 277 case OPC_CALL: { 278 assert(emu->call_stack_idx < ARRAY_SIZE(emu->call_stack)); 279 280 /* call looks to have same delay-slot behavior as branch/etc, so 281 * presumably the return PC is two instructions later: 282 */ 283 emu->call_stack[emu->call_stack_idx++] = emu->gpr_regs.pc + 2; 284 emu->branch_target = instr->call.uoff; 285 286 break; 287 } 288 case OPC_WIN: { 289 assert(!emu->branch_target); 290 emu->run_mode = false; 291 emu->waitin = true; 292 break; 293 } 294 /* OPC_PREEMPTLEAVE6 */ 295 case OPC_SETSECURE: { 296 // TODO this acts like a conditional branch, but in which case 297 // does it branch? 298 break; 299 } 300 default: 301 printf("unhandled opc: 0x%02x\n", opc); 302 exit(1); 303 } 304 305 if (rep) { 306 assert(rem > 0); 307 emu_set_gpr_reg(emu, REG_REM, --rem); 308 } 309} 310 311void 312emu_step(struct emu *emu) 313{ 314 afuc_instr *instr = (void *)&emu->instrs[emu->gpr_regs.pc]; 315 afuc_opc opc; 316 bool rep; 317 318 emu_main_prompt(emu); 319 320 uint32_t branch_target = emu->branch_target; 321 emu->branch_target = 0; 322 323 bool waitin = emu->waitin; 324 emu->waitin = false; 325 326 afuc_get_opc(instr, &opc, &rep); 327 328 if (rep) { 329 do { 330 if (!emu_get_gpr_reg(emu, REG_REM)) 331 break; 332 333 emu_clear_state_change(emu); 334 emu_instr(emu, instr); 335 336 /* defer last state-change dump until after any 337 * post-delay-slot handling below: 338 */ 339 if (emu_get_gpr_reg(emu, REG_REM)) 340 emu_dump_state_change(emu); 341 } while (true); 342 } else { 343 emu_clear_state_change(emu); 344 emu_instr(emu, instr); 345 } 346 347 emu->gpr_regs.pc++; 348 349 if (branch_target) { 350 emu->gpr_regs.pc = branch_target; 351 } 352 353 if (waitin) { 354 uint32_t hdr = emu_get_gpr_reg(emu, 1); 355 uint32_t id, count; 356 357 if (pkt_is_type4(hdr)) { 358 id = afuc_pm4_id("PKT4"); 359 count = type4_pkt_size(hdr); 360 361 /* Possibly a hack, not sure what the hw actually 362 * does here, but we want to mask out the pkt 363 * type field from the hdr, so that PKT4 handler 364 * doesn't see it and interpret it as part as the 365 * register offset: 366 */ 367 emu->gpr_regs.val[1] &= 0x0fffffff; 368 } else if (pkt_is_type7(hdr)) { 369 id = cp_type7_opcode(hdr); 370 count = type7_pkt_size(hdr); 371 } else { 372 printf("Invalid opcode: 0x%08x\n", hdr); 373 exit(1); /* GPU goes *boom* */ 374 } 375 376 assert(id < ARRAY_SIZE(emu->jmptbl)); 377 378 emu_set_gpr_reg(emu, REG_REM, count); 379 emu->gpr_regs.pc = emu->jmptbl[id]; 380 } 381 382 emu_dump_state_change(emu); 383} 384 385void 386emu_run_bootstrap(struct emu *emu) 387{ 388 EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR); 389 390 emu->quiet = true; 391 emu->run_mode = true; 392 393 while (emu_get_reg32(emu, &PACKET_TABLE_WRITE_ADDR) < 0x80) { 394 emu_step(emu); 395 } 396} 397 398 399static void 400check_access(struct emu *emu, uintptr_t gpuaddr, unsigned sz) 401{ 402 if ((gpuaddr % sz) != 0) { 403 printf("unaligned access fault: %p\n", (void *)gpuaddr); 404 exit(1); 405 } 406 407 if ((gpuaddr + sz) >= EMU_MEMORY_SIZE) { 408 printf("iova fault: %p\n", (void *)gpuaddr); 409 exit(1); 410 } 411} 412 413uint32_t 414emu_mem_read_dword(struct emu *emu, uintptr_t gpuaddr) 415{ 416 check_access(emu, gpuaddr, 4); 417 return *(uint32_t *)(emu->gpumem + gpuaddr); 418} 419 420static void 421mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val) 422{ 423 check_access(emu, gpuaddr, 4); 424 *(uint32_t *)(emu->gpumem + gpuaddr) = val; 425} 426 427void 428emu_mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val) 429{ 430 mem_write_dword(emu, gpuaddr, val); 431 assert(emu->gpumem_written == ~0); 432 emu->gpumem_written = gpuaddr; 433} 434 435void 436emu_init(struct emu *emu) 437{ 438 emu->gpumem = mmap(NULL, EMU_MEMORY_SIZE, 439 PROT_READ | PROT_WRITE, 440 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, 441 0, 0); 442 if (emu->gpumem == MAP_FAILED) { 443 printf("Could not allocate GPU memory: %s\n", strerror(errno)); 444 exit(1); 445 } 446 447 /* Copy the instructions into GPU memory: */ 448 for (unsigned i = 0; i < emu->sizedwords; i++) { 449 mem_write_dword(emu, EMU_INSTR_BASE + (4 * i), emu->instrs[i]); 450 } 451 452 EMU_GPU_REG(CP_SQE_INSTR_BASE); 453 EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE); 454 455 /* Setup the address of the SQE fw, just use the normal CPU ptr address: */ 456 if (emu->lpac) { 457 emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE); 458 } else { 459 emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE); 460 } 461 462 if (emu->gpu_id == 660) { 463 emu_set_control_reg(emu, 0, 3 << 28); 464 } else if (emu->gpu_id == 650) { 465 emu_set_control_reg(emu, 0, 1 << 28); 466 } 467} 468 469void 470emu_fini(struct emu *emu) 471{ 472 uint32_t *instrs = emu->instrs; 473 unsigned sizedwords = emu->sizedwords; 474 unsigned gpu_id = emu->gpu_id; 475 476 munmap(emu->gpumem, EMU_MEMORY_SIZE); 477 memset(emu, 0, sizeof(*emu)); 478 479 emu->instrs = instrs; 480 emu->sizedwords = sizedwords; 481 emu->gpu_id = gpu_id; 482} 483