1/* 2 * Copyright (c) 2017 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef _AFUC_H_ 25#define _AFUC_H_ 26 27#include <stdbool.h> 28 29#include "util/macros.h" 30 31/* 32TODO kernel debugfs to inject packet into rb for easier experimentation. It 33should trigger reloading pfp/me and resetting gpu.. 34 35Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs, 36should be restricted to CAP_ADMIN and probably compile option too (default=n). 37if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from 38RB. 39 */ 40 41/* The opcode is encoded variable length. Opcodes less than 0x30 42 * are encoded as 5 bits followed by (rep) flag. Opcodes >= 0x30 43 * (ie. top two bits are '11' are encoded as 6 bits. See get_opc() 44 */ 45typedef enum { 46 OPC_NOP = 0x00, 47 48 OPC_ADD = 0x01, /* add immediate */ 49 OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */ 50 OPC_SUB = 0x03, /* subtract immediate */ 51 OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */ 52 OPC_AND = 0x05, /* AND immediate */ 53 OPC_OR = 0x06, /* OR immediate */ 54 OPC_XOR = 0x07, /* XOR immediate */ 55 OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */ 56 OPC_SHL = 0x09, /* shift-left immediate */ 57 OPC_USHR = 0x0a, /* unsigned shift right by immediate */ 58 OPC_ISHR = 0x0b, /* signed shift right by immediate */ 59 OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */ 60 OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */ 61 OPC_MIN = 0x0e, 62 OPC_MAX = 0x0f, 63 OPC_CMP = 0x10, /* compare src to immed */ 64 OPC_MOVI = 0x11, /* move immediate */ 65 66 /* Return the most-significant bit of src2, or 0 if src2 == 0 (the 67 * same as if src2 == 1). src1 is ignored. Note that this overlaps 68 * with STORE6, so it can only be used with the two-source encoding. 69 */ 70 OPC_MSB = 0x14, 71 72 OPC_ALU = 0x13, /* ALU instruction with two src registers */ 73 74 /* These seem something to do with setting some external state.. 75 * doesn't seem to map *directly* to registers, but I guess that 76 * is where things end up. For example, this sequence in the 77 * CP_INDIRECT_BUFFER handler: 78 * 79 * mov $02, $data ; low 32b of IB target address 80 * mov $03, $data ; high 32b of IB target 81 * mov $04, $data ; IB size in dwords 82 * breq $04, 0x0, #l23 (#69, 04a2) 83 * and $05, $18, 0x0003 84 * shl $05, $05, 0x0002 85 * cwrite $02, [$05 + 0x0b0], 0x8 86 * cwrite $03, [$05 + 0x0b1], 0x8 87 * cwrite $04, [$05 + 0x0b2], 0x8 88 * 89 * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and 90 * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value 91 * for RB->IB1 vs IB1->IB2. 92 */ 93 OPC_CWRITE5 = 0x15, 94 OPC_CREAD5 = 0x16, 95 96 /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes 97 * that let you read/write directly to memory (and bypass the IOMMU?). 98 */ 99 OPC_STORE6 = 0x14, 100 OPC_CWRITE6 = 0x15, 101 OPC_LOAD6 = 0x16, 102 OPC_CREAD6 = 0x17, 103 104 OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */ 105 OPC_BREQI = 0x31, /* relative branch (if $src == immed) */ 106 OPC_BRNEB = 0x32, /* relative branch (if bit not set) */ 107 OPC_BREQB = 0x33, /* relative branch (if bit is set) */ 108 OPC_RET = 0x34, /* return */ 109 OPC_CALL = 0x35, /* "function" call */ 110 OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */ 111 OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */ 112 OPC_SETSECURE = 0x3b, /* switch secure mode on/off */ 113} afuc_opc; 114 115/** 116 * Special GPR registers: 117 * 118 * Notes: (applicable to a6xx, double check a5xx) 119 * 120 * 0x1d: 121 * $addr: writes configure GPU reg address to read/write 122 * (does not respect CP_PROTECT) 123 * $memdata: reads from FIFO filled based on MEM_READ_DWORDS/ 124 * MEM_READ_ADDR 125 * 0x1e: (note different mnemonic for src vs dst) 126 * $usraddr: writes configure GPU reg address to read/write, 127 * respecting CP_PROTECT 128 * $regdata: reads from FIFO filled based on REG_READ_DWORDS/ 129 * REG_READ_ADDR 130 * 0x1f: 131 * $data: reads from from pm4 input stream 132 * $data: writes to stream configured by write to $addr 133 * or $usraddr 134 */ 135typedef enum { 136 REG_REM = 0x1c, 137 REG_MEMDATA = 0x1d, /* when used as src */ 138 REG_ADDR = 0x1d, /* when used as dst */ 139 REG_REGDATA = 0x1e, /* when used as src */ 140 REG_USRADDR = 0x1e, /* when used as dst */ 141 REG_DATA = 0x1f, 142} afuc_reg; 143 144typedef union PACKED { 145 /* addi, subi, andi, ori, xori, etc: */ 146 struct PACKED { 147 uint32_t uimm : 16; 148 uint32_t dst : 5; 149 uint32_t src : 5; 150 uint32_t hdr : 6; 151 } alui; 152 struct PACKED { 153 uint32_t uimm : 16; 154 uint32_t dst : 5; 155 uint32_t shift : 5; 156 uint32_t hdr : 6; 157 } movi; 158 struct PACKED { 159 uint32_t alu : 5; 160 uint32_t pad : 4; 161 uint32_t xmov : 2; /* execute eXtra mov's based on $rem */ 162 uint32_t dst : 5; 163 uint32_t src2 : 5; 164 uint32_t src1 : 5; 165 uint32_t hdr : 6; 166 } alu; 167 struct PACKED { 168 uint32_t uimm : 12; 169 /* TODO this needs to be confirmed: 170 * 171 * flags: 172 * 0x4 - post-increment src2 by uimm (need to confirm this is also 173 * true for load/cread). TBD whether, when used in conjunction 174 * with @LOAD_STORE_HI, 32b rollover works properly. 175 * 176 * other values tbd, also need to confirm if different bits can be 177 * set together (I don't see examples of this in existing fw) 178 */ 179 uint32_t flags : 4; 180 uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */ 181 uint32_t src2 : 5; /* read or write address is src2+uimm */ 182 uint32_t hdr : 6; 183 } control; 184 struct PACKED { 185 int32_t ioff : 16; /* relative offset */ 186 uint32_t bit_or_imm : 5; 187 uint32_t src : 5; 188 uint32_t hdr : 6; 189 } br; 190 struct PACKED { 191 uint32_t uoff : 26; /* absolute (unsigned) offset */ 192 uint32_t hdr : 6; 193 } call; 194 struct PACKED { 195 uint32_t pad : 25; 196 uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */ 197 uint32_t hdr : 6; 198 } ret; 199 struct PACKED { 200 uint32_t pad : 26; 201 uint32_t hdr : 6; 202 } waitin; 203 struct PACKED { 204 uint32_t pad : 26; 205 uint32_t opc_r : 6; 206 }; 207 208} afuc_instr; 209 210static inline void 211afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep) 212{ 213 if (ai->opc_r < 0x30) { 214 *opc = ai->opc_r >> 1; 215 *rep = ai->opc_r & 0x1; 216 } else { 217 *opc = ai->opc_r; 218 *rep = false; 219 } 220} 221 222static inline void 223afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep) 224{ 225 if (opc < 0x30) { 226 ai->opc_r = opc << 1; 227 ai->opc_r |= !!rep; 228 } else { 229 ai->opc_r = opc; 230 } 231} 232 233void print_src(unsigned reg); 234void print_dst(unsigned reg); 235void print_control_reg(uint32_t id); 236void print_pipe_reg(uint32_t id); 237 238#endif /* _AFUC_H_ */ 239