1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Ilia Mirkin 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "ir3/ir3_compiler.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "util/u_math.h" 27bf215546Sopenharmony_ci#include "util/u_queue.h" 28bf215546Sopenharmony_ci#include "util/half_float.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "adreno_pm4.xml.h" 31bf215546Sopenharmony_ci#include "adreno_common.xml.h" 32bf215546Sopenharmony_ci#include "a4xx.xml.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "ir3_asm.h" 35bf215546Sopenharmony_ci#include "main.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cistruct a4xx_backend { 38bf215546Sopenharmony_ci struct backend base; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci struct ir3_compiler *compiler; 41bf215546Sopenharmony_ci struct fd_device *dev; 42bf215546Sopenharmony_ci}; 43bf215546Sopenharmony_cidefine_cast(backend, a4xx_backend); 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci/* 46bf215546Sopenharmony_ci * Backend implementation: 47bf215546Sopenharmony_ci */ 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_cistatic struct kernel * 50bf215546Sopenharmony_cia4xx_assemble(struct backend *b, FILE *in) 51bf215546Sopenharmony_ci{ 52bf215546Sopenharmony_ci struct a4xx_backend *a4xx_backend = to_a4xx_backend(b); 53bf215546Sopenharmony_ci struct ir3_kernel *ir3_kernel = ir3_asm_assemble(a4xx_backend->compiler, in); 54bf215546Sopenharmony_ci ir3_kernel->backend = b; 55bf215546Sopenharmony_ci return &ir3_kernel->base; 56bf215546Sopenharmony_ci} 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_cistatic void 59bf215546Sopenharmony_cia4xx_disassemble(struct kernel *kernel, FILE *out) 60bf215546Sopenharmony_ci{ 61bf215546Sopenharmony_ci ir3_asm_disassemble(to_ir3_kernel(kernel), out); 62bf215546Sopenharmony_ci} 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cistatic void 65bf215546Sopenharmony_cics_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) 66bf215546Sopenharmony_ci{ 67bf215546Sopenharmony_ci struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); 68bf215546Sopenharmony_ci struct ir3_shader_variant *v = ir3_kernel->v; 69bf215546Sopenharmony_ci const struct ir3_info *i = &v->info; 70bf215546Sopenharmony_ci enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2); 73bf215546Sopenharmony_ci OUT_RING(ring, 0x00000000); 74bf215546Sopenharmony_ci OUT_RING(ring, 0x00000012); 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci OUT_WFI(ring); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1); 79bf215546Sopenharmony_ci OUT_RING(ring, 0x0000001e); 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1); 82bf215546Sopenharmony_ci OUT_RING(ring, 0x00000038); 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1); 85bf215546Sopenharmony_ci OUT_RING(ring, 0x00000000); 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci OUT_WFI(ring); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1); 90bf215546Sopenharmony_ci OUT_RING(ring, 0x00000003); 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 1); 93bf215546Sopenharmony_ci OUT_RING(ring, 0x080005f0); 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); 96bf215546Sopenharmony_ci OUT_RING(ring, 0x00000038); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); 99bf215546Sopenharmony_ci OUT_RING(ring, 0x00860010); 100bf215546Sopenharmony_ci // OUT_RING(ring, 0x00920000); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); 103bf215546Sopenharmony_ci OUT_RING(ring, 0x000004ff); 104bf215546Sopenharmony_ci // OUT_RING(ring, 0x00000260); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG1, 1); 107bf215546Sopenharmony_ci OUT_RING(ring, 0x80000000); 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_CS_CTRL_REG0, 1); 110bf215546Sopenharmony_ci OUT_RING(ring, 111bf215546Sopenharmony_ci A4XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | 112bf215546Sopenharmony_ci A4XX_SP_CS_CTRL_REG0_SUPERTHREADMODE | 113bf215546Sopenharmony_ci A4XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | 114bf215546Sopenharmony_ci A4XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1)); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CS_CONTROL_REG, 1); 117bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(0) | 118bf215546Sopenharmony_ci A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(0) | 119bf215546Sopenharmony_ci A4XX_HLSQ_CS_CONTROL_REG_ENABLED | 120bf215546Sopenharmony_ci A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(1) | 121bf215546Sopenharmony_ci COND(v->has_ssbo, A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE) | 122bf215546Sopenharmony_ci A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(v->constlen / 4)); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_CS_OBJ_START, 1); 125bf215546Sopenharmony_ci OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START */ 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_CS_LENGTH_REG, 1); 128bf215546Sopenharmony_ci OUT_RING(ring, v->instrlen); 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci uint32_t local_invocation_id, work_group_id, num_wg_id; 131bf215546Sopenharmony_ci local_invocation_id = 132bf215546Sopenharmony_ci ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); 133bf215546Sopenharmony_ci work_group_id = ir3_kernel->info.wgid; 134bf215546Sopenharmony_ci num_wg_id = ir3_kernel->info.numwg; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2); 137bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) | 138bf215546Sopenharmony_ci A4XX_HLSQ_CL_CONTROL_0_KERNELDIMCONSTID(regid(63, 0)) | 139bf215546Sopenharmony_ci A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(local_invocation_id)); 140bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_1_UNK0CONSTID(regid(63, 0)) | 141bf215546Sopenharmony_ci A4XX_HLSQ_CL_CONTROL_1_WORKGROUPSIZECONSTID(regid(63, 0))); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_CONST, 1); 144bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CL_KERNEL_CONST_UNK0CONSTID(regid(63, 0)) | 145bf215546Sopenharmony_ci A4XX_HLSQ_CL_KERNEL_CONST_NUMWGCONSTID(num_wg_id)); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CL_WG_OFFSET, 1); 148bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CL_WG_OFFSET_UNK0CONSTID(regid(63, 0))); 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci OUT_PKT3(ring, CP_LOAD_STATE4, 2); 151bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | 152bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) | 153bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SHADER) | 154bf215546Sopenharmony_ci CP_LOAD_STATE4_0_NUM_UNIT(v->instrlen)); 155bf215546Sopenharmony_ci OUT_RELOC(ring, v->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); 156bf215546Sopenharmony_ci} 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_cistatic void 159bf215546Sopenharmony_ciemit_const(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t constid, uint32_t sizedwords, 160bf215546Sopenharmony_ci const uint32_t *dwords) 161bf215546Sopenharmony_ci{ 162bf215546Sopenharmony_ci uint32_t align_sz; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci assert((constid % 4) == 0); 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci /* Overwrite appropriate entries with buffer addresses */ 167bf215546Sopenharmony_ci struct fd_bo **replacements = calloc(sizedwords, sizeof(struct fd_bo *)); 168bf215546Sopenharmony_ci for (int i = 0; i < MAX_BUFS; i++) { 169bf215546Sopenharmony_ci if (kernel->buf_addr_regs[i] != INVALID_REG) { 170bf215546Sopenharmony_ci int idx = kernel->buf_addr_regs[i]; 171bf215546Sopenharmony_ci assert(idx < sizedwords); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci replacements[idx] = kernel->bufs[i]; 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci align_sz = align(sizedwords, 4); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci OUT_PKT3(ring, CP_LOAD_STATE4, 2 + align_sz); 180bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(constid / 4) | 181bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | 182bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SHADER) | 183bf215546Sopenharmony_ci CP_LOAD_STATE4_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); 184bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | 185bf215546Sopenharmony_ci CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); 186bf215546Sopenharmony_ci for (unsigned i = 0; i < sizedwords; i++) { 187bf215546Sopenharmony_ci if (replacements[i]) 188bf215546Sopenharmony_ci OUT_RELOC(ring, replacements[i], 0, 0, 0); 189bf215546Sopenharmony_ci else 190bf215546Sopenharmony_ci OUT_RING(ring, dwords[i]); 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci /* Zero-pad to multiple of 4 dwords */ 194bf215546Sopenharmony_ci for (uint32_t i = sizedwords; i < align_sz; i++) { 195bf215546Sopenharmony_ci OUT_RING(ring, 0); 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci free(replacements); 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_cistatic void 202bf215546Sopenharmony_cics_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, 203bf215546Sopenharmony_ci uint32_t grid[3]) 204bf215546Sopenharmony_ci{ 205bf215546Sopenharmony_ci struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); 206bf215546Sopenharmony_ci struct ir3_shader_variant *v = ir3_kernel->v; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 209bf215546Sopenharmony_ci uint32_t base = const_state->offsets.immediate; 210bf215546Sopenharmony_ci int size = DIV_ROUND_UP(const_state->immediates_count, 4); 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci /* truncate size to avoid writing constants that shader 213bf215546Sopenharmony_ci * does not use: 214bf215546Sopenharmony_ci */ 215bf215546Sopenharmony_ci size = MIN2(size + base, v->constlen) - base; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci /* convert out of vec4: */ 218bf215546Sopenharmony_ci base *= 4; 219bf215546Sopenharmony_ci size *= 4; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (size > 0) { 222bf215546Sopenharmony_ci emit_const(ring, kernel, base, size, const_state->immediates); 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci} 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_cistatic void 227bf215546Sopenharmony_cics_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit, 228bf215546Sopenharmony_ci struct kernel *kernel) 229bf215546Sopenharmony_ci{ 230bf215546Sopenharmony_ci OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (4 * kernel->num_bufs)); 231bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | 232bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | 233bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SSBO) | 234bf215546Sopenharmony_ci CP_LOAD_STATE4_0_NUM_UNIT(kernel->num_bufs)); 235bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | 236bf215546Sopenharmony_ci CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); 237bf215546Sopenharmony_ci for (unsigned i = 0; i < kernel->num_bufs; i++) { 238bf215546Sopenharmony_ci OUT_RELOC(ring, kernel->bufs[i], 0, 0, 0); 239bf215546Sopenharmony_ci#if 1 240bf215546Sopenharmony_ci OUT_RING(ring, 0); 241bf215546Sopenharmony_ci OUT_RING(ring, 0); 242bf215546Sopenharmony_ci OUT_RING(ring, 0); 243bf215546Sopenharmony_ci#else 244bf215546Sopenharmony_ci OUT_RING(ring, kernel->buf_sizes[i]); 245bf215546Sopenharmony_ci OUT_RING(ring, kernel->buf_sizes[i]); 246bf215546Sopenharmony_ci OUT_RING(ring, 0x00000004); 247bf215546Sopenharmony_ci#endif 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * kernel->num_bufs)); 251bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | 252bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | 253bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SSBO) | 254bf215546Sopenharmony_ci CP_LOAD_STATE4_0_NUM_UNIT(kernel->num_bufs)); 255bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | 256bf215546Sopenharmony_ci CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); 257bf215546Sopenharmony_ci for (unsigned i = 0; i < kernel->num_bufs; i++) { 258bf215546Sopenharmony_ci unsigned sz = kernel->buf_sizes[i]; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci /* width is in dwords, overflows into height: */ 261bf215546Sopenharmony_ci sz /= 4; 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci#if 1 264bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SSBO_1_0_WIDTH(sz)); 265bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SSBO_1_1_HEIGHT(sz >> 16)); 266bf215546Sopenharmony_ci#else 267bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SSBO_1_0_WIDTH(sz) | 268bf215546Sopenharmony_ci A4XX_SSBO_1_0_FMT(RB4_R32_UINT) | 269bf215546Sopenharmony_ci A4XX_SSBO_1_0_CPP(4)); 270bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SSBO_1_1_HEIGHT(DIV_ROUND_UP(sz, 1 << 16)) | 271bf215546Sopenharmony_ci A4XX_SSBO_1_1_DEPTH(1)); 272bf215546Sopenharmony_ci#endif 273bf215546Sopenharmony_ci } 274bf215546Sopenharmony_ci} 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_cistatic void 277bf215546Sopenharmony_cia4xx_emit_grid(struct kernel *kernel, uint32_t grid[3], 278bf215546Sopenharmony_ci struct fd_submit *submit) 279bf215546Sopenharmony_ci{ 280bf215546Sopenharmony_ci struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( 281bf215546Sopenharmony_ci submit, 0, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci cs_program_emit(ring, kernel); 284bf215546Sopenharmony_ci cs_const_emit(ring, kernel, grid); 285bf215546Sopenharmony_ci cs_ibo_emit(ring, submit, kernel); 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci const unsigned *local_size = kernel->local_size; 288bf215546Sopenharmony_ci const unsigned *num_groups = grid; 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci unsigned work_dim = 0; 291bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 292bf215546Sopenharmony_ci if (!grid[i]) 293bf215546Sopenharmony_ci break; 294bf215546Sopenharmony_ci work_dim++; 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CL_NDRANGE_0, 7); 298bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(work_dim) | 299bf215546Sopenharmony_ci A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | 300bf215546Sopenharmony_ci A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | 301bf215546Sopenharmony_ci A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); 302bf215546Sopenharmony_ci OUT_RING(ring, 303bf215546Sopenharmony_ci A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(local_size[0] * num_groups[0])); 304bf215546Sopenharmony_ci OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_2_GLOBALOFF_X */ 305bf215546Sopenharmony_ci OUT_RING(ring, 306bf215546Sopenharmony_ci A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(local_size[1] * num_groups[1])); 307bf215546Sopenharmony_ci OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_4_GLOBALOFF_Y */ 308bf215546Sopenharmony_ci OUT_RING(ring, 309bf215546Sopenharmony_ci A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(local_size[2] * num_groups[2])); 310bf215546Sopenharmony_ci OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_6_GLOBALOFF_Z */ 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci#if 1 313bf215546Sopenharmony_ci OUT_PKT3(ring, CP_EXEC_CS, 4); 314bf215546Sopenharmony_ci OUT_RING(ring, 0x00000000); 315bf215546Sopenharmony_ci OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0])); 316bf215546Sopenharmony_ci OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(grid[1])); 317bf215546Sopenharmony_ci OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(grid[2])); 318bf215546Sopenharmony_ci#else 319bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_GROUP_X, 3); 320bf215546Sopenharmony_ci OUT_RING(ring, grid[0]); /* HLSQ_CL_KERNEL_GROUP_X */ 321bf215546Sopenharmony_ci OUT_RING(ring, grid[1]); /* HLSQ_CL_KERNEL_GROUP_Y */ 322bf215546Sopenharmony_ci OUT_RING(ring, grid[2]); /* HLSQ_CL_KERNEL_GROUP_Z */ 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci OUT_PKT3(ring, CP_RUN_OPENCL, 1); 325bf215546Sopenharmony_ci OUT_RING(ring, 0); 326bf215546Sopenharmony_ci#endif 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci OUT_WFI(ring); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci /* TODO: cache_flush */ 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_cistruct backend * 334bf215546Sopenharmony_cia4xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci struct a4xx_backend *a4xx_backend = calloc(1, sizeof(*a4xx_backend)); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci a4xx_backend->base = (struct backend){ 339bf215546Sopenharmony_ci .assemble = a4xx_assemble, 340bf215546Sopenharmony_ci .disassemble = a4xx_disassemble, 341bf215546Sopenharmony_ci .emit_grid = a4xx_emit_grid, 342bf215546Sopenharmony_ci }; 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci a4xx_backend->compiler = ir3_compiler_create(dev, dev_id, 345bf215546Sopenharmony_ci &(struct ir3_compiler_options) {}); 346bf215546Sopenharmony_ci a4xx_backend->dev = dev; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci return &a4xx_backend->base; 349bf215546Sopenharmony_ci} 350