1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci#include "r600_sq.h" 24bf215546Sopenharmony_ci#include "r600_opcodes.h" 25bf215546Sopenharmony_ci#include "r600_formats.h" 26bf215546Sopenharmony_ci#include "r600_shader.h" 27bf215546Sopenharmony_ci#include "r600d.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include <errno.h> 30bf215546Sopenharmony_ci#include "util/u_bitcast.h" 31bf215546Sopenharmony_ci#include "util/u_dump.h" 32bf215546Sopenharmony_ci#include "util/u_memory.h" 33bf215546Sopenharmony_ci#include "util/u_math.h" 34bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "sb/sb_public.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#define NUM_OF_CYCLES 3 39bf215546Sopenharmony_ci#define NUM_OF_COMPONENTS 4 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistatic inline bool alu_writes(struct r600_bytecode_alu *alu) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci return alu->dst.write || alu->is_op3; 44bf215546Sopenharmony_ci} 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cistatic inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu) 47bf215546Sopenharmony_ci{ 48bf215546Sopenharmony_ci return r600_isa_alu(alu->op)->src_count; 49bf215546Sopenharmony_ci} 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_cistatic struct r600_bytecode_cf *r600_bytecode_cf(void) 52bf215546Sopenharmony_ci{ 53bf215546Sopenharmony_ci struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci if (!cf) 56bf215546Sopenharmony_ci return NULL; 57bf215546Sopenharmony_ci list_inithead(&cf->list); 58bf215546Sopenharmony_ci list_inithead(&cf->alu); 59bf215546Sopenharmony_ci list_inithead(&cf->vtx); 60bf215546Sopenharmony_ci list_inithead(&cf->tex); 61bf215546Sopenharmony_ci list_inithead(&cf->gds); 62bf215546Sopenharmony_ci return cf; 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistatic struct r600_bytecode_alu *r600_bytecode_alu(void) 66bf215546Sopenharmony_ci{ 67bf215546Sopenharmony_ci struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci if (!alu) 70bf215546Sopenharmony_ci return NULL; 71bf215546Sopenharmony_ci list_inithead(&alu->list); 72bf215546Sopenharmony_ci return alu; 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_cistatic struct r600_bytecode_vtx *r600_bytecode_vtx(void) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci if (!vtx) 80bf215546Sopenharmony_ci return NULL; 81bf215546Sopenharmony_ci list_inithead(&vtx->list); 82bf215546Sopenharmony_ci return vtx; 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic struct r600_bytecode_tex *r600_bytecode_tex(void) 86bf215546Sopenharmony_ci{ 87bf215546Sopenharmony_ci struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci if (!tex) 90bf215546Sopenharmony_ci return NULL; 91bf215546Sopenharmony_ci list_inithead(&tex->list); 92bf215546Sopenharmony_ci return tex; 93bf215546Sopenharmony_ci} 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_cistatic struct r600_bytecode_gds *r600_bytecode_gds(void) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds); 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci if (gds == NULL) 100bf215546Sopenharmony_ci return NULL; 101bf215546Sopenharmony_ci list_inithead(&gds->list); 102bf215546Sopenharmony_ci return gds; 103bf215546Sopenharmony_ci} 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_cistatic unsigned stack_entry_size(enum radeon_family chip) { 106bf215546Sopenharmony_ci /* Wavefront size: 107bf215546Sopenharmony_ci * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/ 108bf215546Sopenharmony_ci * Aruba/Sumo/Sumo2/redwood/juniper 109bf215546Sopenharmony_ci * 32: R630/R730/R710/Palm/Cedar 110bf215546Sopenharmony_ci * 16: R610/Rs780 111bf215546Sopenharmony_ci * 112bf215546Sopenharmony_ci * Stack row size: 113bf215546Sopenharmony_ci * Wavefront Size 16 32 48 64 114bf215546Sopenharmony_ci * Columns per Row (R6xx/R7xx/R8xx only) 8 8 4 4 115bf215546Sopenharmony_ci * Columns per Row (R9xx+) 8 4 4 4 */ 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci switch (chip) { 118bf215546Sopenharmony_ci /* FIXME: are some chips missing here? */ 119bf215546Sopenharmony_ci /* wavefront size 16 */ 120bf215546Sopenharmony_ci case CHIP_RV610: 121bf215546Sopenharmony_ci case CHIP_RS780: 122bf215546Sopenharmony_ci case CHIP_RV620: 123bf215546Sopenharmony_ci case CHIP_RS880: 124bf215546Sopenharmony_ci /* wavefront size 32 */ 125bf215546Sopenharmony_ci case CHIP_RV630: 126bf215546Sopenharmony_ci case CHIP_RV635: 127bf215546Sopenharmony_ci case CHIP_RV730: 128bf215546Sopenharmony_ci case CHIP_RV710: 129bf215546Sopenharmony_ci case CHIP_PALM: 130bf215546Sopenharmony_ci case CHIP_CEDAR: 131bf215546Sopenharmony_ci return 8; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci /* wavefront size 64 */ 134bf215546Sopenharmony_ci default: 135bf215546Sopenharmony_ci return 4; 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci} 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_civoid r600_bytecode_init(struct r600_bytecode *bc, 140bf215546Sopenharmony_ci enum amd_gfx_level gfx_level, 141bf215546Sopenharmony_ci enum radeon_family family, 142bf215546Sopenharmony_ci bool has_compressed_msaa_texturing) 143bf215546Sopenharmony_ci{ 144bf215546Sopenharmony_ci static unsigned next_shader_id = 0; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci bc->debug_id = ++next_shader_id; 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if ((gfx_level == R600) && 149bf215546Sopenharmony_ci (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { 150bf215546Sopenharmony_ci bc->ar_handling = AR_HANDLE_RV6XX; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci /* Insert a nop after a relative temp write so that a read in 153bf215546Sopenharmony_ci * the following instruction group gets the right value. The 154bf215546Sopenharmony_ci * r600 and EG ISA specs both say that read-after-rel-write of a 155bf215546Sopenharmony_ci * register in the next instr group is illegal, but apparently 156bf215546Sopenharmony_ci * that's not true on all chips (see commit 157bf215546Sopenharmony_ci * c96b9834032952492efbd2d1f5511fe225704918). 158bf215546Sopenharmony_ci */ 159bf215546Sopenharmony_ci bc->r6xx_nop_after_rel_dst = 1; 160bf215546Sopenharmony_ci } else if (family == CHIP_RV770) { 161bf215546Sopenharmony_ci bc->ar_handling = AR_HANDLE_NORMAL; 162bf215546Sopenharmony_ci bc->r6xx_nop_after_rel_dst = 1; 163bf215546Sopenharmony_ci } else { 164bf215546Sopenharmony_ci bc->ar_handling = AR_HANDLE_NORMAL; 165bf215546Sopenharmony_ci bc->r6xx_nop_after_rel_dst = 0; 166bf215546Sopenharmony_ci } 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci list_inithead(&bc->cf); 169bf215546Sopenharmony_ci bc->gfx_level = gfx_level; 170bf215546Sopenharmony_ci bc->family = family; 171bf215546Sopenharmony_ci bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing; 172bf215546Sopenharmony_ci bc->stack.entry_size = stack_entry_size(family); 173bf215546Sopenharmony_ci} 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ciint r600_bytecode_add_cf(struct r600_bytecode *bc) 176bf215546Sopenharmony_ci{ 177bf215546Sopenharmony_ci struct r600_bytecode_cf *cf = r600_bytecode_cf(); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci if (!cf) 180bf215546Sopenharmony_ci return -ENOMEM; 181bf215546Sopenharmony_ci list_addtail(&cf->list, &bc->cf); 182bf215546Sopenharmony_ci if (bc->cf_last) { 183bf215546Sopenharmony_ci cf->id = bc->cf_last->id + 2; 184bf215546Sopenharmony_ci if (bc->cf_last->eg_alu_extended) { 185bf215546Sopenharmony_ci /* take into account extended alu size */ 186bf215546Sopenharmony_ci cf->id += 2; 187bf215546Sopenharmony_ci bc->ndw += 2; 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci bc->cf_last = cf; 191bf215546Sopenharmony_ci bc->ncf++; 192bf215546Sopenharmony_ci bc->ndw += 2; 193bf215546Sopenharmony_ci bc->force_add_cf = 0; 194bf215546Sopenharmony_ci bc->ar_loaded = 0; 195bf215546Sopenharmony_ci return 0; 196bf215546Sopenharmony_ci} 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ciint r600_bytecode_add_output(struct r600_bytecode *bc, 199bf215546Sopenharmony_ci const struct r600_bytecode_output *output) 200bf215546Sopenharmony_ci{ 201bf215546Sopenharmony_ci int r; 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci if (output->gpr >= bc->ngpr) 204bf215546Sopenharmony_ci bc->ngpr = output->gpr + 1; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci if (bc->cf_last && (bc->cf_last->op == output->op || 207bf215546Sopenharmony_ci (bc->cf_last->op == CF_OP_EXPORT && 208bf215546Sopenharmony_ci output->op == CF_OP_EXPORT_DONE)) && 209bf215546Sopenharmony_ci output->type == bc->cf_last->output.type && 210bf215546Sopenharmony_ci output->elem_size == bc->cf_last->output.elem_size && 211bf215546Sopenharmony_ci output->swizzle_x == bc->cf_last->output.swizzle_x && 212bf215546Sopenharmony_ci output->swizzle_y == bc->cf_last->output.swizzle_y && 213bf215546Sopenharmony_ci output->swizzle_z == bc->cf_last->output.swizzle_z && 214bf215546Sopenharmony_ci output->swizzle_w == bc->cf_last->output.swizzle_w && 215bf215546Sopenharmony_ci output->comp_mask == bc->cf_last->output.comp_mask && 216bf215546Sopenharmony_ci (output->burst_count + bc->cf_last->output.burst_count) <= 16) { 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && 219bf215546Sopenharmony_ci (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci bc->cf_last->op = bc->cf_last->output.op = output->op; 222bf215546Sopenharmony_ci bc->cf_last->output.gpr = output->gpr; 223bf215546Sopenharmony_ci bc->cf_last->output.array_base = output->array_base; 224bf215546Sopenharmony_ci bc->cf_last->output.burst_count += output->burst_count; 225bf215546Sopenharmony_ci return 0; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && 228bf215546Sopenharmony_ci output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci bc->cf_last->op = bc->cf_last->output.op = output->op; 231bf215546Sopenharmony_ci bc->cf_last->output.burst_count += output->burst_count; 232bf215546Sopenharmony_ci return 0; 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci r = r600_bytecode_add_cf(bc); 237bf215546Sopenharmony_ci if (r) 238bf215546Sopenharmony_ci return r; 239bf215546Sopenharmony_ci bc->cf_last->op = output->op; 240bf215546Sopenharmony_ci memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); 241bf215546Sopenharmony_ci bc->cf_last->barrier = 1; 242bf215546Sopenharmony_ci return 0; 243bf215546Sopenharmony_ci} 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ciint r600_bytecode_add_pending_output(struct r600_bytecode *bc, 246bf215546Sopenharmony_ci const struct r600_bytecode_output *output) 247bf215546Sopenharmony_ci{ 248bf215546Sopenharmony_ci assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs)); 249bf215546Sopenharmony_ci bc->pending_outputs[bc->n_pending_outputs++] = *output; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci return 0; 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_civoid 255bf215546Sopenharmony_cir600_bytecode_add_ack(struct r600_bytecode *bc) 256bf215546Sopenharmony_ci{ 257bf215546Sopenharmony_ci bc->need_wait_ack = true; 258bf215546Sopenharmony_ci} 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ciint 261bf215546Sopenharmony_cir600_bytecode_wait_acks(struct r600_bytecode *bc) 262bf215546Sopenharmony_ci{ 263bf215546Sopenharmony_ci /* Store acks are an R700+ feature. */ 264bf215546Sopenharmony_ci if (bc->gfx_level < R700) 265bf215546Sopenharmony_ci return 0; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci if (!bc->need_wait_ack) 268bf215546Sopenharmony_ci return 0; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci int ret = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK); 271bf215546Sopenharmony_ci if (ret != 0) 272bf215546Sopenharmony_ci return ret; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci struct r600_bytecode_cf *cf = bc->cf_last; 275bf215546Sopenharmony_ci cf->barrier = 1; 276bf215546Sopenharmony_ci /* Request a wait if the number of outstanding acks is > 0 */ 277bf215546Sopenharmony_ci cf->cf_addr = 0; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci return 0; 280bf215546Sopenharmony_ci} 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ciuint32_t 283bf215546Sopenharmony_cir600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect) 284bf215546Sopenharmony_ci{ 285bf215546Sopenharmony_ci if (bc->gfx_level >= R700) { 286bf215546Sopenharmony_ci if (indirect) 287bf215546Sopenharmony_ci return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG; 288bf215546Sopenharmony_ci else 289bf215546Sopenharmony_ci return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG; 290bf215546Sopenharmony_ci } else { 291bf215546Sopenharmony_ci if (indirect) 292bf215546Sopenharmony_ci return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 293bf215546Sopenharmony_ci else 294bf215546Sopenharmony_ci return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci} 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci/* alu instructions that can ony exits once per group */ 299bf215546Sopenharmony_cistatic int is_alu_once_inst(struct r600_bytecode_alu *alu) 300bf215546Sopenharmony_ci{ 301bf215546Sopenharmony_ci return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER; 302bf215546Sopenharmony_ci} 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_cistatic int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 305bf215546Sopenharmony_ci{ 306bf215546Sopenharmony_ci return (r600_isa_alu(alu->op)->flags & AF_REPL) && 307bf215546Sopenharmony_ci (r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V); 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_cistatic int is_alu_mova_inst(struct r600_bytecode_alu *alu) 311bf215546Sopenharmony_ci{ 312bf215546Sopenharmony_ci return r600_isa_alu(alu->op)->flags & AF_MOVA; 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_cistatic int alu_uses_rel(struct r600_bytecode_alu *alu) 316bf215546Sopenharmony_ci{ 317bf215546Sopenharmony_ci unsigned num_src = r600_bytecode_get_num_operands(alu); 318bf215546Sopenharmony_ci unsigned src; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci if (alu->dst.rel) { 321bf215546Sopenharmony_ci return 1; 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci for (src = 0; src < num_src; ++src) { 325bf215546Sopenharmony_ci if (alu->src[src].rel) { 326bf215546Sopenharmony_ci return 1; 327bf215546Sopenharmony_ci } 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci return 0; 330bf215546Sopenharmony_ci} 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_cistatic int is_lds_read(int sel) 333bf215546Sopenharmony_ci{ 334bf215546Sopenharmony_ci return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP; 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cistatic int alu_uses_lds(struct r600_bytecode_alu *alu) 338bf215546Sopenharmony_ci{ 339bf215546Sopenharmony_ci unsigned num_src = r600_bytecode_get_num_operands(alu); 340bf215546Sopenharmony_ci unsigned src; 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci for (src = 0; src < num_src; ++src) { 343bf215546Sopenharmony_ci if (is_lds_read(alu->src[src].sel)) { 344bf215546Sopenharmony_ci return 1; 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci return 0; 348bf215546Sopenharmony_ci} 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_cistatic int is_alu_64bit_inst(struct r600_bytecode_alu *alu) 351bf215546Sopenharmony_ci{ 352bf215546Sopenharmony_ci const struct alu_op_info *op = r600_isa_alu(alu->op); 353bf215546Sopenharmony_ci return (op->flags & AF_64); 354bf215546Sopenharmony_ci} 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_cistatic int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 357bf215546Sopenharmony_ci{ 358bf215546Sopenharmony_ci unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 359bf215546Sopenharmony_ci return !(slots & AF_S); 360bf215546Sopenharmony_ci} 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_cistatic int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 363bf215546Sopenharmony_ci{ 364bf215546Sopenharmony_ci unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 365bf215546Sopenharmony_ci return !(slots & AF_V); 366bf215546Sopenharmony_ci} 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci/* alu instructions that can execute on any unit */ 369bf215546Sopenharmony_cistatic int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 370bf215546Sopenharmony_ci{ 371bf215546Sopenharmony_ci unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 372bf215546Sopenharmony_ci return slots == AF_VS; 373bf215546Sopenharmony_ci} 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_cistatic int is_nop_inst(struct r600_bytecode_alu *alu) 376bf215546Sopenharmony_ci{ 377bf215546Sopenharmony_ci return alu->op == ALU_OP0_NOP; 378bf215546Sopenharmony_ci} 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_cistatic int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, 381bf215546Sopenharmony_ci struct r600_bytecode_alu *assignment[5]) 382bf215546Sopenharmony_ci{ 383bf215546Sopenharmony_ci struct r600_bytecode_alu *alu; 384bf215546Sopenharmony_ci unsigned i, chan, trans; 385bf215546Sopenharmony_ci int max_slots = bc->gfx_level == CAYMAN ? 4 : 5; 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci for (i = 0; i < max_slots; i++) 388bf215546Sopenharmony_ci assignment[i] = NULL; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci for (alu = alu_first; alu; alu = list_entry(alu->list.next, struct r600_bytecode_alu, list)) { 391bf215546Sopenharmony_ci chan = alu->dst.chan; 392bf215546Sopenharmony_ci if (max_slots == 4) 393bf215546Sopenharmony_ci trans = 0; 394bf215546Sopenharmony_ci else if (is_alu_trans_unit_inst(bc, alu)) 395bf215546Sopenharmony_ci trans = 1; 396bf215546Sopenharmony_ci else if (is_alu_vec_unit_inst(bc, alu)) 397bf215546Sopenharmony_ci trans = 0; 398bf215546Sopenharmony_ci else if (assignment[chan]) 399bf215546Sopenharmony_ci trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */ 400bf215546Sopenharmony_ci else 401bf215546Sopenharmony_ci trans = 0; 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci if (trans) { 404bf215546Sopenharmony_ci if (assignment[4]) { 405bf215546Sopenharmony_ci assert(0); /* ALU.Trans has already been allocated. */ 406bf215546Sopenharmony_ci return -1; 407bf215546Sopenharmony_ci } 408bf215546Sopenharmony_ci assignment[4] = alu; 409bf215546Sopenharmony_ci } else { 410bf215546Sopenharmony_ci if (assignment[chan]) { 411bf215546Sopenharmony_ci assert(0); /* ALU.chan has already been allocated. */ 412bf215546Sopenharmony_ci return -1; 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci assignment[chan] = alu; 415bf215546Sopenharmony_ci } 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci if (alu->last) 418bf215546Sopenharmony_ci break; 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci return 0; 421bf215546Sopenharmony_ci} 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_cistruct alu_bank_swizzle { 424bf215546Sopenharmony_ci int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; 425bf215546Sopenharmony_ci int hw_cfile_addr[4]; 426bf215546Sopenharmony_ci int hw_cfile_elem[4]; 427bf215546Sopenharmony_ci}; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_cistatic const unsigned cycle_for_bank_swizzle_vec[][3] = { 430bf215546Sopenharmony_ci [SQ_ALU_VEC_012] = { 0, 1, 2 }, 431bf215546Sopenharmony_ci [SQ_ALU_VEC_021] = { 0, 2, 1 }, 432bf215546Sopenharmony_ci [SQ_ALU_VEC_120] = { 1, 2, 0 }, 433bf215546Sopenharmony_ci [SQ_ALU_VEC_102] = { 1, 0, 2 }, 434bf215546Sopenharmony_ci [SQ_ALU_VEC_201] = { 2, 0, 1 }, 435bf215546Sopenharmony_ci [SQ_ALU_VEC_210] = { 2, 1, 0 } 436bf215546Sopenharmony_ci}; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_cistatic const unsigned cycle_for_bank_swizzle_scl[][3] = { 439bf215546Sopenharmony_ci [SQ_ALU_SCL_210] = { 2, 1, 0 }, 440bf215546Sopenharmony_ci [SQ_ALU_SCL_122] = { 1, 2, 2 }, 441bf215546Sopenharmony_ci [SQ_ALU_SCL_212] = { 2, 1, 2 }, 442bf215546Sopenharmony_ci [SQ_ALU_SCL_221] = { 2, 2, 1 } 443bf215546Sopenharmony_ci}; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistatic void init_bank_swizzle(struct alu_bank_swizzle *bs) 446bf215546Sopenharmony_ci{ 447bf215546Sopenharmony_ci int i, cycle, component; 448bf215546Sopenharmony_ci /* set up gpr use */ 449bf215546Sopenharmony_ci for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) 450bf215546Sopenharmony_ci for (component = 0; component < NUM_OF_COMPONENTS; component++) 451bf215546Sopenharmony_ci bs->hw_gpr[cycle][component] = -1; 452bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 453bf215546Sopenharmony_ci bs->hw_cfile_addr[i] = -1; 454bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 455bf215546Sopenharmony_ci bs->hw_cfile_elem[i] = -1; 456bf215546Sopenharmony_ci} 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_cistatic int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) 459bf215546Sopenharmony_ci{ 460bf215546Sopenharmony_ci if (bs->hw_gpr[cycle][chan] == -1) 461bf215546Sopenharmony_ci bs->hw_gpr[cycle][chan] = sel; 462bf215546Sopenharmony_ci else if (bs->hw_gpr[cycle][chan] != (int)sel) { 463bf215546Sopenharmony_ci /* Another scalar operation has already used the GPR read port for the channel. */ 464bf215546Sopenharmony_ci return -1; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci return 0; 467bf215546Sopenharmony_ci} 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_cistatic int reserve_cfile(const struct r600_bytecode *bc, 470bf215546Sopenharmony_ci struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) 471bf215546Sopenharmony_ci{ 472bf215546Sopenharmony_ci int res, num_res = 4; 473bf215546Sopenharmony_ci if (bc->gfx_level >= R700) { 474bf215546Sopenharmony_ci num_res = 2; 475bf215546Sopenharmony_ci chan /= 2; 476bf215546Sopenharmony_ci } 477bf215546Sopenharmony_ci for (res = 0; res < num_res; ++res) { 478bf215546Sopenharmony_ci if (bs->hw_cfile_addr[res] == -1) { 479bf215546Sopenharmony_ci bs->hw_cfile_addr[res] = sel; 480bf215546Sopenharmony_ci bs->hw_cfile_elem[res] = chan; 481bf215546Sopenharmony_ci return 0; 482bf215546Sopenharmony_ci } else if (bs->hw_cfile_addr[res] == sel && 483bf215546Sopenharmony_ci bs->hw_cfile_elem[res] == chan) 484bf215546Sopenharmony_ci return 0; /* Read for this scalar element already reserved, nothing to do here. */ 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci /* All cfile read ports are used, cannot reference vector element. */ 487bf215546Sopenharmony_ci return -1; 488bf215546Sopenharmony_ci} 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_cistatic int is_gpr(unsigned sel) 491bf215546Sopenharmony_ci{ 492bf215546Sopenharmony_ci return (sel <= 127); 493bf215546Sopenharmony_ci} 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci/* CB constants start at 512, and get translated to a kcache index when ALU 496bf215546Sopenharmony_ci * clauses are constructed. Note that we handle kcache constants the same way 497bf215546Sopenharmony_ci * as (the now gone) cfile constants, is that really required? */ 498bf215546Sopenharmony_cistatic int is_kcache(unsigned sel) 499bf215546Sopenharmony_ci{ 500bf215546Sopenharmony_ci return (sel > 511 && sel < 4607) || /* Kcache before translation. */ 501bf215546Sopenharmony_ci (sel > 127 && sel < 192) || /* Kcache 0 & 1 after translation. */ 502bf215546Sopenharmony_ci (sel > 256 && sel < 320); /* Kcache 2 & 3 after translation (EG). */ 503bf215546Sopenharmony_ci} 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_cistatic int is_const(int sel) 506bf215546Sopenharmony_ci{ 507bf215546Sopenharmony_ci return is_kcache(sel) || 508bf215546Sopenharmony_ci (sel >= V_SQ_ALU_SRC_0 && 509bf215546Sopenharmony_ci sel <= V_SQ_ALU_SRC_LITERAL); 510bf215546Sopenharmony_ci} 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_cistatic int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, 513bf215546Sopenharmony_ci struct alu_bank_swizzle *bs, int bank_swizzle) 514bf215546Sopenharmony_ci{ 515bf215546Sopenharmony_ci int r, src, num_src, sel, elem, cycle; 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci num_src = r600_bytecode_get_num_operands(alu); 518bf215546Sopenharmony_ci for (src = 0; src < num_src; src++) { 519bf215546Sopenharmony_ci sel = alu->src[src].sel; 520bf215546Sopenharmony_ci elem = alu->src[src].chan; 521bf215546Sopenharmony_ci if (is_gpr(sel)) { 522bf215546Sopenharmony_ci cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; 523bf215546Sopenharmony_ci if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) 524bf215546Sopenharmony_ci /* Nothing to do; special-case optimization, 525bf215546Sopenharmony_ci * second source uses first source’s reservation. */ 526bf215546Sopenharmony_ci continue; 527bf215546Sopenharmony_ci else { 528bf215546Sopenharmony_ci r = reserve_gpr(bs, sel, elem, cycle); 529bf215546Sopenharmony_ci if (r) 530bf215546Sopenharmony_ci return r; 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci } else if (is_kcache(sel)) { 533bf215546Sopenharmony_ci r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 534bf215546Sopenharmony_ci if (r) 535bf215546Sopenharmony_ci return r; 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci /* No restrictions on PV, PS, literal or special constants. */ 538bf215546Sopenharmony_ci } 539bf215546Sopenharmony_ci return 0; 540bf215546Sopenharmony_ci} 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_cistatic int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, 543bf215546Sopenharmony_ci struct alu_bank_swizzle *bs, int bank_swizzle) 544bf215546Sopenharmony_ci{ 545bf215546Sopenharmony_ci int r, src, num_src, const_count, sel, elem, cycle; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci num_src = r600_bytecode_get_num_operands(alu); 548bf215546Sopenharmony_ci for (const_count = 0, src = 0; src < num_src; ++src) { 549bf215546Sopenharmony_ci sel = alu->src[src].sel; 550bf215546Sopenharmony_ci elem = alu->src[src].chan; 551bf215546Sopenharmony_ci if (is_const(sel)) { /* Any constant, including literal and inline constants. */ 552bf215546Sopenharmony_ci if (const_count >= 2) 553bf215546Sopenharmony_ci /* More than two references to a constant in 554bf215546Sopenharmony_ci * transcendental operation. */ 555bf215546Sopenharmony_ci return -1; 556bf215546Sopenharmony_ci else 557bf215546Sopenharmony_ci const_count++; 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci if (is_kcache(sel)) { 560bf215546Sopenharmony_ci r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 561bf215546Sopenharmony_ci if (r) 562bf215546Sopenharmony_ci return r; 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci for (src = 0; src < num_src; ++src) { 566bf215546Sopenharmony_ci sel = alu->src[src].sel; 567bf215546Sopenharmony_ci elem = alu->src[src].chan; 568bf215546Sopenharmony_ci if (is_gpr(sel)) { 569bf215546Sopenharmony_ci cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 570bf215546Sopenharmony_ci if (cycle < const_count) 571bf215546Sopenharmony_ci /* Cycle for GPR load conflicts with 572bf215546Sopenharmony_ci * constant load in transcendental operation. */ 573bf215546Sopenharmony_ci return -1; 574bf215546Sopenharmony_ci r = reserve_gpr(bs, sel, elem, cycle); 575bf215546Sopenharmony_ci if (r) 576bf215546Sopenharmony_ci return r; 577bf215546Sopenharmony_ci } 578bf215546Sopenharmony_ci /* PV PS restrictions */ 579bf215546Sopenharmony_ci if (const_count && (sel == 254 || sel == 255)) { 580bf215546Sopenharmony_ci cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 581bf215546Sopenharmony_ci if (cycle < const_count) 582bf215546Sopenharmony_ci return -1; 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci } 585bf215546Sopenharmony_ci return 0; 586bf215546Sopenharmony_ci} 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_cistatic int check_and_set_bank_swizzle(const struct r600_bytecode *bc, 589bf215546Sopenharmony_ci struct r600_bytecode_alu *slots[5]) 590bf215546Sopenharmony_ci{ 591bf215546Sopenharmony_ci struct alu_bank_swizzle bs; 592bf215546Sopenharmony_ci int bank_swizzle[5]; 593bf215546Sopenharmony_ci int i, r = 0, forced = 1; 594bf215546Sopenharmony_ci boolean scalar_only = bc->gfx_level == CAYMAN ? false : true; 595bf215546Sopenharmony_ci int max_slots = bc->gfx_level == CAYMAN ? 4 : 5; 596bf215546Sopenharmony_ci int max_checks = max_slots * 1000; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci for (i = 0; i < max_slots; i++) { 599bf215546Sopenharmony_ci if (slots[i]) { 600bf215546Sopenharmony_ci if (slots[i]->bank_swizzle_force) { 601bf215546Sopenharmony_ci slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; 602bf215546Sopenharmony_ci } else { 603bf215546Sopenharmony_ci forced = 0; 604bf215546Sopenharmony_ci } 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci if (i < 4 && slots[i]) 608bf215546Sopenharmony_ci scalar_only = false; 609bf215546Sopenharmony_ci } 610bf215546Sopenharmony_ci if (forced) 611bf215546Sopenharmony_ci return 0; 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci /* Just check every possible combination of bank swizzle. 614bf215546Sopenharmony_ci * Not very efficent, but works on the first try in most of the cases. */ 615bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 616bf215546Sopenharmony_ci if (!slots[i] || !slots[i]->bank_swizzle_force || slots[i]->is_lds_idx_op) 617bf215546Sopenharmony_ci bank_swizzle[i] = SQ_ALU_VEC_012; 618bf215546Sopenharmony_ci else 619bf215546Sopenharmony_ci bank_swizzle[i] = slots[i]->bank_swizzle; 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci bank_swizzle[4] = SQ_ALU_SCL_210; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci while(bank_swizzle[4] <= SQ_ALU_SCL_221 && max_checks--) { 624bf215546Sopenharmony_ci init_bank_swizzle(&bs); 625bf215546Sopenharmony_ci if (scalar_only == false) { 626bf215546Sopenharmony_ci for (i = 0; i < 4; i++) { 627bf215546Sopenharmony_ci if (slots[i]) { 628bf215546Sopenharmony_ci r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); 629bf215546Sopenharmony_ci if (r) 630bf215546Sopenharmony_ci break; 631bf215546Sopenharmony_ci } 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci } else 634bf215546Sopenharmony_ci r = 0; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci if (!r && max_slots == 5 && slots[4]) { 637bf215546Sopenharmony_ci r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci if (!r) { 640bf215546Sopenharmony_ci for (i = 0; i < max_slots; i++) { 641bf215546Sopenharmony_ci if (slots[i]) 642bf215546Sopenharmony_ci slots[i]->bank_swizzle = bank_swizzle[i]; 643bf215546Sopenharmony_ci } 644bf215546Sopenharmony_ci return 0; 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci if (scalar_only) { 648bf215546Sopenharmony_ci bank_swizzle[4]++; 649bf215546Sopenharmony_ci } else { 650bf215546Sopenharmony_ci for (i = 0; i < max_slots; i++) { 651bf215546Sopenharmony_ci if (!slots[i] || (!slots[i]->bank_swizzle_force && !slots[i]->is_lds_idx_op)) { 652bf215546Sopenharmony_ci bank_swizzle[i]++; 653bf215546Sopenharmony_ci if (bank_swizzle[i] <= SQ_ALU_VEC_210) 654bf215546Sopenharmony_ci break; 655bf215546Sopenharmony_ci else if (i < max_slots - 1) 656bf215546Sopenharmony_ci bank_swizzle[i] = SQ_ALU_VEC_012; 657bf215546Sopenharmony_ci else 658bf215546Sopenharmony_ci return -1; 659bf215546Sopenharmony_ci } 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci /* Couldn't find a working swizzle. */ 665bf215546Sopenharmony_ci return -1; 666bf215546Sopenharmony_ci} 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_cistatic int replace_gpr_with_pv_ps(struct r600_bytecode *bc, 669bf215546Sopenharmony_ci struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) 670bf215546Sopenharmony_ci{ 671bf215546Sopenharmony_ci struct r600_bytecode_alu *prev[5]; 672bf215546Sopenharmony_ci int gpr[5], chan[5]; 673bf215546Sopenharmony_ci int i, j, r, src, num_src; 674bf215546Sopenharmony_ci int max_slots = bc->gfx_level == CAYMAN ? 4 : 5; 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci r = assign_alu_units(bc, alu_prev, prev); 677bf215546Sopenharmony_ci if (r) 678bf215546Sopenharmony_ci return r; 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci for (i = 0; i < max_slots; ++i) { 681bf215546Sopenharmony_ci if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) { 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci if (is_alu_64bit_inst(prev[i])) { 684bf215546Sopenharmony_ci gpr[i] = -1; 685bf215546Sopenharmony_ci continue; 686bf215546Sopenharmony_ci } 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci gpr[i] = prev[i]->dst.sel; 689bf215546Sopenharmony_ci /* cube writes more than PV.X */ 690bf215546Sopenharmony_ci if (is_alu_reduction_inst(bc, prev[i])) 691bf215546Sopenharmony_ci chan[i] = 0; 692bf215546Sopenharmony_ci else 693bf215546Sopenharmony_ci chan[i] = prev[i]->dst.chan; 694bf215546Sopenharmony_ci } else 695bf215546Sopenharmony_ci gpr[i] = -1; 696bf215546Sopenharmony_ci } 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci for (i = 0; i < max_slots; ++i) { 699bf215546Sopenharmony_ci struct r600_bytecode_alu *alu = slots[i]; 700bf215546Sopenharmony_ci if (!alu) 701bf215546Sopenharmony_ci continue; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (is_alu_64bit_inst(alu)) 704bf215546Sopenharmony_ci continue; 705bf215546Sopenharmony_ci num_src = r600_bytecode_get_num_operands(alu); 706bf215546Sopenharmony_ci for (src = 0; src < num_src; ++src) { 707bf215546Sopenharmony_ci if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) 708bf215546Sopenharmony_ci continue; 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci if (bc->gfx_level < CAYMAN) { 711bf215546Sopenharmony_ci if (alu->src[src].sel == gpr[4] && 712bf215546Sopenharmony_ci alu->src[src].chan == chan[4] && 713bf215546Sopenharmony_ci alu_prev->pred_sel == alu->pred_sel) { 714bf215546Sopenharmony_ci alu->src[src].sel = V_SQ_ALU_SRC_PS; 715bf215546Sopenharmony_ci alu->src[src].chan = 0; 716bf215546Sopenharmony_ci continue; 717bf215546Sopenharmony_ci } 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci for (j = 0; j < 4; ++j) { 721bf215546Sopenharmony_ci if (alu->src[src].sel == gpr[j] && 722bf215546Sopenharmony_ci alu->src[src].chan == j && 723bf215546Sopenharmony_ci alu_prev->pred_sel == alu->pred_sel) { 724bf215546Sopenharmony_ci alu->src[src].sel = V_SQ_ALU_SRC_PV; 725bf215546Sopenharmony_ci alu->src[src].chan = chan[j]; 726bf215546Sopenharmony_ci break; 727bf215546Sopenharmony_ci } 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci } 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci return 0; 733bf215546Sopenharmony_ci} 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_civoid r600_bytecode_special_constants(uint32_t value, unsigned *sel) 736bf215546Sopenharmony_ci{ 737bf215546Sopenharmony_ci switch(value) { 738bf215546Sopenharmony_ci case 0: 739bf215546Sopenharmony_ci *sel = V_SQ_ALU_SRC_0; 740bf215546Sopenharmony_ci break; 741bf215546Sopenharmony_ci case 1: 742bf215546Sopenharmony_ci *sel = V_SQ_ALU_SRC_1_INT; 743bf215546Sopenharmony_ci break; 744bf215546Sopenharmony_ci case -1: 745bf215546Sopenharmony_ci *sel = V_SQ_ALU_SRC_M_1_INT; 746bf215546Sopenharmony_ci break; 747bf215546Sopenharmony_ci case 0x3F800000: /* 1.0f */ 748bf215546Sopenharmony_ci *sel = V_SQ_ALU_SRC_1; 749bf215546Sopenharmony_ci break; 750bf215546Sopenharmony_ci case 0x3F000000: /* 0.5f */ 751bf215546Sopenharmony_ci *sel = V_SQ_ALU_SRC_0_5; 752bf215546Sopenharmony_ci break; 753bf215546Sopenharmony_ci default: 754bf215546Sopenharmony_ci *sel = V_SQ_ALU_SRC_LITERAL; 755bf215546Sopenharmony_ci break; 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci} 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci/* compute how many literal are needed */ 760bf215546Sopenharmony_cistatic int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu, 761bf215546Sopenharmony_ci uint32_t literal[4], unsigned *nliteral) 762bf215546Sopenharmony_ci{ 763bf215546Sopenharmony_ci unsigned num_src = r600_bytecode_get_num_operands(alu); 764bf215546Sopenharmony_ci unsigned i, j; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci for (i = 0; i < num_src; ++i) { 767bf215546Sopenharmony_ci if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 768bf215546Sopenharmony_ci uint32_t value = alu->src[i].value; 769bf215546Sopenharmony_ci unsigned found = 0; 770bf215546Sopenharmony_ci for (j = 0; j < *nliteral; ++j) { 771bf215546Sopenharmony_ci if (literal[j] == value) { 772bf215546Sopenharmony_ci found = 1; 773bf215546Sopenharmony_ci break; 774bf215546Sopenharmony_ci } 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci if (!found) { 777bf215546Sopenharmony_ci if (*nliteral >= 4) 778bf215546Sopenharmony_ci return -EINVAL; 779bf215546Sopenharmony_ci literal[(*nliteral)++] = value; 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci } 782bf215546Sopenharmony_ci } 783bf215546Sopenharmony_ci return 0; 784bf215546Sopenharmony_ci} 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_cistatic void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu, 787bf215546Sopenharmony_ci uint32_t literal[4], unsigned nliteral) 788bf215546Sopenharmony_ci{ 789bf215546Sopenharmony_ci unsigned num_src = r600_bytecode_get_num_operands(alu); 790bf215546Sopenharmony_ci unsigned i, j; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci for (i = 0; i < num_src; ++i) { 793bf215546Sopenharmony_ci if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 794bf215546Sopenharmony_ci uint32_t value = alu->src[i].value; 795bf215546Sopenharmony_ci for (j = 0; j < nliteral; ++j) { 796bf215546Sopenharmony_ci if (literal[j] == value) { 797bf215546Sopenharmony_ci alu->src[i].chan = j; 798bf215546Sopenharmony_ci break; 799bf215546Sopenharmony_ci } 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci } 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci} 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_cistatic int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], 806bf215546Sopenharmony_ci struct r600_bytecode_alu *alu_prev) 807bf215546Sopenharmony_ci{ 808bf215546Sopenharmony_ci struct r600_bytecode_alu *prev[5]; 809bf215546Sopenharmony_ci struct r600_bytecode_alu *result[5] = { NULL }; 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci uint8_t interp_xz = 0; 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci uint32_t literal[4], prev_literal[4]; 814bf215546Sopenharmony_ci unsigned nliteral = 0, prev_nliteral = 0; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci int i, j, r, src, num_src; 817bf215546Sopenharmony_ci int num_once_inst = 0; 818bf215546Sopenharmony_ci int have_mova = 0, have_rel = 0; 819bf215546Sopenharmony_ci int max_slots = bc->gfx_level == CAYMAN ? 4 : 5; 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci r = assign_alu_units(bc, alu_prev, prev); 822bf215546Sopenharmony_ci if (r) 823bf215546Sopenharmony_ci return r; 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci for (i = 0; i < max_slots; ++i) { 826bf215546Sopenharmony_ci if (prev[i]) { 827bf215546Sopenharmony_ci if (prev[i]->pred_sel) 828bf215546Sopenharmony_ci return 0; 829bf215546Sopenharmony_ci if (is_alu_once_inst(prev[i])) 830bf215546Sopenharmony_ci return 0; 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci if (prev[i]->op == ALU_OP1_INTERP_LOAD_P0) 833bf215546Sopenharmony_ci interp_xz |= 3; 834bf215546Sopenharmony_ci if (prev[i]->op == ALU_OP2_INTERP_X) 835bf215546Sopenharmony_ci interp_xz |= 1; 836bf215546Sopenharmony_ci if (prev[i]->op == ALU_OP2_INTERP_Z) 837bf215546Sopenharmony_ci interp_xz |= 2; 838bf215546Sopenharmony_ci } 839bf215546Sopenharmony_ci if (slots[i]) { 840bf215546Sopenharmony_ci if (slots[i]->pred_sel) 841bf215546Sopenharmony_ci return 0; 842bf215546Sopenharmony_ci if (is_alu_once_inst(slots[i])) 843bf215546Sopenharmony_ci return 0; 844bf215546Sopenharmony_ci if (slots[i]->op == ALU_OP1_INTERP_LOAD_P0) 845bf215546Sopenharmony_ci interp_xz |= 3; 846bf215546Sopenharmony_ci if (slots[i]->op == ALU_OP2_INTERP_X) 847bf215546Sopenharmony_ci interp_xz |= 1; 848bf215546Sopenharmony_ci if (slots[i]->op == ALU_OP2_INTERP_Z) 849bf215546Sopenharmony_ci interp_xz |= 2; 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci if (interp_xz == 3) 852bf215546Sopenharmony_ci return 0; 853bf215546Sopenharmony_ci } 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci for (i = 0; i < max_slots; ++i) { 856bf215546Sopenharmony_ci struct r600_bytecode_alu *alu; 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci if (num_once_inst > 0) 859bf215546Sopenharmony_ci return 0; 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci /* check number of literals */ 862bf215546Sopenharmony_ci if (prev[i]) { 863bf215546Sopenharmony_ci if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral)) 864bf215546Sopenharmony_ci return 0; 865bf215546Sopenharmony_ci if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral)) 866bf215546Sopenharmony_ci return 0; 867bf215546Sopenharmony_ci if (is_alu_mova_inst(prev[i])) { 868bf215546Sopenharmony_ci if (have_rel) 869bf215546Sopenharmony_ci return 0; 870bf215546Sopenharmony_ci have_mova = 1; 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci if (alu_uses_rel(prev[i])) { 874bf215546Sopenharmony_ci if (have_mova) { 875bf215546Sopenharmony_ci return 0; 876bf215546Sopenharmony_ci } 877bf215546Sopenharmony_ci have_rel = 1; 878bf215546Sopenharmony_ci } 879bf215546Sopenharmony_ci if (alu_uses_lds(prev[i])) 880bf215546Sopenharmony_ci return 0; 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci num_once_inst += is_alu_once_inst(prev[i]); 883bf215546Sopenharmony_ci } 884bf215546Sopenharmony_ci if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral)) 885bf215546Sopenharmony_ci return 0; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci /* Let's check used slots. */ 888bf215546Sopenharmony_ci if (prev[i] && !slots[i]) { 889bf215546Sopenharmony_ci result[i] = prev[i]; 890bf215546Sopenharmony_ci continue; 891bf215546Sopenharmony_ci } else if (prev[i] && slots[i]) { 892bf215546Sopenharmony_ci if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { 893bf215546Sopenharmony_ci /* Trans unit is still free try to use it. */ 894bf215546Sopenharmony_ci if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) { 895bf215546Sopenharmony_ci result[i] = prev[i]; 896bf215546Sopenharmony_ci result[4] = slots[i]; 897bf215546Sopenharmony_ci } else if (is_alu_any_unit_inst(bc, prev[i])) { 898bf215546Sopenharmony_ci if (slots[i]->dst.sel == prev[i]->dst.sel && 899bf215546Sopenharmony_ci alu_writes(slots[i]) && 900bf215546Sopenharmony_ci alu_writes(prev[i])) 901bf215546Sopenharmony_ci return 0; 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci result[i] = slots[i]; 904bf215546Sopenharmony_ci result[4] = prev[i]; 905bf215546Sopenharmony_ci } else 906bf215546Sopenharmony_ci return 0; 907bf215546Sopenharmony_ci } else 908bf215546Sopenharmony_ci return 0; 909bf215546Sopenharmony_ci } else if(!slots[i]) { 910bf215546Sopenharmony_ci continue; 911bf215546Sopenharmony_ci } else { 912bf215546Sopenharmony_ci if (max_slots == 5 && slots[i] && prev[4] && 913bf215546Sopenharmony_ci slots[i]->dst.sel == prev[4]->dst.sel && 914bf215546Sopenharmony_ci slots[i]->dst.chan == prev[4]->dst.chan && 915bf215546Sopenharmony_ci alu_writes(slots[i]) && 916bf215546Sopenharmony_ci alu_writes(prev[4])) 917bf215546Sopenharmony_ci return 0; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci result[i] = slots[i]; 920bf215546Sopenharmony_ci } 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci alu = slots[i]; 923bf215546Sopenharmony_ci num_once_inst += is_alu_once_inst(alu); 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci /* don't reschedule NOPs */ 926bf215546Sopenharmony_ci if (is_nop_inst(alu)) 927bf215546Sopenharmony_ci return 0; 928bf215546Sopenharmony_ci 929bf215546Sopenharmony_ci if (is_alu_mova_inst(alu)) { 930bf215546Sopenharmony_ci if (have_rel) { 931bf215546Sopenharmony_ci return 0; 932bf215546Sopenharmony_ci } 933bf215546Sopenharmony_ci have_mova = 1; 934bf215546Sopenharmony_ci } 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci if (alu_uses_rel(alu)) { 937bf215546Sopenharmony_ci if (have_mova) { 938bf215546Sopenharmony_ci return 0; 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci have_rel = 1; 941bf215546Sopenharmony_ci } 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci if (alu->op == ALU_OP0_SET_CF_IDX0 || 944bf215546Sopenharmony_ci alu->op == ALU_OP0_SET_CF_IDX1) 945bf215546Sopenharmony_ci return 0; /* data hazard with MOVA */ 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci /* Let's check source gprs */ 948bf215546Sopenharmony_ci num_src = r600_bytecode_get_num_operands(alu); 949bf215546Sopenharmony_ci for (src = 0; src < num_src; ++src) { 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci /* Constants don't matter. */ 952bf215546Sopenharmony_ci if (!is_gpr(alu->src[src].sel)) 953bf215546Sopenharmony_ci continue; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci for (j = 0; j < max_slots; ++j) { 956bf215546Sopenharmony_ci if (!prev[j] || !alu_writes(prev[j])) 957bf215546Sopenharmony_ci continue; 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci /* If it's relative then we can't determin which gpr is really used. */ 960bf215546Sopenharmony_ci if (prev[j]->dst.chan == alu->src[src].chan && 961bf215546Sopenharmony_ci (prev[j]->dst.sel == alu->src[src].sel || 962bf215546Sopenharmony_ci prev[j]->dst.rel || alu->src[src].rel)) 963bf215546Sopenharmony_ci return 0; 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_ci /* more than one PRED_ or KILL_ ? */ 969bf215546Sopenharmony_ci if (num_once_inst > 1) 970bf215546Sopenharmony_ci return 0; 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci /* check if the result can still be swizzlet */ 973bf215546Sopenharmony_ci r = check_and_set_bank_swizzle(bc, result); 974bf215546Sopenharmony_ci if (r) 975bf215546Sopenharmony_ci return 0; 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci /* looks like everything worked out right, apply the changes */ 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci /* undo adding previus literals */ 980bf215546Sopenharmony_ci bc->cf_last->ndw -= align(prev_nliteral, 2); 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci /* sort instructions */ 983bf215546Sopenharmony_ci for (i = 0; i < max_slots; ++i) { 984bf215546Sopenharmony_ci slots[i] = result[i]; 985bf215546Sopenharmony_ci if (result[i]) { 986bf215546Sopenharmony_ci list_del(&result[i]->list); 987bf215546Sopenharmony_ci result[i]->last = 0; 988bf215546Sopenharmony_ci list_addtail(&result[i]->list, &bc->cf_last->alu); 989bf215546Sopenharmony_ci } 990bf215546Sopenharmony_ci } 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci /* determine new last instruction */ 993bf215546Sopenharmony_ci list_entry(bc->cf_last->alu.prev, struct r600_bytecode_alu, list)->last = 1; 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci /* determine new first instruction */ 996bf215546Sopenharmony_ci for (i = 0; i < max_slots; ++i) { 997bf215546Sopenharmony_ci if (result[i]) { 998bf215546Sopenharmony_ci bc->cf_last->curr_bs_head = result[i]; 999bf215546Sopenharmony_ci break; 1000bf215546Sopenharmony_ci } 1001bf215546Sopenharmony_ci } 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; 1004bf215546Sopenharmony_ci bc->cf_last->prev2_bs_head = NULL; 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci return 0; 1007bf215546Sopenharmony_ci} 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci/* we'll keep kcache sets sorted by bank & addr */ 1010bf215546Sopenharmony_cistatic int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc, 1011bf215546Sopenharmony_ci struct r600_bytecode_kcache *kcache, 1012bf215546Sopenharmony_ci unsigned bank, unsigned line, unsigned index_mode) 1013bf215546Sopenharmony_ci{ 1014bf215546Sopenharmony_ci int i, kcache_banks = bc->gfx_level >= EVERGREEN ? 4 : 2; 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci for (i = 0; i < kcache_banks; i++) { 1017bf215546Sopenharmony_ci if (kcache[i].mode) { 1018bf215546Sopenharmony_ci int d; 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ci if (kcache[i].bank < bank) 1021bf215546Sopenharmony_ci continue; 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci if ((kcache[i].bank == bank && kcache[i].addr > line+1) || 1024bf215546Sopenharmony_ci kcache[i].bank > bank) { 1025bf215546Sopenharmony_ci /* try to insert new line */ 1026bf215546Sopenharmony_ci if (kcache[kcache_banks-1].mode) { 1027bf215546Sopenharmony_ci /* all sets are in use */ 1028bf215546Sopenharmony_ci return -ENOMEM; 1029bf215546Sopenharmony_ci } 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache)); 1032bf215546Sopenharmony_ci kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1033bf215546Sopenharmony_ci kcache[i].bank = bank; 1034bf215546Sopenharmony_ci kcache[i].addr = line; 1035bf215546Sopenharmony_ci kcache[i].index_mode = index_mode; 1036bf215546Sopenharmony_ci return 0; 1037bf215546Sopenharmony_ci } 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci d = line - kcache[i].addr; 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci if (d == -1) { 1042bf215546Sopenharmony_ci kcache[i].addr--; 1043bf215546Sopenharmony_ci if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) { 1044bf215546Sopenharmony_ci /* we are prepending the line to the current set, 1045bf215546Sopenharmony_ci * discarding the existing second line, 1046bf215546Sopenharmony_ci * so we'll have to insert line+2 after it */ 1047bf215546Sopenharmony_ci line += 2; 1048bf215546Sopenharmony_ci continue; 1049bf215546Sopenharmony_ci } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) { 1050bf215546Sopenharmony_ci kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1051bf215546Sopenharmony_ci return 0; 1052bf215546Sopenharmony_ci } else { 1053bf215546Sopenharmony_ci /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ 1054bf215546Sopenharmony_ci return -ENOMEM; 1055bf215546Sopenharmony_ci } 1056bf215546Sopenharmony_ci } else if (d == 1) { 1057bf215546Sopenharmony_ci kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1058bf215546Sopenharmony_ci return 0; 1059bf215546Sopenharmony_ci } else if (d == 0) 1060bf215546Sopenharmony_ci return 0; 1061bf215546Sopenharmony_ci } else { /* free kcache set - use it */ 1062bf215546Sopenharmony_ci kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1063bf215546Sopenharmony_ci kcache[i].bank = bank; 1064bf215546Sopenharmony_ci kcache[i].addr = line; 1065bf215546Sopenharmony_ci kcache[i].index_mode = index_mode; 1066bf215546Sopenharmony_ci return 0; 1067bf215546Sopenharmony_ci } 1068bf215546Sopenharmony_ci } 1069bf215546Sopenharmony_ci return -ENOMEM; 1070bf215546Sopenharmony_ci} 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_cistatic int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc, 1073bf215546Sopenharmony_ci struct r600_bytecode_kcache *kcache, 1074bf215546Sopenharmony_ci struct r600_bytecode_alu *alu) 1075bf215546Sopenharmony_ci{ 1076bf215546Sopenharmony_ci int i, r; 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci for (i = 0; i < 3; i++) { 1079bf215546Sopenharmony_ci unsigned bank, line, sel = alu->src[i].sel, index_mode; 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_ci if (sel < 512) 1082bf215546Sopenharmony_ci continue; 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci bank = alu->src[i].kc_bank; 1085bf215546Sopenharmony_ci assert(bank < R600_MAX_HW_CONST_BUFFERS); 1086bf215546Sopenharmony_ci line = (sel-512)>>4; 1087bf215546Sopenharmony_ci index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE 1088bf215546Sopenharmony_ci 1089bf215546Sopenharmony_ci if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode))) 1090bf215546Sopenharmony_ci return r; 1091bf215546Sopenharmony_ci } 1092bf215546Sopenharmony_ci return 0; 1093bf215546Sopenharmony_ci} 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_cistatic int r600_bytecode_assign_kcache_banks( 1096bf215546Sopenharmony_ci struct r600_bytecode_alu *alu, 1097bf215546Sopenharmony_ci struct r600_bytecode_kcache * kcache) 1098bf215546Sopenharmony_ci{ 1099bf215546Sopenharmony_ci int i, j; 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci /* Alter the src operands to refer to the kcache. */ 1102bf215546Sopenharmony_ci for (i = 0; i < 3; ++i) { 1103bf215546Sopenharmony_ci static const unsigned int base[] = {128, 160, 256, 288}; 1104bf215546Sopenharmony_ci unsigned int line, sel = alu->src[i].sel, found = 0; 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci if (sel < 512) 1107bf215546Sopenharmony_ci continue; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci sel -= 512; 1110bf215546Sopenharmony_ci line = sel>>4; 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci for (j = 0; j < 4 && !found; ++j) { 1113bf215546Sopenharmony_ci switch (kcache[j].mode) { 1114bf215546Sopenharmony_ci case V_SQ_CF_KCACHE_NOP: 1115bf215546Sopenharmony_ci case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX: 1116bf215546Sopenharmony_ci R600_ERR("unexpected kcache line mode\n"); 1117bf215546Sopenharmony_ci return -ENOMEM; 1118bf215546Sopenharmony_ci default: 1119bf215546Sopenharmony_ci if (kcache[j].bank == alu->src[i].kc_bank && 1120bf215546Sopenharmony_ci kcache[j].addr <= line && 1121bf215546Sopenharmony_ci line < kcache[j].addr + kcache[j].mode) { 1122bf215546Sopenharmony_ci alu->src[i].sel = sel - (kcache[j].addr<<4); 1123bf215546Sopenharmony_ci alu->src[i].sel += base[j]; 1124bf215546Sopenharmony_ci found=1; 1125bf215546Sopenharmony_ci } 1126bf215546Sopenharmony_ci } 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci return 0; 1130bf215546Sopenharmony_ci} 1131bf215546Sopenharmony_ci 1132bf215546Sopenharmony_cistatic int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, 1133bf215546Sopenharmony_ci struct r600_bytecode_alu *alu, 1134bf215546Sopenharmony_ci unsigned type) 1135bf215546Sopenharmony_ci{ 1136bf215546Sopenharmony_ci struct r600_bytecode_kcache kcache_sets[4]; 1137bf215546Sopenharmony_ci struct r600_bytecode_kcache *kcache = kcache_sets; 1138bf215546Sopenharmony_ci int r; 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_ci if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1143bf215546Sopenharmony_ci /* can't alloc, need to start new clause */ 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ci /* Make sure the CF ends with an "last" instruction when 1146bf215546Sopenharmony_ci * we split an ALU group because of a new CF */ 1147bf215546Sopenharmony_ci if (!list_is_empty(&bc->cf_last->alu)) { 1148bf215546Sopenharmony_ci struct r600_bytecode_alu *last_submitted = 1149bf215546Sopenharmony_ci list_last_entry(&bc->cf_last->alu, struct r600_bytecode_alu, list); 1150bf215546Sopenharmony_ci last_submitted->last = 1; 1151bf215546Sopenharmony_ci } 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci if ((r = r600_bytecode_add_cf(bc))) { 1154bf215546Sopenharmony_ci return r; 1155bf215546Sopenharmony_ci } 1156bf215546Sopenharmony_ci bc->cf_last->op = type; 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci /* retry with the new clause */ 1159bf215546Sopenharmony_ci kcache = bc->cf_last->kcache; 1160bf215546Sopenharmony_ci if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1161bf215546Sopenharmony_ci /* can't alloc again- should never happen */ 1162bf215546Sopenharmony_ci return r; 1163bf215546Sopenharmony_ci } 1164bf215546Sopenharmony_ci } else { 1165bf215546Sopenharmony_ci /* update kcache sets */ 1166bf215546Sopenharmony_ci memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1167bf215546Sopenharmony_ci } 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci /* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */ 1170bf215546Sopenharmony_ci if (kcache[2].mode != V_SQ_CF_KCACHE_NOP || 1171bf215546Sopenharmony_ci kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) { 1172bf215546Sopenharmony_ci if (bc->gfx_level < EVERGREEN) 1173bf215546Sopenharmony_ci return -ENOMEM; 1174bf215546Sopenharmony_ci bc->cf_last->eg_alu_extended = 1; 1175bf215546Sopenharmony_ci } 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_ci return 0; 1178bf215546Sopenharmony_ci} 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_cistatic int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots) 1181bf215546Sopenharmony_ci{ 1182bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 1183bf215546Sopenharmony_ci int r, i; 1184bf215546Sopenharmony_ci 1185bf215546Sopenharmony_ci for (i = 0; i < max_slots; i++) { 1186bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 1187bf215546Sopenharmony_ci alu.op = ALU_OP0_NOP; 1188bf215546Sopenharmony_ci alu.src[0].chan = i & 3; 1189bf215546Sopenharmony_ci alu.dst.chan = i & 3; 1190bf215546Sopenharmony_ci alu.last = (i == max_slots - 1); 1191bf215546Sopenharmony_ci r = r600_bytecode_add_alu(bc, &alu); 1192bf215546Sopenharmony_ci if (r) 1193bf215546Sopenharmony_ci return r; 1194bf215546Sopenharmony_ci } 1195bf215546Sopenharmony_ci return 0; 1196bf215546Sopenharmony_ci} 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1199bf215546Sopenharmony_cistatic int load_ar_r6xx(struct r600_bytecode *bc, bool for_src) 1200bf215546Sopenharmony_ci{ 1201bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 1202bf215546Sopenharmony_ci int r; 1203bf215546Sopenharmony_ci 1204bf215546Sopenharmony_ci if (bc->ar_loaded) 1205bf215546Sopenharmony_ci return 0; 1206bf215546Sopenharmony_ci 1207bf215546Sopenharmony_ci /* hack to avoid making MOVA the last instruction in the clause */ 1208bf215546Sopenharmony_ci if ((bc->cf_last->ndw>>1) >= 110) 1209bf215546Sopenharmony_ci bc->force_add_cf = 1; 1210bf215546Sopenharmony_ci else if (for_src) { 1211bf215546Sopenharmony_ci insert_nop_r6xx(bc, 4); 1212bf215546Sopenharmony_ci bc->nalu_groups++; 1213bf215546Sopenharmony_ci } 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 1216bf215546Sopenharmony_ci alu.op = ALU_OP1_MOVA_GPR_INT; 1217bf215546Sopenharmony_ci alu.src[0].sel = bc->ar_reg; 1218bf215546Sopenharmony_ci alu.src[0].chan = bc->ar_chan; 1219bf215546Sopenharmony_ci alu.last = 1; 1220bf215546Sopenharmony_ci alu.index_mode = INDEX_MODE_LOOP; 1221bf215546Sopenharmony_ci r = r600_bytecode_add_alu(bc, &alu); 1222bf215546Sopenharmony_ci if (r) 1223bf215546Sopenharmony_ci return r; 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci /* no requirement to set uses waterfall on MOVA_GPR_INT */ 1226bf215546Sopenharmony_ci bc->ar_loaded = 1; 1227bf215546Sopenharmony_ci return 0; 1228bf215546Sopenharmony_ci} 1229bf215546Sopenharmony_ci 1230bf215546Sopenharmony_ci/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1231bf215546Sopenharmony_ciint r600_load_ar(struct r600_bytecode *bc, bool for_src) 1232bf215546Sopenharmony_ci{ 1233bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 1234bf215546Sopenharmony_ci int r; 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_ci if (bc->ar_handling) 1237bf215546Sopenharmony_ci return load_ar_r6xx(bc, for_src); 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci if (bc->ar_loaded) 1240bf215546Sopenharmony_ci return 0; 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_ci /* hack to avoid making MOVA the last instruction in the clause */ 1243bf215546Sopenharmony_ci if ((bc->cf_last->ndw>>1) >= 110) 1244bf215546Sopenharmony_ci bc->force_add_cf = 1; 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 1247bf215546Sopenharmony_ci alu.op = ALU_OP1_MOVA_INT; 1248bf215546Sopenharmony_ci alu.src[0].sel = bc->ar_reg; 1249bf215546Sopenharmony_ci alu.src[0].chan = bc->ar_chan; 1250bf215546Sopenharmony_ci alu.last = 1; 1251bf215546Sopenharmony_ci r = r600_bytecode_add_alu(bc, &alu); 1252bf215546Sopenharmony_ci if (r) 1253bf215546Sopenharmony_ci return r; 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci bc->cf_last->r6xx_uses_waterfall = 1; 1256bf215546Sopenharmony_ci bc->ar_loaded = 1; 1257bf215546Sopenharmony_ci return 0; 1258bf215546Sopenharmony_ci} 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ciint r600_bytecode_add_alu_type(struct r600_bytecode *bc, 1261bf215546Sopenharmony_ci const struct r600_bytecode_alu *alu, unsigned type) 1262bf215546Sopenharmony_ci{ 1263bf215546Sopenharmony_ci struct r600_bytecode_alu *nalu = r600_bytecode_alu(); 1264bf215546Sopenharmony_ci struct r600_bytecode_alu *lalu; 1265bf215546Sopenharmony_ci int i, r; 1266bf215546Sopenharmony_ci 1267bf215546Sopenharmony_ci if (!nalu) 1268bf215546Sopenharmony_ci return -ENOMEM; 1269bf215546Sopenharmony_ci memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci if (alu->is_op3) { 1272bf215546Sopenharmony_ci /* will fail later since alu does not support it. */ 1273bf215546Sopenharmony_ci assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); 1274bf215546Sopenharmony_ci } 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci if (bc->cf_last != NULL && bc->cf_last->op != type) { 1277bf215546Sopenharmony_ci /* check if we could add it anyway */ 1278bf215546Sopenharmony_ci if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) || 1279bf215546Sopenharmony_ci (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) { 1280bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { 1281bf215546Sopenharmony_ci if (lalu->execute_mask) { 1282bf215546Sopenharmony_ci bc->force_add_cf = 1; 1283bf215546Sopenharmony_ci break; 1284bf215546Sopenharmony_ci } 1285bf215546Sopenharmony_ci type = CF_OP_ALU_PUSH_BEFORE; 1286bf215546Sopenharmony_ci } 1287bf215546Sopenharmony_ci } else 1288bf215546Sopenharmony_ci bc->force_add_cf = 1; 1289bf215546Sopenharmony_ci } 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci /* cf can contains only alu or only vtx or only tex */ 1292bf215546Sopenharmony_ci if (bc->cf_last == NULL || bc->force_add_cf) { 1293bf215546Sopenharmony_ci if (bc->cf_last && bc->cf_last->curr_bs_head) 1294bf215546Sopenharmony_ci bc->cf_last->curr_bs_head->last = 1; 1295bf215546Sopenharmony_ci r = r600_bytecode_add_cf(bc); 1296bf215546Sopenharmony_ci if (r) { 1297bf215546Sopenharmony_ci free(nalu); 1298bf215546Sopenharmony_ci return r; 1299bf215546Sopenharmony_ci } 1300bf215546Sopenharmony_ci } 1301bf215546Sopenharmony_ci bc->cf_last->op = type; 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci /* Load index register if required */ 1304bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) { 1305bf215546Sopenharmony_ci for (i = 0; i < 3; i++) 1306bf215546Sopenharmony_ci if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) 1307bf215546Sopenharmony_ci egcm_load_index_reg(bc, 0, true); 1308bf215546Sopenharmony_ci } 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci /* Check AR usage and load it if required */ 1311bf215546Sopenharmony_ci for (i = 0; i < 3; i++) 1312bf215546Sopenharmony_ci if (nalu->src[i].rel && !bc->ar_loaded) 1313bf215546Sopenharmony_ci r600_load_ar(bc, true); 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci if (nalu->dst.rel && !bc->ar_loaded) 1316bf215546Sopenharmony_ci r600_load_ar(bc, false); 1317bf215546Sopenharmony_ci 1318bf215546Sopenharmony_ci /* Setup the kcache for this ALU instruction. This will start a new 1319bf215546Sopenharmony_ci * ALU clause if needed. */ 1320bf215546Sopenharmony_ci if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { 1321bf215546Sopenharmony_ci free(nalu); 1322bf215546Sopenharmony_ci return r; 1323bf215546Sopenharmony_ci } 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_ci if (!bc->cf_last->curr_bs_head) { 1326bf215546Sopenharmony_ci bc->cf_last->curr_bs_head = nalu; 1327bf215546Sopenharmony_ci } 1328bf215546Sopenharmony_ci /* number of gpr == the last gpr used in any alu */ 1329bf215546Sopenharmony_ci for (i = 0; i < 3; i++) { 1330bf215546Sopenharmony_ci if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { 1331bf215546Sopenharmony_ci bc->ngpr = nalu->src[i].sel + 1; 1332bf215546Sopenharmony_ci } 1333bf215546Sopenharmony_ci if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) 1334bf215546Sopenharmony_ci r600_bytecode_special_constants(nalu->src[i].value, 1335bf215546Sopenharmony_ci &nalu->src[i].sel); 1336bf215546Sopenharmony_ci } 1337bf215546Sopenharmony_ci if (nalu->dst.write && nalu->dst.sel >= bc->ngpr) { 1338bf215546Sopenharmony_ci bc->ngpr = nalu->dst.sel + 1; 1339bf215546Sopenharmony_ci } 1340bf215546Sopenharmony_ci list_addtail(&nalu->list, &bc->cf_last->alu); 1341bf215546Sopenharmony_ci /* each alu use 2 dwords */ 1342bf215546Sopenharmony_ci bc->cf_last->ndw += 2; 1343bf215546Sopenharmony_ci bc->ndw += 2; 1344bf215546Sopenharmony_ci 1345bf215546Sopenharmony_ci /* process cur ALU instructions for bank swizzle */ 1346bf215546Sopenharmony_ci if (nalu->last) { 1347bf215546Sopenharmony_ci uint32_t literal[4]; 1348bf215546Sopenharmony_ci unsigned nliteral; 1349bf215546Sopenharmony_ci struct r600_bytecode_alu *slots[5]; 1350bf215546Sopenharmony_ci int max_slots = bc->gfx_level == CAYMAN ? 4 : 5; 1351bf215546Sopenharmony_ci r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); 1352bf215546Sopenharmony_ci if (r) 1353bf215546Sopenharmony_ci return r; 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci if (bc->cf_last->prev_bs_head) { 1356bf215546Sopenharmony_ci struct r600_bytecode_alu *cur_prev_head = bc->cf_last->prev_bs_head; 1357bf215546Sopenharmony_ci r = merge_inst_groups(bc, slots, cur_prev_head); 1358bf215546Sopenharmony_ci if (r) 1359bf215546Sopenharmony_ci return r; 1360bf215546Sopenharmony_ci if (cur_prev_head != bc->cf_last->prev_bs_head) 1361bf215546Sopenharmony_ci bc->nalu_groups--; 1362bf215546Sopenharmony_ci } 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci if (bc->cf_last->prev_bs_head) { 1365bf215546Sopenharmony_ci r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); 1366bf215546Sopenharmony_ci if (r) 1367bf215546Sopenharmony_ci return r; 1368bf215546Sopenharmony_ci } 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci r = check_and_set_bank_swizzle(bc, slots); 1371bf215546Sopenharmony_ci if (r) 1372bf215546Sopenharmony_ci return r; 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci for (i = 0, nliteral = 0; i < max_slots; i++) { 1375bf215546Sopenharmony_ci if (slots[i]) { 1376bf215546Sopenharmony_ci r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral); 1377bf215546Sopenharmony_ci if (r) 1378bf215546Sopenharmony_ci return r; 1379bf215546Sopenharmony_ci } 1380bf215546Sopenharmony_ci } 1381bf215546Sopenharmony_ci bc->cf_last->ndw += align(nliteral, 2); 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) 1384bf215546Sopenharmony_ci * worst case */ 1385bf215546Sopenharmony_ci if ((bc->cf_last->ndw >> 1) >= 120) { 1386bf215546Sopenharmony_ci bc->force_add_cf = 1; 1387bf215546Sopenharmony_ci } 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; 1390bf215546Sopenharmony_ci bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; 1391bf215546Sopenharmony_ci bc->cf_last->curr_bs_head = NULL; 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_ci bc->nalu_groups++; 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci if (bc->r6xx_nop_after_rel_dst) { 1396bf215546Sopenharmony_ci for (int i = 0; i < max_slots; ++i) { 1397bf215546Sopenharmony_ci if (slots[i] && slots[i]->dst.rel) { 1398bf215546Sopenharmony_ci insert_nop_r6xx(bc, max_slots); 1399bf215546Sopenharmony_ci bc->nalu_groups++; 1400bf215546Sopenharmony_ci break; 1401bf215546Sopenharmony_ci } 1402bf215546Sopenharmony_ci } 1403bf215546Sopenharmony_ci } 1404bf215546Sopenharmony_ci } 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_ci /* Might need to insert spill write ops after current clause */ 1407bf215546Sopenharmony_ci if (nalu->last && bc->n_pending_outputs) { 1408bf215546Sopenharmony_ci while (bc->n_pending_outputs) { 1409bf215546Sopenharmony_ci r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]); 1410bf215546Sopenharmony_ci if (r) 1411bf215546Sopenharmony_ci return r; 1412bf215546Sopenharmony_ci } 1413bf215546Sopenharmony_ci } 1414bf215546Sopenharmony_ci 1415bf215546Sopenharmony_ci return 0; 1416bf215546Sopenharmony_ci} 1417bf215546Sopenharmony_ci 1418bf215546Sopenharmony_ciint r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) 1419bf215546Sopenharmony_ci{ 1420bf215546Sopenharmony_ci return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU); 1421bf215546Sopenharmony_ci} 1422bf215546Sopenharmony_ci 1423bf215546Sopenharmony_cistatic unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) 1424bf215546Sopenharmony_ci{ 1425bf215546Sopenharmony_ci switch (bc->gfx_level) { 1426bf215546Sopenharmony_ci case R600: 1427bf215546Sopenharmony_ci return 8; 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_ci case R700: 1430bf215546Sopenharmony_ci case EVERGREEN: 1431bf215546Sopenharmony_ci case CAYMAN: 1432bf215546Sopenharmony_ci return 16; 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci default: 1435bf215546Sopenharmony_ci R600_ERR("Unknown gfx level %d.\n", bc->gfx_level); 1436bf215546Sopenharmony_ci return 8; 1437bf215546Sopenharmony_ci } 1438bf215546Sopenharmony_ci} 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_cistatic inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc) 1441bf215546Sopenharmony_ci{ 1442bf215546Sopenharmony_ci return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) && 1443bf215546Sopenharmony_ci bc->cf_last->op != CF_OP_GDS && 1444bf215546Sopenharmony_ci (bc->gfx_level == CAYMAN || 1445bf215546Sopenharmony_ci bc->cf_last->op != CF_OP_TEX)); 1446bf215546Sopenharmony_ci} 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_cistatic int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx, 1449bf215546Sopenharmony_ci bool use_tc) 1450bf215546Sopenharmony_ci{ 1451bf215546Sopenharmony_ci struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); 1452bf215546Sopenharmony_ci int r; 1453bf215546Sopenharmony_ci 1454bf215546Sopenharmony_ci if (!nvtx) 1455bf215546Sopenharmony_ci return -ENOMEM; 1456bf215546Sopenharmony_ci memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci /* Load index register if required */ 1459bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) { 1460bf215546Sopenharmony_ci if (vtx->buffer_index_mode) 1461bf215546Sopenharmony_ci egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false); 1462bf215546Sopenharmony_ci } 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci /* cf can contains only alu or only vtx or only tex */ 1465bf215546Sopenharmony_ci if (bc->cf_last == NULL || 1466bf215546Sopenharmony_ci last_inst_was_not_vtx_fetch(bc) || 1467bf215546Sopenharmony_ci bc->force_add_cf) { 1468bf215546Sopenharmony_ci r = r600_bytecode_add_cf(bc); 1469bf215546Sopenharmony_ci if (r) { 1470bf215546Sopenharmony_ci free(nvtx); 1471bf215546Sopenharmony_ci return r; 1472bf215546Sopenharmony_ci } 1473bf215546Sopenharmony_ci switch (bc->gfx_level) { 1474bf215546Sopenharmony_ci case R600: 1475bf215546Sopenharmony_ci case R700: 1476bf215546Sopenharmony_ci bc->cf_last->op = CF_OP_VTX; 1477bf215546Sopenharmony_ci break; 1478bf215546Sopenharmony_ci case EVERGREEN: 1479bf215546Sopenharmony_ci if (use_tc) 1480bf215546Sopenharmony_ci bc->cf_last->op = CF_OP_TEX; 1481bf215546Sopenharmony_ci else 1482bf215546Sopenharmony_ci bc->cf_last->op = CF_OP_VTX; 1483bf215546Sopenharmony_ci break; 1484bf215546Sopenharmony_ci case CAYMAN: 1485bf215546Sopenharmony_ci bc->cf_last->op = CF_OP_TEX; 1486bf215546Sopenharmony_ci break; 1487bf215546Sopenharmony_ci default: 1488bf215546Sopenharmony_ci R600_ERR("Unknown gfx level %d.\n", bc->gfx_level); 1489bf215546Sopenharmony_ci free(nvtx); 1490bf215546Sopenharmony_ci return -EINVAL; 1491bf215546Sopenharmony_ci } 1492bf215546Sopenharmony_ci } 1493bf215546Sopenharmony_ci list_addtail(&nvtx->list, &bc->cf_last->vtx); 1494bf215546Sopenharmony_ci /* each fetch use 4 dwords */ 1495bf215546Sopenharmony_ci bc->cf_last->ndw += 4; 1496bf215546Sopenharmony_ci bc->ndw += 4; 1497bf215546Sopenharmony_ci if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1498bf215546Sopenharmony_ci bc->force_add_cf = 1; 1499bf215546Sopenharmony_ci 1500bf215546Sopenharmony_ci bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1); 1501bf215546Sopenharmony_ci bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1); 1502bf215546Sopenharmony_ci 1503bf215546Sopenharmony_ci return 0; 1504bf215546Sopenharmony_ci} 1505bf215546Sopenharmony_ci 1506bf215546Sopenharmony_ciint r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 1507bf215546Sopenharmony_ci{ 1508bf215546Sopenharmony_ci return r600_bytecode_add_vtx_internal(bc, vtx, false); 1509bf215546Sopenharmony_ci} 1510bf215546Sopenharmony_ci 1511bf215546Sopenharmony_ciint r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 1512bf215546Sopenharmony_ci{ 1513bf215546Sopenharmony_ci return r600_bytecode_add_vtx_internal(bc, vtx, true); 1514bf215546Sopenharmony_ci} 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ciint r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) 1517bf215546Sopenharmony_ci{ 1518bf215546Sopenharmony_ci struct r600_bytecode_tex *ntex = r600_bytecode_tex(); 1519bf215546Sopenharmony_ci int r; 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci if (!ntex) 1522bf215546Sopenharmony_ci return -ENOMEM; 1523bf215546Sopenharmony_ci memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci /* Load index register if required */ 1526bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) { 1527bf215546Sopenharmony_ci if (tex->sampler_index_mode || tex->resource_index_mode) 1528bf215546Sopenharmony_ci egcm_load_index_reg(bc, 1, false); 1529bf215546Sopenharmony_ci } 1530bf215546Sopenharmony_ci 1531bf215546Sopenharmony_ci /* we can't fetch data und use it as texture lookup address in the same TEX clause */ 1532bf215546Sopenharmony_ci if (bc->cf_last != NULL && 1533bf215546Sopenharmony_ci bc->cf_last->op == CF_OP_TEX) { 1534bf215546Sopenharmony_ci struct r600_bytecode_tex *ttex; 1535bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { 1536bf215546Sopenharmony_ci if (ttex->dst_gpr == ntex->src_gpr && 1537bf215546Sopenharmony_ci (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 || 1538bf215546Sopenharmony_ci ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) { 1539bf215546Sopenharmony_ci bc->force_add_cf = 1; 1540bf215546Sopenharmony_ci break; 1541bf215546Sopenharmony_ci } 1542bf215546Sopenharmony_ci } 1543bf215546Sopenharmony_ci /* vtx instrs get inserted after tex, so make sure we aren't moving the tex 1544bf215546Sopenharmony_ci * before (say) the instr fetching the texcoord. 1545bf215546Sopenharmony_ci */ 1546bf215546Sopenharmony_ci if (!list_is_empty(&bc->cf_last->vtx)) 1547bf215546Sopenharmony_ci bc->force_add_cf = 1; 1548bf215546Sopenharmony_ci 1549bf215546Sopenharmony_ci /* slight hack to make gradients always go into same cf */ 1550bf215546Sopenharmony_ci if (ntex->op == FETCH_OP_SET_GRADIENTS_H) 1551bf215546Sopenharmony_ci bc->force_add_cf = 1; 1552bf215546Sopenharmony_ci } 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci /* cf can contains only alu or only vtx or only tex */ 1555bf215546Sopenharmony_ci if (bc->cf_last == NULL || 1556bf215546Sopenharmony_ci bc->cf_last->op != CF_OP_TEX || 1557bf215546Sopenharmony_ci bc->force_add_cf) { 1558bf215546Sopenharmony_ci r = r600_bytecode_add_cf(bc); 1559bf215546Sopenharmony_ci if (r) { 1560bf215546Sopenharmony_ci free(ntex); 1561bf215546Sopenharmony_ci return r; 1562bf215546Sopenharmony_ci } 1563bf215546Sopenharmony_ci bc->cf_last->op = CF_OP_TEX; 1564bf215546Sopenharmony_ci } 1565bf215546Sopenharmony_ci if (ntex->src_gpr >= bc->ngpr) { 1566bf215546Sopenharmony_ci bc->ngpr = ntex->src_gpr + 1; 1567bf215546Sopenharmony_ci } 1568bf215546Sopenharmony_ci if (ntex->dst_gpr >= bc->ngpr) { 1569bf215546Sopenharmony_ci bc->ngpr = ntex->dst_gpr + 1; 1570bf215546Sopenharmony_ci } 1571bf215546Sopenharmony_ci list_addtail(&ntex->list, &bc->cf_last->tex); 1572bf215546Sopenharmony_ci /* each texture fetch use 4 dwords */ 1573bf215546Sopenharmony_ci bc->cf_last->ndw += 4; 1574bf215546Sopenharmony_ci bc->ndw += 4; 1575bf215546Sopenharmony_ci if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1576bf215546Sopenharmony_ci bc->force_add_cf = 1; 1577bf215546Sopenharmony_ci return 0; 1578bf215546Sopenharmony_ci} 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_ciint r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds) 1581bf215546Sopenharmony_ci{ 1582bf215546Sopenharmony_ci struct r600_bytecode_gds *ngds = r600_bytecode_gds(); 1583bf215546Sopenharmony_ci int r; 1584bf215546Sopenharmony_ci 1585bf215546Sopenharmony_ci if (ngds == NULL) 1586bf215546Sopenharmony_ci return -ENOMEM; 1587bf215546Sopenharmony_ci memcpy(ngds, gds, sizeof(struct r600_bytecode_gds)); 1588bf215546Sopenharmony_ci 1589bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) { 1590bf215546Sopenharmony_ci if (gds->uav_index_mode) 1591bf215546Sopenharmony_ci egcm_load_index_reg(bc, gds->uav_index_mode - 1, false); 1592bf215546Sopenharmony_ci } 1593bf215546Sopenharmony_ci 1594bf215546Sopenharmony_ci if (bc->cf_last == NULL || 1595bf215546Sopenharmony_ci bc->cf_last->op != CF_OP_GDS || 1596bf215546Sopenharmony_ci bc->force_add_cf) { 1597bf215546Sopenharmony_ci r = r600_bytecode_add_cf(bc); 1598bf215546Sopenharmony_ci if (r) { 1599bf215546Sopenharmony_ci free(ngds); 1600bf215546Sopenharmony_ci return r; 1601bf215546Sopenharmony_ci } 1602bf215546Sopenharmony_ci bc->cf_last->op = CF_OP_GDS; 1603bf215546Sopenharmony_ci } 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci list_addtail(&ngds->list, &bc->cf_last->gds); 1606bf215546Sopenharmony_ci bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */ 1607bf215546Sopenharmony_ci if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1608bf215546Sopenharmony_ci bc->force_add_cf = 1; 1609bf215546Sopenharmony_ci return 0; 1610bf215546Sopenharmony_ci} 1611bf215546Sopenharmony_ci 1612bf215546Sopenharmony_ciint r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op) 1613bf215546Sopenharmony_ci{ 1614bf215546Sopenharmony_ci int r; 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_ci /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */ 1617bf215546Sopenharmony_ci if (op != CF_OP_WAIT_ACK && op != CF_OP_MEM_SCRATCH) 1618bf215546Sopenharmony_ci r600_bytecode_wait_acks(bc); 1619bf215546Sopenharmony_ci 1620bf215546Sopenharmony_ci r = r600_bytecode_add_cf(bc); 1621bf215546Sopenharmony_ci if (r) 1622bf215546Sopenharmony_ci return r; 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_ci bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; 1625bf215546Sopenharmony_ci bc->cf_last->op = op; 1626bf215546Sopenharmony_ci return 0; 1627bf215546Sopenharmony_ci} 1628bf215546Sopenharmony_ci 1629bf215546Sopenharmony_ciint cm_bytecode_add_cf_end(struct r600_bytecode *bc) 1630bf215546Sopenharmony_ci{ 1631bf215546Sopenharmony_ci return r600_bytecode_add_cfinst(bc, CF_OP_CF_END); 1632bf215546Sopenharmony_ci} 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci/* common to all 3 families */ 1635bf215546Sopenharmony_cistatic int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) 1636bf215546Sopenharmony_ci{ 1637bf215546Sopenharmony_ci if (r600_isa_fetch(vtx->op)->flags & FF_MEM) 1638bf215546Sopenharmony_ci return r700_bytecode_fetch_mem_build(bc, vtx, id); 1639bf215546Sopenharmony_ci bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) | 1640bf215546Sopenharmony_ci S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | 1641bf215546Sopenharmony_ci S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | 1642bf215546Sopenharmony_ci S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | 1643bf215546Sopenharmony_ci S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); 1644bf215546Sopenharmony_ci if (bc->gfx_level < CAYMAN) 1645bf215546Sopenharmony_ci bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); 1646bf215546Sopenharmony_ci id++; 1647bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | 1648bf215546Sopenharmony_ci S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | 1649bf215546Sopenharmony_ci S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | 1650bf215546Sopenharmony_ci S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | 1651bf215546Sopenharmony_ci S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | 1652bf215546Sopenharmony_ci S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | 1653bf215546Sopenharmony_ci S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | 1654bf215546Sopenharmony_ci S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | 1655bf215546Sopenharmony_ci S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | 1656bf215546Sopenharmony_ci S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); 1657bf215546Sopenharmony_ci bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| 1658bf215546Sopenharmony_ci S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); 1659bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) 1660bf215546Sopenharmony_ci bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode); 1661bf215546Sopenharmony_ci if (bc->gfx_level < CAYMAN) 1662bf215546Sopenharmony_ci bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); 1663bf215546Sopenharmony_ci id++; 1664bf215546Sopenharmony_ci bc->bytecode[id++] = 0; 1665bf215546Sopenharmony_ci return 0; 1666bf215546Sopenharmony_ci} 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci/* common to all 3 families */ 1669bf215546Sopenharmony_cistatic int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) 1670bf215546Sopenharmony_ci{ 1671bf215546Sopenharmony_ci bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST( 1672bf215546Sopenharmony_ci r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) | 1673bf215546Sopenharmony_ci EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) | 1674bf215546Sopenharmony_ci S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | 1675bf215546Sopenharmony_ci S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | 1676bf215546Sopenharmony_ci S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); 1677bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) 1678bf215546Sopenharmony_ci bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode); 1679bf215546Sopenharmony_ci ((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode) 1680bf215546Sopenharmony_ci id++; 1681bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | 1682bf215546Sopenharmony_ci S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | 1683bf215546Sopenharmony_ci S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | 1684bf215546Sopenharmony_ci S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | 1685bf215546Sopenharmony_ci S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | 1686bf215546Sopenharmony_ci S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | 1687bf215546Sopenharmony_ci S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | 1688bf215546Sopenharmony_ci S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | 1689bf215546Sopenharmony_ci S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | 1690bf215546Sopenharmony_ci S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | 1691bf215546Sopenharmony_ci S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); 1692bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | 1693bf215546Sopenharmony_ci S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | 1694bf215546Sopenharmony_ci S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | 1695bf215546Sopenharmony_ci S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | 1696bf215546Sopenharmony_ci S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | 1697bf215546Sopenharmony_ci S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | 1698bf215546Sopenharmony_ci S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | 1699bf215546Sopenharmony_ci S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); 1700bf215546Sopenharmony_ci bc->bytecode[id++] = 0; 1701bf215546Sopenharmony_ci return 0; 1702bf215546Sopenharmony_ci} 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci/* r600 only, r700/eg bits in r700_asm.c */ 1705bf215546Sopenharmony_cistatic int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) 1706bf215546Sopenharmony_ci{ 1707bf215546Sopenharmony_ci unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op); 1708bf215546Sopenharmony_ci 1709bf215546Sopenharmony_ci /* don't replace gpr by pv or ps for destination register */ 1710bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | 1711bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | 1712bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | 1713bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | 1714bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | 1715bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | 1716bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | 1717bf215546Sopenharmony_ci S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | 1718bf215546Sopenharmony_ci S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | 1719bf215546Sopenharmony_ci S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | 1720bf215546Sopenharmony_ci S_SQ_ALU_WORD0_LAST(alu->last); 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci if (alu->is_op3) { 1723bf215546Sopenharmony_ci assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); 1724bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1725bf215546Sopenharmony_ci S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1726bf215546Sopenharmony_ci S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1727bf215546Sopenharmony_ci S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1728bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | 1729bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | 1730bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | 1731bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | 1732bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) | 1733bf215546Sopenharmony_ci S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); 1734bf215546Sopenharmony_ci } else { 1735bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1736bf215546Sopenharmony_ci S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1737bf215546Sopenharmony_ci S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1738bf215546Sopenharmony_ci S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1739bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | 1740bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | 1741bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | 1742bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | 1743bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) | 1744bf215546Sopenharmony_ci S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | 1745bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) | 1746bf215546Sopenharmony_ci S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred); 1747bf215546Sopenharmony_ci } 1748bf215546Sopenharmony_ci return 0; 1749bf215546Sopenharmony_ci} 1750bf215546Sopenharmony_ci 1751bf215546Sopenharmony_cistatic void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) 1752bf215546Sopenharmony_ci{ 1753bf215546Sopenharmony_ci *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); 1754bf215546Sopenharmony_ci *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) | 1755bf215546Sopenharmony_ci S_SQ_CF_WORD1_BARRIER(1) | 1756bf215546Sopenharmony_ci S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)| 1757bf215546Sopenharmony_ci S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); 1758bf215546Sopenharmony_ci} 1759bf215546Sopenharmony_ci 1760bf215546Sopenharmony_ci/* common for r600/r700 - eg in eg_asm.c */ 1761bf215546Sopenharmony_cistatic int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) 1762bf215546Sopenharmony_ci{ 1763bf215546Sopenharmony_ci unsigned id = cf->id; 1764bf215546Sopenharmony_ci const struct cf_op_info *cfop = r600_isa_cf(cf->op); 1765bf215546Sopenharmony_ci unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op); 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci 1768bf215546Sopenharmony_ci if (cf->op == CF_NATIVE) { 1769bf215546Sopenharmony_ci bc->bytecode[id++] = cf->isa[0]; 1770bf215546Sopenharmony_ci bc->bytecode[id++] = cf->isa[1]; 1771bf215546Sopenharmony_ci } else if (cfop->flags & CF_ALU) { 1772bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | 1773bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | 1774bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | 1775bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) | 1778bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | 1779bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | 1780bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | 1781bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD1_BARRIER(1) | 1782bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->gfx_level == R600 ? cf->r6xx_uses_waterfall : 0) | 1783bf215546Sopenharmony_ci S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); 1784bf215546Sopenharmony_ci } else if (cfop->flags & CF_FETCH) { 1785bf215546Sopenharmony_ci if (bc->gfx_level == R700) 1786bf215546Sopenharmony_ci r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1787bf215546Sopenharmony_ci else 1788bf215546Sopenharmony_ci r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1789bf215546Sopenharmony_ci } else if (cfop->flags & CF_EXP) { 1790bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1791bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1792bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1793bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) | 1794bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr); 1795bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1796bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | 1797bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | 1798bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | 1799bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | 1800bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | 1801bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | 1802bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); 1803bf215546Sopenharmony_ci } else if (cfop->flags & CF_MEM) { 1804bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1805bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1806bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1807bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) | 1808bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr); 1809bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1810bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | 1811bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | 1812bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) | 1813bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) | 1814bf215546Sopenharmony_ci S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask); 1815bf215546Sopenharmony_ci } else { 1816bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); 1817bf215546Sopenharmony_ci bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | 1818bf215546Sopenharmony_ci S_SQ_CF_WORD1_BARRIER(1) | 1819bf215546Sopenharmony_ci S_SQ_CF_WORD1_COND(cf->cond) | 1820bf215546Sopenharmony_ci S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | 1821bf215546Sopenharmony_ci S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); 1822bf215546Sopenharmony_ci } 1823bf215546Sopenharmony_ci return 0; 1824bf215546Sopenharmony_ci} 1825bf215546Sopenharmony_ci 1826bf215546Sopenharmony_ciint r600_bytecode_build(struct r600_bytecode *bc) 1827bf215546Sopenharmony_ci{ 1828bf215546Sopenharmony_ci struct r600_bytecode_cf *cf; 1829bf215546Sopenharmony_ci struct r600_bytecode_alu *alu; 1830bf215546Sopenharmony_ci struct r600_bytecode_vtx *vtx; 1831bf215546Sopenharmony_ci struct r600_bytecode_tex *tex; 1832bf215546Sopenharmony_ci struct r600_bytecode_gds *gds; 1833bf215546Sopenharmony_ci uint32_t literal[4]; 1834bf215546Sopenharmony_ci unsigned nliteral; 1835bf215546Sopenharmony_ci unsigned addr; 1836bf215546Sopenharmony_ci int i, r; 1837bf215546Sopenharmony_ci 1838bf215546Sopenharmony_ci if (!bc->nstack) { // If not 0, Stack_size already provided by llvm 1839bf215546Sopenharmony_ci if (bc->stack.max_entries) 1840bf215546Sopenharmony_ci bc->nstack = bc->stack.max_entries; 1841bf215546Sopenharmony_ci else if (bc->type == PIPE_SHADER_VERTEX || 1842bf215546Sopenharmony_ci bc->type == PIPE_SHADER_TESS_EVAL || 1843bf215546Sopenharmony_ci bc->type == PIPE_SHADER_TESS_CTRL) 1844bf215546Sopenharmony_ci bc->nstack = 1; 1845bf215546Sopenharmony_ci } 1846bf215546Sopenharmony_ci 1847bf215546Sopenharmony_ci /* first path compute addr of each CF block */ 1848bf215546Sopenharmony_ci /* addr start after all the CF instructions */ 1849bf215546Sopenharmony_ci addr = bc->cf_last->id + 2; 1850bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1851bf215546Sopenharmony_ci if (r600_isa_cf(cf->op)->flags & CF_FETCH) { 1852bf215546Sopenharmony_ci addr += 3; 1853bf215546Sopenharmony_ci addr &= 0xFFFFFFFCUL; 1854bf215546Sopenharmony_ci } 1855bf215546Sopenharmony_ci cf->addr = addr; 1856bf215546Sopenharmony_ci addr += cf->ndw; 1857bf215546Sopenharmony_ci bc->ndw = cf->addr + cf->ndw; 1858bf215546Sopenharmony_ci } 1859bf215546Sopenharmony_ci free(bc->bytecode); 1860bf215546Sopenharmony_ci bc->bytecode = calloc(4, bc->ndw); 1861bf215546Sopenharmony_ci if (bc->bytecode == NULL) 1862bf215546Sopenharmony_ci return -ENOMEM; 1863bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1864bf215546Sopenharmony_ci const struct cf_op_info *cfop = r600_isa_cf(cf->op); 1865bf215546Sopenharmony_ci addr = cf->addr; 1866bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN) 1867bf215546Sopenharmony_ci r = eg_bytecode_cf_build(bc, cf); 1868bf215546Sopenharmony_ci else 1869bf215546Sopenharmony_ci r = r600_bytecode_cf_build(bc, cf); 1870bf215546Sopenharmony_ci if (r) 1871bf215546Sopenharmony_ci return r; 1872bf215546Sopenharmony_ci if (cfop->flags & CF_ALU) { 1873bf215546Sopenharmony_ci nliteral = 0; 1874bf215546Sopenharmony_ci memset(literal, 0, sizeof(literal)); 1875bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 1876bf215546Sopenharmony_ci r = r600_bytecode_alu_nliterals(alu, literal, &nliteral); 1877bf215546Sopenharmony_ci if (r) 1878bf215546Sopenharmony_ci return r; 1879bf215546Sopenharmony_ci r600_bytecode_alu_adjust_literals(alu, literal, nliteral); 1880bf215546Sopenharmony_ci r600_bytecode_assign_kcache_banks(alu, cf->kcache); 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ci switch(bc->gfx_level) { 1883bf215546Sopenharmony_ci case R600: 1884bf215546Sopenharmony_ci r = r600_bytecode_alu_build(bc, alu, addr); 1885bf215546Sopenharmony_ci break; 1886bf215546Sopenharmony_ci case R700: 1887bf215546Sopenharmony_ci r = r700_bytecode_alu_build(bc, alu, addr); 1888bf215546Sopenharmony_ci break; 1889bf215546Sopenharmony_ci case EVERGREEN: 1890bf215546Sopenharmony_ci case CAYMAN: 1891bf215546Sopenharmony_ci r = eg_bytecode_alu_build(bc, alu, addr); 1892bf215546Sopenharmony_ci break; 1893bf215546Sopenharmony_ci default: 1894bf215546Sopenharmony_ci R600_ERR("unknown gfx level %d.\n", bc->gfx_level); 1895bf215546Sopenharmony_ci return -EINVAL; 1896bf215546Sopenharmony_ci } 1897bf215546Sopenharmony_ci if (r) 1898bf215546Sopenharmony_ci return r; 1899bf215546Sopenharmony_ci addr += 2; 1900bf215546Sopenharmony_ci if (alu->last) { 1901bf215546Sopenharmony_ci for (i = 0; i < align(nliteral, 2); ++i) { 1902bf215546Sopenharmony_ci bc->bytecode[addr++] = literal[i]; 1903bf215546Sopenharmony_ci } 1904bf215546Sopenharmony_ci nliteral = 0; 1905bf215546Sopenharmony_ci memset(literal, 0, sizeof(literal)); 1906bf215546Sopenharmony_ci } 1907bf215546Sopenharmony_ci } 1908bf215546Sopenharmony_ci } else if (cf->op == CF_OP_VTX) { 1909bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 1910bf215546Sopenharmony_ci r = r600_bytecode_vtx_build(bc, vtx, addr); 1911bf215546Sopenharmony_ci if (r) 1912bf215546Sopenharmony_ci return r; 1913bf215546Sopenharmony_ci addr += 4; 1914bf215546Sopenharmony_ci } 1915bf215546Sopenharmony_ci } else if (cf->op == CF_OP_GDS) { 1916bf215546Sopenharmony_ci assert(bc->gfx_level >= EVERGREEN); 1917bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { 1918bf215546Sopenharmony_ci r = eg_bytecode_gds_build(bc, gds, addr); 1919bf215546Sopenharmony_ci if (r) 1920bf215546Sopenharmony_ci return r; 1921bf215546Sopenharmony_ci addr += 4; 1922bf215546Sopenharmony_ci } 1923bf215546Sopenharmony_ci } else if (cf->op == CF_OP_TEX) { 1924bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 1925bf215546Sopenharmony_ci assert(bc->gfx_level >= EVERGREEN); 1926bf215546Sopenharmony_ci r = r600_bytecode_vtx_build(bc, vtx, addr); 1927bf215546Sopenharmony_ci if (r) 1928bf215546Sopenharmony_ci return r; 1929bf215546Sopenharmony_ci addr += 4; 1930bf215546Sopenharmony_ci } 1931bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 1932bf215546Sopenharmony_ci r = r600_bytecode_tex_build(bc, tex, addr); 1933bf215546Sopenharmony_ci if (r) 1934bf215546Sopenharmony_ci return r; 1935bf215546Sopenharmony_ci addr += 4; 1936bf215546Sopenharmony_ci } 1937bf215546Sopenharmony_ci } 1938bf215546Sopenharmony_ci } 1939bf215546Sopenharmony_ci return 0; 1940bf215546Sopenharmony_ci} 1941bf215546Sopenharmony_ci 1942bf215546Sopenharmony_civoid r600_bytecode_clear(struct r600_bytecode *bc) 1943bf215546Sopenharmony_ci{ 1944bf215546Sopenharmony_ci struct r600_bytecode_cf *cf = NULL, *next_cf; 1945bf215546Sopenharmony_ci 1946bf215546Sopenharmony_ci free(bc->bytecode); 1947bf215546Sopenharmony_ci bc->bytecode = NULL; 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { 1950bf215546Sopenharmony_ci struct r600_bytecode_alu *alu = NULL, *next_alu; 1951bf215546Sopenharmony_ci struct r600_bytecode_tex *tex = NULL, *next_tex; 1952bf215546Sopenharmony_ci struct r600_bytecode_tex *vtx = NULL, *next_vtx; 1953bf215546Sopenharmony_ci struct r600_bytecode_gds *gds = NULL, *next_gds; 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { 1956bf215546Sopenharmony_ci free(alu); 1957bf215546Sopenharmony_ci } 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_ci list_inithead(&cf->alu); 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { 1962bf215546Sopenharmony_ci free(tex); 1963bf215546Sopenharmony_ci } 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_ci list_inithead(&cf->tex); 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { 1968bf215546Sopenharmony_ci free(vtx); 1969bf215546Sopenharmony_ci } 1970bf215546Sopenharmony_ci 1971bf215546Sopenharmony_ci list_inithead(&cf->vtx); 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) { 1974bf215546Sopenharmony_ci free(gds); 1975bf215546Sopenharmony_ci } 1976bf215546Sopenharmony_ci 1977bf215546Sopenharmony_ci list_inithead(&cf->gds); 1978bf215546Sopenharmony_ci 1979bf215546Sopenharmony_ci free(cf); 1980bf215546Sopenharmony_ci } 1981bf215546Sopenharmony_ci 1982bf215546Sopenharmony_ci list_inithead(&cf->list); 1983bf215546Sopenharmony_ci} 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_cistatic int print_swizzle(unsigned swz) 1986bf215546Sopenharmony_ci{ 1987bf215546Sopenharmony_ci const char * swzchars = "xyzw01?_"; 1988bf215546Sopenharmony_ci assert(swz<8 && swz != 6); 1989bf215546Sopenharmony_ci return fprintf(stderr, "%c", swzchars[swz]); 1990bf215546Sopenharmony_ci} 1991bf215546Sopenharmony_ci 1992bf215546Sopenharmony_cistatic int print_sel(unsigned sel, unsigned rel, unsigned index_mode, 1993bf215546Sopenharmony_ci unsigned need_brackets) 1994bf215546Sopenharmony_ci{ 1995bf215546Sopenharmony_ci int o = 0; 1996bf215546Sopenharmony_ci if (rel && index_mode >= 5 && sel < 128) 1997bf215546Sopenharmony_ci o += fprintf(stderr, "G"); 1998bf215546Sopenharmony_ci if (rel || need_brackets) { 1999bf215546Sopenharmony_ci o += fprintf(stderr, "["); 2000bf215546Sopenharmony_ci } 2001bf215546Sopenharmony_ci o += fprintf(stderr, "%d", sel); 2002bf215546Sopenharmony_ci if (rel) { 2003bf215546Sopenharmony_ci if (index_mode == 0 || index_mode == 6) 2004bf215546Sopenharmony_ci o += fprintf(stderr, "+AR"); 2005bf215546Sopenharmony_ci else if (index_mode == 4) 2006bf215546Sopenharmony_ci o += fprintf(stderr, "+AL"); 2007bf215546Sopenharmony_ci } 2008bf215546Sopenharmony_ci if (rel || need_brackets) { 2009bf215546Sopenharmony_ci o += fprintf(stderr, "]"); 2010bf215546Sopenharmony_ci } 2011bf215546Sopenharmony_ci return o; 2012bf215546Sopenharmony_ci} 2013bf215546Sopenharmony_ci 2014bf215546Sopenharmony_cistatic int print_dst(struct r600_bytecode_alu *alu) 2015bf215546Sopenharmony_ci{ 2016bf215546Sopenharmony_ci int o = 0; 2017bf215546Sopenharmony_ci unsigned sel = alu->dst.sel; 2018bf215546Sopenharmony_ci char reg_char = 'R'; 2019bf215546Sopenharmony_ci if (sel > 128 - 4) { /* clause temporary gpr */ 2020bf215546Sopenharmony_ci sel -= 128 - 4; 2021bf215546Sopenharmony_ci reg_char = 'T'; 2022bf215546Sopenharmony_ci } 2023bf215546Sopenharmony_ci 2024bf215546Sopenharmony_ci if (alu_writes(alu)) { 2025bf215546Sopenharmony_ci o += fprintf(stderr, "%c", reg_char); 2026bf215546Sopenharmony_ci o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0); 2027bf215546Sopenharmony_ci } else { 2028bf215546Sopenharmony_ci o += fprintf(stderr, "__"); 2029bf215546Sopenharmony_ci } 2030bf215546Sopenharmony_ci o += fprintf(stderr, "."); 2031bf215546Sopenharmony_ci o += print_swizzle(alu->dst.chan); 2032bf215546Sopenharmony_ci return o; 2033bf215546Sopenharmony_ci} 2034bf215546Sopenharmony_ci 2035bf215546Sopenharmony_cistatic int print_src(struct r600_bytecode_alu *alu, unsigned idx) 2036bf215546Sopenharmony_ci{ 2037bf215546Sopenharmony_ci int o = 0; 2038bf215546Sopenharmony_ci struct r600_bytecode_alu_src *src = &alu->src[idx]; 2039bf215546Sopenharmony_ci unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0; 2040bf215546Sopenharmony_ci 2041bf215546Sopenharmony_ci if (src->neg) 2042bf215546Sopenharmony_ci o += fprintf(stderr,"-"); 2043bf215546Sopenharmony_ci if (src->abs) 2044bf215546Sopenharmony_ci o += fprintf(stderr,"|"); 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci if (sel < 128 - 4) { 2047bf215546Sopenharmony_ci o += fprintf(stderr, "R"); 2048bf215546Sopenharmony_ci } else if (sel < 128) { 2049bf215546Sopenharmony_ci o += fprintf(stderr, "T"); 2050bf215546Sopenharmony_ci sel -= 128 - 4; 2051bf215546Sopenharmony_ci } else if (sel < 160) { 2052bf215546Sopenharmony_ci o += fprintf(stderr, "KC0"); 2053bf215546Sopenharmony_ci need_brackets = 1; 2054bf215546Sopenharmony_ci sel -= 128; 2055bf215546Sopenharmony_ci } else if (sel < 192) { 2056bf215546Sopenharmony_ci o += fprintf(stderr, "KC1"); 2057bf215546Sopenharmony_ci need_brackets = 1; 2058bf215546Sopenharmony_ci sel -= 160; 2059bf215546Sopenharmony_ci } else if (sel >= 512) { 2060bf215546Sopenharmony_ci o += fprintf(stderr, "C%d", src->kc_bank); 2061bf215546Sopenharmony_ci need_brackets = 1; 2062bf215546Sopenharmony_ci sel -= 512; 2063bf215546Sopenharmony_ci } else if (sel >= 448) { 2064bf215546Sopenharmony_ci o += fprintf(stderr, "Param"); 2065bf215546Sopenharmony_ci sel -= 448; 2066bf215546Sopenharmony_ci need_chan = 0; 2067bf215546Sopenharmony_ci } else if (sel >= 288) { 2068bf215546Sopenharmony_ci o += fprintf(stderr, "KC3"); 2069bf215546Sopenharmony_ci need_brackets = 1; 2070bf215546Sopenharmony_ci sel -= 288; 2071bf215546Sopenharmony_ci } else if (sel >= 256) { 2072bf215546Sopenharmony_ci o += fprintf(stderr, "KC2"); 2073bf215546Sopenharmony_ci need_brackets = 1; 2074bf215546Sopenharmony_ci sel -= 256; 2075bf215546Sopenharmony_ci } else { 2076bf215546Sopenharmony_ci need_sel = 0; 2077bf215546Sopenharmony_ci need_chan = 0; 2078bf215546Sopenharmony_ci switch (sel) { 2079bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_LDS_DIRECT_A: 2080bf215546Sopenharmony_ci o += fprintf(stderr, "LDS_A[0x%08X]", src->value); 2081bf215546Sopenharmony_ci break; 2082bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_LDS_DIRECT_B: 2083bf215546Sopenharmony_ci o += fprintf(stderr, "LDS_B[0x%08X]", src->value); 2084bf215546Sopenharmony_ci break; 2085bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_LDS_OQ_A: 2086bf215546Sopenharmony_ci o += fprintf(stderr, "LDS_OQ_A"); 2087bf215546Sopenharmony_ci need_chan = 1; 2088bf215546Sopenharmony_ci break; 2089bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_LDS_OQ_B: 2090bf215546Sopenharmony_ci o += fprintf(stderr, "LDS_OQ_B"); 2091bf215546Sopenharmony_ci need_chan = 1; 2092bf215546Sopenharmony_ci break; 2093bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP: 2094bf215546Sopenharmony_ci o += fprintf(stderr, "LDS_OQ_A_POP"); 2095bf215546Sopenharmony_ci need_chan = 1; 2096bf215546Sopenharmony_ci break; 2097bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP: 2098bf215546Sopenharmony_ci o += fprintf(stderr, "LDS_OQ_B_POP"); 2099bf215546Sopenharmony_ci need_chan = 1; 2100bf215546Sopenharmony_ci break; 2101bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_TIME_LO: 2102bf215546Sopenharmony_ci o += fprintf(stderr, "TIME_LO"); 2103bf215546Sopenharmony_ci break; 2104bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_TIME_HI: 2105bf215546Sopenharmony_ci o += fprintf(stderr, "TIME_HI"); 2106bf215546Sopenharmony_ci break; 2107bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_SE_ID: 2108bf215546Sopenharmony_ci o += fprintf(stderr, "SE_ID"); 2109bf215546Sopenharmony_ci break; 2110bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_SIMD_ID: 2111bf215546Sopenharmony_ci o += fprintf(stderr, "SIMD_ID"); 2112bf215546Sopenharmony_ci break; 2113bf215546Sopenharmony_ci case EG_V_SQ_ALU_SRC_HW_WAVE_ID: 2114bf215546Sopenharmony_ci o += fprintf(stderr, "HW_WAVE_ID"); 2115bf215546Sopenharmony_ci break; 2116bf215546Sopenharmony_ci case V_SQ_ALU_SRC_PS: 2117bf215546Sopenharmony_ci o += fprintf(stderr, "PS"); 2118bf215546Sopenharmony_ci break; 2119bf215546Sopenharmony_ci case V_SQ_ALU_SRC_PV: 2120bf215546Sopenharmony_ci o += fprintf(stderr, "PV"); 2121bf215546Sopenharmony_ci need_chan = 1; 2122bf215546Sopenharmony_ci break; 2123bf215546Sopenharmony_ci case V_SQ_ALU_SRC_LITERAL: 2124bf215546Sopenharmony_ci o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value)); 2125bf215546Sopenharmony_ci break; 2126bf215546Sopenharmony_ci case V_SQ_ALU_SRC_0_5: 2127bf215546Sopenharmony_ci o += fprintf(stderr, "0.5"); 2128bf215546Sopenharmony_ci break; 2129bf215546Sopenharmony_ci case V_SQ_ALU_SRC_M_1_INT: 2130bf215546Sopenharmony_ci o += fprintf(stderr, "-1"); 2131bf215546Sopenharmony_ci break; 2132bf215546Sopenharmony_ci case V_SQ_ALU_SRC_1_INT: 2133bf215546Sopenharmony_ci o += fprintf(stderr, "1"); 2134bf215546Sopenharmony_ci break; 2135bf215546Sopenharmony_ci case V_SQ_ALU_SRC_1: 2136bf215546Sopenharmony_ci o += fprintf(stderr, "1.0"); 2137bf215546Sopenharmony_ci break; 2138bf215546Sopenharmony_ci case V_SQ_ALU_SRC_0: 2139bf215546Sopenharmony_ci o += fprintf(stderr, "0"); 2140bf215546Sopenharmony_ci break; 2141bf215546Sopenharmony_ci default: 2142bf215546Sopenharmony_ci o += fprintf(stderr, "??IMM_%d", sel); 2143bf215546Sopenharmony_ci break; 2144bf215546Sopenharmony_ci } 2145bf215546Sopenharmony_ci } 2146bf215546Sopenharmony_ci 2147bf215546Sopenharmony_ci if (need_sel) 2148bf215546Sopenharmony_ci o += print_sel(sel, src->rel, alu->index_mode, need_brackets); 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_ci if (need_chan) { 2151bf215546Sopenharmony_ci o += fprintf(stderr, "."); 2152bf215546Sopenharmony_ci o += print_swizzle(src->chan); 2153bf215546Sopenharmony_ci } 2154bf215546Sopenharmony_ci 2155bf215546Sopenharmony_ci if (src->abs) 2156bf215546Sopenharmony_ci o += fprintf(stderr,"|"); 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci return o; 2159bf215546Sopenharmony_ci} 2160bf215546Sopenharmony_ci 2161bf215546Sopenharmony_cistatic int print_indent(int p, int c) 2162bf215546Sopenharmony_ci{ 2163bf215546Sopenharmony_ci int o = 0; 2164bf215546Sopenharmony_ci while (p++ < c) 2165bf215546Sopenharmony_ci o += fprintf(stderr, " "); 2166bf215546Sopenharmony_ci return o; 2167bf215546Sopenharmony_ci} 2168bf215546Sopenharmony_ci 2169bf215546Sopenharmony_civoid r600_bytecode_disasm(struct r600_bytecode *bc) 2170bf215546Sopenharmony_ci{ 2171bf215546Sopenharmony_ci const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"}; 2172bf215546Sopenharmony_ci static int index = 0; 2173bf215546Sopenharmony_ci struct r600_bytecode_cf *cf = NULL; 2174bf215546Sopenharmony_ci struct r600_bytecode_alu *alu = NULL; 2175bf215546Sopenharmony_ci struct r600_bytecode_vtx *vtx = NULL; 2176bf215546Sopenharmony_ci struct r600_bytecode_tex *tex = NULL; 2177bf215546Sopenharmony_ci struct r600_bytecode_gds *gds = NULL; 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_ci unsigned i, id, ngr = 0, last; 2180bf215546Sopenharmony_ci uint32_t literal[4]; 2181bf215546Sopenharmony_ci unsigned nliteral; 2182bf215546Sopenharmony_ci char chip = '6'; 2183bf215546Sopenharmony_ci 2184bf215546Sopenharmony_ci switch (bc->gfx_level) { 2185bf215546Sopenharmony_ci case R700: 2186bf215546Sopenharmony_ci chip = '7'; 2187bf215546Sopenharmony_ci break; 2188bf215546Sopenharmony_ci case EVERGREEN: 2189bf215546Sopenharmony_ci chip = 'E'; 2190bf215546Sopenharmony_ci break; 2191bf215546Sopenharmony_ci case CAYMAN: 2192bf215546Sopenharmony_ci chip = 'C'; 2193bf215546Sopenharmony_ci break; 2194bf215546Sopenharmony_ci case R600: 2195bf215546Sopenharmony_ci default: 2196bf215546Sopenharmony_ci chip = '6'; 2197bf215546Sopenharmony_ci break; 2198bf215546Sopenharmony_ci } 2199bf215546Sopenharmony_ci fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n", 2200bf215546Sopenharmony_ci bc->ndw, bc->ngpr, bc->nstack); 2201bf215546Sopenharmony_ci fprintf(stderr, "shader %d -- %c\n", index++, chip); 2202bf215546Sopenharmony_ci 2203bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 2204bf215546Sopenharmony_ci id = cf->id; 2205bf215546Sopenharmony_ci if (cf->op == CF_NATIVE) { 2206bf215546Sopenharmony_ci fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id], 2207bf215546Sopenharmony_ci bc->bytecode[id + 1]); 2208bf215546Sopenharmony_ci } else { 2209bf215546Sopenharmony_ci const struct cf_op_info *cfop = r600_isa_cf(cf->op); 2210bf215546Sopenharmony_ci if (cfop->flags & CF_ALU) { 2211bf215546Sopenharmony_ci if (cf->eg_alu_extended) { 2212bf215546Sopenharmony_ci fprintf(stderr, "%04d %08X %08X %s\n", id, bc->bytecode[id], 2213bf215546Sopenharmony_ci bc->bytecode[id + 1], "ALU_EXT"); 2214bf215546Sopenharmony_ci id += 2; 2215bf215546Sopenharmony_ci } 2216bf215546Sopenharmony_ci fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2217bf215546Sopenharmony_ci bc->bytecode[id + 1], cfop->name); 2218bf215546Sopenharmony_ci fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr); 2219bf215546Sopenharmony_ci for (i = 0; i < 4; ++i) { 2220bf215546Sopenharmony_ci if (cf->kcache[i].mode) { 2221bf215546Sopenharmony_ci int c_start = (cf->kcache[i].addr << 4); 2222bf215546Sopenharmony_ci int c_end = c_start + (cf->kcache[i].mode << 4); 2223bf215546Sopenharmony_ci fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ", 2224bf215546Sopenharmony_ci i, cf->kcache[i].bank, c_start, c_end, 2225bf215546Sopenharmony_ci cf->kcache[i].index_mode ? " " : "", 2226bf215546Sopenharmony_ci cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : ""); 2227bf215546Sopenharmony_ci } 2228bf215546Sopenharmony_ci } 2229bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2230bf215546Sopenharmony_ci } else if (cfop->flags & CF_FETCH) { 2231bf215546Sopenharmony_ci fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2232bf215546Sopenharmony_ci bc->bytecode[id + 1], cfop->name); 2233bf215546Sopenharmony_ci fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr); 2234bf215546Sopenharmony_ci if (cf->vpm) 2235bf215546Sopenharmony_ci fprintf(stderr, "VPM "); 2236bf215546Sopenharmony_ci if (cf->end_of_program) 2237bf215546Sopenharmony_ci fprintf(stderr, "EOP "); 2238bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci } else if (cfop->flags & CF_EXP) { 2241bf215546Sopenharmony_ci int o = 0; 2242bf215546Sopenharmony_ci const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; 2243bf215546Sopenharmony_ci o += fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2244bf215546Sopenharmony_ci bc->bytecode[id + 1], cfop->name); 2245bf215546Sopenharmony_ci o += print_indent(o, 43); 2246bf215546Sopenharmony_ci o += fprintf(stderr, "%s ", exp_type[cf->output.type]); 2247bf215546Sopenharmony_ci if (cf->output.burst_count > 1) { 2248bf215546Sopenharmony_ci o += fprintf(stderr, "%d-%d ", cf->output.array_base, 2249bf215546Sopenharmony_ci cf->output.array_base + cf->output.burst_count - 1); 2250bf215546Sopenharmony_ci 2251bf215546Sopenharmony_ci o += print_indent(o, 55); 2252bf215546Sopenharmony_ci o += fprintf(stderr, "R%d-%d.", cf->output.gpr, 2253bf215546Sopenharmony_ci cf->output.gpr + cf->output.burst_count - 1); 2254bf215546Sopenharmony_ci } else { 2255bf215546Sopenharmony_ci o += fprintf(stderr, "%d ", cf->output.array_base); 2256bf215546Sopenharmony_ci o += print_indent(o, 55); 2257bf215546Sopenharmony_ci o += fprintf(stderr, "R%d.", cf->output.gpr); 2258bf215546Sopenharmony_ci } 2259bf215546Sopenharmony_ci 2260bf215546Sopenharmony_ci o += print_swizzle(cf->output.swizzle_x); 2261bf215546Sopenharmony_ci o += print_swizzle(cf->output.swizzle_y); 2262bf215546Sopenharmony_ci o += print_swizzle(cf->output.swizzle_z); 2263bf215546Sopenharmony_ci o += print_swizzle(cf->output.swizzle_w); 2264bf215546Sopenharmony_ci 2265bf215546Sopenharmony_ci print_indent(o, 67); 2266bf215546Sopenharmony_ci 2267bf215546Sopenharmony_ci fprintf(stderr, " ES:%X ", cf->output.elem_size); 2268bf215546Sopenharmony_ci if (cf->mark) 2269bf215546Sopenharmony_ci fprintf(stderr, "MARK "); 2270bf215546Sopenharmony_ci if (!cf->barrier) 2271bf215546Sopenharmony_ci fprintf(stderr, "NO_BARRIER "); 2272bf215546Sopenharmony_ci if (cf->end_of_program) 2273bf215546Sopenharmony_ci fprintf(stderr, "EOP "); 2274bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2275bf215546Sopenharmony_ci } else if (r600_isa_cf(cf->op)->flags & CF_MEM) { 2276bf215546Sopenharmony_ci int o = 0; 2277bf215546Sopenharmony_ci const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", 2278bf215546Sopenharmony_ci "WRITE_IND_ACK"}; 2279bf215546Sopenharmony_ci o += fprintf(stderr, "%04d %08X %08X %s ", id, 2280bf215546Sopenharmony_ci bc->bytecode[id], bc->bytecode[id + 1], cfop->name); 2281bf215546Sopenharmony_ci o += print_indent(o, 43); 2282bf215546Sopenharmony_ci o += fprintf(stderr, "%s ", exp_type[cf->output.type]); 2283bf215546Sopenharmony_ci 2284bf215546Sopenharmony_ci if (r600_isa_cf(cf->op)->flags & CF_RAT) { 2285bf215546Sopenharmony_ci o += fprintf(stderr, "RAT%d", cf->rat.id); 2286bf215546Sopenharmony_ci if (cf->rat.index_mode) { 2287bf215546Sopenharmony_ci o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1); 2288bf215546Sopenharmony_ci } 2289bf215546Sopenharmony_ci o += fprintf(stderr, " INST: %d ", cf->rat.inst); 2290bf215546Sopenharmony_ci } 2291bf215546Sopenharmony_ci 2292bf215546Sopenharmony_ci if (cf->output.burst_count > 1) { 2293bf215546Sopenharmony_ci o += fprintf(stderr, "%d-%d ", cf->output.array_base, 2294bf215546Sopenharmony_ci cf->output.array_base + cf->output.burst_count - 1); 2295bf215546Sopenharmony_ci o += print_indent(o, 55); 2296bf215546Sopenharmony_ci o += fprintf(stderr, "R%d-%d.", cf->output.gpr, 2297bf215546Sopenharmony_ci cf->output.gpr + cf->output.burst_count - 1); 2298bf215546Sopenharmony_ci } else { 2299bf215546Sopenharmony_ci o += fprintf(stderr, "%d ", cf->output.array_base); 2300bf215546Sopenharmony_ci o += print_indent(o, 55); 2301bf215546Sopenharmony_ci o += fprintf(stderr, "R%d.", cf->output.gpr); 2302bf215546Sopenharmony_ci } 2303bf215546Sopenharmony_ci for (i = 0; i < 4; ++i) { 2304bf215546Sopenharmony_ci if (cf->output.comp_mask & (1 << i)) 2305bf215546Sopenharmony_ci o += print_swizzle(i); 2306bf215546Sopenharmony_ci else 2307bf215546Sopenharmony_ci o += print_swizzle(7); 2308bf215546Sopenharmony_ci } 2309bf215546Sopenharmony_ci 2310bf215546Sopenharmony_ci if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND || 2311bf215546Sopenharmony_ci cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND) 2312bf215546Sopenharmony_ci o += fprintf(stderr, " R%d", cf->output.index_gpr); 2313bf215546Sopenharmony_ci 2314bf215546Sopenharmony_ci o += print_indent(o, 67); 2315bf215546Sopenharmony_ci 2316bf215546Sopenharmony_ci fprintf(stderr, " ES:%i ", cf->output.elem_size); 2317bf215546Sopenharmony_ci if (cf->output.array_size != 0xFFF) 2318bf215546Sopenharmony_ci fprintf(stderr, "AS:%i ", cf->output.array_size); 2319bf215546Sopenharmony_ci if (cf->mark) 2320bf215546Sopenharmony_ci fprintf(stderr, "MARK "); 2321bf215546Sopenharmony_ci if (!cf->barrier) 2322bf215546Sopenharmony_ci fprintf(stderr, "NO_BARRIER "); 2323bf215546Sopenharmony_ci if (cf->end_of_program) 2324bf215546Sopenharmony_ci fprintf(stderr, "EOP "); 2325bf215546Sopenharmony_ci 2326bf215546Sopenharmony_ci if (cf->output.mark) 2327bf215546Sopenharmony_ci fprintf(stderr, "MARK "); 2328bf215546Sopenharmony_ci 2329bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2330bf215546Sopenharmony_ci } else { 2331bf215546Sopenharmony_ci fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2332bf215546Sopenharmony_ci bc->bytecode[id + 1], cfop->name); 2333bf215546Sopenharmony_ci fprintf(stderr, "@%d ", cf->cf_addr); 2334bf215546Sopenharmony_ci if (cf->cond) 2335bf215546Sopenharmony_ci fprintf(stderr, "CND:%X ", cf->cond); 2336bf215546Sopenharmony_ci if (cf->pop_count) 2337bf215546Sopenharmony_ci fprintf(stderr, "POP:%X ", cf->pop_count); 2338bf215546Sopenharmony_ci if (cf->count && (cfop->flags & CF_EMIT)) 2339bf215546Sopenharmony_ci fprintf(stderr, "STREAM%d ", cf->count); 2340bf215546Sopenharmony_ci if (cf->vpm) 2341bf215546Sopenharmony_ci fprintf(stderr, "VPM "); 2342bf215546Sopenharmony_ci if (cf->end_of_program) 2343bf215546Sopenharmony_ci fprintf(stderr, "EOP "); 2344bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2345bf215546Sopenharmony_ci } 2346bf215546Sopenharmony_ci } 2347bf215546Sopenharmony_ci 2348bf215546Sopenharmony_ci id = cf->addr; 2349bf215546Sopenharmony_ci nliteral = 0; 2350bf215546Sopenharmony_ci last = 1; 2351bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2352bf215546Sopenharmony_ci const char *omod_str[] = {"","*2","*4","/2"}; 2353bf215546Sopenharmony_ci const struct alu_op_info *aop = r600_isa_alu(alu->op); 2354bf215546Sopenharmony_ci int o = 0; 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci r600_bytecode_alu_nliterals(alu, literal, &nliteral); 2357bf215546Sopenharmony_ci o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]); 2358bf215546Sopenharmony_ci if (last) 2359bf215546Sopenharmony_ci o += fprintf(stderr, "%4d ", ++ngr); 2360bf215546Sopenharmony_ci else 2361bf215546Sopenharmony_ci o += fprintf(stderr, " "); 2362bf215546Sopenharmony_ci o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ', 2363bf215546Sopenharmony_ci alu->update_pred ? 'P':' ', 2364bf215546Sopenharmony_ci alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' '); 2365bf215546Sopenharmony_ci 2366bf215546Sopenharmony_ci o += fprintf(stderr, "%s%s%s ", aop->name, 2367bf215546Sopenharmony_ci omod_str[alu->omod], alu->dst.clamp ? "_sat":""); 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_ci o += print_indent(o,60); 2370bf215546Sopenharmony_ci o += print_dst(alu); 2371bf215546Sopenharmony_ci for (i = 0; i < aop->src_count; ++i) { 2372bf215546Sopenharmony_ci o += fprintf(stderr, i == 0 ? ", ": ", "); 2373bf215546Sopenharmony_ci o += print_src(alu, i); 2374bf215546Sopenharmony_ci } 2375bf215546Sopenharmony_ci 2376bf215546Sopenharmony_ci if (alu->bank_swizzle) { 2377bf215546Sopenharmony_ci o += print_indent(o,75); 2378bf215546Sopenharmony_ci o += fprintf(stderr, " BS:%d", alu->bank_swizzle); 2379bf215546Sopenharmony_ci } 2380bf215546Sopenharmony_ci 2381bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2382bf215546Sopenharmony_ci id += 2; 2383bf215546Sopenharmony_ci 2384bf215546Sopenharmony_ci if (alu->last) { 2385bf215546Sopenharmony_ci for (i = 0; i < nliteral; i++, id++) { 2386bf215546Sopenharmony_ci float *f = (float*)(bc->bytecode + id); 2387bf215546Sopenharmony_ci o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]); 2388bf215546Sopenharmony_ci print_indent(o, 60); 2389bf215546Sopenharmony_ci fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id)); 2390bf215546Sopenharmony_ci } 2391bf215546Sopenharmony_ci id += nliteral & 1; 2392bf215546Sopenharmony_ci nliteral = 0; 2393bf215546Sopenharmony_ci } 2394bf215546Sopenharmony_ci last = alu->last; 2395bf215546Sopenharmony_ci } 2396bf215546Sopenharmony_ci 2397bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2398bf215546Sopenharmony_ci int o = 0; 2399bf215546Sopenharmony_ci o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2400bf215546Sopenharmony_ci bc->bytecode[id + 1], bc->bytecode[id + 2]); 2401bf215546Sopenharmony_ci 2402bf215546Sopenharmony_ci o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name); 2403bf215546Sopenharmony_ci 2404bf215546Sopenharmony_ci o += print_indent(o, 50); 2405bf215546Sopenharmony_ci 2406bf215546Sopenharmony_ci o += fprintf(stderr, "R%d.", tex->dst_gpr); 2407bf215546Sopenharmony_ci o += print_swizzle(tex->dst_sel_x); 2408bf215546Sopenharmony_ci o += print_swizzle(tex->dst_sel_y); 2409bf215546Sopenharmony_ci o += print_swizzle(tex->dst_sel_z); 2410bf215546Sopenharmony_ci o += print_swizzle(tex->dst_sel_w); 2411bf215546Sopenharmony_ci 2412bf215546Sopenharmony_ci o += fprintf(stderr, ", R%d.", tex->src_gpr); 2413bf215546Sopenharmony_ci o += print_swizzle(tex->src_sel_x); 2414bf215546Sopenharmony_ci o += print_swizzle(tex->src_sel_y); 2415bf215546Sopenharmony_ci o += print_swizzle(tex->src_sel_z); 2416bf215546Sopenharmony_ci o += print_swizzle(tex->src_sel_w); 2417bf215546Sopenharmony_ci 2418bf215546Sopenharmony_ci o += fprintf(stderr, ", RID:%d", tex->resource_id); 2419bf215546Sopenharmony_ci o += fprintf(stderr, ", SID:%d ", tex->sampler_id); 2420bf215546Sopenharmony_ci 2421bf215546Sopenharmony_ci if (tex->sampler_index_mode) 2422bf215546Sopenharmony_ci fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]); 2423bf215546Sopenharmony_ci 2424bf215546Sopenharmony_ci if (tex->lod_bias) 2425bf215546Sopenharmony_ci fprintf(stderr, "LB:%d ", tex->lod_bias); 2426bf215546Sopenharmony_ci 2427bf215546Sopenharmony_ci fprintf(stderr, "CT:%c%c%c%c ", 2428bf215546Sopenharmony_ci tex->coord_type_x ? 'N' : 'U', 2429bf215546Sopenharmony_ci tex->coord_type_y ? 'N' : 'U', 2430bf215546Sopenharmony_ci tex->coord_type_z ? 'N' : 'U', 2431bf215546Sopenharmony_ci tex->coord_type_w ? 'N' : 'U'); 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_ci if (tex->offset_x) 2434bf215546Sopenharmony_ci fprintf(stderr, "OX:%d ", tex->offset_x); 2435bf215546Sopenharmony_ci if (tex->offset_y) 2436bf215546Sopenharmony_ci fprintf(stderr, "OY:%d ", tex->offset_y); 2437bf215546Sopenharmony_ci if (tex->offset_z) 2438bf215546Sopenharmony_ci fprintf(stderr, "OZ:%d ", tex->offset_z); 2439bf215546Sopenharmony_ci 2440bf215546Sopenharmony_ci id += 4; 2441bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2442bf215546Sopenharmony_ci } 2443bf215546Sopenharmony_ci 2444bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2445bf215546Sopenharmony_ci int o = 0; 2446bf215546Sopenharmony_ci const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; 2447bf215546Sopenharmony_ci o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2448bf215546Sopenharmony_ci bc->bytecode[id + 1], bc->bytecode[id + 2]); 2449bf215546Sopenharmony_ci 2450bf215546Sopenharmony_ci o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name); 2451bf215546Sopenharmony_ci 2452bf215546Sopenharmony_ci o += print_indent(o, 50); 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_ci o += fprintf(stderr, "R%d.", vtx->dst_gpr); 2455bf215546Sopenharmony_ci o += print_swizzle(vtx->dst_sel_x); 2456bf215546Sopenharmony_ci o += print_swizzle(vtx->dst_sel_y); 2457bf215546Sopenharmony_ci o += print_swizzle(vtx->dst_sel_z); 2458bf215546Sopenharmony_ci o += print_swizzle(vtx->dst_sel_w); 2459bf215546Sopenharmony_ci 2460bf215546Sopenharmony_ci o += fprintf(stderr, ", R%d.", vtx->src_gpr); 2461bf215546Sopenharmony_ci o += print_swizzle(vtx->src_sel_x); 2462bf215546Sopenharmony_ci if (r600_isa_fetch(vtx->op)->flags & FF_MEM) 2463bf215546Sopenharmony_ci o += print_swizzle(vtx->src_sel_y); 2464bf215546Sopenharmony_ci 2465bf215546Sopenharmony_ci if (vtx->offset) 2466bf215546Sopenharmony_ci fprintf(stderr, " +%db", vtx->offset); 2467bf215546Sopenharmony_ci 2468bf215546Sopenharmony_ci o += print_indent(o, 55); 2469bf215546Sopenharmony_ci 2470bf215546Sopenharmony_ci fprintf(stderr, ", RID:%d ", vtx->buffer_id); 2471bf215546Sopenharmony_ci 2472bf215546Sopenharmony_ci fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]); 2473bf215546Sopenharmony_ci 2474bf215546Sopenharmony_ci if (bc->gfx_level < CAYMAN && vtx->mega_fetch_count) 2475bf215546Sopenharmony_ci fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count); 2476bf215546Sopenharmony_ci 2477bf215546Sopenharmony_ci if (bc->gfx_level >= EVERGREEN && vtx->buffer_index_mode) 2478bf215546Sopenharmony_ci fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]); 2479bf215546Sopenharmony_ci 2480bf215546Sopenharmony_ci if (r600_isa_fetch(vtx->op)->flags & FF_MEM) { 2481bf215546Sopenharmony_ci if (vtx->uncached) 2482bf215546Sopenharmony_ci fprintf(stderr, "UNCACHED "); 2483bf215546Sopenharmony_ci if (vtx->indexed) 2484bf215546Sopenharmony_ci fprintf(stderr, "INDEXED:%d ", vtx->indexed); 2485bf215546Sopenharmony_ci 2486bf215546Sopenharmony_ci fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size); 2487bf215546Sopenharmony_ci if (vtx->burst_count) 2488bf215546Sopenharmony_ci fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count); 2489bf215546Sopenharmony_ci fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base); 2490bf215546Sopenharmony_ci fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size); 2491bf215546Sopenharmony_ci } 2492bf215546Sopenharmony_ci 2493bf215546Sopenharmony_ci fprintf(stderr, "UCF:%d ", vtx->use_const_fields); 2494bf215546Sopenharmony_ci fprintf(stderr, "FMT(DTA:%d ", vtx->data_format); 2495bf215546Sopenharmony_ci fprintf(stderr, "NUM:%d ", vtx->num_format_all); 2496bf215546Sopenharmony_ci fprintf(stderr, "COMP:%d ", vtx->format_comp_all); 2497bf215546Sopenharmony_ci fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); 2498bf215546Sopenharmony_ci 2499bf215546Sopenharmony_ci id += 4; 2500bf215546Sopenharmony_ci } 2501bf215546Sopenharmony_ci 2502bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { 2503bf215546Sopenharmony_ci int o = 0; 2504bf215546Sopenharmony_ci o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2505bf215546Sopenharmony_ci bc->bytecode[id + 1], bc->bytecode[id + 2]); 2506bf215546Sopenharmony_ci 2507bf215546Sopenharmony_ci o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name); 2508bf215546Sopenharmony_ci 2509bf215546Sopenharmony_ci if (gds->op != FETCH_OP_TF_WRITE) { 2510bf215546Sopenharmony_ci o += fprintf(stderr, "R%d.", gds->dst_gpr); 2511bf215546Sopenharmony_ci o += print_swizzle(gds->dst_sel_x); 2512bf215546Sopenharmony_ci o += print_swizzle(gds->dst_sel_y); 2513bf215546Sopenharmony_ci o += print_swizzle(gds->dst_sel_z); 2514bf215546Sopenharmony_ci o += print_swizzle(gds->dst_sel_w); 2515bf215546Sopenharmony_ci } 2516bf215546Sopenharmony_ci 2517bf215546Sopenharmony_ci o += fprintf(stderr, ", R%d.", gds->src_gpr); 2518bf215546Sopenharmony_ci o += print_swizzle(gds->src_sel_x); 2519bf215546Sopenharmony_ci o += print_swizzle(gds->src_sel_y); 2520bf215546Sopenharmony_ci o += print_swizzle(gds->src_sel_z); 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci if (gds->op != FETCH_OP_TF_WRITE) { 2523bf215546Sopenharmony_ci o += fprintf(stderr, ", R%d.", gds->src_gpr2); 2524bf215546Sopenharmony_ci } 2525bf215546Sopenharmony_ci if (gds->alloc_consume) { 2526bf215546Sopenharmony_ci o += fprintf(stderr, " UAV: %d", gds->uav_id); 2527bf215546Sopenharmony_ci if (gds->uav_index_mode) 2528bf215546Sopenharmony_ci o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]); 2529bf215546Sopenharmony_ci } 2530bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2531bf215546Sopenharmony_ci id += 4; 2532bf215546Sopenharmony_ci } 2533bf215546Sopenharmony_ci } 2534bf215546Sopenharmony_ci 2535bf215546Sopenharmony_ci fprintf(stderr, "--------------------------------------\n"); 2536bf215546Sopenharmony_ci} 2537bf215546Sopenharmony_ci 2538bf215546Sopenharmony_civoid r600_vertex_data_type(enum pipe_format pformat, 2539bf215546Sopenharmony_ci unsigned *format, 2540bf215546Sopenharmony_ci unsigned *num_format, unsigned *format_comp, unsigned *endian) 2541bf215546Sopenharmony_ci{ 2542bf215546Sopenharmony_ci const struct util_format_description *desc; 2543bf215546Sopenharmony_ci unsigned i; 2544bf215546Sopenharmony_ci 2545bf215546Sopenharmony_ci *format = 0; 2546bf215546Sopenharmony_ci *num_format = 0; 2547bf215546Sopenharmony_ci *format_comp = 0; 2548bf215546Sopenharmony_ci *endian = ENDIAN_NONE; 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) { 2551bf215546Sopenharmony_ci *format = FMT_10_11_11_FLOAT; 2552bf215546Sopenharmony_ci *endian = r600_endian_swap(32); 2553bf215546Sopenharmony_ci return; 2554bf215546Sopenharmony_ci } 2555bf215546Sopenharmony_ci 2556bf215546Sopenharmony_ci if (pformat == PIPE_FORMAT_B5G6R5_UNORM) { 2557bf215546Sopenharmony_ci *format = FMT_5_6_5; 2558bf215546Sopenharmony_ci *endian = r600_endian_swap(16); 2559bf215546Sopenharmony_ci return; 2560bf215546Sopenharmony_ci } 2561bf215546Sopenharmony_ci 2562bf215546Sopenharmony_ci if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) { 2563bf215546Sopenharmony_ci *format = FMT_1_5_5_5; 2564bf215546Sopenharmony_ci *endian = r600_endian_swap(16); 2565bf215546Sopenharmony_ci return; 2566bf215546Sopenharmony_ci } 2567bf215546Sopenharmony_ci 2568bf215546Sopenharmony_ci if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) { 2569bf215546Sopenharmony_ci *format = FMT_5_5_5_1; 2570bf215546Sopenharmony_ci return; 2571bf215546Sopenharmony_ci } 2572bf215546Sopenharmony_ci 2573bf215546Sopenharmony_ci desc = util_format_description(pformat); 2574bf215546Sopenharmony_ci if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { 2575bf215546Sopenharmony_ci goto out_unknown; 2576bf215546Sopenharmony_ci } 2577bf215546Sopenharmony_ci 2578bf215546Sopenharmony_ci /* Find the first non-VOID channel. */ 2579bf215546Sopenharmony_ci for (i = 0; i < 4; i++) { 2580bf215546Sopenharmony_ci if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2581bf215546Sopenharmony_ci break; 2582bf215546Sopenharmony_ci } 2583bf215546Sopenharmony_ci } 2584bf215546Sopenharmony_ci 2585bf215546Sopenharmony_ci *endian = r600_endian_swap(desc->channel[i].size); 2586bf215546Sopenharmony_ci 2587bf215546Sopenharmony_ci switch (desc->channel[i].type) { 2588bf215546Sopenharmony_ci /* Half-floats, floats, ints */ 2589bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FLOAT: 2590bf215546Sopenharmony_ci switch (desc->channel[i].size) { 2591bf215546Sopenharmony_ci case 16: 2592bf215546Sopenharmony_ci switch (desc->nr_channels) { 2593bf215546Sopenharmony_ci case 1: 2594bf215546Sopenharmony_ci *format = FMT_16_FLOAT; 2595bf215546Sopenharmony_ci break; 2596bf215546Sopenharmony_ci case 2: 2597bf215546Sopenharmony_ci *format = FMT_16_16_FLOAT; 2598bf215546Sopenharmony_ci break; 2599bf215546Sopenharmony_ci case 3: 2600bf215546Sopenharmony_ci case 4: 2601bf215546Sopenharmony_ci *format = FMT_16_16_16_16_FLOAT; 2602bf215546Sopenharmony_ci break; 2603bf215546Sopenharmony_ci } 2604bf215546Sopenharmony_ci break; 2605bf215546Sopenharmony_ci case 32: 2606bf215546Sopenharmony_ci switch (desc->nr_channels) { 2607bf215546Sopenharmony_ci case 1: 2608bf215546Sopenharmony_ci *format = FMT_32_FLOAT; 2609bf215546Sopenharmony_ci break; 2610bf215546Sopenharmony_ci case 2: 2611bf215546Sopenharmony_ci *format = FMT_32_32_FLOAT; 2612bf215546Sopenharmony_ci break; 2613bf215546Sopenharmony_ci case 3: 2614bf215546Sopenharmony_ci *format = FMT_32_32_32_FLOAT; 2615bf215546Sopenharmony_ci break; 2616bf215546Sopenharmony_ci case 4: 2617bf215546Sopenharmony_ci *format = FMT_32_32_32_32_FLOAT; 2618bf215546Sopenharmony_ci break; 2619bf215546Sopenharmony_ci } 2620bf215546Sopenharmony_ci break; 2621bf215546Sopenharmony_ci default: 2622bf215546Sopenharmony_ci goto out_unknown; 2623bf215546Sopenharmony_ci } 2624bf215546Sopenharmony_ci break; 2625bf215546Sopenharmony_ci /* Unsigned ints */ 2626bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_UNSIGNED: 2627bf215546Sopenharmony_ci /* Signed ints */ 2628bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_SIGNED: 2629bf215546Sopenharmony_ci switch (desc->channel[i].size) { 2630bf215546Sopenharmony_ci case 4: 2631bf215546Sopenharmony_ci switch (desc->nr_channels) { 2632bf215546Sopenharmony_ci case 2: 2633bf215546Sopenharmony_ci *format = FMT_4_4; 2634bf215546Sopenharmony_ci break; 2635bf215546Sopenharmony_ci case 4: 2636bf215546Sopenharmony_ci *format = FMT_4_4_4_4; 2637bf215546Sopenharmony_ci break; 2638bf215546Sopenharmony_ci } 2639bf215546Sopenharmony_ci break; 2640bf215546Sopenharmony_ci case 8: 2641bf215546Sopenharmony_ci switch (desc->nr_channels) { 2642bf215546Sopenharmony_ci case 1: 2643bf215546Sopenharmony_ci *format = FMT_8; 2644bf215546Sopenharmony_ci break; 2645bf215546Sopenharmony_ci case 2: 2646bf215546Sopenharmony_ci *format = FMT_8_8; 2647bf215546Sopenharmony_ci break; 2648bf215546Sopenharmony_ci case 3: 2649bf215546Sopenharmony_ci case 4: 2650bf215546Sopenharmony_ci *format = FMT_8_8_8_8; 2651bf215546Sopenharmony_ci break; 2652bf215546Sopenharmony_ci } 2653bf215546Sopenharmony_ci break; 2654bf215546Sopenharmony_ci case 10: 2655bf215546Sopenharmony_ci if (desc->nr_channels != 4) 2656bf215546Sopenharmony_ci goto out_unknown; 2657bf215546Sopenharmony_ci 2658bf215546Sopenharmony_ci *format = FMT_2_10_10_10; 2659bf215546Sopenharmony_ci break; 2660bf215546Sopenharmony_ci case 16: 2661bf215546Sopenharmony_ci switch (desc->nr_channels) { 2662bf215546Sopenharmony_ci case 1: 2663bf215546Sopenharmony_ci *format = FMT_16; 2664bf215546Sopenharmony_ci break; 2665bf215546Sopenharmony_ci case 2: 2666bf215546Sopenharmony_ci *format = FMT_16_16; 2667bf215546Sopenharmony_ci break; 2668bf215546Sopenharmony_ci case 3: 2669bf215546Sopenharmony_ci case 4: 2670bf215546Sopenharmony_ci *format = FMT_16_16_16_16; 2671bf215546Sopenharmony_ci break; 2672bf215546Sopenharmony_ci } 2673bf215546Sopenharmony_ci break; 2674bf215546Sopenharmony_ci case 32: 2675bf215546Sopenharmony_ci switch (desc->nr_channels) { 2676bf215546Sopenharmony_ci case 1: 2677bf215546Sopenharmony_ci *format = FMT_32; 2678bf215546Sopenharmony_ci break; 2679bf215546Sopenharmony_ci case 2: 2680bf215546Sopenharmony_ci *format = FMT_32_32; 2681bf215546Sopenharmony_ci break; 2682bf215546Sopenharmony_ci case 3: 2683bf215546Sopenharmony_ci *format = FMT_32_32_32; 2684bf215546Sopenharmony_ci break; 2685bf215546Sopenharmony_ci case 4: 2686bf215546Sopenharmony_ci *format = FMT_32_32_32_32; 2687bf215546Sopenharmony_ci break; 2688bf215546Sopenharmony_ci } 2689bf215546Sopenharmony_ci break; 2690bf215546Sopenharmony_ci default: 2691bf215546Sopenharmony_ci goto out_unknown; 2692bf215546Sopenharmony_ci } 2693bf215546Sopenharmony_ci break; 2694bf215546Sopenharmony_ci default: 2695bf215546Sopenharmony_ci goto out_unknown; 2696bf215546Sopenharmony_ci } 2697bf215546Sopenharmony_ci 2698bf215546Sopenharmony_ci if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2699bf215546Sopenharmony_ci *format_comp = 1; 2700bf215546Sopenharmony_ci } 2701bf215546Sopenharmony_ci 2702bf215546Sopenharmony_ci *num_format = 0; 2703bf215546Sopenharmony_ci if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || 2704bf215546Sopenharmony_ci desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2705bf215546Sopenharmony_ci if (!desc->channel[i].normalized) { 2706bf215546Sopenharmony_ci if (desc->channel[i].pure_integer) 2707bf215546Sopenharmony_ci *num_format = 1; 2708bf215546Sopenharmony_ci else 2709bf215546Sopenharmony_ci *num_format = 2; 2710bf215546Sopenharmony_ci } 2711bf215546Sopenharmony_ci } 2712bf215546Sopenharmony_ci return; 2713bf215546Sopenharmony_ciout_unknown: 2714bf215546Sopenharmony_ci R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); 2715bf215546Sopenharmony_ci} 2716bf215546Sopenharmony_ci 2717bf215546Sopenharmony_civoid *r600_create_vertex_fetch_shader(struct pipe_context *ctx, 2718bf215546Sopenharmony_ci unsigned count, 2719bf215546Sopenharmony_ci const struct pipe_vertex_element *elements) 2720bf215546Sopenharmony_ci{ 2721bf215546Sopenharmony_ci struct r600_context *rctx = (struct r600_context *)ctx; 2722bf215546Sopenharmony_ci struct r600_bytecode bc; 2723bf215546Sopenharmony_ci struct r600_bytecode_vtx vtx; 2724bf215546Sopenharmony_ci const struct util_format_description *desc; 2725bf215546Sopenharmony_ci unsigned fetch_resource_start = rctx->b.gfx_level >= EVERGREEN ? 0 : 160; 2726bf215546Sopenharmony_ci unsigned format, num_format, format_comp, endian; 2727bf215546Sopenharmony_ci uint32_t *bytecode; 2728bf215546Sopenharmony_ci int i, j, r, fs_size; 2729bf215546Sopenharmony_ci struct r600_fetch_shader *shader; 2730bf215546Sopenharmony_ci unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB || 2731bf215546Sopenharmony_ci (rctx->screen->b.debug_flags & DBG_NIR); 2732bf215546Sopenharmony_ci unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 2733bf215546Sopenharmony_ci 2734bf215546Sopenharmony_ci assert(count < 32); 2735bf215546Sopenharmony_ci 2736bf215546Sopenharmony_ci memset(&bc, 0, sizeof(bc)); 2737bf215546Sopenharmony_ci r600_bytecode_init(&bc, rctx->b.gfx_level, rctx->b.family, 2738bf215546Sopenharmony_ci rctx->screen->has_compressed_msaa_texturing); 2739bf215546Sopenharmony_ci 2740bf215546Sopenharmony_ci bc.isa = rctx->isa; 2741bf215546Sopenharmony_ci 2742bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 2743bf215546Sopenharmony_ci if (elements[i].instance_divisor > 1) { 2744bf215546Sopenharmony_ci if (rctx->b.gfx_level == CAYMAN) { 2745bf215546Sopenharmony_ci for (j = 0; j < 4; j++) { 2746bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 2747bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 2748bf215546Sopenharmony_ci alu.op = ALU_OP2_MULHI_UINT; 2749bf215546Sopenharmony_ci alu.src[0].sel = 0; 2750bf215546Sopenharmony_ci alu.src[0].chan = 3; 2751bf215546Sopenharmony_ci alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2752bf215546Sopenharmony_ci alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2753bf215546Sopenharmony_ci alu.dst.sel = i + 1; 2754bf215546Sopenharmony_ci alu.dst.chan = j; 2755bf215546Sopenharmony_ci alu.dst.write = j == 3; 2756bf215546Sopenharmony_ci alu.last = j == 3; 2757bf215546Sopenharmony_ci if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2758bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2759bf215546Sopenharmony_ci return NULL; 2760bf215546Sopenharmony_ci } 2761bf215546Sopenharmony_ci } 2762bf215546Sopenharmony_ci } else { 2763bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 2764bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 2765bf215546Sopenharmony_ci alu.op = ALU_OP2_MULHI_UINT; 2766bf215546Sopenharmony_ci alu.src[0].sel = 0; 2767bf215546Sopenharmony_ci alu.src[0].chan = 3; 2768bf215546Sopenharmony_ci alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2769bf215546Sopenharmony_ci alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2770bf215546Sopenharmony_ci alu.dst.sel = i + 1; 2771bf215546Sopenharmony_ci alu.dst.chan = 3; 2772bf215546Sopenharmony_ci alu.dst.write = 1; 2773bf215546Sopenharmony_ci alu.last = 1; 2774bf215546Sopenharmony_ci if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2775bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2776bf215546Sopenharmony_ci return NULL; 2777bf215546Sopenharmony_ci } 2778bf215546Sopenharmony_ci } 2779bf215546Sopenharmony_ci } 2780bf215546Sopenharmony_ci } 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 2783bf215546Sopenharmony_ci r600_vertex_data_type(elements[i].src_format, 2784bf215546Sopenharmony_ci &format, &num_format, &format_comp, &endian); 2785bf215546Sopenharmony_ci 2786bf215546Sopenharmony_ci desc = util_format_description(elements[i].src_format); 2787bf215546Sopenharmony_ci 2788bf215546Sopenharmony_ci if (elements[i].src_offset > 65535) { 2789bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2790bf215546Sopenharmony_ci R600_ERR("too big src_offset: %u\n", elements[i].src_offset); 2791bf215546Sopenharmony_ci return NULL; 2792bf215546Sopenharmony_ci } 2793bf215546Sopenharmony_ci 2794bf215546Sopenharmony_ci memset(&vtx, 0, sizeof(vtx)); 2795bf215546Sopenharmony_ci vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; 2796bf215546Sopenharmony_ci vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA; 2797bf215546Sopenharmony_ci vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; 2798bf215546Sopenharmony_ci vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; 2799bf215546Sopenharmony_ci vtx.mega_fetch_count = 0x1F; 2800bf215546Sopenharmony_ci vtx.dst_gpr = i + 1; 2801bf215546Sopenharmony_ci vtx.dst_sel_x = desc->swizzle[0]; 2802bf215546Sopenharmony_ci vtx.dst_sel_y = desc->swizzle[1]; 2803bf215546Sopenharmony_ci vtx.dst_sel_z = desc->swizzle[2]; 2804bf215546Sopenharmony_ci vtx.dst_sel_w = desc->swizzle[3]; 2805bf215546Sopenharmony_ci vtx.data_format = format; 2806bf215546Sopenharmony_ci vtx.num_format_all = num_format; 2807bf215546Sopenharmony_ci vtx.format_comp_all = format_comp; 2808bf215546Sopenharmony_ci vtx.offset = elements[i].src_offset; 2809bf215546Sopenharmony_ci vtx.endian = endian; 2810bf215546Sopenharmony_ci 2811bf215546Sopenharmony_ci if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { 2812bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2813bf215546Sopenharmony_ci return NULL; 2814bf215546Sopenharmony_ci } 2815bf215546Sopenharmony_ci } 2816bf215546Sopenharmony_ci 2817bf215546Sopenharmony_ci r600_bytecode_add_cfinst(&bc, CF_OP_RET); 2818bf215546Sopenharmony_ci 2819bf215546Sopenharmony_ci if ((r = r600_bytecode_build(&bc))) { 2820bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2821bf215546Sopenharmony_ci return NULL; 2822bf215546Sopenharmony_ci } 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_ci if (rctx->screen->b.debug_flags & DBG_FS) { 2825bf215546Sopenharmony_ci fprintf(stderr, "--------------------------------------------------------------\n"); 2826bf215546Sopenharmony_ci fprintf(stderr, "Vertex elements state:\n"); 2827bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 2828bf215546Sopenharmony_ci fprintf(stderr, " "); 2829bf215546Sopenharmony_ci util_dump_vertex_element(stderr, elements+i); 2830bf215546Sopenharmony_ci fprintf(stderr, "\n"); 2831bf215546Sopenharmony_ci } 2832bf215546Sopenharmony_ci 2833bf215546Sopenharmony_ci if (!sb_disasm) { 2834bf215546Sopenharmony_ci r600_bytecode_disasm(&bc); 2835bf215546Sopenharmony_ci 2836bf215546Sopenharmony_ci fprintf(stderr, "______________________________________________________________\n"); 2837bf215546Sopenharmony_ci } else { 2838bf215546Sopenharmony_ci r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/); 2839bf215546Sopenharmony_ci } 2840bf215546Sopenharmony_ci } 2841bf215546Sopenharmony_ci 2842bf215546Sopenharmony_ci fs_size = bc.ndw*4; 2843bf215546Sopenharmony_ci 2844bf215546Sopenharmony_ci /* Allocate the CSO. */ 2845bf215546Sopenharmony_ci shader = CALLOC_STRUCT(r600_fetch_shader); 2846bf215546Sopenharmony_ci if (!shader) { 2847bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2848bf215546Sopenharmony_ci return NULL; 2849bf215546Sopenharmony_ci } 2850bf215546Sopenharmony_ci 2851bf215546Sopenharmony_ci u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256, 2852bf215546Sopenharmony_ci &shader->offset, 2853bf215546Sopenharmony_ci (struct pipe_resource**)&shader->buffer); 2854bf215546Sopenharmony_ci if (!shader->buffer) { 2855bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2856bf215546Sopenharmony_ci FREE(shader); 2857bf215546Sopenharmony_ci return NULL; 2858bf215546Sopenharmony_ci } 2859bf215546Sopenharmony_ci 2860bf215546Sopenharmony_ci bytecode = r600_buffer_map_sync_with_rings 2861bf215546Sopenharmony_ci (&rctx->b, shader->buffer, 2862bf215546Sopenharmony_ci PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY); 2863bf215546Sopenharmony_ci bytecode += shader->offset / 4; 2864bf215546Sopenharmony_ci 2865bf215546Sopenharmony_ci if (R600_BIG_ENDIAN) { 2866bf215546Sopenharmony_ci for (i = 0; i < fs_size / 4; ++i) { 2867bf215546Sopenharmony_ci bytecode[i] = util_cpu_to_le32(bc.bytecode[i]); 2868bf215546Sopenharmony_ci } 2869bf215546Sopenharmony_ci } else { 2870bf215546Sopenharmony_ci memcpy(bytecode, bc.bytecode, fs_size); 2871bf215546Sopenharmony_ci } 2872bf215546Sopenharmony_ci rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf); 2873bf215546Sopenharmony_ci 2874bf215546Sopenharmony_ci r600_bytecode_clear(&bc); 2875bf215546Sopenharmony_ci return shader; 2876bf215546Sopenharmony_ci} 2877bf215546Sopenharmony_ci 2878bf215546Sopenharmony_civoid r600_bytecode_alu_read(struct r600_bytecode *bc, 2879bf215546Sopenharmony_ci struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) 2880bf215546Sopenharmony_ci{ 2881bf215546Sopenharmony_ci /* WORD0 */ 2882bf215546Sopenharmony_ci alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); 2883bf215546Sopenharmony_ci alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); 2884bf215546Sopenharmony_ci alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); 2885bf215546Sopenharmony_ci alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); 2886bf215546Sopenharmony_ci alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); 2887bf215546Sopenharmony_ci alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); 2888bf215546Sopenharmony_ci alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); 2889bf215546Sopenharmony_ci alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); 2890bf215546Sopenharmony_ci alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); 2891bf215546Sopenharmony_ci alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); 2892bf215546Sopenharmony_ci alu->last = G_SQ_ALU_WORD0_LAST(word0); 2893bf215546Sopenharmony_ci 2894bf215546Sopenharmony_ci /* WORD1 */ 2895bf215546Sopenharmony_ci alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); 2896bf215546Sopenharmony_ci if (alu->bank_swizzle) 2897bf215546Sopenharmony_ci alu->bank_swizzle_force = alu->bank_swizzle; 2898bf215546Sopenharmony_ci alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); 2899bf215546Sopenharmony_ci alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); 2900bf215546Sopenharmony_ci alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); 2901bf215546Sopenharmony_ci alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); 2902bf215546Sopenharmony_ci if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ 2903bf215546Sopenharmony_ci { 2904bf215546Sopenharmony_ci alu->is_op3 = 1; 2905bf215546Sopenharmony_ci alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); 2906bf215546Sopenharmony_ci alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); 2907bf215546Sopenharmony_ci alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); 2908bf215546Sopenharmony_ci alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); 2909bf215546Sopenharmony_ci alu->op = r600_isa_alu_by_opcode(bc->isa, 2910bf215546Sopenharmony_ci G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1); 2911bf215546Sopenharmony_ci 2912bf215546Sopenharmony_ci } 2913bf215546Sopenharmony_ci else /*ALU_DWORD1_OP2*/ 2914bf215546Sopenharmony_ci { 2915bf215546Sopenharmony_ci alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); 2916bf215546Sopenharmony_ci alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); 2917bf215546Sopenharmony_ci alu->op = r600_isa_alu_by_opcode(bc->isa, 2918bf215546Sopenharmony_ci G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0); 2919bf215546Sopenharmony_ci alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); 2920bf215546Sopenharmony_ci alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); 2921bf215546Sopenharmony_ci alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); 2922bf215546Sopenharmony_ci alu->execute_mask = 2923bf215546Sopenharmony_ci G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); 2924bf215546Sopenharmony_ci } 2925bf215546Sopenharmony_ci} 2926bf215546Sopenharmony_ci 2927bf215546Sopenharmony_ci#if 0 2928bf215546Sopenharmony_civoid r600_bytecode_export_read(struct r600_bytecode *bc, 2929bf215546Sopenharmony_ci struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) 2930bf215546Sopenharmony_ci{ 2931bf215546Sopenharmony_ci output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); 2932bf215546Sopenharmony_ci output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); 2933bf215546Sopenharmony_ci output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); 2934bf215546Sopenharmony_ci output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); 2935bf215546Sopenharmony_ci 2936bf215546Sopenharmony_ci output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); 2937bf215546Sopenharmony_ci output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); 2938bf215546Sopenharmony_ci output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); 2939bf215546Sopenharmony_ci output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); 2940bf215546Sopenharmony_ci output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); 2941bf215546Sopenharmony_ci output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); 2942bf215546Sopenharmony_ci output->op = r600_isa_cf_by_opcode(bc->isa, 2943bf215546Sopenharmony_ci G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0); 2944bf215546Sopenharmony_ci output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); 2945bf215546Sopenharmony_ci output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); 2946bf215546Sopenharmony_ci output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); 2947bf215546Sopenharmony_ci} 2948bf215546Sopenharmony_ci#endif 2949