1/* 2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Jonathan Marek <jonathan@marek.ca> 25 */ 26 27#include <assert.h> 28#include <stdint.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32 33#include "ir2/instr-a2xx.h" 34#include "fd2_program.h" 35#include "ir2.h" 36 37enum ir2_src_type { 38 IR2_SRC_SSA, 39 IR2_SRC_REG, 40 IR2_SRC_INPUT, 41 IR2_SRC_CONST, 42}; 43 44struct ir2_src { 45 /* num can mean different things 46 * ssa: index of instruction 47 * reg: index in ctx->reg array 48 * input: index in ctx->input array 49 * const: constant index (C0, C1, etc) 50 */ 51 uint16_t num; 52 uint8_t swizzle; 53 enum ir2_src_type type : 2; 54 uint8_t abs : 1; 55 uint8_t negate : 1; 56 uint8_t : 4; 57}; 58 59struct ir2_reg_component { 60 uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */ 61 bool alloc : 1; /* is it currently allocated */ 62 uint8_t ref_count; /* for ra */ 63}; 64 65struct ir2_reg { 66 uint8_t idx; /* assigned hardware register */ 67 uint8_t ncomp; 68 69 uint8_t loop_depth; 70 bool initialized; 71 /* block_idx to free on (-1 = free on ref_count==0) */ 72 int block_idx_free; 73 struct ir2_reg_component comp[4]; 74}; 75 76struct ir2_instr { 77 unsigned idx; 78 79 unsigned block_idx; 80 81 enum { 82 IR2_NONE, 83 IR2_FETCH, 84 IR2_ALU, 85 IR2_CF, 86 } type : 2; 87 88 /* instruction needs to be emitted (for scheduling) */ 89 bool need_emit : 1; 90 91 /* predicate value - (usually) same for entire block */ 92 uint8_t pred : 2; 93 94 /* src */ 95 uint8_t src_count; 96 struct ir2_src src[4]; 97 98 /* dst */ 99 bool is_ssa; 100 union { 101 struct ir2_reg ssa; 102 struct ir2_reg *reg; 103 }; 104 105 /* type-specific */ 106 union { 107 struct { 108 instr_fetch_opc_t opc : 5; 109 union { 110 struct { 111 uint8_t const_idx; 112 uint8_t const_idx_sel; 113 } vtx; 114 struct { 115 bool is_cube : 1; 116 bool is_rect : 1; 117 uint8_t samp_id; 118 } tex; 119 }; 120 } fetch; 121 struct { 122 /* store possible opcs, then we can choose vector/scalar instr */ 123 instr_scalar_opc_t scalar_opc : 6; 124 instr_vector_opc_t vector_opc : 5; 125 /* same as nir */ 126 uint8_t write_mask : 4; 127 bool saturate : 1; 128 129 /* export idx (-1 no export) */ 130 int8_t export; 131 132 /* for scalarized 2 src instruction */ 133 uint8_t src1_swizzle; 134 } alu; 135 struct { 136 /* jmp dst block_idx */ 137 uint8_t block_idx; 138 } cf; 139 }; 140}; 141 142struct ir2_sched_instr { 143 uint32_t reg_state[8]; 144 struct ir2_instr *instr, *instr_s; 145}; 146 147struct ir2_context { 148 struct fd2_shader_stateobj *so; 149 150 unsigned block_idx, pred_idx; 151 uint8_t pred; 152 bool block_has_jump[64]; 153 154 unsigned loop_last_block[64]; 155 unsigned loop_depth; 156 157 nir_shader *nir; 158 159 /* ssa index of position output */ 160 struct ir2_src position; 161 162 /* to translate SSA ids to instruction ids */ 163 int16_t ssa_map[1024]; 164 165 struct ir2_shader_info *info; 166 struct ir2_frag_linkage *f; 167 168 int prev_export; 169 170 /* RA state */ 171 struct ir2_reg *live_regs[64]; 172 uint32_t reg_state[256 / 32]; /* 64*4 bits */ 173 174 /* inputs */ 175 struct ir2_reg input[16 + 1]; /* 16 + param */ 176 177 /* non-ssa regs */ 178 struct ir2_reg reg[64]; 179 unsigned reg_count; 180 181 struct ir2_instr instr[0x300]; 182 unsigned instr_count; 183 184 struct ir2_sched_instr instr_sched[0x180]; 185 unsigned instr_sched_count; 186}; 187 188void assemble(struct ir2_context *ctx, bool binning); 189 190void ir2_nir_compile(struct ir2_context *ctx, bool binning); 191bool ir2_nir_lower_scalar(nir_shader *shader); 192 193void ra_count_refs(struct ir2_context *ctx); 194void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, 195 bool export, uint8_t export_writemask); 196void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr); 197void ra_block_free(struct ir2_context *ctx, unsigned block); 198 199void cp_src(struct ir2_context *ctx); 200void cp_export(struct ir2_context *ctx); 201 202/* utils */ 203enum { 204 IR2_SWIZZLE_Y = 1 << 0, 205 IR2_SWIZZLE_Z = 2 << 0, 206 IR2_SWIZZLE_W = 3 << 0, 207 208 IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2, 209 210 IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4, 211 212 IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6, 213 IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6, 214 IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6, 215 IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6, 216 IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6, 217 IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6, 218 IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6, 219 IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6, 220}; 221 222#define compile_error(ctx, args...) \ 223 ({ \ 224 printf(args); \ 225 assert(0); \ 226 }) 227 228static inline struct ir2_src 229ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type) 230{ 231 return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type}; 232} 233 234/* ir2_assemble uses it .. */ 235struct ir2_src ir2_zero(struct ir2_context *ctx); 236 237#define ir2_foreach_instr(it, ctx) \ 238 for (struct ir2_instr *it = (ctx)->instr; ({ \ 239 while (it != &(ctx)->instr[(ctx)->instr_count] && \ 240 it->type == IR2_NONE) \ 241 it++; \ 242 it != &(ctx)->instr[(ctx)->instr_count]; \ 243 }); \ 244 it++) 245 246#define ir2_foreach_live_reg(it, ctx) \ 247 for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \ 248 while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \ 249 __ptr++; \ 250 __ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \ 251 }); \ 252 it++) 253 254#define ir2_foreach_avail(it) \ 255 for (struct ir2_instr **__instrp = avail, *it; \ 256 it = *__instrp, __instrp != &avail[avail_count]; __instrp++) 257 258#define ir2_foreach_src(it, instr) \ 259 for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \ 260 it++) 261 262/* mask for register allocation 263 * 64 registers with 4 components each = 256 bits 264 */ 265/* typedef struct { 266 uint64_t data[4]; 267} regmask_t; */ 268 269static inline bool 270mask_isset(uint32_t *mask, unsigned num) 271{ 272 return !!(mask[num / 32] & 1 << num % 32); 273} 274 275static inline void 276mask_set(uint32_t *mask, unsigned num) 277{ 278 mask[num / 32] |= 1 << num % 32; 279} 280 281static inline void 282mask_unset(uint32_t *mask, unsigned num) 283{ 284 mask[num / 32] &= ~(1 << num % 32); 285} 286 287static inline unsigned 288mask_reg(uint32_t *mask, unsigned num) 289{ 290 return mask[num / 8] >> num % 8 * 4 & 0xf; 291} 292 293static inline bool 294is_export(struct ir2_instr *instr) 295{ 296 return instr->type == IR2_ALU && instr->alu.export >= 0; 297} 298 299static inline instr_alloc_type_t 300export_buf(unsigned num) 301{ 302 return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY; 303} 304 305/* component c for channel i */ 306static inline unsigned 307swiz_set(unsigned c, unsigned i) 308{ 309 return ((c - i) & 3) << i * 2; 310} 311 312/* get swizzle in channel i */ 313static inline unsigned 314swiz_get(unsigned swiz, unsigned i) 315{ 316 return ((swiz >> i * 2) + i) & 3; 317} 318 319static inline unsigned 320swiz_merge(unsigned swiz0, unsigned swiz1) 321{ 322 unsigned swiz = 0; 323 for (int i = 0; i < 4; i++) 324 swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i); 325 return swiz; 326} 327 328static inline void 329swiz_merge_p(uint8_t *swiz0, unsigned swiz1) 330{ 331 unsigned swiz = 0; 332 for (int i = 0; i < 4; i++) 333 swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i); 334 *swiz0 = swiz; 335} 336 337static inline struct ir2_reg * 338get_reg(struct ir2_instr *instr) 339{ 340 return instr->is_ssa ? &instr->ssa : instr->reg; 341} 342 343static inline struct ir2_reg * 344get_reg_src(struct ir2_context *ctx, struct ir2_src *src) 345{ 346 switch (src->type) { 347 case IR2_SRC_INPUT: 348 return &ctx->input[src->num]; 349 case IR2_SRC_SSA: 350 return &ctx->instr[src->num].ssa; 351 case IR2_SRC_REG: 352 return &ctx->reg[src->num]; 353 default: 354 return NULL; 355 } 356} 357 358/* gets a ncomp value for the dst */ 359static inline unsigned 360dst_ncomp(struct ir2_instr *instr) 361{ 362 if (instr->is_ssa) 363 return instr->ssa.ncomp; 364 365 if (instr->type == IR2_FETCH) 366 return instr->reg->ncomp; 367 368 assert(instr->type == IR2_ALU); 369 370 unsigned ncomp = 0; 371 for (int i = 0; i < instr->reg->ncomp; i++) 372 ncomp += !!(instr->alu.write_mask & 1 << i); 373 return ncomp; 374} 375 376/* gets a ncomp value for the src registers */ 377static inline unsigned 378src_ncomp(struct ir2_instr *instr) 379{ 380 if (instr->type == IR2_FETCH) { 381 switch (instr->fetch.opc) { 382 case VTX_FETCH: 383 return 1; 384 case TEX_FETCH: 385 return instr->fetch.tex.is_cube ? 3 : 2; 386 case TEX_SET_TEX_LOD: 387 return 1; 388 default: 389 assert(0); 390 } 391 } 392 393 switch (instr->alu.scalar_opc) { 394 case PRED_SETEs ... KILLONEs: 395 return 1; 396 default: 397 break; 398 } 399 400 switch (instr->alu.vector_opc) { 401 case DOT2ADDv: 402 return 2; 403 case DOT3v: 404 return 3; 405 case DOT4v: 406 case CUBEv: 407 case PRED_SETE_PUSHv: 408 return 4; 409 default: 410 return dst_ncomp(instr); 411 } 412} 413