1#include <float.h> 2#include "pipe/p_context.h" 3#include "pipe/p_defines.h" 4#include "pipe/p_state.h" 5#include "util/u_dynarray.h" 6#include "util/u_inlines.h" 7#include "util/u_debug.h" 8#include "util/u_memory.h" 9 10#include "pipe/p_shader_tokens.h" 11#include "tgsi/tgsi_parse.h" 12#include "tgsi/tgsi_util.h" 13#include "tgsi/tgsi_dump.h" 14#include "tgsi/tgsi_ureg.h" 15 16#include "nouveau_debug.h" 17#include "nv_object.xml.h" 18#include "nv30/nv30-40_3d.xml.h" 19#include "nv30/nvfx_shader.h" 20#include "nv30/nv30_state.h" 21 22struct nvfx_fpc { 23 struct nv30_fragprog *fp; 24 25 unsigned max_temps; 26 unsigned long long r_temps; 27 unsigned long long r_temps_discard; 28 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 29 struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS]; 30 struct nvfx_reg *r_temp; 31 32 int num_regs; 33 34 unsigned inst_offset; 35 unsigned have_const; 36 unsigned is_nv4x; 37 38 struct util_dynarray imm_data; 39 40 struct nvfx_reg* r_imm; 41 unsigned nr_imm; 42 43 struct util_dynarray if_stack; 44 //struct util_dynarray loop_stack; 45 struct util_dynarray label_relocs; 46}; 47 48static inline struct nvfx_reg 49temp(struct nvfx_fpc *fpc) 50{ 51 int idx = __builtin_ctzll(~fpc->r_temps); 52 53 if (idx >= fpc->max_temps) { 54 NOUVEAU_ERR("out of temps!!\n"); 55 return nvfx_reg(NVFXSR_TEMP, 0); 56 } 57 58 fpc->r_temps |= (1ULL << idx); 59 fpc->r_temps_discard |= (1ULL << idx); 60 return nvfx_reg(NVFXSR_TEMP, idx); 61} 62 63static inline void 64release_temps(struct nvfx_fpc *fpc) 65{ 66 fpc->r_temps &= ~fpc->r_temps_discard; 67 fpc->r_temps_discard = 0ULL; 68} 69 70static inline struct nvfx_reg 71nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d) 72{ 73 float v[4] = {a, b, c, d}; 74 int idx = fpc->imm_data.size >> 4; 75 76 memcpy(util_dynarray_grow(&fpc->imm_data, float, 4), v, 4 * sizeof(float)); 77 return nvfx_reg(NVFXSR_IMM, idx); 78} 79 80static void 81grow_insns(struct nvfx_fpc *fpc, int size) 82{ 83 struct nv30_fragprog *fp = fpc->fp; 84 85 fp->insn_len += size; 86 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); 87} 88 89static void 90emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src) 91{ 92 struct nv30_fragprog *fp = fpc->fp; 93 uint32_t *hw = &fp->insn[fpc->inst_offset]; 94 uint32_t sr = 0; 95 96 switch (src.reg.type) { 97 case NVFXSR_INPUT: 98 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 99 hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT); 100 break; 101 case NVFXSR_OUTPUT: 102 sr |= NVFX_FP_REG_SRC_HALF; 103 FALLTHROUGH; 104 case NVFXSR_TEMP: 105 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); 106 sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT); 107 break; 108 case NVFXSR_IMM: 109 if (!fpc->have_const) { 110 grow_insns(fpc, 4); 111 hw = &fp->insn[fpc->inst_offset]; 112 fpc->have_const = 1; 113 } 114 115 memcpy(&fp->insn[fpc->inst_offset + 4], 116 (float*)fpc->imm_data.data + src.reg.index * 4, 117 sizeof(uint32_t) * 4); 118 119 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 120 break; 121 case NVFXSR_CONST: 122 if (!fpc->have_const) { 123 grow_insns(fpc, 4); 124 hw = &fp->insn[fpc->inst_offset]; 125 fpc->have_const = 1; 126 } 127 128 { 129 struct nv30_fragprog_data *fpd; 130 131 fp->consts = realloc(fp->consts, ++fp->nr_consts * 132 sizeof(*fpd)); 133 fpd = &fp->consts[fp->nr_consts - 1]; 134 fpd->offset = fpc->inst_offset + 4; 135 fpd->index = src.reg.index; 136 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); 137 } 138 139 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 140 break; 141 case NVFXSR_NONE: 142 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 143 break; 144 default: 145 assert(0); 146 } 147 148 if (src.negate) 149 sr |= NVFX_FP_REG_NEGATE; 150 151 if (src.abs) 152 hw[1] |= (1 << (29 + pos)); 153 154 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | 155 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | 156 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | 157 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); 158 159 hw[pos + 1] |= sr; 160} 161 162static void 163emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst) 164{ 165 struct nv30_fragprog *fp = fpc->fp; 166 uint32_t *hw = &fp->insn[fpc->inst_offset]; 167 168 switch (dst.type) { 169 case NVFXSR_OUTPUT: 170 if (dst.index == 1) 171 fp->fp_control |= 0x0000000e; 172 else { 173 hw[0] |= NVFX_FP_OP_OUT_REG_HALF; 174 dst.index <<= 1; 175 } 176 FALLTHROUGH; 177 case NVFXSR_TEMP: 178 if (fpc->num_regs < (dst.index + 1)) 179 fpc->num_regs = dst.index + 1; 180 break; 181 case NVFXSR_NONE: 182 hw[0] |= (1 << 30); 183 break; 184 default: 185 assert(0); 186 } 187 188 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); 189} 190 191static void 192nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn) 193{ 194 struct nv30_fragprog *fp = fpc->fp; 195 uint32_t *hw; 196 197 fpc->inst_offset = fp->insn_len; 198 fpc->have_const = 0; 199 grow_insns(fpc, 4); 200 hw = &fp->insn[fpc->inst_offset]; 201 memset(hw, 0, sizeof(uint32_t) * 4); 202 203 if (insn.op == NVFX_FP_OP_OPCODE_KIL) 204 fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL; 205 hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT); 206 hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT); 207 hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT); 208 209 if (insn.sat) 210 hw[0] |= NVFX_FP_OP_OUT_SAT; 211 212 if (insn.cc_update) 213 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; 214 hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT); 215 hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 216 (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 217 (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 218 (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); 219 220 if(insn.unit >= 0) 221 { 222 hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT); 223 } 224 225 emit_dst(fpc, insn.dst); 226 emit_src(fpc, 0, insn.src[0]); 227 emit_src(fpc, 1, insn.src[1]); 228 emit_src(fpc, 2, insn.src[2]); 229} 230 231#define arith(s,o,d,m,s0,s1,s2) \ 232 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ 233 (d), (m), (s0), (s1), (s2)) 234 235#define tex(s,o,u,d,m,s0,s1,s2) \ 236 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \ 237 (d), (m), (s0), none, none) 238 239/* IF src.x != 0, as TGSI specifies */ 240static void 241nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src) 242{ 243 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 244 struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none); 245 uint32_t *hw; 246 insn.cc_update = 1; 247 nvfx_fp_emit(fpc, insn); 248 249 fpc->inst_offset = fpc->fp->insn_len; 250 grow_insns(fpc, 4); 251 hw = &fpc->fp->insn[fpc->inst_offset]; 252 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 253 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 254 NV40_FP_OP_OUT_NONE | 255 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 256 /* Use .xxxx swizzle so that we check only src[0].x*/ 257 hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 258 (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 259 (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 260 (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) | 261 (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT); 262 hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */ 263 hw[3] = 0; /* | endif_offset */ 264 util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset); 265} 266 267/* IF src.x != 0, as TGSI specifies */ 268static void 269nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target) 270{ 271 struct nvfx_relocation reloc; 272 uint32_t *hw; 273 fpc->inst_offset = fpc->fp->insn_len; 274 grow_insns(fpc, 4); 275 hw = &fpc->fp->insn[fpc->inst_offset]; 276 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 277 hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT); 278 /* Use .xxxx swizzle so that we check only src[0].x*/ 279 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 280 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 281 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 282 hw[3] = 0; 283 reloc.target = target; 284 reloc.location = fpc->inst_offset + 2; 285 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 286} 287 288static void 289nv40_fp_ret(struct nvfx_fpc *fpc) 290{ 291 uint32_t *hw; 292 fpc->inst_offset = fpc->fp->insn_len; 293 grow_insns(fpc, 4); 294 hw = &fpc->fp->insn[fpc->inst_offset]; 295 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 296 hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT); 297 /* Use .xxxx swizzle so that we check only src[0].x*/ 298 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 299 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 300 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 301 hw[3] = 0; 302} 303 304static void 305nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) 306{ 307 struct nvfx_relocation reloc; 308 uint32_t *hw; 309 fpc->inst_offset = fpc->fp->insn_len; 310 grow_insns(fpc, 4); 311 hw = &fpc->fp->insn[fpc->inst_offset]; 312 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 313 hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) | 314 NV40_FP_OP_OUT_NONE | 315 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 316 /* Use .xxxx swizzle so that we check only src[0].x*/ 317 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 318 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 319 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | 320 (count << NV40_FP_OP_REP_COUNT1_SHIFT) | 321 (count << NV40_FP_OP_REP_COUNT2_SHIFT) | 322 (count << NV40_FP_OP_REP_COUNT3_SHIFT); 323 hw[3] = 0; /* | end_offset */ 324 reloc.target = target; 325 reloc.location = fpc->inst_offset + 3; 326 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 327 //util_dynarray_append(&fpc->loop_stack, unsigned, target); 328} 329 330#if 0 331/* documentation only */ 332/* warning: this only works forward, and probably only if not inside any IF */ 333static void 334nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) 335{ 336 struct nvfx_relocation reloc; 337 uint32_t *hw; 338 fpc->inst_offset = fpc->fp->insn_len; 339 grow_insns(fpc, 4); 340 hw = &fpc->fp->insn[fpc->inst_offset]; 341 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 342 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 343 NV40_FP_OP_OUT_NONE | 344 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 345 /* Use .xxxx swizzle so that we check only src[0].x*/ 346 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 347 (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT); 348 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */ 349 hw[3] = 0; /* | endif_offset */ 350 reloc.target = target; 351 reloc.location = fpc->inst_offset + 2; 352 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 353 reloc.target = target; 354 reloc.location = fpc->inst_offset + 3; 355 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 356} 357#endif 358 359static void 360nv40_fp_brk(struct nvfx_fpc *fpc) 361{ 362 uint32_t *hw; 363 fpc->inst_offset = fpc->fp->insn_len; 364 grow_insns(fpc, 4); 365 hw = &fpc->fp->insn[fpc->inst_offset]; 366 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 367 hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) | 368 NV40_FP_OP_OUT_NONE; 369 /* Use .xxxx swizzle so that we check only src[0].x*/ 370 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 371 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 372 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; 373 hw[3] = 0; 374} 375 376static inline struct nvfx_src 377tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) 378{ 379 struct nvfx_src src; 380 381 switch (fsrc->Register.File) { 382 case TGSI_FILE_INPUT: 383 src.reg = fpc->r_input[fsrc->Register.Index]; 384 break; 385 case TGSI_FILE_CONSTANT: 386 src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index); 387 break; 388 case TGSI_FILE_IMMEDIATE: 389 assert(fsrc->Register.Index < fpc->nr_imm); 390 src.reg = fpc->r_imm[fsrc->Register.Index]; 391 break; 392 case TGSI_FILE_TEMPORARY: 393 src.reg = fpc->r_temp[fsrc->Register.Index]; 394 break; 395 /* NV40 fragprog result regs are just temps, so this is simple */ 396 case TGSI_FILE_OUTPUT: 397 src.reg = fpc->r_result[fsrc->Register.Index]; 398 break; 399 default: 400 NOUVEAU_ERR("bad src file\n"); 401 src.reg.index = 0; 402 src.reg.type = 0; 403 break; 404 } 405 406 src.abs = fsrc->Register.Absolute; 407 src.negate = fsrc->Register.Negate; 408 src.swz[0] = fsrc->Register.SwizzleX; 409 src.swz[1] = fsrc->Register.SwizzleY; 410 src.swz[2] = fsrc->Register.SwizzleZ; 411 src.swz[3] = fsrc->Register.SwizzleW; 412 src.indirect = 0; 413 src.indirect_reg = 0; 414 src.indirect_swz = 0; 415 return src; 416} 417 418static inline struct nvfx_reg 419tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { 420 switch (fdst->Register.File) { 421 case TGSI_FILE_OUTPUT: 422 return fpc->r_result[fdst->Register.Index]; 423 case TGSI_FILE_TEMPORARY: 424 return fpc->r_temp[fdst->Register.Index]; 425 case TGSI_FILE_NULL: 426 return nvfx_reg(NVFXSR_NONE, 0); 427 default: 428 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); 429 return nvfx_reg(NVFXSR_NONE, 0); 430 } 431} 432 433static inline int 434tgsi_mask(uint tgsi) 435{ 436 int mask = 0; 437 438 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; 439 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; 440 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; 441 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; 442 return mask; 443} 444 445static bool 446nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc, 447 const struct tgsi_full_instruction *finst) 448{ 449 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 450 struct nvfx_insn insn; 451 struct nvfx_src src[3], tmp; 452 struct nvfx_reg dst; 453 int mask, sat, unit = 0; 454 int ai = -1, ci = -1, ii = -1; 455 int i; 456 457 if (finst->Instruction.Opcode == TGSI_OPCODE_END) 458 return true; 459 460 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 461 const struct tgsi_full_src_register *fsrc; 462 463 fsrc = &finst->Src[i]; 464 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 465 src[i] = tgsi_src(fpc, fsrc); 466 } 467 } 468 469 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 470 const struct tgsi_full_src_register *fsrc; 471 472 fsrc = &finst->Src[i]; 473 474 switch (fsrc->Register.File) { 475 case TGSI_FILE_INPUT: 476 if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0 477 || fsrc->Register.SwizzleX == PIPE_SWIZZLE_W 478 || fsrc->Register.SwizzleY == PIPE_SWIZZLE_W 479 || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_W 480 || fsrc->Register.SwizzleW == PIPE_SWIZZLE_W 481 )) { 482 /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */ 483 struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1)); 484 addend.swz[0] = fsrc->Register.SwizzleX; 485 addend.swz[1] = fsrc->Register.SwizzleY; 486 addend.swz[2] = fsrc->Register.SwizzleZ; 487 addend.swz[3] = fsrc->Register.SwizzleW; 488 src[i] = nvfx_src(temp(fpc)); 489 nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none)); 490 } else if (ai == -1 || ai == fsrc->Register.Index) { 491 ai = fsrc->Register.Index; 492 src[i] = tgsi_src(fpc, fsrc); 493 } else { 494 src[i] = nvfx_src(temp(fpc)); 495 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 496 } 497 break; 498 case TGSI_FILE_CONSTANT: 499 if ((ci == -1 && ii == -1) || 500 ci == fsrc->Register.Index) { 501 ci = fsrc->Register.Index; 502 src[i] = tgsi_src(fpc, fsrc); 503 } else { 504 src[i] = nvfx_src(temp(fpc)); 505 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 506 } 507 break; 508 case TGSI_FILE_IMMEDIATE: 509 if ((ci == -1 && ii == -1) || 510 ii == fsrc->Register.Index) { 511 ii = fsrc->Register.Index; 512 src[i] = tgsi_src(fpc, fsrc); 513 } else { 514 src[i] = nvfx_src(temp(fpc)); 515 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 516 } 517 break; 518 case TGSI_FILE_TEMPORARY: 519 /* handled above */ 520 break; 521 case TGSI_FILE_SAMPLER: 522 unit = fsrc->Register.Index; 523 break; 524 case TGSI_FILE_OUTPUT: 525 break; 526 default: 527 NOUVEAU_ERR("bad src file\n"); 528 return false; 529 } 530 } 531 532 dst = tgsi_dst(fpc, &finst->Dst[0]); 533 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 534 sat = finst->Instruction.Saturate; 535 536 switch (finst->Instruction.Opcode) { 537 case TGSI_OPCODE_ADD: 538 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); 539 break; 540 case TGSI_OPCODE_CEIL: 541 tmp = nvfx_src(temp(fpc)); 542 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); 543 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); 544 break; 545 case TGSI_OPCODE_CMP: 546 insn = arith(0, MOV, none.reg, mask, src[0], none, none); 547 insn.cc_update = 1; 548 nvfx_fp_emit(fpc, insn); 549 550 insn = arith(sat, MOV, dst, mask, src[2], none, none); 551 insn.cc_test = NVFX_COND_GE; 552 nvfx_fp_emit(fpc, insn); 553 554 insn = arith(sat, MOV, dst, mask, src[1], none, none); 555 insn.cc_test = NVFX_COND_LT; 556 nvfx_fp_emit(fpc, insn); 557 break; 558 case TGSI_OPCODE_COS: 559 nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none)); 560 break; 561 case TGSI_OPCODE_DDX: 562 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 563 tmp = nvfx_src(temp(fpc)); 564 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 565 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 566 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 567 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 568 } else { 569 nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none)); 570 } 571 break; 572 case TGSI_OPCODE_DDY: 573 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 574 tmp = nvfx_src(temp(fpc)); 575 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 576 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 577 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 578 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 579 } else { 580 nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none)); 581 } 582 break; 583 case TGSI_OPCODE_DP2: 584 tmp = nvfx_src(temp(fpc)); 585 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none)); 586 nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none)); 587 break; 588 case TGSI_OPCODE_DP3: 589 nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none)); 590 break; 591 case TGSI_OPCODE_DP4: 592 nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none)); 593 break; 594 case TGSI_OPCODE_DST: 595 nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none)); 596 break; 597 case TGSI_OPCODE_EX2: 598 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none)); 599 break; 600 case TGSI_OPCODE_FLR: 601 nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none)); 602 break; 603 case TGSI_OPCODE_FRC: 604 nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none)); 605 break; 606 case TGSI_OPCODE_KILL: 607 nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none)); 608 break; 609 case TGSI_OPCODE_KILL_IF: 610 insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none); 611 insn.cc_update = 1; 612 nvfx_fp_emit(fpc, insn); 613 614 insn = arith(0, KIL, none.reg, 0, none, none, none); 615 insn.cc_test = NVFX_COND_LT; 616 nvfx_fp_emit(fpc, insn); 617 break; 618 case TGSI_OPCODE_LG2: 619 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); 620 break; 621 case TGSI_OPCODE_LIT: 622 if(!fpc->is_nv4x) 623 nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none)); 624 else { 625 /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by 626 * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement 627 * 628 * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead 629 */ 630 struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0)); 631 tmp = nvfx_src(temp(fpc)); 632 if (ci>= 0 || ii >= 0) { 633 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); 634 maxs = tmp; 635 } 636 nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); 637 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); 638 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); 639 nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); 640 } 641 break; 642 case TGSI_OPCODE_LRP: 643 if(!fpc->is_nv4x) 644 nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); 645 else { 646 tmp = nvfx_src(temp(fpc)); 647 nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 648 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp)); 649 } 650 break; 651 case TGSI_OPCODE_MAD: 652 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2])); 653 break; 654 case TGSI_OPCODE_MAX: 655 nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none)); 656 break; 657 case TGSI_OPCODE_MIN: 658 nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none)); 659 break; 660 case TGSI_OPCODE_MOV: 661 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none)); 662 break; 663 case TGSI_OPCODE_MUL: 664 nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none)); 665 break; 666 case TGSI_OPCODE_NOP: 667 break; 668 case TGSI_OPCODE_POW: 669 if(!fpc->is_nv4x) 670 nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none)); 671 else { 672 tmp = nvfx_src(temp(fpc)); 673 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 674 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 675 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none)); 676 } 677 break; 678 case TGSI_OPCODE_RCP: 679 nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none)); 680 break; 681 case TGSI_OPCODE_RSQ: 682 if(!fpc->is_nv4x) 683 nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none)); 684 else { 685 tmp = nvfx_src(temp(fpc)); 686 insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); 687 insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; 688 nvfx_fp_emit(fpc, insn); 689 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none)); 690 } 691 break; 692 case TGSI_OPCODE_SEQ: 693 nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none)); 694 break; 695 case TGSI_OPCODE_SGE: 696 nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none)); 697 break; 698 case TGSI_OPCODE_SGT: 699 nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none)); 700 break; 701 case TGSI_OPCODE_SIN: 702 nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none)); 703 break; 704 case TGSI_OPCODE_SLE: 705 nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none)); 706 break; 707 case TGSI_OPCODE_SLT: 708 nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none)); 709 break; 710 case TGSI_OPCODE_SNE: 711 nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none)); 712 break; 713 case TGSI_OPCODE_SSG: 714 { 715 struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X); 716 717 insn = arith(sat, MOV, dst, mask, src[0], none, none); 718 insn.cc_update = 1; 719 nvfx_fp_emit(fpc, insn); 720 721 insn = arith(0, STR, dst, mask, none, none, none); 722 insn.cc_test = NVFX_COND_GT; 723 nvfx_fp_emit(fpc, insn); 724 725 if(!sat) { 726 insn = arith(0, MOV, dst, mask, minones, none, none); 727 insn.cc_test = NVFX_COND_LT; 728 nvfx_fp_emit(fpc, insn); 729 } 730 break; 731 } 732 case TGSI_OPCODE_TEX: 733 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 734 break; 735 case TGSI_OPCODE_TRUNC: 736 tmp = nvfx_src(temp(fpc)); 737 insn = arith(0, MOV, none.reg, mask, src[0], none, none); 738 insn.cc_update = 1; 739 nvfx_fp_emit(fpc, insn); 740 741 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); 742 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none)); 743 744 insn = arith(sat, MOV, dst, mask, neg(tmp), none, none); 745 insn.cc_test = NVFX_COND_LT; 746 nvfx_fp_emit(fpc, insn); 747 break; 748 case TGSI_OPCODE_TXB: 749 nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none)); 750 break; 751 case TGSI_OPCODE_TXL: 752 if(fpc->is_nv4x) 753 nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none)); 754 else /* unsupported on nv30, use TEX and hope they like it */ 755 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 756 break; 757 case TGSI_OPCODE_TXP: 758 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none)); 759 break; 760 761 case TGSI_OPCODE_IF: 762 // MOVRC0 R31 (TR0.xyzw), R<src>: 763 // IF (NE.xxxx) ELSE <else> END <end> 764 if(!fpc->is_nv4x) 765 goto nv3x_cflow; 766 nv40_fp_if(fpc, src[0]); 767 break; 768 769 case TGSI_OPCODE_ELSE: 770 { 771 uint32_t *hw; 772 if(!fpc->is_nv4x) 773 goto nv3x_cflow; 774 assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 775 hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)]; 776 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 777 break; 778 } 779 780 case TGSI_OPCODE_ENDIF: 781 { 782 uint32_t *hw; 783 if(!fpc->is_nv4x) 784 goto nv3x_cflow; 785 assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 786 hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)]; 787 if(!hw[2]) 788 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 789 hw[3] = fpc->fp->insn_len; 790 break; 791 } 792 793 case TGSI_OPCODE_BGNSUB: 794 case TGSI_OPCODE_ENDSUB: 795 /* nothing to do here */ 796 break; 797 798 case TGSI_OPCODE_CAL: 799 if(!fpc->is_nv4x) 800 goto nv3x_cflow; 801 nv40_fp_cal(fpc, finst->Label.Label); 802 break; 803 804 case TGSI_OPCODE_RET: 805 if(!fpc->is_nv4x) 806 goto nv3x_cflow; 807 nv40_fp_ret(fpc); 808 break; 809 810 case TGSI_OPCODE_BGNLOOP: 811 if(!fpc->is_nv4x) 812 goto nv3x_cflow; 813 /* TODO: we should support using two nested REPs to allow a > 255 iteration count */ 814 nv40_fp_rep(fpc, 255, finst->Label.Label); 815 break; 816 817 case TGSI_OPCODE_ENDLOOP: 818 break; 819 820 case TGSI_OPCODE_BRK: 821 if(!fpc->is_nv4x) 822 goto nv3x_cflow; 823 nv40_fp_brk(fpc); 824 break; 825 826 case TGSI_OPCODE_CONT: 827 { 828 static int warned = 0; 829 if(!warned) { 830 NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n"); 831 warned = 1; 832 } 833 break; 834 } 835 836 default: 837 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 838 return false; 839 } 840 841out: 842 release_temps(fpc); 843 return true; 844nv3x_cflow: 845 { 846 static int warned = 0; 847 if(!warned) { 848 NOUVEAU_ERR( 849 "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n" 850 "If rendering is incorrect, try to disable GLSL support in the application.\n"); 851 warned = 1; 852 } 853 } 854 goto out; 855} 856 857static bool 858nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc, 859 const struct tgsi_full_declaration *fdec) 860{ 861 unsigned idx = fdec->Range.First; 862 unsigned hw; 863 864 switch (fdec->Semantic.Name) { 865 case TGSI_SEMANTIC_POSITION: 866 hw = NVFX_FP_OP_INPUT_SRC_POSITION; 867 break; 868 case TGSI_SEMANTIC_COLOR: 869 hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index; 870 break; 871 case TGSI_SEMANTIC_FOG: 872 hw = NVFX_FP_OP_INPUT_SRC_FOGC; 873 break; 874 case TGSI_SEMANTIC_FACE: 875 hw = NV40_FP_OP_INPUT_SRC_FACING; 876 break; 877 case TGSI_SEMANTIC_TEXCOORD: 878 assert(fdec->Semantic.Index < 8); 879 fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index; 880 fpc->fp->texcoords |= (1 << fdec->Semantic.Index); 881 fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index); 882 hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index); 883 break; 884 case TGSI_SEMANTIC_GENERIC: 885 case TGSI_SEMANTIC_PCOORD: 886 /* will be assigned to remaining TC slots later */ 887 return true; 888 default: 889 assert(0); 890 return false; 891 } 892 893 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 894 return true; 895} 896 897static bool 898nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc, 899 const struct tgsi_full_declaration *fdec) 900{ 901 unsigned num_texcoords = fpc->is_nv4x ? 10 : 8; 902 unsigned idx = fdec->Range.First; 903 unsigned hw; 904 905 switch (fdec->Semantic.Name) { 906 case TGSI_SEMANTIC_GENERIC: 907 case TGSI_SEMANTIC_PCOORD: 908 for (hw = 0; hw < num_texcoords; hw++) { 909 if (fpc->fp->texcoord[hw] == 0xffff) { 910 if (hw <= 7) { 911 fpc->fp->texcoords |= (0x1 << hw); 912 fpc->fp->vp_or |= (0x00004000 << hw); 913 } else { 914 fpc->fp->vp_or |= (0x00001000 << (hw - 8)); 915 } 916 if (fdec->Semantic.Name == TGSI_SEMANTIC_PCOORD) { 917 fpc->fp->texcoord[hw] = 0xfffe; 918 fpc->fp->point_sprite_control |= (0x00000100 << hw); 919 } else { 920 fpc->fp->texcoord[hw] = fdec->Semantic.Index + 8; 921 } 922 hw = NVFX_FP_OP_INPUT_SRC_TC(hw); 923 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 924 return true; 925 } 926 } 927 return false; 928 default: 929 return true; 930 } 931} 932 933static bool 934nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc, 935 const struct tgsi_full_declaration *fdec) 936{ 937 unsigned idx = fdec->Range.First; 938 unsigned hw; 939 940 switch (fdec->Semantic.Name) { 941 case TGSI_SEMANTIC_POSITION: 942 hw = 1; 943 break; 944 case TGSI_SEMANTIC_COLOR: 945 hw = ~0; 946 switch (fdec->Semantic.Index) { 947 case 0: hw = 0; break; 948 case 1: hw = 2; break; 949 case 2: hw = 3; break; 950 case 3: hw = 4; break; 951 } 952 if(hw > ((fpc->is_nv4x) ? 4 : 2)) { 953 NOUVEAU_ERR("bad rcol index\n"); 954 return false; 955 } 956 break; 957 default: 958 NOUVEAU_ERR("bad output semantic\n"); 959 return false; 960 } 961 962 fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 963 fpc->r_temps |= (1ULL << hw); 964 return true; 965} 966 967static bool 968nvfx_fragprog_prepare(struct nvfx_fpc *fpc) 969{ 970 struct tgsi_parse_context p; 971 int high_temp = -1, i; 972 973 fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg)); 974 975 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 976 while (!tgsi_parse_end_of_tokens(&p)) { 977 const union tgsi_full_token *tok = &p.FullToken; 978 979 tgsi_parse_token(&p); 980 switch(tok->Token.Type) { 981 case TGSI_TOKEN_TYPE_DECLARATION: 982 { 983 const struct tgsi_full_declaration *fdec; 984 fdec = &p.FullToken.FullDeclaration; 985 switch (fdec->Declaration.File) { 986 case TGSI_FILE_INPUT: 987 if (!nvfx_fragprog_parse_decl_input(fpc, fdec)) 988 goto out_err; 989 break; 990 case TGSI_FILE_OUTPUT: 991 if (!nvfx_fragprog_parse_decl_output(fpc, fdec)) 992 goto out_err; 993 break; 994 case TGSI_FILE_TEMPORARY: 995 if (fdec->Range.Last > high_temp) { 996 high_temp = 997 fdec->Range.Last; 998 } 999 break; 1000 default: 1001 break; 1002 } 1003 } 1004 break; 1005 case TGSI_TOKEN_TYPE_IMMEDIATE: 1006 { 1007 struct tgsi_full_immediate *imm; 1008 1009 imm = &p.FullToken.FullImmediate; 1010 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1011 assert(fpc->nr_imm < fpc->fp->info.immediate_count); 1012 1013 fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float); 1014 break; 1015 } 1016 default: 1017 break; 1018 } 1019 } 1020 tgsi_parse_free(&p); 1021 1022 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 1023 while (!tgsi_parse_end_of_tokens(&p)) { 1024 const struct tgsi_full_declaration *fdec; 1025 tgsi_parse_token(&p); 1026 switch(p.FullToken.Token.Type) { 1027 case TGSI_TOKEN_TYPE_DECLARATION: 1028 fdec = &p.FullToken.FullDeclaration; 1029 switch (fdec->Declaration.File) { 1030 case TGSI_FILE_INPUT: 1031 if (!nvfx_fragprog_assign_generic(fpc, fdec)) 1032 goto out_err; 1033 break; 1034 default: 1035 break; 1036 } 1037 break; 1038 default: 1039 break; 1040 } 1041 } 1042 tgsi_parse_free(&p); 1043 1044 if (++high_temp) { 1045 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 1046 for (i = 0; i < high_temp; i++) 1047 fpc->r_temp[i] = temp(fpc); 1048 fpc->r_temps_discard = 0ULL; 1049 } 1050 1051 return true; 1052 1053out_err: 1054 FREE(fpc->r_temp); 1055 fpc->r_temp = NULL; 1056 1057 tgsi_parse_free(&p); 1058 return false; 1059} 1060 1061DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false) 1062 1063void 1064_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp) 1065{ 1066 struct tgsi_parse_context parse; 1067 struct nvfx_fpc *fpc = NULL; 1068 struct util_dynarray insns; 1069 1070 fp->translated = false; 1071 fp->point_sprite_control = 0; 1072 fp->vp_or = 0; 1073 1074 fpc = CALLOC_STRUCT(nvfx_fpc); 1075 if (!fpc) 1076 goto out_err; 1077 1078 fpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0; 1079 fpc->max_temps = fpc->is_nv4x ? 48 : 32; 1080 fpc->fp = fp; 1081 fpc->num_regs = 2; 1082 memset(fp->texcoord, 0xff, sizeof(fp->texcoord)); 1083 1084 if (fp->info.properties[TGSI_PROPERTY_FS_COORD_ORIGIN]) 1085 fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED; 1086 if (fp->info.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER]) 1087 fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER; 1088 if (fp->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) 1089 fp->rt_enable |= NV30_3D_RT_ENABLE_MRT; 1090 1091 if (!nvfx_fragprog_prepare(fpc)) 1092 goto out_err; 1093 1094 tgsi_parse_init(&parse, fp->pipe.tokens); 1095 util_dynarray_init(&insns, NULL); 1096 1097 while (!tgsi_parse_end_of_tokens(&parse)) { 1098 tgsi_parse_token(&parse); 1099 1100 switch (parse.FullToken.Token.Type) { 1101 case TGSI_TOKEN_TYPE_INSTRUCTION: 1102 { 1103 const struct tgsi_full_instruction *finst; 1104 1105 util_dynarray_append(&insns, unsigned, fp->insn_len); 1106 finst = &parse.FullToken.FullInstruction; 1107 if (!nvfx_fragprog_parse_instruction(fpc, finst)) 1108 goto out_err; 1109 } 1110 break; 1111 default: 1112 break; 1113 } 1114 } 1115 util_dynarray_append(&insns, unsigned, fp->insn_len); 1116 1117 for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1118 { 1119 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i); 1120 fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target]; 1121 } 1122 util_dynarray_fini(&insns); 1123 1124 if(!fpc->is_nv4x) 1125 fp->fp_control |= (fpc->num_regs-1)/2; 1126 else 1127 fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT; 1128 1129 /* Terminate final instruction */ 1130 if(fp->insn) 1131 fp->insn[fpc->inst_offset] |= 0x00000001; 1132 1133 /* Append NOP + END instruction for branches to the end of the program */ 1134 fpc->inst_offset = fp->insn_len; 1135 grow_insns(fpc, 4); 1136 fp->insn[fpc->inst_offset + 0] = 0x00000001; 1137 fp->insn[fpc->inst_offset + 1] = 0x00000000; 1138 fp->insn[fpc->inst_offset + 2] = 0x00000000; 1139 fp->insn[fpc->inst_offset + 3] = 0x00000000; 1140 1141 if(debug_get_option_nvfx_dump_fp()) 1142 { 1143 debug_printf("\n"); 1144 tgsi_dump(fp->pipe.tokens, 0); 1145 1146 debug_printf("\n%s fragment program:\n", fpc->is_nv4x ? "nv4x" : "nv3x"); 1147 for (unsigned i = 0; i < fp->insn_len; i += 4) 1148 debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]); 1149 debug_printf("\n"); 1150 } 1151 1152 fp->translated = true; 1153 1154out: 1155 tgsi_parse_free(&parse); 1156 if (fpc) 1157 { 1158 FREE(fpc->r_temp); 1159 FREE(fpc->r_imm); 1160 util_dynarray_fini(&fpc->if_stack); 1161 util_dynarray_fini(&fpc->label_relocs); 1162 util_dynarray_fini(&fpc->imm_data); 1163 //util_dynarray_fini(&fpc->loop_stack); 1164 FREE(fpc); 1165 } 1166 1167 return; 1168 1169out_err: 1170 _debug_printf("Error: failed to compile this fragment program:\n"); 1171 tgsi_dump(fp->pipe.tokens, 0); 1172 goto out; 1173} 1174