1bf215546Sopenharmony_ci#include <float.h> 2bf215546Sopenharmony_ci#include "pipe/p_context.h" 3bf215546Sopenharmony_ci#include "pipe/p_defines.h" 4bf215546Sopenharmony_ci#include "pipe/p_state.h" 5bf215546Sopenharmony_ci#include "util/u_dynarray.h" 6bf215546Sopenharmony_ci#include "util/u_inlines.h" 7bf215546Sopenharmony_ci#include "util/u_debug.h" 8bf215546Sopenharmony_ci#include "util/u_memory.h" 9bf215546Sopenharmony_ci 10bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h" 11bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h" 12bf215546Sopenharmony_ci#include "tgsi/tgsi_util.h" 13bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h" 14bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h" 15bf215546Sopenharmony_ci 16bf215546Sopenharmony_ci#include "nouveau_debug.h" 17bf215546Sopenharmony_ci#include "nv_object.xml.h" 18bf215546Sopenharmony_ci#include "nv30/nv30-40_3d.xml.h" 19bf215546Sopenharmony_ci#include "nv30/nvfx_shader.h" 20bf215546Sopenharmony_ci#include "nv30/nv30_state.h" 21bf215546Sopenharmony_ci 22bf215546Sopenharmony_cistruct nvfx_fpc { 23bf215546Sopenharmony_ci struct nv30_fragprog *fp; 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci unsigned max_temps; 26bf215546Sopenharmony_ci unsigned long long r_temps; 27bf215546Sopenharmony_ci unsigned long long r_temps_discard; 28bf215546Sopenharmony_ci struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 29bf215546Sopenharmony_ci struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS]; 30bf215546Sopenharmony_ci struct nvfx_reg *r_temp; 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci int num_regs; 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci unsigned inst_offset; 35bf215546Sopenharmony_ci unsigned have_const; 36bf215546Sopenharmony_ci unsigned is_nv4x; 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci struct util_dynarray imm_data; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci struct nvfx_reg* r_imm; 41bf215546Sopenharmony_ci unsigned nr_imm; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci struct util_dynarray if_stack; 44bf215546Sopenharmony_ci //struct util_dynarray loop_stack; 45bf215546Sopenharmony_ci struct util_dynarray label_relocs; 46bf215546Sopenharmony_ci}; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic inline struct nvfx_reg 49bf215546Sopenharmony_citemp(struct nvfx_fpc *fpc) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci int idx = __builtin_ctzll(~fpc->r_temps); 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci if (idx >= fpc->max_temps) { 54bf215546Sopenharmony_ci NOUVEAU_ERR("out of temps!!\n"); 55bf215546Sopenharmony_ci return nvfx_reg(NVFXSR_TEMP, 0); 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci fpc->r_temps |= (1ULL << idx); 59bf215546Sopenharmony_ci fpc->r_temps_discard |= (1ULL << idx); 60bf215546Sopenharmony_ci return nvfx_reg(NVFXSR_TEMP, idx); 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_cistatic inline void 64bf215546Sopenharmony_cirelease_temps(struct nvfx_fpc *fpc) 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci fpc->r_temps &= ~fpc->r_temps_discard; 67bf215546Sopenharmony_ci fpc->r_temps_discard = 0ULL; 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_cistatic inline struct nvfx_reg 71bf215546Sopenharmony_cinvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci float v[4] = {a, b, c, d}; 74bf215546Sopenharmony_ci int idx = fpc->imm_data.size >> 4; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci memcpy(util_dynarray_grow(&fpc->imm_data, float, 4), v, 4 * sizeof(float)); 77bf215546Sopenharmony_ci return nvfx_reg(NVFXSR_IMM, idx); 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistatic void 81bf215546Sopenharmony_cigrow_insns(struct nvfx_fpc *fpc, int size) 82bf215546Sopenharmony_ci{ 83bf215546Sopenharmony_ci struct nv30_fragprog *fp = fpc->fp; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci fp->insn_len += size; 86bf215546Sopenharmony_ci fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); 87bf215546Sopenharmony_ci} 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_cistatic void 90bf215546Sopenharmony_ciemit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src) 91bf215546Sopenharmony_ci{ 92bf215546Sopenharmony_ci struct nv30_fragprog *fp = fpc->fp; 93bf215546Sopenharmony_ci uint32_t *hw = &fp->insn[fpc->inst_offset]; 94bf215546Sopenharmony_ci uint32_t sr = 0; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci switch (src.reg.type) { 97bf215546Sopenharmony_ci case NVFXSR_INPUT: 98bf215546Sopenharmony_ci sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 99bf215546Sopenharmony_ci hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT); 100bf215546Sopenharmony_ci break; 101bf215546Sopenharmony_ci case NVFXSR_OUTPUT: 102bf215546Sopenharmony_ci sr |= NVFX_FP_REG_SRC_HALF; 103bf215546Sopenharmony_ci FALLTHROUGH; 104bf215546Sopenharmony_ci case NVFXSR_TEMP: 105bf215546Sopenharmony_ci sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); 106bf215546Sopenharmony_ci sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT); 107bf215546Sopenharmony_ci break; 108bf215546Sopenharmony_ci case NVFXSR_IMM: 109bf215546Sopenharmony_ci if (!fpc->have_const) { 110bf215546Sopenharmony_ci grow_insns(fpc, 4); 111bf215546Sopenharmony_ci hw = &fp->insn[fpc->inst_offset]; 112bf215546Sopenharmony_ci fpc->have_const = 1; 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci memcpy(&fp->insn[fpc->inst_offset + 4], 116bf215546Sopenharmony_ci (float*)fpc->imm_data.data + src.reg.index * 4, 117bf215546Sopenharmony_ci sizeof(uint32_t) * 4); 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 120bf215546Sopenharmony_ci break; 121bf215546Sopenharmony_ci case NVFXSR_CONST: 122bf215546Sopenharmony_ci if (!fpc->have_const) { 123bf215546Sopenharmony_ci grow_insns(fpc, 4); 124bf215546Sopenharmony_ci hw = &fp->insn[fpc->inst_offset]; 125bf215546Sopenharmony_ci fpc->have_const = 1; 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci { 129bf215546Sopenharmony_ci struct nv30_fragprog_data *fpd; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci fp->consts = realloc(fp->consts, ++fp->nr_consts * 132bf215546Sopenharmony_ci sizeof(*fpd)); 133bf215546Sopenharmony_ci fpd = &fp->consts[fp->nr_consts - 1]; 134bf215546Sopenharmony_ci fpd->offset = fpc->inst_offset + 4; 135bf215546Sopenharmony_ci fpd->index = src.reg.index; 136bf215546Sopenharmony_ci memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 140bf215546Sopenharmony_ci break; 141bf215546Sopenharmony_ci case NVFXSR_NONE: 142bf215546Sopenharmony_ci sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 143bf215546Sopenharmony_ci break; 144bf215546Sopenharmony_ci default: 145bf215546Sopenharmony_ci assert(0); 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (src.negate) 149bf215546Sopenharmony_ci sr |= NVFX_FP_REG_NEGATE; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci if (src.abs) 152bf215546Sopenharmony_ci hw[1] |= (1 << (29 + pos)); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | 155bf215546Sopenharmony_ci (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | 156bf215546Sopenharmony_ci (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | 157bf215546Sopenharmony_ci (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci hw[pos + 1] |= sr; 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic void 163bf215546Sopenharmony_ciemit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci struct nv30_fragprog *fp = fpc->fp; 166bf215546Sopenharmony_ci uint32_t *hw = &fp->insn[fpc->inst_offset]; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci switch (dst.type) { 169bf215546Sopenharmony_ci case NVFXSR_OUTPUT: 170bf215546Sopenharmony_ci if (dst.index == 1) 171bf215546Sopenharmony_ci fp->fp_control |= 0x0000000e; 172bf215546Sopenharmony_ci else { 173bf215546Sopenharmony_ci hw[0] |= NVFX_FP_OP_OUT_REG_HALF; 174bf215546Sopenharmony_ci dst.index <<= 1; 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci FALLTHROUGH; 177bf215546Sopenharmony_ci case NVFXSR_TEMP: 178bf215546Sopenharmony_ci if (fpc->num_regs < (dst.index + 1)) 179bf215546Sopenharmony_ci fpc->num_regs = dst.index + 1; 180bf215546Sopenharmony_ci break; 181bf215546Sopenharmony_ci case NVFXSR_NONE: 182bf215546Sopenharmony_ci hw[0] |= (1 << 30); 183bf215546Sopenharmony_ci break; 184bf215546Sopenharmony_ci default: 185bf215546Sopenharmony_ci assert(0); 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); 189bf215546Sopenharmony_ci} 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_cistatic void 192bf215546Sopenharmony_cinvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn) 193bf215546Sopenharmony_ci{ 194bf215546Sopenharmony_ci struct nv30_fragprog *fp = fpc->fp; 195bf215546Sopenharmony_ci uint32_t *hw; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci fpc->inst_offset = fp->insn_len; 198bf215546Sopenharmony_ci fpc->have_const = 0; 199bf215546Sopenharmony_ci grow_insns(fpc, 4); 200bf215546Sopenharmony_ci hw = &fp->insn[fpc->inst_offset]; 201bf215546Sopenharmony_ci memset(hw, 0, sizeof(uint32_t) * 4); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci if (insn.op == NVFX_FP_OP_OPCODE_KIL) 204bf215546Sopenharmony_ci fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL; 205bf215546Sopenharmony_ci hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT); 206bf215546Sopenharmony_ci hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT); 207bf215546Sopenharmony_ci hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci if (insn.sat) 210bf215546Sopenharmony_ci hw[0] |= NVFX_FP_OP_OUT_SAT; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci if (insn.cc_update) 213bf215546Sopenharmony_ci hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; 214bf215546Sopenharmony_ci hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT); 215bf215546Sopenharmony_ci hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 216bf215546Sopenharmony_ci (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 217bf215546Sopenharmony_ci (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 218bf215546Sopenharmony_ci (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci if(insn.unit >= 0) 221bf215546Sopenharmony_ci { 222bf215546Sopenharmony_ci hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT); 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci emit_dst(fpc, insn.dst); 226bf215546Sopenharmony_ci emit_src(fpc, 0, insn.src[0]); 227bf215546Sopenharmony_ci emit_src(fpc, 1, insn.src[1]); 228bf215546Sopenharmony_ci emit_src(fpc, 2, insn.src[2]); 229bf215546Sopenharmony_ci} 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci#define arith(s,o,d,m,s0,s1,s2) \ 232bf215546Sopenharmony_ci nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ 233bf215546Sopenharmony_ci (d), (m), (s0), (s1), (s2)) 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci#define tex(s,o,u,d,m,s0,s1,s2) \ 236bf215546Sopenharmony_ci nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \ 237bf215546Sopenharmony_ci (d), (m), (s0), none, none) 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci/* IF src.x != 0, as TGSI specifies */ 240bf215546Sopenharmony_cistatic void 241bf215546Sopenharmony_cinv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 244bf215546Sopenharmony_ci struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none); 245bf215546Sopenharmony_ci uint32_t *hw; 246bf215546Sopenharmony_ci insn.cc_update = 1; 247bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci fpc->inst_offset = fpc->fp->insn_len; 250bf215546Sopenharmony_ci grow_insns(fpc, 4); 251bf215546Sopenharmony_ci hw = &fpc->fp->insn[fpc->inst_offset]; 252bf215546Sopenharmony_ci /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 253bf215546Sopenharmony_ci hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 254bf215546Sopenharmony_ci NV40_FP_OP_OUT_NONE | 255bf215546Sopenharmony_ci (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 256bf215546Sopenharmony_ci /* Use .xxxx swizzle so that we check only src[0].x*/ 257bf215546Sopenharmony_ci hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 258bf215546Sopenharmony_ci (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 259bf215546Sopenharmony_ci (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 260bf215546Sopenharmony_ci (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) | 261bf215546Sopenharmony_ci (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT); 262bf215546Sopenharmony_ci hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */ 263bf215546Sopenharmony_ci hw[3] = 0; /* | endif_offset */ 264bf215546Sopenharmony_ci util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset); 265bf215546Sopenharmony_ci} 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci/* IF src.x != 0, as TGSI specifies */ 268bf215546Sopenharmony_cistatic void 269bf215546Sopenharmony_cinv40_fp_cal(struct nvfx_fpc *fpc, unsigned target) 270bf215546Sopenharmony_ci{ 271bf215546Sopenharmony_ci struct nvfx_relocation reloc; 272bf215546Sopenharmony_ci uint32_t *hw; 273bf215546Sopenharmony_ci fpc->inst_offset = fpc->fp->insn_len; 274bf215546Sopenharmony_ci grow_insns(fpc, 4); 275bf215546Sopenharmony_ci hw = &fpc->fp->insn[fpc->inst_offset]; 276bf215546Sopenharmony_ci /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 277bf215546Sopenharmony_ci hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT); 278bf215546Sopenharmony_ci /* Use .xxxx swizzle so that we check only src[0].x*/ 279bf215546Sopenharmony_ci hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 280bf215546Sopenharmony_ci (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 281bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 282bf215546Sopenharmony_ci hw[3] = 0; 283bf215546Sopenharmony_ci reloc.target = target; 284bf215546Sopenharmony_ci reloc.location = fpc->inst_offset + 2; 285bf215546Sopenharmony_ci util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 286bf215546Sopenharmony_ci} 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_cistatic void 289bf215546Sopenharmony_cinv40_fp_ret(struct nvfx_fpc *fpc) 290bf215546Sopenharmony_ci{ 291bf215546Sopenharmony_ci uint32_t *hw; 292bf215546Sopenharmony_ci fpc->inst_offset = fpc->fp->insn_len; 293bf215546Sopenharmony_ci grow_insns(fpc, 4); 294bf215546Sopenharmony_ci hw = &fpc->fp->insn[fpc->inst_offset]; 295bf215546Sopenharmony_ci /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 296bf215546Sopenharmony_ci hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT); 297bf215546Sopenharmony_ci /* Use .xxxx swizzle so that we check only src[0].x*/ 298bf215546Sopenharmony_ci hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 299bf215546Sopenharmony_ci (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 300bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 301bf215546Sopenharmony_ci hw[3] = 0; 302bf215546Sopenharmony_ci} 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_cistatic void 305bf215546Sopenharmony_cinv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) 306bf215546Sopenharmony_ci{ 307bf215546Sopenharmony_ci struct nvfx_relocation reloc; 308bf215546Sopenharmony_ci uint32_t *hw; 309bf215546Sopenharmony_ci fpc->inst_offset = fpc->fp->insn_len; 310bf215546Sopenharmony_ci grow_insns(fpc, 4); 311bf215546Sopenharmony_ci hw = &fpc->fp->insn[fpc->inst_offset]; 312bf215546Sopenharmony_ci /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 313bf215546Sopenharmony_ci hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) | 314bf215546Sopenharmony_ci NV40_FP_OP_OUT_NONE | 315bf215546Sopenharmony_ci (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 316bf215546Sopenharmony_ci /* Use .xxxx swizzle so that we check only src[0].x*/ 317bf215546Sopenharmony_ci hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 318bf215546Sopenharmony_ci (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 319bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | 320bf215546Sopenharmony_ci (count << NV40_FP_OP_REP_COUNT1_SHIFT) | 321bf215546Sopenharmony_ci (count << NV40_FP_OP_REP_COUNT2_SHIFT) | 322bf215546Sopenharmony_ci (count << NV40_FP_OP_REP_COUNT3_SHIFT); 323bf215546Sopenharmony_ci hw[3] = 0; /* | end_offset */ 324bf215546Sopenharmony_ci reloc.target = target; 325bf215546Sopenharmony_ci reloc.location = fpc->inst_offset + 3; 326bf215546Sopenharmony_ci util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 327bf215546Sopenharmony_ci //util_dynarray_append(&fpc->loop_stack, unsigned, target); 328bf215546Sopenharmony_ci} 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci#if 0 331bf215546Sopenharmony_ci/* documentation only */ 332bf215546Sopenharmony_ci/* warning: this only works forward, and probably only if not inside any IF */ 333bf215546Sopenharmony_cistatic void 334bf215546Sopenharmony_cinv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci struct nvfx_relocation reloc; 337bf215546Sopenharmony_ci uint32_t *hw; 338bf215546Sopenharmony_ci fpc->inst_offset = fpc->fp->insn_len; 339bf215546Sopenharmony_ci grow_insns(fpc, 4); 340bf215546Sopenharmony_ci hw = &fpc->fp->insn[fpc->inst_offset]; 341bf215546Sopenharmony_ci /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 342bf215546Sopenharmony_ci hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 343bf215546Sopenharmony_ci NV40_FP_OP_OUT_NONE | 344bf215546Sopenharmony_ci (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 345bf215546Sopenharmony_ci /* Use .xxxx swizzle so that we check only src[0].x*/ 346bf215546Sopenharmony_ci hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 347bf215546Sopenharmony_ci (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT); 348bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */ 349bf215546Sopenharmony_ci hw[3] = 0; /* | endif_offset */ 350bf215546Sopenharmony_ci reloc.target = target; 351bf215546Sopenharmony_ci reloc.location = fpc->inst_offset + 2; 352bf215546Sopenharmony_ci util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 353bf215546Sopenharmony_ci reloc.target = target; 354bf215546Sopenharmony_ci reloc.location = fpc->inst_offset + 3; 355bf215546Sopenharmony_ci util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 356bf215546Sopenharmony_ci} 357bf215546Sopenharmony_ci#endif 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_cistatic void 360bf215546Sopenharmony_cinv40_fp_brk(struct nvfx_fpc *fpc) 361bf215546Sopenharmony_ci{ 362bf215546Sopenharmony_ci uint32_t *hw; 363bf215546Sopenharmony_ci fpc->inst_offset = fpc->fp->insn_len; 364bf215546Sopenharmony_ci grow_insns(fpc, 4); 365bf215546Sopenharmony_ci hw = &fpc->fp->insn[fpc->inst_offset]; 366bf215546Sopenharmony_ci /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 367bf215546Sopenharmony_ci hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) | 368bf215546Sopenharmony_ci NV40_FP_OP_OUT_NONE; 369bf215546Sopenharmony_ci /* Use .xxxx swizzle so that we check only src[0].x*/ 370bf215546Sopenharmony_ci hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 371bf215546Sopenharmony_ci (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 372bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; 373bf215546Sopenharmony_ci hw[3] = 0; 374bf215546Sopenharmony_ci} 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_cistatic inline struct nvfx_src 377bf215546Sopenharmony_citgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) 378bf215546Sopenharmony_ci{ 379bf215546Sopenharmony_ci struct nvfx_src src; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci switch (fsrc->Register.File) { 382bf215546Sopenharmony_ci case TGSI_FILE_INPUT: 383bf215546Sopenharmony_ci src.reg = fpc->r_input[fsrc->Register.Index]; 384bf215546Sopenharmony_ci break; 385bf215546Sopenharmony_ci case TGSI_FILE_CONSTANT: 386bf215546Sopenharmony_ci src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index); 387bf215546Sopenharmony_ci break; 388bf215546Sopenharmony_ci case TGSI_FILE_IMMEDIATE: 389bf215546Sopenharmony_ci assert(fsrc->Register.Index < fpc->nr_imm); 390bf215546Sopenharmony_ci src.reg = fpc->r_imm[fsrc->Register.Index]; 391bf215546Sopenharmony_ci break; 392bf215546Sopenharmony_ci case TGSI_FILE_TEMPORARY: 393bf215546Sopenharmony_ci src.reg = fpc->r_temp[fsrc->Register.Index]; 394bf215546Sopenharmony_ci break; 395bf215546Sopenharmony_ci /* NV40 fragprog result regs are just temps, so this is simple */ 396bf215546Sopenharmony_ci case TGSI_FILE_OUTPUT: 397bf215546Sopenharmony_ci src.reg = fpc->r_result[fsrc->Register.Index]; 398bf215546Sopenharmony_ci break; 399bf215546Sopenharmony_ci default: 400bf215546Sopenharmony_ci NOUVEAU_ERR("bad src file\n"); 401bf215546Sopenharmony_ci src.reg.index = 0; 402bf215546Sopenharmony_ci src.reg.type = 0; 403bf215546Sopenharmony_ci break; 404bf215546Sopenharmony_ci } 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci src.abs = fsrc->Register.Absolute; 407bf215546Sopenharmony_ci src.negate = fsrc->Register.Negate; 408bf215546Sopenharmony_ci src.swz[0] = fsrc->Register.SwizzleX; 409bf215546Sopenharmony_ci src.swz[1] = fsrc->Register.SwizzleY; 410bf215546Sopenharmony_ci src.swz[2] = fsrc->Register.SwizzleZ; 411bf215546Sopenharmony_ci src.swz[3] = fsrc->Register.SwizzleW; 412bf215546Sopenharmony_ci src.indirect = 0; 413bf215546Sopenharmony_ci src.indirect_reg = 0; 414bf215546Sopenharmony_ci src.indirect_swz = 0; 415bf215546Sopenharmony_ci return src; 416bf215546Sopenharmony_ci} 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_cistatic inline struct nvfx_reg 419bf215546Sopenharmony_citgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { 420bf215546Sopenharmony_ci switch (fdst->Register.File) { 421bf215546Sopenharmony_ci case TGSI_FILE_OUTPUT: 422bf215546Sopenharmony_ci return fpc->r_result[fdst->Register.Index]; 423bf215546Sopenharmony_ci case TGSI_FILE_TEMPORARY: 424bf215546Sopenharmony_ci return fpc->r_temp[fdst->Register.Index]; 425bf215546Sopenharmony_ci case TGSI_FILE_NULL: 426bf215546Sopenharmony_ci return nvfx_reg(NVFXSR_NONE, 0); 427bf215546Sopenharmony_ci default: 428bf215546Sopenharmony_ci NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); 429bf215546Sopenharmony_ci return nvfx_reg(NVFXSR_NONE, 0); 430bf215546Sopenharmony_ci } 431bf215546Sopenharmony_ci} 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_cistatic inline int 434bf215546Sopenharmony_citgsi_mask(uint tgsi) 435bf215546Sopenharmony_ci{ 436bf215546Sopenharmony_ci int mask = 0; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; 439bf215546Sopenharmony_ci if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; 440bf215546Sopenharmony_ci if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; 441bf215546Sopenharmony_ci if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; 442bf215546Sopenharmony_ci return mask; 443bf215546Sopenharmony_ci} 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistatic bool 446bf215546Sopenharmony_cinvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc, 447bf215546Sopenharmony_ci const struct tgsi_full_instruction *finst) 448bf215546Sopenharmony_ci{ 449bf215546Sopenharmony_ci const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 450bf215546Sopenharmony_ci struct nvfx_insn insn; 451bf215546Sopenharmony_ci struct nvfx_src src[3], tmp; 452bf215546Sopenharmony_ci struct nvfx_reg dst; 453bf215546Sopenharmony_ci int mask, sat, unit = 0; 454bf215546Sopenharmony_ci int ai = -1, ci = -1, ii = -1; 455bf215546Sopenharmony_ci int i; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci if (finst->Instruction.Opcode == TGSI_OPCODE_END) 458bf215546Sopenharmony_ci return true; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 461bf215546Sopenharmony_ci const struct tgsi_full_src_register *fsrc; 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci fsrc = &finst->Src[i]; 464bf215546Sopenharmony_ci if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 465bf215546Sopenharmony_ci src[i] = tgsi_src(fpc, fsrc); 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 470bf215546Sopenharmony_ci const struct tgsi_full_src_register *fsrc; 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci fsrc = &finst->Src[i]; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci switch (fsrc->Register.File) { 475bf215546Sopenharmony_ci case TGSI_FILE_INPUT: 476bf215546Sopenharmony_ci if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0 477bf215546Sopenharmony_ci || fsrc->Register.SwizzleX == PIPE_SWIZZLE_W 478bf215546Sopenharmony_ci || fsrc->Register.SwizzleY == PIPE_SWIZZLE_W 479bf215546Sopenharmony_ci || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_W 480bf215546Sopenharmony_ci || fsrc->Register.SwizzleW == PIPE_SWIZZLE_W 481bf215546Sopenharmony_ci )) { 482bf215546Sopenharmony_ci /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */ 483bf215546Sopenharmony_ci struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1)); 484bf215546Sopenharmony_ci addend.swz[0] = fsrc->Register.SwizzleX; 485bf215546Sopenharmony_ci addend.swz[1] = fsrc->Register.SwizzleY; 486bf215546Sopenharmony_ci addend.swz[2] = fsrc->Register.SwizzleZ; 487bf215546Sopenharmony_ci addend.swz[3] = fsrc->Register.SwizzleW; 488bf215546Sopenharmony_ci src[i] = nvfx_src(temp(fpc)); 489bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none)); 490bf215546Sopenharmony_ci } else if (ai == -1 || ai == fsrc->Register.Index) { 491bf215546Sopenharmony_ci ai = fsrc->Register.Index; 492bf215546Sopenharmony_ci src[i] = tgsi_src(fpc, fsrc); 493bf215546Sopenharmony_ci } else { 494bf215546Sopenharmony_ci src[i] = nvfx_src(temp(fpc)); 495bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci break; 498bf215546Sopenharmony_ci case TGSI_FILE_CONSTANT: 499bf215546Sopenharmony_ci if ((ci == -1 && ii == -1) || 500bf215546Sopenharmony_ci ci == fsrc->Register.Index) { 501bf215546Sopenharmony_ci ci = fsrc->Register.Index; 502bf215546Sopenharmony_ci src[i] = tgsi_src(fpc, fsrc); 503bf215546Sopenharmony_ci } else { 504bf215546Sopenharmony_ci src[i] = nvfx_src(temp(fpc)); 505bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci break; 508bf215546Sopenharmony_ci case TGSI_FILE_IMMEDIATE: 509bf215546Sopenharmony_ci if ((ci == -1 && ii == -1) || 510bf215546Sopenharmony_ci ii == fsrc->Register.Index) { 511bf215546Sopenharmony_ci ii = fsrc->Register.Index; 512bf215546Sopenharmony_ci src[i] = tgsi_src(fpc, fsrc); 513bf215546Sopenharmony_ci } else { 514bf215546Sopenharmony_ci src[i] = nvfx_src(temp(fpc)); 515bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci break; 518bf215546Sopenharmony_ci case TGSI_FILE_TEMPORARY: 519bf215546Sopenharmony_ci /* handled above */ 520bf215546Sopenharmony_ci break; 521bf215546Sopenharmony_ci case TGSI_FILE_SAMPLER: 522bf215546Sopenharmony_ci unit = fsrc->Register.Index; 523bf215546Sopenharmony_ci break; 524bf215546Sopenharmony_ci case TGSI_FILE_OUTPUT: 525bf215546Sopenharmony_ci break; 526bf215546Sopenharmony_ci default: 527bf215546Sopenharmony_ci NOUVEAU_ERR("bad src file\n"); 528bf215546Sopenharmony_ci return false; 529bf215546Sopenharmony_ci } 530bf215546Sopenharmony_ci } 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci dst = tgsi_dst(fpc, &finst->Dst[0]); 533bf215546Sopenharmony_ci mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 534bf215546Sopenharmony_ci sat = finst->Instruction.Saturate; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci switch (finst->Instruction.Opcode) { 537bf215546Sopenharmony_ci case TGSI_OPCODE_ADD: 538bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); 539bf215546Sopenharmony_ci break; 540bf215546Sopenharmony_ci case TGSI_OPCODE_CEIL: 541bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 542bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); 543bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); 544bf215546Sopenharmony_ci break; 545bf215546Sopenharmony_ci case TGSI_OPCODE_CMP: 546bf215546Sopenharmony_ci insn = arith(0, MOV, none.reg, mask, src[0], none, none); 547bf215546Sopenharmony_ci insn.cc_update = 1; 548bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci insn = arith(sat, MOV, dst, mask, src[2], none, none); 551bf215546Sopenharmony_ci insn.cc_test = NVFX_COND_GE; 552bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci insn = arith(sat, MOV, dst, mask, src[1], none, none); 555bf215546Sopenharmony_ci insn.cc_test = NVFX_COND_LT; 556bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 557bf215546Sopenharmony_ci break; 558bf215546Sopenharmony_ci case TGSI_OPCODE_COS: 559bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none)); 560bf215546Sopenharmony_ci break; 561bf215546Sopenharmony_ci case TGSI_OPCODE_DDX: 562bf215546Sopenharmony_ci if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 563bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 564bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 565bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 566bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 567bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 568bf215546Sopenharmony_ci } else { 569bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none)); 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci break; 572bf215546Sopenharmony_ci case TGSI_OPCODE_DDY: 573bf215546Sopenharmony_ci if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 574bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 575bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 576bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 577bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 578bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 579bf215546Sopenharmony_ci } else { 580bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none)); 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci break; 583bf215546Sopenharmony_ci case TGSI_OPCODE_DP2: 584bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 585bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none)); 586bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none)); 587bf215546Sopenharmony_ci break; 588bf215546Sopenharmony_ci case TGSI_OPCODE_DP3: 589bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none)); 590bf215546Sopenharmony_ci break; 591bf215546Sopenharmony_ci case TGSI_OPCODE_DP4: 592bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none)); 593bf215546Sopenharmony_ci break; 594bf215546Sopenharmony_ci case TGSI_OPCODE_DST: 595bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none)); 596bf215546Sopenharmony_ci break; 597bf215546Sopenharmony_ci case TGSI_OPCODE_EX2: 598bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none)); 599bf215546Sopenharmony_ci break; 600bf215546Sopenharmony_ci case TGSI_OPCODE_FLR: 601bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none)); 602bf215546Sopenharmony_ci break; 603bf215546Sopenharmony_ci case TGSI_OPCODE_FRC: 604bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none)); 605bf215546Sopenharmony_ci break; 606bf215546Sopenharmony_ci case TGSI_OPCODE_KILL: 607bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none)); 608bf215546Sopenharmony_ci break; 609bf215546Sopenharmony_ci case TGSI_OPCODE_KILL_IF: 610bf215546Sopenharmony_ci insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none); 611bf215546Sopenharmony_ci insn.cc_update = 1; 612bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci insn = arith(0, KIL, none.reg, 0, none, none, none); 615bf215546Sopenharmony_ci insn.cc_test = NVFX_COND_LT; 616bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 617bf215546Sopenharmony_ci break; 618bf215546Sopenharmony_ci case TGSI_OPCODE_LG2: 619bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); 620bf215546Sopenharmony_ci break; 621bf215546Sopenharmony_ci case TGSI_OPCODE_LIT: 622bf215546Sopenharmony_ci if(!fpc->is_nv4x) 623bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none)); 624bf215546Sopenharmony_ci else { 625bf215546Sopenharmony_ci /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by 626bf215546Sopenharmony_ci * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement 627bf215546Sopenharmony_ci * 628bf215546Sopenharmony_ci * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead 629bf215546Sopenharmony_ci */ 630bf215546Sopenharmony_ci struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0)); 631bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 632bf215546Sopenharmony_ci if (ci>= 0 || ii >= 0) { 633bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); 634bf215546Sopenharmony_ci maxs = tmp; 635bf215546Sopenharmony_ci } 636bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); 637bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); 638bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); 639bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci break; 642bf215546Sopenharmony_ci case TGSI_OPCODE_LRP: 643bf215546Sopenharmony_ci if(!fpc->is_nv4x) 644bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); 645bf215546Sopenharmony_ci else { 646bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 647bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 648bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp)); 649bf215546Sopenharmony_ci } 650bf215546Sopenharmony_ci break; 651bf215546Sopenharmony_ci case TGSI_OPCODE_MAD: 652bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2])); 653bf215546Sopenharmony_ci break; 654bf215546Sopenharmony_ci case TGSI_OPCODE_MAX: 655bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none)); 656bf215546Sopenharmony_ci break; 657bf215546Sopenharmony_ci case TGSI_OPCODE_MIN: 658bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none)); 659bf215546Sopenharmony_ci break; 660bf215546Sopenharmony_ci case TGSI_OPCODE_MOV: 661bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none)); 662bf215546Sopenharmony_ci break; 663bf215546Sopenharmony_ci case TGSI_OPCODE_MUL: 664bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none)); 665bf215546Sopenharmony_ci break; 666bf215546Sopenharmony_ci case TGSI_OPCODE_NOP: 667bf215546Sopenharmony_ci break; 668bf215546Sopenharmony_ci case TGSI_OPCODE_POW: 669bf215546Sopenharmony_ci if(!fpc->is_nv4x) 670bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none)); 671bf215546Sopenharmony_ci else { 672bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 673bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 674bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 675bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none)); 676bf215546Sopenharmony_ci } 677bf215546Sopenharmony_ci break; 678bf215546Sopenharmony_ci case TGSI_OPCODE_RCP: 679bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none)); 680bf215546Sopenharmony_ci break; 681bf215546Sopenharmony_ci case TGSI_OPCODE_RSQ: 682bf215546Sopenharmony_ci if(!fpc->is_nv4x) 683bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none)); 684bf215546Sopenharmony_ci else { 685bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 686bf215546Sopenharmony_ci insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); 687bf215546Sopenharmony_ci insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; 688bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 689bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none)); 690bf215546Sopenharmony_ci } 691bf215546Sopenharmony_ci break; 692bf215546Sopenharmony_ci case TGSI_OPCODE_SEQ: 693bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none)); 694bf215546Sopenharmony_ci break; 695bf215546Sopenharmony_ci case TGSI_OPCODE_SGE: 696bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none)); 697bf215546Sopenharmony_ci break; 698bf215546Sopenharmony_ci case TGSI_OPCODE_SGT: 699bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none)); 700bf215546Sopenharmony_ci break; 701bf215546Sopenharmony_ci case TGSI_OPCODE_SIN: 702bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none)); 703bf215546Sopenharmony_ci break; 704bf215546Sopenharmony_ci case TGSI_OPCODE_SLE: 705bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none)); 706bf215546Sopenharmony_ci break; 707bf215546Sopenharmony_ci case TGSI_OPCODE_SLT: 708bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none)); 709bf215546Sopenharmony_ci break; 710bf215546Sopenharmony_ci case TGSI_OPCODE_SNE: 711bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none)); 712bf215546Sopenharmony_ci break; 713bf215546Sopenharmony_ci case TGSI_OPCODE_SSG: 714bf215546Sopenharmony_ci { 715bf215546Sopenharmony_ci struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X); 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci insn = arith(sat, MOV, dst, mask, src[0], none, none); 718bf215546Sopenharmony_ci insn.cc_update = 1; 719bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci insn = arith(0, STR, dst, mask, none, none, none); 722bf215546Sopenharmony_ci insn.cc_test = NVFX_COND_GT; 723bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci if(!sat) { 726bf215546Sopenharmony_ci insn = arith(0, MOV, dst, mask, minones, none, none); 727bf215546Sopenharmony_ci insn.cc_test = NVFX_COND_LT; 728bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci break; 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci case TGSI_OPCODE_TEX: 733bf215546Sopenharmony_ci nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 734bf215546Sopenharmony_ci break; 735bf215546Sopenharmony_ci case TGSI_OPCODE_TRUNC: 736bf215546Sopenharmony_ci tmp = nvfx_src(temp(fpc)); 737bf215546Sopenharmony_ci insn = arith(0, MOV, none.reg, mask, src[0], none, none); 738bf215546Sopenharmony_ci insn.cc_update = 1; 739bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); 742bf215546Sopenharmony_ci nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none)); 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci insn = arith(sat, MOV, dst, mask, neg(tmp), none, none); 745bf215546Sopenharmony_ci insn.cc_test = NVFX_COND_LT; 746bf215546Sopenharmony_ci nvfx_fp_emit(fpc, insn); 747bf215546Sopenharmony_ci break; 748bf215546Sopenharmony_ci case TGSI_OPCODE_TXB: 749bf215546Sopenharmony_ci nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none)); 750bf215546Sopenharmony_ci break; 751bf215546Sopenharmony_ci case TGSI_OPCODE_TXL: 752bf215546Sopenharmony_ci if(fpc->is_nv4x) 753bf215546Sopenharmony_ci nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none)); 754bf215546Sopenharmony_ci else /* unsupported on nv30, use TEX and hope they like it */ 755bf215546Sopenharmony_ci nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 756bf215546Sopenharmony_ci break; 757bf215546Sopenharmony_ci case TGSI_OPCODE_TXP: 758bf215546Sopenharmony_ci nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none)); 759bf215546Sopenharmony_ci break; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci case TGSI_OPCODE_IF: 762bf215546Sopenharmony_ci // MOVRC0 R31 (TR0.xyzw), R<src>: 763bf215546Sopenharmony_ci // IF (NE.xxxx) ELSE <else> END <end> 764bf215546Sopenharmony_ci if(!fpc->is_nv4x) 765bf215546Sopenharmony_ci goto nv3x_cflow; 766bf215546Sopenharmony_ci nv40_fp_if(fpc, src[0]); 767bf215546Sopenharmony_ci break; 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci case TGSI_OPCODE_ELSE: 770bf215546Sopenharmony_ci { 771bf215546Sopenharmony_ci uint32_t *hw; 772bf215546Sopenharmony_ci if(!fpc->is_nv4x) 773bf215546Sopenharmony_ci goto nv3x_cflow; 774bf215546Sopenharmony_ci assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 775bf215546Sopenharmony_ci hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)]; 776bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 777bf215546Sopenharmony_ci break; 778bf215546Sopenharmony_ci } 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci case TGSI_OPCODE_ENDIF: 781bf215546Sopenharmony_ci { 782bf215546Sopenharmony_ci uint32_t *hw; 783bf215546Sopenharmony_ci if(!fpc->is_nv4x) 784bf215546Sopenharmony_ci goto nv3x_cflow; 785bf215546Sopenharmony_ci assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 786bf215546Sopenharmony_ci hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)]; 787bf215546Sopenharmony_ci if(!hw[2]) 788bf215546Sopenharmony_ci hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 789bf215546Sopenharmony_ci hw[3] = fpc->fp->insn_len; 790bf215546Sopenharmony_ci break; 791bf215546Sopenharmony_ci } 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci case TGSI_OPCODE_BGNSUB: 794bf215546Sopenharmony_ci case TGSI_OPCODE_ENDSUB: 795bf215546Sopenharmony_ci /* nothing to do here */ 796bf215546Sopenharmony_ci break; 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci case TGSI_OPCODE_CAL: 799bf215546Sopenharmony_ci if(!fpc->is_nv4x) 800bf215546Sopenharmony_ci goto nv3x_cflow; 801bf215546Sopenharmony_ci nv40_fp_cal(fpc, finst->Label.Label); 802bf215546Sopenharmony_ci break; 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci case TGSI_OPCODE_RET: 805bf215546Sopenharmony_ci if(!fpc->is_nv4x) 806bf215546Sopenharmony_ci goto nv3x_cflow; 807bf215546Sopenharmony_ci nv40_fp_ret(fpc); 808bf215546Sopenharmony_ci break; 809bf215546Sopenharmony_ci 810bf215546Sopenharmony_ci case TGSI_OPCODE_BGNLOOP: 811bf215546Sopenharmony_ci if(!fpc->is_nv4x) 812bf215546Sopenharmony_ci goto nv3x_cflow; 813bf215546Sopenharmony_ci /* TODO: we should support using two nested REPs to allow a > 255 iteration count */ 814bf215546Sopenharmony_ci nv40_fp_rep(fpc, 255, finst->Label.Label); 815bf215546Sopenharmony_ci break; 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_ci case TGSI_OPCODE_ENDLOOP: 818bf215546Sopenharmony_ci break; 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci case TGSI_OPCODE_BRK: 821bf215546Sopenharmony_ci if(!fpc->is_nv4x) 822bf215546Sopenharmony_ci goto nv3x_cflow; 823bf215546Sopenharmony_ci nv40_fp_brk(fpc); 824bf215546Sopenharmony_ci break; 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci case TGSI_OPCODE_CONT: 827bf215546Sopenharmony_ci { 828bf215546Sopenharmony_ci static int warned = 0; 829bf215546Sopenharmony_ci if(!warned) { 830bf215546Sopenharmony_ci NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n"); 831bf215546Sopenharmony_ci warned = 1; 832bf215546Sopenharmony_ci } 833bf215546Sopenharmony_ci break; 834bf215546Sopenharmony_ci } 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_ci default: 837bf215546Sopenharmony_ci NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 838bf215546Sopenharmony_ci return false; 839bf215546Sopenharmony_ci } 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ciout: 842bf215546Sopenharmony_ci release_temps(fpc); 843bf215546Sopenharmony_ci return true; 844bf215546Sopenharmony_cinv3x_cflow: 845bf215546Sopenharmony_ci { 846bf215546Sopenharmony_ci static int warned = 0; 847bf215546Sopenharmony_ci if(!warned) { 848bf215546Sopenharmony_ci NOUVEAU_ERR( 849bf215546Sopenharmony_ci "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n" 850bf215546Sopenharmony_ci "If rendering is incorrect, try to disable GLSL support in the application.\n"); 851bf215546Sopenharmony_ci warned = 1; 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci } 854bf215546Sopenharmony_ci goto out; 855bf215546Sopenharmony_ci} 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_cistatic bool 858bf215546Sopenharmony_cinvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc, 859bf215546Sopenharmony_ci const struct tgsi_full_declaration *fdec) 860bf215546Sopenharmony_ci{ 861bf215546Sopenharmony_ci unsigned idx = fdec->Range.First; 862bf215546Sopenharmony_ci unsigned hw; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci switch (fdec->Semantic.Name) { 865bf215546Sopenharmony_ci case TGSI_SEMANTIC_POSITION: 866bf215546Sopenharmony_ci hw = NVFX_FP_OP_INPUT_SRC_POSITION; 867bf215546Sopenharmony_ci break; 868bf215546Sopenharmony_ci case TGSI_SEMANTIC_COLOR: 869bf215546Sopenharmony_ci hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index; 870bf215546Sopenharmony_ci break; 871bf215546Sopenharmony_ci case TGSI_SEMANTIC_FOG: 872bf215546Sopenharmony_ci hw = NVFX_FP_OP_INPUT_SRC_FOGC; 873bf215546Sopenharmony_ci break; 874bf215546Sopenharmony_ci case TGSI_SEMANTIC_FACE: 875bf215546Sopenharmony_ci hw = NV40_FP_OP_INPUT_SRC_FACING; 876bf215546Sopenharmony_ci break; 877bf215546Sopenharmony_ci case TGSI_SEMANTIC_TEXCOORD: 878bf215546Sopenharmony_ci assert(fdec->Semantic.Index < 8); 879bf215546Sopenharmony_ci fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index; 880bf215546Sopenharmony_ci fpc->fp->texcoords |= (1 << fdec->Semantic.Index); 881bf215546Sopenharmony_ci fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index); 882bf215546Sopenharmony_ci hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index); 883bf215546Sopenharmony_ci break; 884bf215546Sopenharmony_ci case TGSI_SEMANTIC_GENERIC: 885bf215546Sopenharmony_ci case TGSI_SEMANTIC_PCOORD: 886bf215546Sopenharmony_ci /* will be assigned to remaining TC slots later */ 887bf215546Sopenharmony_ci return true; 888bf215546Sopenharmony_ci default: 889bf215546Sopenharmony_ci assert(0); 890bf215546Sopenharmony_ci return false; 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 894bf215546Sopenharmony_ci return true; 895bf215546Sopenharmony_ci} 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_cistatic bool 898bf215546Sopenharmony_cinvfx_fragprog_assign_generic(struct nvfx_fpc *fpc, 899bf215546Sopenharmony_ci const struct tgsi_full_declaration *fdec) 900bf215546Sopenharmony_ci{ 901bf215546Sopenharmony_ci unsigned num_texcoords = fpc->is_nv4x ? 10 : 8; 902bf215546Sopenharmony_ci unsigned idx = fdec->Range.First; 903bf215546Sopenharmony_ci unsigned hw; 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci switch (fdec->Semantic.Name) { 906bf215546Sopenharmony_ci case TGSI_SEMANTIC_GENERIC: 907bf215546Sopenharmony_ci case TGSI_SEMANTIC_PCOORD: 908bf215546Sopenharmony_ci for (hw = 0; hw < num_texcoords; hw++) { 909bf215546Sopenharmony_ci if (fpc->fp->texcoord[hw] == 0xffff) { 910bf215546Sopenharmony_ci if (hw <= 7) { 911bf215546Sopenharmony_ci fpc->fp->texcoords |= (0x1 << hw); 912bf215546Sopenharmony_ci fpc->fp->vp_or |= (0x00004000 << hw); 913bf215546Sopenharmony_ci } else { 914bf215546Sopenharmony_ci fpc->fp->vp_or |= (0x00001000 << (hw - 8)); 915bf215546Sopenharmony_ci } 916bf215546Sopenharmony_ci if (fdec->Semantic.Name == TGSI_SEMANTIC_PCOORD) { 917bf215546Sopenharmony_ci fpc->fp->texcoord[hw] = 0xfffe; 918bf215546Sopenharmony_ci fpc->fp->point_sprite_control |= (0x00000100 << hw); 919bf215546Sopenharmony_ci } else { 920bf215546Sopenharmony_ci fpc->fp->texcoord[hw] = fdec->Semantic.Index + 8; 921bf215546Sopenharmony_ci } 922bf215546Sopenharmony_ci hw = NVFX_FP_OP_INPUT_SRC_TC(hw); 923bf215546Sopenharmony_ci fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 924bf215546Sopenharmony_ci return true; 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci return false; 928bf215546Sopenharmony_ci default: 929bf215546Sopenharmony_ci return true; 930bf215546Sopenharmony_ci } 931bf215546Sopenharmony_ci} 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_cistatic bool 934bf215546Sopenharmony_cinvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc, 935bf215546Sopenharmony_ci const struct tgsi_full_declaration *fdec) 936bf215546Sopenharmony_ci{ 937bf215546Sopenharmony_ci unsigned idx = fdec->Range.First; 938bf215546Sopenharmony_ci unsigned hw; 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci switch (fdec->Semantic.Name) { 941bf215546Sopenharmony_ci case TGSI_SEMANTIC_POSITION: 942bf215546Sopenharmony_ci hw = 1; 943bf215546Sopenharmony_ci break; 944bf215546Sopenharmony_ci case TGSI_SEMANTIC_COLOR: 945bf215546Sopenharmony_ci hw = ~0; 946bf215546Sopenharmony_ci switch (fdec->Semantic.Index) { 947bf215546Sopenharmony_ci case 0: hw = 0; break; 948bf215546Sopenharmony_ci case 1: hw = 2; break; 949bf215546Sopenharmony_ci case 2: hw = 3; break; 950bf215546Sopenharmony_ci case 3: hw = 4; break; 951bf215546Sopenharmony_ci } 952bf215546Sopenharmony_ci if(hw > ((fpc->is_nv4x) ? 4 : 2)) { 953bf215546Sopenharmony_ci NOUVEAU_ERR("bad rcol index\n"); 954bf215546Sopenharmony_ci return false; 955bf215546Sopenharmony_ci } 956bf215546Sopenharmony_ci break; 957bf215546Sopenharmony_ci default: 958bf215546Sopenharmony_ci NOUVEAU_ERR("bad output semantic\n"); 959bf215546Sopenharmony_ci return false; 960bf215546Sopenharmony_ci } 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 963bf215546Sopenharmony_ci fpc->r_temps |= (1ULL << hw); 964bf215546Sopenharmony_ci return true; 965bf215546Sopenharmony_ci} 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_cistatic bool 968bf215546Sopenharmony_cinvfx_fragprog_prepare(struct nvfx_fpc *fpc) 969bf215546Sopenharmony_ci{ 970bf215546Sopenharmony_ci struct tgsi_parse_context p; 971bf215546Sopenharmony_ci int high_temp = -1, i; 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg)); 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci tgsi_parse_init(&p, fpc->fp->pipe.tokens); 976bf215546Sopenharmony_ci while (!tgsi_parse_end_of_tokens(&p)) { 977bf215546Sopenharmony_ci const union tgsi_full_token *tok = &p.FullToken; 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci tgsi_parse_token(&p); 980bf215546Sopenharmony_ci switch(tok->Token.Type) { 981bf215546Sopenharmony_ci case TGSI_TOKEN_TYPE_DECLARATION: 982bf215546Sopenharmony_ci { 983bf215546Sopenharmony_ci const struct tgsi_full_declaration *fdec; 984bf215546Sopenharmony_ci fdec = &p.FullToken.FullDeclaration; 985bf215546Sopenharmony_ci switch (fdec->Declaration.File) { 986bf215546Sopenharmony_ci case TGSI_FILE_INPUT: 987bf215546Sopenharmony_ci if (!nvfx_fragprog_parse_decl_input(fpc, fdec)) 988bf215546Sopenharmony_ci goto out_err; 989bf215546Sopenharmony_ci break; 990bf215546Sopenharmony_ci case TGSI_FILE_OUTPUT: 991bf215546Sopenharmony_ci if (!nvfx_fragprog_parse_decl_output(fpc, fdec)) 992bf215546Sopenharmony_ci goto out_err; 993bf215546Sopenharmony_ci break; 994bf215546Sopenharmony_ci case TGSI_FILE_TEMPORARY: 995bf215546Sopenharmony_ci if (fdec->Range.Last > high_temp) { 996bf215546Sopenharmony_ci high_temp = 997bf215546Sopenharmony_ci fdec->Range.Last; 998bf215546Sopenharmony_ci } 999bf215546Sopenharmony_ci break; 1000bf215546Sopenharmony_ci default: 1001bf215546Sopenharmony_ci break; 1002bf215546Sopenharmony_ci } 1003bf215546Sopenharmony_ci } 1004bf215546Sopenharmony_ci break; 1005bf215546Sopenharmony_ci case TGSI_TOKEN_TYPE_IMMEDIATE: 1006bf215546Sopenharmony_ci { 1007bf215546Sopenharmony_ci struct tgsi_full_immediate *imm; 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci imm = &p.FullToken.FullImmediate; 1010bf215546Sopenharmony_ci assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1011bf215546Sopenharmony_ci assert(fpc->nr_imm < fpc->fp->info.immediate_count); 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float); 1014bf215546Sopenharmony_ci break; 1015bf215546Sopenharmony_ci } 1016bf215546Sopenharmony_ci default: 1017bf215546Sopenharmony_ci break; 1018bf215546Sopenharmony_ci } 1019bf215546Sopenharmony_ci } 1020bf215546Sopenharmony_ci tgsi_parse_free(&p); 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci tgsi_parse_init(&p, fpc->fp->pipe.tokens); 1023bf215546Sopenharmony_ci while (!tgsi_parse_end_of_tokens(&p)) { 1024bf215546Sopenharmony_ci const struct tgsi_full_declaration *fdec; 1025bf215546Sopenharmony_ci tgsi_parse_token(&p); 1026bf215546Sopenharmony_ci switch(p.FullToken.Token.Type) { 1027bf215546Sopenharmony_ci case TGSI_TOKEN_TYPE_DECLARATION: 1028bf215546Sopenharmony_ci fdec = &p.FullToken.FullDeclaration; 1029bf215546Sopenharmony_ci switch (fdec->Declaration.File) { 1030bf215546Sopenharmony_ci case TGSI_FILE_INPUT: 1031bf215546Sopenharmony_ci if (!nvfx_fragprog_assign_generic(fpc, fdec)) 1032bf215546Sopenharmony_ci goto out_err; 1033bf215546Sopenharmony_ci break; 1034bf215546Sopenharmony_ci default: 1035bf215546Sopenharmony_ci break; 1036bf215546Sopenharmony_ci } 1037bf215546Sopenharmony_ci break; 1038bf215546Sopenharmony_ci default: 1039bf215546Sopenharmony_ci break; 1040bf215546Sopenharmony_ci } 1041bf215546Sopenharmony_ci } 1042bf215546Sopenharmony_ci tgsi_parse_free(&p); 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci if (++high_temp) { 1045bf215546Sopenharmony_ci fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 1046bf215546Sopenharmony_ci for (i = 0; i < high_temp; i++) 1047bf215546Sopenharmony_ci fpc->r_temp[i] = temp(fpc); 1048bf215546Sopenharmony_ci fpc->r_temps_discard = 0ULL; 1049bf215546Sopenharmony_ci } 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci return true; 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ciout_err: 1054bf215546Sopenharmony_ci FREE(fpc->r_temp); 1055bf215546Sopenharmony_ci fpc->r_temp = NULL; 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci tgsi_parse_free(&p); 1058bf215546Sopenharmony_ci return false; 1059bf215546Sopenharmony_ci} 1060bf215546Sopenharmony_ci 1061bf215546Sopenharmony_ciDEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false) 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_civoid 1064bf215546Sopenharmony_ci_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp) 1065bf215546Sopenharmony_ci{ 1066bf215546Sopenharmony_ci struct tgsi_parse_context parse; 1067bf215546Sopenharmony_ci struct nvfx_fpc *fpc = NULL; 1068bf215546Sopenharmony_ci struct util_dynarray insns; 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci fp->translated = false; 1071bf215546Sopenharmony_ci fp->point_sprite_control = 0; 1072bf215546Sopenharmony_ci fp->vp_or = 0; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci fpc = CALLOC_STRUCT(nvfx_fpc); 1075bf215546Sopenharmony_ci if (!fpc) 1076bf215546Sopenharmony_ci goto out_err; 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci fpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0; 1079bf215546Sopenharmony_ci fpc->max_temps = fpc->is_nv4x ? 48 : 32; 1080bf215546Sopenharmony_ci fpc->fp = fp; 1081bf215546Sopenharmony_ci fpc->num_regs = 2; 1082bf215546Sopenharmony_ci memset(fp->texcoord, 0xff, sizeof(fp->texcoord)); 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci if (fp->info.properties[TGSI_PROPERTY_FS_COORD_ORIGIN]) 1085bf215546Sopenharmony_ci fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED; 1086bf215546Sopenharmony_ci if (fp->info.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER]) 1087bf215546Sopenharmony_ci fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER; 1088bf215546Sopenharmony_ci if (fp->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) 1089bf215546Sopenharmony_ci fp->rt_enable |= NV30_3D_RT_ENABLE_MRT; 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci if (!nvfx_fragprog_prepare(fpc)) 1092bf215546Sopenharmony_ci goto out_err; 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci tgsi_parse_init(&parse, fp->pipe.tokens); 1095bf215546Sopenharmony_ci util_dynarray_init(&insns, NULL); 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_ci while (!tgsi_parse_end_of_tokens(&parse)) { 1098bf215546Sopenharmony_ci tgsi_parse_token(&parse); 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_ci switch (parse.FullToken.Token.Type) { 1101bf215546Sopenharmony_ci case TGSI_TOKEN_TYPE_INSTRUCTION: 1102bf215546Sopenharmony_ci { 1103bf215546Sopenharmony_ci const struct tgsi_full_instruction *finst; 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci util_dynarray_append(&insns, unsigned, fp->insn_len); 1106bf215546Sopenharmony_ci finst = &parse.FullToken.FullInstruction; 1107bf215546Sopenharmony_ci if (!nvfx_fragprog_parse_instruction(fpc, finst)) 1108bf215546Sopenharmony_ci goto out_err; 1109bf215546Sopenharmony_ci } 1110bf215546Sopenharmony_ci break; 1111bf215546Sopenharmony_ci default: 1112bf215546Sopenharmony_ci break; 1113bf215546Sopenharmony_ci } 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci util_dynarray_append(&insns, unsigned, fp->insn_len); 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1118bf215546Sopenharmony_ci { 1119bf215546Sopenharmony_ci struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i); 1120bf215546Sopenharmony_ci fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target]; 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci util_dynarray_fini(&insns); 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci if(!fpc->is_nv4x) 1125bf215546Sopenharmony_ci fp->fp_control |= (fpc->num_regs-1)/2; 1126bf215546Sopenharmony_ci else 1127bf215546Sopenharmony_ci fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT; 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci /* Terminate final instruction */ 1130bf215546Sopenharmony_ci if(fp->insn) 1131bf215546Sopenharmony_ci fp->insn[fpc->inst_offset] |= 0x00000001; 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci /* Append NOP + END instruction for branches to the end of the program */ 1134bf215546Sopenharmony_ci fpc->inst_offset = fp->insn_len; 1135bf215546Sopenharmony_ci grow_insns(fpc, 4); 1136bf215546Sopenharmony_ci fp->insn[fpc->inst_offset + 0] = 0x00000001; 1137bf215546Sopenharmony_ci fp->insn[fpc->inst_offset + 1] = 0x00000000; 1138bf215546Sopenharmony_ci fp->insn[fpc->inst_offset + 2] = 0x00000000; 1139bf215546Sopenharmony_ci fp->insn[fpc->inst_offset + 3] = 0x00000000; 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci if(debug_get_option_nvfx_dump_fp()) 1142bf215546Sopenharmony_ci { 1143bf215546Sopenharmony_ci debug_printf("\n"); 1144bf215546Sopenharmony_ci tgsi_dump(fp->pipe.tokens, 0); 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci debug_printf("\n%s fragment program:\n", fpc->is_nv4x ? "nv4x" : "nv3x"); 1147bf215546Sopenharmony_ci for (unsigned i = 0; i < fp->insn_len; i += 4) 1148bf215546Sopenharmony_ci debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]); 1149bf215546Sopenharmony_ci debug_printf("\n"); 1150bf215546Sopenharmony_ci } 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci fp->translated = true; 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ciout: 1155bf215546Sopenharmony_ci tgsi_parse_free(&parse); 1156bf215546Sopenharmony_ci if (fpc) 1157bf215546Sopenharmony_ci { 1158bf215546Sopenharmony_ci FREE(fpc->r_temp); 1159bf215546Sopenharmony_ci FREE(fpc->r_imm); 1160bf215546Sopenharmony_ci util_dynarray_fini(&fpc->if_stack); 1161bf215546Sopenharmony_ci util_dynarray_fini(&fpc->label_relocs); 1162bf215546Sopenharmony_ci util_dynarray_fini(&fpc->imm_data); 1163bf215546Sopenharmony_ci //util_dynarray_fini(&fpc->loop_stack); 1164bf215546Sopenharmony_ci FREE(fpc); 1165bf215546Sopenharmony_ci } 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci return; 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ciout_err: 1170bf215546Sopenharmony_ci _debug_printf("Error: failed to compile this fragment program:\n"); 1171bf215546Sopenharmony_ci tgsi_dump(fp->pipe.tokens, 0); 1172bf215546Sopenharmony_ci goto out; 1173bf215546Sopenharmony_ci} 1174